├── .github
    ├── CODEOWNERS
    ├── FUNDING.yml
    ├── dependabot.yml
    └── workflows
    │   ├── scorecard.yml
    │   └── tests.yml
├── .gitignore
├── docs
    ├── bench_lin.png
    ├── bench_log.png
    ├── benchmark_compare
    │   └── python-sklearn
    │   │   ├── requirements.txt
    │   │   ├── Makefile
    │   │   ├── macbook_2017
    │   │   └── bench.py
    ├── codegen_transform_cpu_profile.png
    ├── reflect_transform_cpu_profile.png
    ├── codegen_transform_cpu_profile_selected.png
    └── benchmarks
    │   └── macbook_2017
├── SECURITY.md
├── go.mod
├── transformers
    ├── common.go
    ├── discretization.go
    ├── samplenormalizers.go
    ├── discretization_test.go
    ├── categorical.go
    ├── scalers.go
    ├── samplenormalizers_test.go
    ├── categorical_test.go
    ├── textprocesors.go
    ├── scalers_test.go
    └── textprocessors_test.go
├── CITATION.cff
├── cmd
    └── generate
    │   ├── tests
    │       ├── readme.go
    │       ├── examplefile.go
    │       ├── weirdtagsfp.go
    │       ├── largememorytransformerfp.go
    │       ├── employeefp.go
    │       ├── alltransformersfp.go
    │       ├── readme_test.go
    │       ├── with32fieldsfp_test.go
    │       ├── employeefp_test.go
    │       ├── weirdtagsfp_test.go
    │       ├── alltransformersfp_test.go
    │       ├── with32fieldsfp.go
    │       └── largememorytransformerfp_test.go
    │   ├── main.go
    │   ├── templatecode.go
    │   ├── parser.go
    │   └── templatetests.go
├── LICENSE
├── go.sum
└── structtransformer
    ├── structtransformer.go
    └── structtransformer_test.go


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @nikolaydubina
2 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: nikolaydubina
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.test
2 | docs/benchmark_profiles/*
3 | 


--------------------------------------------------------------------------------
/docs/bench_lin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nikolaydubina/go-featureprocessing/HEAD/docs/bench_lin.png


--------------------------------------------------------------------------------
/docs/bench_log.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nikolaydubina/go-featureprocessing/HEAD/docs/bench_log.png


--------------------------------------------------------------------------------
/docs/benchmark_compare/python-sklearn/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy
2 | pandas
3 | numpy
4 | sklearn
5 | argparse


--------------------------------------------------------------------------------
/docs/codegen_transform_cpu_profile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nikolaydubina/go-featureprocessing/HEAD/docs/codegen_transform_cpu_profile.png


--------------------------------------------------------------------------------
/docs/reflect_transform_cpu_profile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nikolaydubina/go-featureprocessing/HEAD/docs/reflect_transform_cpu_profile.png


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 | 
3 | ## Reporting a Vulnerability
4 | 
5 | Contact [@nikolaydubina](https://github.com/nikolaydubina) over email or linkedin.
6 | 


--------------------------------------------------------------------------------
/docs/codegen_transform_cpu_profile_selected.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nikolaydubina/go-featureprocessing/HEAD/docs/codegen_transform_cpu_profile_selected.png


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/nikolaydubina/go-featureprocessing
 2 | 
 3 | go 1.15
 4 | 
 5 | require (
 6 | 	github.com/google/gofuzz v1.2.0
 7 | 	github.com/stretchr/testify v1.10.0
 8 | 	go.uber.org/multierr v1.9.0
 9 | )
10 | 


--------------------------------------------------------------------------------
/transformers/common.go:
--------------------------------------------------------------------------------
 1 | package transformers
 2 | 
 3 | import "math"
 4 | 
 5 | func std(vals []float64, mean float64) float64 {
 6 | 	sum := 0.
 7 | 	for _, v := range vals {
 8 | 		sum += math.Abs(v-mean) * math.Abs(v-mean)
 9 | 	}
10 | 	return math.Sqrt(sum / (float64(len(vals)) - 1))
11 | }
12 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "gomod"
 9 |     directory: "/"
10 |     schedule:
11 |       interval: "daily"
12 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: If you reference this library in publication, please cite it as below.
 3 | title: Feature Pre-processing in Go
 4 | abstract: High-performance machine learning feature preprocessing in Go
 5 | authors:
 6 | - family-names: Dubina
 7 |   given-names: Nikolay
 8 | version: 2.1
 9 | date-released: 2020-12-21
10 | license: MIT
11 | repository-code: https://github.com/nikolaydubina/go-featureprocessing
12 | url: https://github.com/nikolaydubina/go-featureprocessing
13 | 


--------------------------------------------------------------------------------
/cmd/generate/tests/readme.go:
--------------------------------------------------------------------------------
 1 | package examplemodule
 2 | 
 3 | //go:generate go run github.com/nikolaydubina/go-featureprocessing/cmd/generate -struct=Employee
 4 | 
 5 | // Employee is example from readme
 6 | type Employee struct {
 7 | 	Age         int     `feature:"identity"`
 8 | 	Salary      float64 `feature:"minmax"`
 9 | 	Kids        int     `feature:"maxabs"`
10 | 	Weight      float64 `feature:"standard"`
11 | 	Height      float64 `feature:"quantile"`
12 | 	City        string  `feature:"onehot"`
13 | 	Car         string  `feature:"ordinal"`
14 | 	Income      float64 `feature:"kbins"`
15 | 	Description string  `feature:"tfidf"`
16 | 	SecretValue float64
17 | }
18 | 


--------------------------------------------------------------------------------
/transformers/discretization.go:
--------------------------------------------------------------------------------
 1 | package transformers
 2 | 
 3 | import "sort"
 4 | 
 5 | // KBinsDiscretizer based on quantile strategy
 6 | type KBinsDiscretizer struct {
 7 | 	QuantileScaler
 8 | }
 9 | 
10 | // Fit fits quantile scaler
11 | func (t *KBinsDiscretizer) Fit(vals []float64) {
12 | 	t.QuantileScaler.Fit(vals)
13 | }
14 | 
15 | // Transform finds index of matched quantile for input
16 | func (t *KBinsDiscretizer) Transform(v float64) float64 {
17 | 	if len(t.QuantileScaler.Quantiles) == 0 {
18 | 		return 0
19 | 	}
20 | 	i := sort.SearchFloat64s(t.Quantiles[:], v)
21 | 	if i >= len(t.Quantiles) {
22 | 		return float64(len(t.Quantiles)) + 1
23 | 	}
24 | 	return float64(i) + 1
25 | }
26 | 


--------------------------------------------------------------------------------
/docs/benchmark_compare/python-sklearn/Makefile:
--------------------------------------------------------------------------------
 1 | install:
 2 | 	pip3 install -r requirements.txt
 3 | 
 4 | clean:
 5 | 	rm -rf macbook_2017
 6 | 
 7 | bench: install clean
 8 | 	python3 bench.py --nsamples=1 --ntrials=10 --ntrialsgroup=100 >> macbook_2017
 9 | 	python3 bench.py --nsamples=10 --ntrials=10 --ntrialsgroup=100 >> macbook_2017
10 | 	python3 bench.py --nsamples=100 --ntrials=10 --ntrialsgroup=100 >> macbook_2017
11 | 	python3 bench.py --nsamples=1000 --ntrials=10 --ntrialsgroup=100 >> macbook_2017
12 | 	python3 bench.py --nsamples=10000 --ntrials=10 --ntrialsgroup=10 >> macbook_2017
13 | 	python3 bench.py --nsamples=100000 --ntrials=10 --ntrialsgroup=10 >> macbook_2017
14 | 	python3 bench.py --nsamples=1000000 --ntrials=10 --ntrialsgroup=1 >> macbook_2017
15 | 	python3 bench.py --nsamples=5000000 --ntrials=10 --ntrialsgroup=1 >> macbook_2017
16 | 	python3 bench.py --nsamples=15000000 --ntrials=10 --ntrialsgroup=1 >> macbook_2017


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Nikolay Dubina
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.github/workflows/scorecard.yml:
--------------------------------------------------------------------------------
 1 | name: Scorecard supply-chain security
 2 | on:
 3 |   branch_protection_rule:
 4 |   schedule:
 5 |     - cron: '42 6 * * 2'
 6 |   push:
 7 |     branches: [ "main" ]
 8 | 
 9 | permissions: read-all
10 | 
11 | jobs:
12 |   analysis:
13 |     name: Scorecard analysis
14 |     runs-on: ubuntu-latest
15 |     permissions:
16 |       security-events: write
17 |       id-token: write
18 | 
19 |     steps:
20 |       - name: "Checkout code"
21 |         uses: actions/checkout@v3.1.0
22 |         with:
23 |           persist-credentials: false
24 | 
25 |       - name: "Run analysis"
26 |         uses: ossf/scorecard-action@v2.3.1
27 |         with:
28 |           results_file: results.sarif
29 |           results_format: sarif
30 |           publish_results: true
31 | 
32 |       - name: "Upload artifact"
33 |         uses: actions/upload-artifact@v3.1.0
34 |         with:
35 |           name: SARIF file
36 |           path: results.sarif
37 |           retention-days: 5
38 | 
39 |       - name: "Upload to code-scanning"
40 |         uses: github/codeql-action/upload-sarif@v2.2.4
41 |         with:
42 |           sarif_file: results.sarif
43 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | permissions: read-all
10 | 
11 | jobs:
12 |   build:
13 |     name: Tests
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - name: Checkout code
17 |         uses: actions/checkout@v4
18 | 
19 |       - name: Set up Go 1.x
20 |         uses: actions/setup-go@v5
21 |         with:
22 |           go-version: ^1.15
23 | 
24 |       - name: Test
25 |         run: |
26 |           go generate ./...
27 |           go get -v -t -d ./...
28 |           go install github.com/jstemmer/go-junit-report/v2@latest
29 |           go test -coverprofile=coverage.out -covermode=atomic -cover -json -v ./... 2>&1 | go-junit-report -set-exit-code > tests.xml
30 | 
31 |       - name: Upload test results to Codecov
32 |         uses: codecov/test-results-action@v1
33 |         with:
34 |           token: ${{ secrets.CODECOV_TOKEN }}
35 |           files: tests.xml
36 | 
37 |       - name: Upload coverage to Codecov
38 |         uses: codecov/codecov-action@v4.1.1
39 |         with:
40 |           token: ${{ secrets.CODECOV_TOKEN }}
41 |           files: coverage.out
42 | 


--------------------------------------------------------------------------------
/docs/benchmark_compare/python-sklearn/macbook_2017:
--------------------------------------------------------------------------------
 1 | nsamples=1	 ntrials=10	 ntrialsgroup=100	 avg=12824253 ns	 min=12219147 ns	 max=13943499 ns	 samples_dataframe_size=8 B setuptook=27610184 ns 
 2 | nsamples=10	 ntrials=10	 ntrialsgroup=100	 avg=13809201 ns	 min=12688076 ns	 max=14746466 ns	 samples_dataframe_size=80 B setuptook=27280819 ns 
 3 | nsamples=100	 ntrials=10	 ntrialsgroup=100	 avg=14324627 ns	 min=13311803 ns	 max=15129684 ns	 samples_dataframe_size=800 B setuptook=25503670 ns 
 4 | nsamples=1000	 ntrials=10	 ntrialsgroup=100	 avg=15042673 ns	 min=13605346 ns	 max=17810513 ns	 samples_dataframe_size=8000 B setuptook=32386977 ns 
 5 | nsamples=10000	 ntrials=10	 ntrialsgroup=10	 avg=20092639 ns	 min=18415227 ns	 max=22949523 ns	 samples_dataframe_size=80000 B setuptook=98518650 ns 
 6 | nsamples=100000	 ntrials=10	 ntrialsgroup=10	 avg=73354263 ns	 min=71922718 ns	 max=75853612 ns	 samples_dataframe_size=800000 B setuptook=758389751 ns 
 7 | nsamples=1000000	 ntrials=10	 ntrialsgroup=1	 avg=660746274 ns	 min=645929252 ns	 max=697522591 ns	 samples_dataframe_size=8000000 B setuptook=6992992088 ns 
 8 | nsamples=5000000	 ntrials=10	 ntrialsgroup=1	 avg=3839594987 ns	 min=3557765533 ns	 max=4383723914 ns	 samples_dataframe_size=40000000 B setuptook=37772393178 ns 
 9 | nsamples=15000000	 ntrials=10	 ntrialsgroup=1	 avg=19546411996 ns	 min=15810273557 ns	 max=21872775279 ns	 samples_dataframe_size=120000000 B setuptook=197243552642 ns 
10 | 


--------------------------------------------------------------------------------
/transformers/samplenormalizers.go:
--------------------------------------------------------------------------------
 1 | package transformers
 2 | 
 3 | import "math"
 4 | 
 5 | // SampleNormalizerL1 transforms features for single sample to have norm L1=1
 6 | type SampleNormalizerL1 struct{}
 7 | 
 8 | // Fit is empty, kept only to keep same interface
 9 | func (t *SampleNormalizerL1) Fit(_ []float64) {}
10 | 
11 | // Transform returns L1 normalized vector
12 | func (t *SampleNormalizerL1) Transform(vs []float64) []float64 {
13 | 	if t == nil || vs == nil {
14 | 		return nil
15 | 	}
16 | 	vsnorm := make([]float64, len(vs))
17 | 	t.TransformInplace(vsnorm, vs)
18 | 	return vsnorm
19 | }
20 | 
21 | // TransformInplace returns L1 normalized vector, inplace
22 | func (t *SampleNormalizerL1) TransformInplace(dest []float64, vs []float64) {
23 | 	if t == nil || vs == nil || dest == nil || len(dest) != len(vs) {
24 | 		return
25 | 	}
26 | 
27 | 	sum := 0.
28 | 	for _, v := range vs {
29 | 		sum += math.Abs(v)
30 | 	}
31 | 
32 | 	for i := range dest {
33 | 		if sum == 0 {
34 | 			dest[i] = 0
35 | 		} else {
36 | 			dest[i] = vs[i] / sum
37 | 		}
38 | 	}
39 | }
40 | 
41 | // SampleNormalizerL2 transforms features for single sample to have norm L2=1
42 | type SampleNormalizerL2 struct{}
43 | 
44 | // Fit is empty, kept only to keep same interface
45 | func (t *SampleNormalizerL2) Fit(_ []float64) {}
46 | 
47 | // Transform returns L2 normalized vector
48 | func (t *SampleNormalizerL2) Transform(vs []float64) []float64 {
49 | 	if t == nil || vs == nil {
50 | 		return nil
51 | 	}
52 | 	vsnorm := make([]float64, len(vs))
53 | 	t.TransformInplace(vsnorm, vs)
54 | 	return vsnorm
55 | }
56 | 
57 | // TransformInplace returns L2 normalized vector, inplace
58 | func (t *SampleNormalizerL2) TransformInplace(dest []float64, vs []float64) {
59 | 	if t == nil || vs == nil || dest == nil || len(dest) != len(vs) {
60 | 		return
61 | 	}
62 | 
63 | 	sum := 0.
64 | 	for _, v := range vs {
65 | 		sum += v * v
66 | 	}
67 | 	sum = math.Sqrt(sum)
68 | 
69 | 	for i := range dest {
70 | 		if sum == 0 {
71 | 			dest[i] = 0
72 | 		} else {
73 | 			dest[i] = vs[i] / sum
74 | 		}
75 | 	}
76 | }
77 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 2 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 3 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 4 | github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
 5 | github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 6 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 7 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 8 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 9 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
10 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
11 | github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
12 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
13 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
14 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
15 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
16 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
17 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
18 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
19 | go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
20 | go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
21 | go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI=
22 | go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ=
23 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
24 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
25 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
26 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
27 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
28 | 


--------------------------------------------------------------------------------
/transformers/discretization_test.go:
--------------------------------------------------------------------------------
 1 | package transformers_test
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	. "github.com/nikolaydubina/go-featureprocessing/transformers"
 7 | 	"github.com/stretchr/testify/assert"
 8 | )
 9 | 
10 | func TestKBinsDiscretizerTransform(t *testing.T) {
11 | 	samples := []struct {
12 | 		name      string
13 | 		quantiles []float64
14 | 		input     float64
15 | 		output    float64
16 | 	}{
17 | 		{"basic1", []float64{25, 50, 75, 100}, 0, 1},
18 | 		{"basic2", []float64{25, 50, 75, 100}, 11, 1},
19 | 		{"basic3", []float64{25, 50, 75, 100}, 25, 1},
20 | 		{"basic4", []float64{25, 50, 75, 100}, 40, 2},
21 | 		{"basic5", []float64{25, 50, 75, 100}, 50, 2},
22 | 		{"basic6", []float64{25, 50, 75, 100}, 80, 4},
23 | 		{"above_max", []float64{25, 50, 75, 100}, 101, 5},
24 | 		{"empty", nil, 10, 0},
25 | 	}
26 | 	for _, s := range samples {
27 | 		t.Run(s.name, func(t *testing.T) {
28 | 			encoder := KBinsDiscretizer{QuantileScaler{Quantiles: s.quantiles}}
29 | 			features := encoder.Transform((s.input))
30 | 			assert.Equal(t, s.output, features)
31 | 		})
32 | 	}
33 | }
34 | 
35 | func TestKBinsDiscretizerTransformFit(t *testing.T) {
36 | 	samples := []struct {
37 | 		name      string
38 | 		quantiles []float64
39 | 		vals      []float64
40 | 	}{
41 | 		{"noinput", nil, nil},
42 | 		{"basic", []float64{25, 50, 75, 100}, []float64{25, 50, 75, 100}},
43 | 		{"reverse_order", []float64{25, 50, 75, 100}, []float64{100, 75, 50, 25}},
44 | 		{"negative", []float64{-100, -75, -50, -25}, []float64{-25, -50, -75, -100}},
45 | 		{"one_element", []float64{10}, []float64{10}},
46 | 		{"less_elements_than_quantiles", []float64{1, 2, 3}, []float64{1, 2, 3}},
47 | 		{"less_elements_than_quantiles_negative", []float64{-3, -2, -1}, []float64{-1, -3, -2}},
48 | 	}
49 | 	for _, s := range samples {
50 | 		t.Run(s.name, func(t *testing.T) {
51 | 			encoder := KBinsDiscretizer{QuantileScaler{}}
52 | 			encoder.Fit(s.vals)
53 | 			assert.Equal(t, KBinsDiscretizer{QuantileScaler{Quantiles: s.quantiles}}, encoder)
54 | 		})
55 | 	}
56 | 
57 | 	t.Run("number of quantiles is larger than num input vals", func(t *testing.T) {
58 | 		encoder := KBinsDiscretizer{QuantileScaler{Quantiles: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}}}
59 | 		encoder.Fit([]float64{1, 2, 3})
60 | 		assert.Equal(t, KBinsDiscretizer{QuantileScaler{Quantiles: []float64{1, 2, 3}}}, encoder)
61 | 	})
62 | 
63 | 	t.Run("when fit on nil data not zero value", func(t *testing.T) {
64 | 		encoder := KBinsDiscretizer{}
65 | 		encoder.Fit(nil)
66 | 		assert.Equal(t, KBinsDiscretizer{}, encoder)
67 | 	})
68 | }
69 | 


--------------------------------------------------------------------------------
/cmd/generate/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"flag"
 6 | 	"fmt"
 7 | 	"go/format"
 8 | 	"io/ioutil"
 9 | 	"log"
10 | 	"os"
11 | 	"path/filepath"
12 | 	"strings"
13 | 	"text/template"
14 | 
15 | 	"go.uber.org/multierr"
16 | )
17 | 
18 | func run() error {
19 | 	structName := ""
20 | 	fileName := os.Getenv("GOFILE")
21 | 	packageName := os.Getenv("GOPACKAGE")
22 | 
23 | 	flag.StringVar(&structName, "struct", "", "struct to be generated for")
24 | 	flag.Parse()
25 | 
26 | 	if structName == "" || fileName == "" || packageName == "" {
27 | 		return fmt.Errorf("missing arguments or environment variables")
28 | 	}
29 | 
30 | 	log.Printf("go-featureprocessing is writing struct transfomer for struct '%s' $GOFILE=%s $GOPACKAGE=%s ", structName, fileName, packageName)
31 | 
32 | 	inputCode, err := ioutil.ReadFile(fileName)
33 | 	if err != nil {
34 | 		return fmt.Errorf("can not open input file: %w", err)
35 | 	}
36 | 
37 | 	params, err := parseCode(fileName, inputCode, structName, packageName)
38 | 	if err != nil {
39 | 		return fmt.Errorf("can not parse code: %w", err)
40 | 	}
41 | 
42 | 	codeFilePath := fmt.Sprintf("%sfp.go", strings.ToLower(structName))
43 | 	testFilePath := fmt.Sprintf("%sfp_test.go", strings.ToLower(structName))
44 | 
45 | 	if err := generate(params, codeFilePath, "templateCode", templateCode); err != nil {
46 | 		return fmt.Errorf("can not make code: %w", err)
47 | 	}
48 | 	if err := generate(params, testFilePath, "templateTests", templateTests); err != nil {
49 | 		return fmt.Errorf("can not make tests: %w", err)
50 | 	}
51 | 
52 | 	return nil
53 | }
54 | 
55 | func generate(params *TemplateParams, outfilepath string, templateName string, templateVal string) error {
56 | 	code := bytes.NewBufferString("")
57 | 	parsedTemplate, err := template.New(templateName).Parse(templateVal)
58 | 	if err != nil {
59 | 		return fmt.Errorf("can not initialize template: %w", err)
60 | 	}
61 | 	if err := parsedTemplate.Execute(code, params); err != nil {
62 | 		return fmt.Errorf("can not execute template: %w", err)
63 | 	}
64 | 
65 | 	if err := writeCodeToFile(code.Bytes(), outfilepath); err != nil {
66 | 		return fmt.Errorf("can not write code: %w", err)
67 | 	}
68 | 	return nil
69 | }
70 | 
71 | func writeCodeToFile(code []byte, outfilepath string) (err error) {
72 | 	formattedCode, err := format.Source(code)
73 | 	if err != nil {
74 | 		return fmt.Errorf("can not format code: %w, code: %s", err, code)
75 | 	}
76 | 
77 | 	if err := os.MkdirAll(filepath.Dir(outfilepath), 0700); err != nil {
78 | 		return fmt.Errorf("can not make dir for output file: %w", err)
79 | 	}
80 | 
81 | 	file, err := os.Create(outfilepath)
82 | 	if err != nil {
83 | 		return fmt.Errorf("can not create file: %w", err)
84 | 	}
85 | 	defer func() { err = multierr.Combine(err, file.Close()) }()
86 | 
87 | 	if _, err := file.Write(formattedCode); err != nil {
88 | 		return fmt.Errorf("can not write code to file: %w", err)
89 | 	}
90 | 	return nil
91 | }
92 | 
93 | func main() {
94 | 	if err := run(); err != nil {
95 | 		log.Fatalf(fmt.Errorf("go-featureprocessing encountered error: %w", err).Error())
96 | 	}
97 | }
98 | 


--------------------------------------------------------------------------------
/structtransformer/structtransformer.go:
--------------------------------------------------------------------------------
  1 | package structtransformer
  2 | 
  3 | import (
  4 | 	"reflect"
  5 | )
  6 | 
  7 | type numericalTransformer interface {
  8 | 	Fit(vals []float64)
  9 | 	Transform(val float64) float64
 10 | }
 11 | 
 12 | type stringTransformer interface {
 13 | 	Fit(vals []string)
 14 | 	Transform(val string) float64
 15 | }
 16 | 
 17 | type stringExpandingTransformer interface {
 18 | 	Fit(vals []string)
 19 | 	NumFeatures() int
 20 | 	Transform(val string) []float64
 21 | }
 22 | 
 23 | // StructTransformer uses reflection to encode struct into feature vector.
 24 | // It uses struct tags to create feature transformers for each field.
 25 | // Since it is using reflection, there is a slight overhead for large structs, which can be seen in benchmarks.
 26 | // For better performance, use codegen version for your struct, refer to README of this repo.
 27 | type StructTransformer struct {
 28 | 	Transformers []interface{}
 29 | }
 30 | 
 31 | // Fit will fit all field transformers
 32 | func (s *StructTransformer) Fit(_ []interface{}) {
 33 | 	// TODO: go through encoders, make slice for each with data, call fit on that data
 34 | 	panic("not implemented")
 35 | }
 36 | 
 37 | // Transform applies all field transformers
 38 | func (s *StructTransformer) Transform(v interface{}) []float64 {
 39 | 	if v == nil || s == nil {
 40 | 		return nil
 41 | 	}
 42 | 
 43 | 	if s.getNumFeatures() == 0 {
 44 | 		return nil
 45 | 	}
 46 | 
 47 | 	features := make([]float64, 0, s.getNumFeatures())
 48 | 
 49 | 	val := reflect.ValueOf(v)
 50 | 	for i := 0; i < val.NumField() && i < len(s.Transformers); i++ {
 51 | 		transformer := s.Transformers[i]
 52 | 		if transformer == nil || reflect.ValueOf(transformer).IsNil() {
 53 | 			continue
 54 | 		}
 55 | 
 56 | 		field := val.Field(i)
 57 | 		switch field.Type().Kind() {
 58 | 		case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
 59 | 			features = append(features, s.transformNumerical(transformer, float64(field.Int()))...)
 60 | 		case reflect.Float32, reflect.Float64:
 61 | 			features = append(features, s.transformNumerical(transformer, field.Float())...)
 62 | 		case reflect.String:
 63 | 			features = append(features, s.transformString(transformer, field.String())...)
 64 | 		default:
 65 | 			panic("unsupported type in struct")
 66 | 		}
 67 | 	}
 68 | 
 69 | 	return features
 70 | }
 71 | 
 72 | func (s *StructTransformer) getNumFeatures() int {
 73 | 	count := 0
 74 | 	for _, tr := range s.Transformers {
 75 | 		if tr, ok := tr.(stringExpandingTransformer); ok {
 76 | 			count += tr.NumFeatures()
 77 | 		} else {
 78 | 			count++
 79 | 		}
 80 | 	}
 81 | 	return count
 82 | }
 83 | 
 84 | func (s *StructTransformer) transformNumerical(transformer interface{}, val float64) []float64 {
 85 | 	if transformer, ok := transformer.(numericalTransformer); ok {
 86 | 		return []float64{transformer.Transform(val)}
 87 | 	}
 88 | 	return nil
 89 | }
 90 | 
 91 | func (s *StructTransformer) transformString(transformer interface{}, val string) []float64 {
 92 | 	if transformer, ok := transformer.(stringTransformer); ok {
 93 | 		return []float64{transformer.Transform(val)}
 94 | 	}
 95 | 	if transformer, ok := transformer.(stringExpandingTransformer); ok {
 96 | 		return transformer.Transform(val)
 97 | 	}
 98 | 	return nil
 99 | }
100 | 


--------------------------------------------------------------------------------
/transformers/categorical.go:
--------------------------------------------------------------------------------
  1 | package transformers
  2 | 
  3 | // OneHotEncoder encodes string value to corresponding index
  4 | //
  5 | // Mapping should contain all values from 0 to N where N is len(Mapping).
  6 | // Responsibility to ensure this is on caller.
  7 | // If some index is higher than N or lower than 0, then code will panic.
  8 | // If some index is not set, then that index will be skipped.
  9 | // If some index is set twice, then index will have effect of either of words.
 10 | type OneHotEncoder struct {
 11 | 	Mapping map[string]uint // word to index
 12 | }
 13 | 
 14 | // Fit assigns each value from inputs a number
 15 | // based on order of occurrence in input data.
 16 | // Ignoring empty strings in input.
 17 | func (t *OneHotEncoder) Fit(vs []string) {
 18 | 	if t == nil || len(vs) == 0 {
 19 | 		return
 20 | 	}
 21 | 	t.Mapping = make(map[string]uint)
 22 | 	for _, v := range vs {
 23 | 		if v == "" {
 24 | 			continue
 25 | 		}
 26 | 		if _, ok := t.Mapping[v]; !ok {
 27 | 			t.Mapping[v] = uint(len(t.Mapping))
 28 | 		}
 29 | 	}
 30 | }
 31 | 
 32 | // NumFeatures returns number of features one field is expanded
 33 | func (t *OneHotEncoder) NumFeatures() int {
 34 | 	return len(t.Mapping)
 35 | }
 36 | 
 37 | // Transform assigns 1 to value that is found
 38 | func (t *OneHotEncoder) Transform(v string) []float64 {
 39 | 	if t == nil || len(t.Mapping) == 0 {
 40 | 		return nil
 41 | 	}
 42 | 	features := make([]float64, t.NumFeatures())
 43 | 	t.TransformInplace(features, v)
 44 | 	return features
 45 | }
 46 | 
 47 | // TransformInplace assigns 1 to value that is found, inplace.
 48 | // It is responsibility of a caller to reset destination to 0.
 49 | func (t *OneHotEncoder) TransformInplace(dest []float64, v string) {
 50 | 	if t == nil || len(t.Mapping) == 0 || len(dest) != t.NumFeatures() {
 51 | 		return
 52 | 	}
 53 | 	if idx, ok := t.Mapping[v]; ok {
 54 | 		dest[idx] = 1
 55 | 	}
 56 | }
 57 | 
 58 | // FeatureNames returns names of each produced value.
 59 | func (t *OneHotEncoder) FeatureNames() []string {
 60 | 	if t == nil || len(t.Mapping) == 0 {
 61 | 		return nil
 62 | 	}
 63 | 	names := make([]string, t.NumFeatures())
 64 | 	for w, i := range t.Mapping {
 65 | 		names[i] = w
 66 | 	}
 67 | 	return names
 68 | }
 69 | 
 70 | // OrdinalEncoder returns 0 for string that is not found, or else a number for that string
 71 | //
 72 | // Mapping should contain all values from 0 to N where N is len(Mapping).
 73 | // Responsibility to ensure this is on caller.
 74 | // If some index is higher than N or lower than 0, then code will panic.
 75 | // If some index is not set, then that index will be skipped.
 76 | // If some index is set twice, then index will have effect of either of words.
 77 | type OrdinalEncoder struct {
 78 | 	Mapping map[string]uint
 79 | }
 80 | 
 81 | // Fit assigns each word value from 1 to N
 82 | // Ignoring empty strings in input.
 83 | func (t *OrdinalEncoder) Fit(vals []string) {
 84 | 	if t == nil || len(vals) == 0 {
 85 | 		return
 86 | 	}
 87 | 	t.Mapping = make(map[string]uint)
 88 | 	for _, v := range vals {
 89 | 		if v == "" {
 90 | 			continue
 91 | 		}
 92 | 		if _, ok := t.Mapping[v]; !ok {
 93 | 			t.Mapping[v] = uint(len(t.Mapping) + 1)
 94 | 		}
 95 | 	}
 96 | }
 97 | 
 98 | // Transform returns number of input, if not found returns zero value which is 0
 99 | func (t *OrdinalEncoder) Transform(v string) float64 {
100 | 	if t == nil {
101 | 		return 0
102 | 	}
103 | 	return float64(t.Mapping[v])
104 | }
105 | 


--------------------------------------------------------------------------------
/cmd/generate/tests/examplefile.go:
--------------------------------------------------------------------------------
  1 | package examplemodule
  2 | 
  3 | // SomeOther is ignored since there is no gencode command in source file
  4 | type SomeOther struct {
  5 | 	Name1 float64
  6 | 	Name2 float64
  7 | 	Name3 string
  8 | }
  9 | 
 10 | // SomeOtherWithTags is ignored since there is no gencode command in source file, even though it has correct feature tags
 11 | type SomeOtherWithTags struct {
 12 | 	Name1 float64 `feature:"minmax"`
 13 | 	Name2 float64 `feature:"maxabs"`
 14 | 	Name3 string  `feature:"onehot"`
 15 | 	Name4 string  `feature:""`
 16 | }
 17 | 
 18 | //go:generate go run github.com/nikolaydubina/go-featureprocessing/cmd/generate -struct=AllTransformers
 19 | 
 20 | // AllTransformers has all transformer
 21 | type AllTransformers struct {
 22 | 	Name0 int     `feature:"identity"`
 23 | 	Name1 int32   `feature:"minmax"`
 24 | 	Name2 float32 `feature:"maxabs"`
 25 | 	Name3 float64 `feature:"standard"`
 26 | 	Name4 float64 `feature:"quantile"`
 27 | 	Name5 string  `feature:"onehot"`
 28 | 	Name6 string  `feature:"ordinal"`
 29 | 	Name7 float64 `feature:"kbins"`
 30 | 	Name8 string  `feature:"countvectorizer"`
 31 | 	Name9 string  `feature:"tfidf"`
 32 | }
 33 | 
 34 | //go:generate go run github.com/nikolaydubina/go-featureprocessing/cmd/generate -struct=With32Fields
 35 | 
 36 | // With32Fields has many fields
 37 | type With32Fields struct {
 38 | 	Name1  float64 `feature:"minmax"`
 39 | 	Name2  float64 `feature:"minmax"`
 40 | 	Name3  float64 `feature:"minmax"`
 41 | 	Name4  float64 `feature:"minmax"`
 42 | 	Name5  float64 `feature:"minmax"`
 43 | 	Name6  float64 `feature:"minmax"`
 44 | 	Name7  float64 `feature:"minmax"`
 45 | 	Name8  float64 `feature:"minmax"`
 46 | 	Name9  float64 `feature:"minmax"`
 47 | 	Name10 float64 `feature:"minmax"`
 48 | 	Name11 float64 `feature:"minmax"`
 49 | 	Name12 float64 `feature:"minmax"`
 50 | 	Name13 float64 `feature:"minmax"`
 51 | 	Name14 float64 `feature:"minmax"`
 52 | 	Name15 float64 `feature:"minmax"`
 53 | 	Name16 float64 `feature:"minmax"`
 54 | 	Name17 float64 `feature:"minmax"`
 55 | 	Name18 float64 `feature:"minmax"`
 56 | 	Name19 float64 `feature:"minmax"`
 57 | 	Name21 float64 `feature:"minmax"`
 58 | 	Name22 float64 `feature:"minmax"`
 59 | 	Name23 float64 `feature:"minmax"`
 60 | 	Name24 float64 `feature:"minmax"`
 61 | 	Name25 float64 `feature:"minmax"`
 62 | 	Name26 float64 `feature:"minmax"`
 63 | 	Name27 float64 `feature:"minmax"`
 64 | 	Name28 float64 `feature:"minmax"`
 65 | 	Name29 float64 `feature:"minmax"`
 66 | 	Name30 float64 `feature:"minmax"`
 67 | 	Name31 float64 `feature:"minmax"`
 68 | 	Name32 float64 `feature:"minmax"`
 69 | }
 70 | 
 71 | //go:generate go run github.com/nikolaydubina/go-featureprocessing/cmd/generate -struct=LargeMemoryTransformer
 72 | 
 73 | // LargeMemoryTransformer has large memory footprint since each transformer is large
 74 | type LargeMemoryTransformer struct {
 75 | 	Name1 string  `feature:"onehot"`
 76 | 	Name2 string  `feature:"onehot"`
 77 | 	Name3 string  `feature:"ordinal"`
 78 | 	Name4 string  `feature:"ordinal"`
 79 | 	Name5 float64 `feature:"quantile"`
 80 | 	Name6 float64 `feature:"quantile"`
 81 | 	Name7 float64 `feature:"kbins"`
 82 | 	Name8 float64 `feature:"kbins"`
 83 | }
 84 | 
 85 | //go:generate go run github.com/nikolaydubina/go-featureprocessing/cmd/generate -struct=WeirdTags
 86 | 
 87 | // WeirdTags has unusual but valid tags
 88 | type WeirdTags struct {
 89 | 	OnlyFeature       float64 `feature:"minmax"`
 90 | 	FeatureNotFirst   float64 `json:"name2" feature:"maxabs"`
 91 | 	FirstFeature      string  `feature:"onehot" json:"some_json_tag"`
 92 | 	Multiline         float64 `json:"multiline" feature:"maxabs"`
 93 | 	WithoutFeatureTag string  `json:"with_tag"`
 94 | 
 95 | 	WithoutTag string
 96 | 
 97 | 	// UTF-8 is allowed
 98 | 	A안녕하세요  int    `feature:"minmax"`
 99 | 	B안녕하세요1 string `feature:"onehot"`
100 | 	C안녕하세요0 string `feature:"tfidf"`
101 | }
102 | 


--------------------------------------------------------------------------------
/docs/benchmark_compare/python-sklearn/bench.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import random
 3 | import time
 4 | 
 5 | import numpy as np
 6 | import pandas as pd
 7 | 
 8 | from sklearn.pipeline import *
 9 | from sklearn.compose import *
10 | from sklearn.preprocessing import *
11 | from sklearn.feature_extraction.text import *
12 | 
13 | """
14 | Example from Go:
15 | 
16 | // Employee is example from readme
17 | type Employee struct {
18 | 	Age         int     `feature:"identity"`
19 | 	Salary      float64 `feature:"minmax"`
20 | 	Kids        int     `feature:"maxabs"`
21 | 	Weight      float64 `feature:"standard"`
22 | 	Height      float64 `feature:"quantile"`
23 | 	City        string  `feature:"onehot"`
24 | 	Car         string  `feature:"ordinal"`
25 | 	Income      float64 `feature:"kbins"`
26 | 	Description string  `feature:"tfidf"`
27 | 	SecretValue float64
28 | }
29 | """
30 |     
31 | parser = argparse.ArgumentParser(description='Benchmarking feature preprocessing from structs for sklearn')
32 | parser.add_argument('--nsamples', type=int, default=100000, help='Number of samples')
33 | parser.add_argument('--ntrials', type=int, default=20, help='Number of trials')
34 | parser.add_argument('--ntrialsgroup', type=int, default=20, help='Number of trials')
35 | args = parser.parse_args()
36 | 
37 | nsamples = args.nsamples
38 | ntrials = args.ntrials
39 |     
40 | setupstartt = time.perf_counter_ns()
41 | 
42 | samples = [
43 |     {
44 |         'age': int(random.uniform(1, 100)),
45 |         'salary': random.uniform(0, 9000),
46 |         'kids': int(random.uniform(1, 10)),
47 |         'weight': random.uniform(1, 200),
48 |         'height': random.uniform(1, 200),
49 |         'city': random.choice(["seoul", "pangyo", "daejeon", "busan", "something_else"]),
50 |         'car': random.choice(["bmw", "tesla", "volvo", "hyndai", "something_else"]),
51 |         'income': random.uniform(1, 200),
52 |         'description': "some very long description here some very long description here some very long description here some very long description here ",
53 |         'secret': 42.1,
54 |     }
55 |     for i in range(nsamples)
56 | ]
57 | df = pd.DataFrame.from_records(samples, nrows=nsamples)
58 | 
59 | corpus = ['this is the first document', 'this document is the second document', 'and this is the third one', 'is this the first document']
60 | vocabulary = ['this', 'document', 'first', 'is', 'second', 'the', 'and', 'one']
61 | pipeTfidf = Pipeline([('count', CountVectorizer(vocabulary=vocabulary)), ('tfid', TfidfTransformer())])
62 | 
63 | preprocessor = ColumnTransformer(
64 |     transformers=[
65 |         ('age', StandardScaler(), ["age"]),
66 |         ('salary', MinMaxScaler(), ["salary"]),
67 |         ('kids', MaxAbsScaler(), ["kids"]),
68 |         ('weight', StandardScaler(), ["weight"]),
69 |         ('height', Normalizer(), ["height"]),
70 |         ('city', OneHotEncoder(), ["city"]),
71 |         ('car', OrdinalEncoder(), ["car"]),
72 |         ('income', KBinsDiscretizer(), ["income"]),
73 |         #('description', pipeTfidf, ["description"]), #cant not run it
74 |     ],
75 | )
76 | tr = preprocessor.fit(df)
77 |     
78 | setupendt = time.perf_counter_ns()
79 | 
80 | def benchmark():
81 |     data = tr.transform(df)
82 | 
83 | # evaluate
84 | # perf_counter_ns ~ 83ns precision
85 | # monotonic_ns ~ 83ns precision
86 | # process_time_ns ~ 2ms precision
87 | # https://www.python.org/dev/peps/pep-0564/
88 | runs = np.zeros(ntrials)
89 | for i in range(ntrials):
90 |     tic = time.perf_counter_ns()
91 |     for j in range(args.ntrialsgroup):
92 |         data = tr.transform(df)
93 |     toc = time.perf_counter_ns()
94 |     runs[i] = (toc - tic) / args.ntrialsgroup
95 | 
96 | print(f"nsamples={nsamples}\t ntrials={ntrials}\t ntrialsgroup={args.ntrialsgroup}\t avg={int(np.mean(runs))} ns\t min={int(np.min(runs))} ns\t max={int(np.max(runs))} ns\t samples_dataframe_size={df.memory_usage(index=False, deep=True)[1].sum()} B setuptook={int(setupendt - setupstartt)} ns ")
97 | 


--------------------------------------------------------------------------------
/transformers/scalers.go:
--------------------------------------------------------------------------------
  1 | package transformers
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"sort"
  6 | )
  7 | 
  8 | // Identity is a transformer that returns unmodified input value
  9 | type Identity struct{}
 10 | 
 11 | // Fit is not used, it is here only to keep same interface as rest of transformers
 12 | func (t *Identity) Fit(_ []float64) {}
 13 | 
 14 | // Transform returns same value as input
 15 | func (t *Identity) Transform(v float64) float64 {
 16 | 	return v
 17 | }
 18 | 
 19 | // MinMaxScaler is a transformer that rescales value into range between min and max
 20 | type MinMaxScaler struct {
 21 | 	Min float64
 22 | 	Max float64
 23 | }
 24 | 
 25 | // Fit findx min and max value in range
 26 | func (t *MinMaxScaler) Fit(vals []float64) {
 27 | 	for i, v := range vals {
 28 | 		if i == 0 {
 29 | 			t.Min = v
 30 | 			t.Max = v
 31 | 		}
 32 | 		if v < t.Min {
 33 | 			t.Min = v
 34 | 		}
 35 | 		if v > t.Max {
 36 | 			t.Max = v
 37 | 		}
 38 | 	}
 39 | }
 40 | 
 41 | // Transform scales value from 0 to 1 linearly
 42 | func (t *MinMaxScaler) Transform(v float64) float64 {
 43 | 	if t.Min == t.Max {
 44 | 		return 0
 45 | 	}
 46 | 	if v < t.Min {
 47 | 		return 0.
 48 | 	}
 49 | 	if v > t.Max {
 50 | 		return 1.
 51 | 	}
 52 | 	return (v - t.Min) / (t.Max - t.Min)
 53 | }
 54 | 
 55 | // MaxAbsScaler transforms value into -1 to +1 range linearly
 56 | type MaxAbsScaler struct {
 57 | 	Max float64
 58 | }
 59 | 
 60 | // Fit finds maximum abssolute value
 61 | func (t *MaxAbsScaler) Fit(vals []float64) {
 62 | 	for i, v := range vals {
 63 | 		if i == 0 {
 64 | 			t.Max = v
 65 | 		}
 66 | 		if math.Abs(v) > t.Max {
 67 | 			t.Max = math.Abs(v)
 68 | 		}
 69 | 	}
 70 | }
 71 | 
 72 | // Transform scales value into -1 to +1 range
 73 | func (t *MaxAbsScaler) Transform(v float64) float64 {
 74 | 	if t.Max == 0 {
 75 | 		return 0
 76 | 	}
 77 | 	if v > math.Abs(t.Max) {
 78 | 		return 1.
 79 | 	}
 80 | 	if v < -math.Abs(t.Max) {
 81 | 		return -1.
 82 | 	}
 83 | 	return v / math.Abs(t.Max)
 84 | }
 85 | 
 86 | // StandardScaler transforms feature into normal standard distribution.
 87 | type StandardScaler struct {
 88 | 	Mean float64
 89 | 	STD  float64
 90 | }
 91 | 
 92 | // Fit computes mean and standard deviation
 93 | func (t *StandardScaler) Fit(vals []float64) {
 94 | 	sum := 0.
 95 | 	for _, v := range vals {
 96 | 		sum += v
 97 | 	}
 98 | 	if len(vals) > 0 {
 99 | 		t.Mean = sum / float64(len(vals))
100 | 		t.STD = std(vals, t.Mean)
101 | 	}
102 | }
103 | 
104 | // Transform centralizes and scales based on standard deviation and mean
105 | func (t *StandardScaler) Transform(v float64) float64 {
106 | 	return (v - t.Mean) / t.STD
107 | }
108 | 
109 | // QuantileScaler transforms any distribution to uniform distribution
110 | // This is done by mapping values to quantiles they belong to.
111 | type QuantileScaler struct {
112 | 	Quantiles []float64
113 | }
114 | 
115 | // Fit sets parameters for quantiles based on input.
116 | // Number of quantiles are specified by size of Quantiles slice.
117 | // If it is empty or nil, then 100 is used as default.
118 | // If input is smaller than number of quantiles, then using length of input.
119 | func (t *QuantileScaler) Fit(vals []float64) {
120 | 	if len(vals) == 0 {
121 | 		return
122 | 	}
123 | 	if len(t.Quantiles) == 0 {
124 | 		t.Quantiles = make([]float64, 100)
125 | 	}
126 | 	if len(vals) < len(t.Quantiles) {
127 | 		t.Quantiles = t.Quantiles[:len(vals)]
128 | 	}
129 | 
130 | 	sorted := make([]float64, len(vals))
131 | 	copy(sorted, vals)
132 | 	sort.Float64s(sorted)
133 | 
134 | 	f := float64(len(sorted)) / float64(len(t.Quantiles))
135 | 	for i := range t.Quantiles {
136 | 		idx := int(float64(i) * f)
137 | 		t.Quantiles[i] = sorted[idx]
138 | 	}
139 | }
140 | 
141 | // Transform changes distribution into uniform one from 0 to 1
142 | func (t *QuantileScaler) Transform(v float64) float64 {
143 | 	if t == nil || len(t.Quantiles) == 0 {
144 | 		return 0
145 | 	}
146 | 	i := sort.SearchFloat64s(t.Quantiles[:], v)
147 | 	if i >= len(t.Quantiles) {
148 | 		return 1.
149 | 	}
150 | 	return float64(i+1) / float64(len(t.Quantiles))
151 | }
152 | 


--------------------------------------------------------------------------------
/transformers/samplenormalizers_test.go:
--------------------------------------------------------------------------------
  1 | package transformers_test
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	. "github.com/nikolaydubina/go-featureprocessing/transformers"
  7 | 	"github.com/stretchr/testify/assert"
  8 | )
  9 | 
 10 | func TestSampleNormalizserL1(t *testing.T) {
 11 | 	samples := []struct {
 12 | 		name   string
 13 | 		input  []float64
 14 | 		output []float64
 15 | 	}{
 16 | 		{"basic", []float64{1, 2, 3, 4}, []float64{0.1, 0.2, 0.3, 0.4}},
 17 | 		{"empty", []float64{}, []float64{}},
 18 | 		{"nil", nil, nil},
 19 | 		{"zeros", []float64{0, 0, 0}, []float64{0, 0, 0}},
 20 | 		{"zeros_single", []float64{0}, []float64{0}},
 21 | 		{"single", []float64{5}, []float64{1}},
 22 | 		{"single_negative", []float64{-5}, []float64{-1}},
 23 | 		{"negative", []float64{1, 2, 3, -4}, []float64{0.1, 0.2, 0.3, -0.4}},
 24 | 	}
 25 | 
 26 | 	for _, s := range samples {
 27 | 		t.Run(s.name, func(t *testing.T) {
 28 | 			encoder := SampleNormalizerL1{}
 29 | 			features := encoder.Transform((s.input))
 30 | 			assert.Equal(t, s.output, features)
 31 | 
 32 | 			// inplace
 33 | 			if len(s.output) > 0 {
 34 | 				features := make([]float64, len(s.input))
 35 | 				encoder.TransformInplace(features, s.input)
 36 | 				assert.Equal(t, s.output, features)
 37 | 
 38 | 				features = make([]float64, len(s.input)+100)
 39 | 				features[0] = 11223344556677
 40 | 				features[1] = 10101010110101
 41 | 				features[99] = 223312112233
 42 | 				copy(features[10:], s.output)
 43 | 				expected := make([]float64, len(features))
 44 | 				copy(expected, features)
 45 | 
 46 | 				encoder.TransformInplace(features[10:10+len(s.input)], s.input)
 47 | 				assert.Equal(t, expected, features)
 48 | 			}
 49 | 		})
 50 | 	}
 51 | 
 52 | 	t.Run("fit", func(t *testing.T) {
 53 | 		encoder := SampleNormalizerL1{}
 54 | 		encoder.Fit(nil)
 55 | 		assert.Equal(t, SampleNormalizerL1{}, encoder)
 56 | 	})
 57 | 
 58 | 	t.Run("inplace does not run when input mismatches", func(t *testing.T) {
 59 | 		encoder := SampleNormalizerL1{}
 60 | 		f := []float64{1, 2}
 61 | 		encoder.TransformInplace(f, []float64{1, 2, 3, 4})
 62 | 		assert.Equal(t, []float64{1, 2}, f)
 63 | 	})
 64 | }
 65 | 
 66 | func TestSampleNormalizserL2(t *testing.T) {
 67 | 	samples := []struct {
 68 | 		name   string
 69 | 		input  []float64
 70 | 		output []float64
 71 | 	}{
 72 | 		{"basic", []float64{1, 1, 3, 5, 8}, []float64{0.1, 0.1, 0.3, 0.5, 0.8}},
 73 | 		{"empty", []float64{}, []float64{}},
 74 | 		{"nil", nil, nil},
 75 | 		{"zeros", []float64{0, 0, 0}, []float64{0, 0, 0}},
 76 | 		{"zeros_single", []float64{0}, []float64{0}},
 77 | 		{"single", []float64{5}, []float64{1}},
 78 | 		{"single_negative", []float64{-5}, []float64{-1}},
 79 | 		{"basic", []float64{1, 1, -3, 5, -8}, []float64{0.1, 0.1, -0.3, 0.5, -0.8}},
 80 | 	}
 81 | 
 82 | 	for _, s := range samples {
 83 | 		t.Run(s.name, func(t *testing.T) {
 84 | 			encoder := SampleNormalizerL2{}
 85 | 			features := encoder.Transform((s.input))
 86 | 			assert.Equal(t, s.output, features)
 87 | 		})
 88 | 
 89 | 		if len(s.output) > 0 {
 90 | 			t.Run(s.name+"_inplace", func(t *testing.T) {
 91 | 				encoder := SampleNormalizerL2{}
 92 | 
 93 | 				features := make([]float64, len(s.input))
 94 | 				encoder.TransformInplace(features, s.input)
 95 | 				assert.Equal(t, s.output, features)
 96 | 
 97 | 				features = make([]float64, len(s.input)+100)
 98 | 				features[0] = 1
 99 | 				features[1] = 2
100 | 				features[10] = 12312 // has to overwrite this
101 | 				features[99] = 5
102 | 
103 | 				expected := make([]float64, len(features))
104 | 				copy(expected, features)
105 | 				copy(expected[10:], s.output)
106 | 
107 | 				encoder.TransformInplace(features[10:10+len(s.input)], s.input)
108 | 				assert.Equal(t, expected, features)
109 | 			})
110 | 		}
111 | 	}
112 | 
113 | 	t.Run("fit", func(t *testing.T) {
114 | 		encoder := SampleNormalizerL2{}
115 | 		encoder.Fit(nil)
116 | 		assert.Equal(t, SampleNormalizerL2{}, encoder)
117 | 	})
118 | 
119 | 	t.Run("inplace does not run when input mismatches", func(t *testing.T) {
120 | 		encoder := SampleNormalizerL2{}
121 | 		f := []float64{1, 2}
122 | 		encoder.TransformInplace(f, []float64{1, 2, 3, 4})
123 | 		assert.Equal(t, []float64{1, 2}, f)
124 | 	})
125 | }
126 | 


--------------------------------------------------------------------------------
/docs/benchmarks/macbook_2017:
--------------------------------------------------------------------------------
 1 | GOMAXPROCS=8 go test -timeout=1h -bench=. -benchtime=10s -benchmem ./...
 2 | ?   	github.com/nikolaydubina/go-featureprocessing/cmd/generate	[no test files]
 3 | goos: darwin
 4 | goarch: amd64
 5 | pkg: github.com/nikolaydubina/go-featureprocessing/cmd/generate/tests
 6 | BenchmarkAllTransformersFeatureTransformer_Fit_100elements-8                                   	   84565	    154175 ns/op	  129252 B/op	     343 allocs/op
 7 | BenchmarkAllTransformersFeatureTransformer_Fit_1000elements-8                                  	    3060	   3842749 ns/op	 8362158 B/op	    3122 allocs/op
 8 | BenchmarkAllTransformersFeatureTransformer_Fit_10000elements-8                                 	      58	 198200932 ns/op	785001236 B/op	   30662 allocs/op
 9 | BenchmarkAllTransformersFeatureTransformer_Transform-8                                         	48809920	       227 ns/op	     288 B/op	       1 allocs/op
10 | BenchmarkAllTransformersFeatureTransformer_Transform_Inplace-8                                 	81078682	       167 ns/op	       0 B/op	       0 allocs/op
11 | BenchmarkAllTransformersFeatureTransformer_TransformAll_10elems-8                              	 5265136	      2400 ns/op	    3072 B/op	       1 allocs/op
12 | BenchmarkAllTransformersFeatureTransformer_TransformAll_100elems-8                             	  525598	     25025 ns/op	   32768 B/op	       1 allocs/op
13 | BenchmarkAllTransformersFeatureTransformer_TransformAll_1000elems-8                            	   39865	    304406 ns/op	  303105 B/op	       1 allocs/op
14 | BenchmarkAllTransformersFeatureTransformer_TransformAll_10000elems-8                           	    4161	   3034926 ns/op	 3047428 B/op	       1 allocs/op
15 | BenchmarkAllTransformersFeatureTransformer_TransformAll_100000elems-8                          	     388	  31737672 ns/op	29605890 B/op	       1 allocs/op
16 | BenchmarkAllTransformersFeatureTransformer_TransformAll_1000000elems-8                         	      34	 306209484 ns/op	296001536 B/op	       1 allocs/op
17 | BenchmarkAllTransformersFeatureTransformer_TransformAll_10elems_8workers-8                     	 1508948	      7915 ns/op	    3088 B/op	       2 allocs/op
18 | BenchmarkAllTransformersFeatureTransformer_TransformAll_100elems_8workers-8                    	  362482	     32626 ns/op	   32784 B/op	       2 allocs/op
19 | BenchmarkAllTransformersFeatureTransformer_TransformAll_1000elems_8workers-8                   	   58651	    205823 ns/op	  303121 B/op	       2 allocs/op
20 | BenchmarkAllTransformersFeatureTransformer_TransformAll_10000elems_8workers-8                  	    6986	   1947879 ns/op	 2801686 B/op	       2 allocs/op
21 | BenchmarkAllTransformersFeatureTransformer_TransformAll_100000elems_8workers-8                 	     668	  17773264 ns/op	30400529 B/op	       2 allocs/op
22 | BenchmarkAllTransformersFeatureTransformer_TransformAll_1000000elems_8workers-8                	      67	 174749358 ns/op	280002579 B/op	       2 allocs/op
23 | BenchmarkAllTransformersFeatureTransformer_TransformAll_5000000elems_8workers-8                	       6	2465129104 ns/op	1480007696 B/op	       2 allocs/op
24 | BenchmarkAllTransformersFeatureTransformer_TransformAll_15000000elems_8workers-8               	       1	24361808755 ns/op	4560003088 B/op	       2 allocs/op
25 | BenchmarkAllTransformersFeatureTransformer_Transform_LargeComposites_100elements-8             	 8935842	      1397 ns/op	    2688 B/op	       1 allocs/op
26 | BenchmarkAllTransformersFeatureTransformer_Transform_LargeComposites_1000elements-8            	  960136	     11899 ns/op	   24576 B/op	       1 allocs/op
27 | BenchmarkAllTransformersFeatureTransformer_Transform_LargeComposites_10000elements-8           	  139093	     74061 ns/op	  229376 B/op	       1 allocs/op
28 | BenchmarkAllTransformersFeatureTransformer_Transform_LargeComposites_100000elements-8          	   13260	    809367 ns/op	 2252800 B/op	       1 allocs/op
29 | BenchmarkEmployeeFeatureTransformer_Fit_100elements-8                                          	   92524	    130821 ns/op	  117716 B/op	     232 allocs/op
30 | BenchmarkEmployeeFeatureTransformer_Fit_1000elements-8                                         	    2898	   4025417 ns/op	 8296921 B/op	    2090 allocs/op
31 | BenchmarkEmployeeFeatureTransformer_Fit_10000elements-8                                        	      51	 217919326 ns/op	784843594 B/op	   20396 allocs/op
32 | BenchmarkEmployeeFeatureTransformer_Transform-8                                                	signal: interrupt
33 | FAIL	github.com/nikolaydubina/go-featureprocessing/cmd/generate/tests	668.566s
34 | 


--------------------------------------------------------------------------------
/cmd/generate/templatecode.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | const templateCode = `
  4 | // Code generated by go-featureprocessing DO NOT EDIT
  5 | 
  6 | package {{$.PackageName}}
  7 | 
  8 | import (
  9 | 	"sync"
 10 | 
 11 | 	fp "github.com/nikolaydubina/go-featureprocessing/transformers"
 12 | )
 13 | 
 14 | // {{$.StructName}}FeatureTransformer is a feature processor for {{$.StructName}}.
 15 | // It was automatically generated by go-featureprocessing tool.
 16 | type {{$.StructName}}FeatureTransformer struct {
 17 | 	{{range $i, $tr := $.Fields}}{{$tr.Name}} fp.{{$tr.Transformer}} ` + "`" + `json:"{{$tr.Name}}_{{$tr.TransformerTag}}"` + "`" + ` 
 18 | 	{{end}}
 19 | }
 20 | 
 21 | // Fit fits transformer for each field
 22 | func (e *{{$.StructName}}FeatureTransformer) Fit(s []{{$.StructName}}) {
 23 | 	if e == nil || len(s) == 0 {
 24 | 		return
 25 | 	}
 26 | 
 27 | 	{{if $.HasNumericalTransformers}}dataNum := make([]float64, len(s)){{end}}
 28 | 	{{if $.HasStringTransformers}}dataStr := make([]string, len(s)){{end}}
 29 | 
 30 | 	{{range $i, $tr := $.Fields}}
 31 | 
 32 | 	for i, v := range s {
 33 | 		{{if $tr.NumericalInput }}dataNum[i] = float64(v.{{$tr.Name}}){{else}}dataStr[i] = v.{{$tr.Name}}{{end}}
 34 | 	}
 35 | 
 36 | 	e.{{$tr.Name}}.Fit({{if $tr.NumericalInput }}dataNum{{else}}dataStr{{end}})
 37 | 	
 38 | 	{{end}}
 39 | }
 40 | 
 41 | // Transform transforms struct into feature vector accordingly to transformers
 42 | func (e *{{$.StructName}}FeatureTransformer) Transform(s *{{$.StructName}}) []float64 {
 43 | 	if s == nil || e == nil {
 44 | 		return nil
 45 | 	}
 46 | 	features := make([]float64, e.NumFeatures())
 47 | 	e.TransformInplace(features, s)
 48 | 	return features
 49 | }
 50 | 
 51 | // TransformInplace transforms struct into feature vector accordingly to transformers, and does so inplace
 52 | func (e *{{$.StructName}}FeatureTransformer) TransformInplace(dst []float64, s *{{$.StructName}}) {
 53 | 	if s == nil || e == nil || len(dst) != e.NumFeatures() {
 54 | 		return
 55 | 	}
 56 | 	idx := 0
 57 | 	{{range $i, $tr := $.Fields}}
 58 | 	{{if $tr.Expanding }}e.{{$tr.Name}}.TransformInplace(dst[idx:idx + e.{{$tr.Name}}.NumFeatures()], s.{{$tr.Name}})
 59 | 	idx += e.{{$tr.Name}}.NumFeatures()
 60 | 	{{else}}dst[idx] = e.{{$tr.Name}}.Transform( {{if $tr.NumericalInput }}float64{{end}}( s.{{$tr.Name}} ))
 61 | 	idx++
 62 | 	{{end}}
 63 | 	{{end}}
 64 | }
 65 | 
 66 | // TransformAll transforms a slice of {{$.StructName}}
 67 | func (e *{{$.StructName}}FeatureTransformer) TransformAll(s []{{$.StructName}}) []float64 {
 68 | 	if e == nil {
 69 | 		return nil
 70 | 	}
 71 | 	features := make([]float64, len(s) * e.NumFeatures())
 72 | 	e.TransformAllInplace(features, s)
 73 | 	return features
 74 | }
 75 | 
 76 | // TransformAllInplace transforms a slice of {{$.StructName}} inplace
 77 | func (e *{{$.StructName}}FeatureTransformer) TransformAllInplace(dst []float64, s []{{$.StructName}}) {
 78 | 	if e == nil {
 79 | 		return
 80 | 	}
 81 | 	n := e.NumFeatures()
 82 | 	if len(dst) != n * len(s) {
 83 | 		return 
 84 | 	}
 85 | 	for i := range s {
 86 | 		e.TransformInplace(dst[i * n: (i + 1) * n], &s[i])
 87 | 	}
 88 | }
 89 | 
 90 | // TransformAllParallel transforms a slice of {{$.StructName}} in parallel
 91 | func (e *{{$.StructName}}FeatureTransformer) TransformAllParallel(s []{{$.StructName}}, nworkers uint) []float64 {
 92 | 	if e == nil {
 93 | 		return nil
 94 | 	}
 95 | 	features := make([]float64, len(s) * e.NumFeatures())
 96 | 	e.TransformAllInplaceParallel(features, s, nworkers)
 97 | 	return features
 98 | }
 99 | 
100 | // TransformAllInplaceParallel transforms a slice of {{$.StructName}} inplace parallel
101 | // Useful for very large slices.
102 | func (e *{{$.StructName}}FeatureTransformer) TransformAllInplaceParallel(dst []float64, s []{{$.StructName}}, nworkers uint) {
103 | 	if e == nil || nworkers == 0 {
104 | 		return
105 | 	}
106 | 	ns := uint(len(s))
107 | 	nf := uint(e.NumFeatures())
108 | 	if uint(len(dst)) != nf * ns {
109 | 		return 
110 | 	}
111 | 
112 | 	nbatch := ns / nworkers
113 | 	var wg sync.WaitGroup
114 | 
115 | 	for i := uint(0); i < nworkers; i++ {
116 | 		wg.Add(1)
117 | 		go func (i uint) {
118 | 			defer wg.Done()
119 | 			iStart := nbatch * i
120 | 			iEnd := nbatch * (i + 1)
121 | 			if i == (nworkers - 1) {
122 | 				iEnd = ns
123 | 			}
124 | 			e.TransformAllInplace(dst[iStart * nf: iEnd * nf], s[iStart:iEnd])
125 | 		} (i);
126 | 	}
127 | 
128 | 	wg.Wait()
129 | }
130 | 
131 | // NumFeatures returns number of features in output feature vector
132 | func (e *{{$.StructName}}FeatureTransformer) NumFeatures() int {
133 | 	if e == nil {
134 | 		return 0
135 | 	}
136 | 
137 | 	count := {{$.NumFieldsFlat}}
138 | 	{{range $i, $tr := $.Fields}}{{if $tr.Expanding}}count += e.{{$tr.Name}}.NumFeatures(){{end}}
139 | 	{{end}}
140 | 	return count
141 | }
142 | 
143 | // FeatureNames provides names of features that match output of transform
144 | func (e *{{$.StructName}}FeatureTransformer) FeatureNames() []string {
145 | 	if e == nil {
146 | 		return nil
147 | 	}
148 | 
149 | 	idx := 0
150 | 	names := make([]string, e.NumFeatures())
151 | 
152 | 	{{range $i, $tr := $.Fields}}
153 | 	{{if $tr.Expanding }}
154 | 	for _, w := range e.{{$tr.Name}}.FeatureNames() {
155 | 		names[idx] = "{{$tr.Name}}_" + w
156 | 		idx++
157 | 	}
158 | 	{{else}}
159 | 	names[idx] = "{{$tr.Name}}"
160 | 	idx++
161 | 	{{end}}
162 | 	{{end}}
163 | 
164 | 	return names
165 | }
166 | `
167 | 


--------------------------------------------------------------------------------
/transformers/categorical_test.go:
--------------------------------------------------------------------------------
  1 | package transformers_test
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	. "github.com/nikolaydubina/go-featureprocessing/transformers"
  7 | 	"github.com/stretchr/testify/assert"
  8 | )
  9 | 
 10 | func TestOneHotEncoderFit(t *testing.T) {
 11 | 	samples := []struct {
 12 | 		name   string
 13 | 		input  []string
 14 | 		output map[string]uint
 15 | 		n      int
 16 | 	}{
 17 | 		{"basic", []string{"a", "b", "a", "a", "a"}, map[string]uint{"a": 0, "b": 1}, 2},
 18 | 		{"empty", []string{}, nil, 0},
 19 | 		{"nil", nil, nil, 0},
 20 | 		{"same_string", []string{"a", "a", "a"}, map[string]uint{"a": 0}, 1},
 21 | 		{"empty_string", []string{"", "", ""}, map[string]uint{}, 0},
 22 | 		{"zeros_single", []string{""}, map[string]uint{}, 0},
 23 | 		{"single", []string{"a"}, map[string]uint{"a": 0}, 1},
 24 | 	}
 25 | 	for _, s := range samples {
 26 | 		t.Run(s.name, func(t *testing.T) {
 27 | 			encoder := OneHotEncoder{}
 28 | 			encoder.Fit(s.input)
 29 | 			assert.Equal(t, OneHotEncoder{Mapping: s.output}, encoder)
 30 | 			assert.Equal(t, s.n, encoder.NumFeatures())
 31 | 		})
 32 | 	}
 33 | }
 34 | 
 35 | func TestOneHotEncoderTransform(t *testing.T) {
 36 | 	samples := []struct {
 37 | 		name    string
 38 | 		mapping map[string]uint
 39 | 		input   string
 40 | 		output  []float64
 41 | 	}{
 42 | 		{"basic", map[string]uint{"a": 0, "b": 1}, "a", []float64{1, 0}},
 43 | 		{"basic", map[string]uint{"a": 0, "b": 1}, "b", []float64{0, 1}},
 44 | 		{"none", map[string]uint{"a": 0, "b": 1}, "c", []float64{0, 0}},
 45 | 		{"empty_input", map[string]uint{"a": 0, "b": 1}, "", []float64{0, 0}},
 46 | 		{"empty_vals", nil, "a", nil},
 47 | 		{"nil_vals", nil, "a", nil},
 48 | 		{"zeros_single", map[string]uint{"": 0}, "", []float64{1}},
 49 | 		{"single", map[string]uint{"a": 0}, "a", []float64{1}},
 50 | 	}
 51 | 
 52 | 	for _, s := range samples {
 53 | 		t.Run(s.name, func(t *testing.T) {
 54 | 			encoder := OneHotEncoder{Mapping: s.mapping}
 55 | 			assert.Equal(t, s.output, encoder.Transform(s.input))
 56 | 		})
 57 | 
 58 | 		if len(s.output) > 0 {
 59 | 			t.Run(s.name+"_inplace", func(t *testing.T) {
 60 | 				encoder := OneHotEncoder{Mapping: s.mapping}
 61 | 				assert.Equal(t, s.output, encoder.Transform(s.input))
 62 | 
 63 | 				features := make([]float64, encoder.NumFeatures())
 64 | 				encoder.TransformInplace(features, s.input)
 65 | 				assert.Equal(t, s.output, features)
 66 | 
 67 | 				features = make([]float64, encoder.NumFeatures()+100)
 68 | 				features[0] = 11223344556677
 69 | 				features[1] = 10101010110101
 70 | 				features[99] = 12312312312312
 71 | 
 72 | 				expected := make([]float64, len(features))
 73 | 				copy(expected, features)
 74 | 				copy(expected[10:], s.output)
 75 | 
 76 | 				encoder.TransformInplace(features[10:10+encoder.NumFeatures()], s.input)
 77 | 				assert.Equal(t, expected, features)
 78 | 			})
 79 | 		}
 80 | 	}
 81 | 
 82 | 	t.Run("inplace does not compute when input is wrong", func(t *testing.T) {
 83 | 		encoder := OneHotEncoder{Mapping: map[string]uint{"a": 0, "b": 1}}
 84 | 		features := []float64{1.1, 2.1, 3.1, 4.1}
 85 | 		encoder.TransformInplace(features, "a")
 86 | 		assert.Equal(t, []float64{1.1, 2.1, 3.1, 4.1}, features)
 87 | 	})
 88 | 
 89 | 	t.Run("transform when encoder is nil", func(t *testing.T) {
 90 | 		var encoder *OneHotEncoder
 91 | 		assert.Equal(t, []float64(nil), encoder.Transform("abcd"))
 92 | 	})
 93 | }
 94 | 
 95 | func TestOneHotEncoderFeatureNames(t *testing.T) {
 96 | 	t.Run("feature names on empty transformer", func(t *testing.T) {
 97 | 		var encoder *OneHotEncoder
 98 | 		assert.Equal(t, []string(nil), encoder.FeatureNames())
 99 | 	})
100 | 
101 | 	t.Run("feature names", func(t *testing.T) {
102 | 		encoder := OneHotEncoder{Mapping: map[string]uint{"a": 0, "b": 1}}
103 | 		assert.Equal(t, []string{"a", "b"}, encoder.FeatureNames())
104 | 	})
105 | }
106 | 
107 | func TestOrdinalEncoderFit(t *testing.T) {
108 | 	samples := []struct {
109 | 		name   string
110 | 		input  []string
111 | 		output map[string]uint
112 | 	}{
113 | 		{"basic", []string{"a", "b", "a", "a", "a"}, map[string]uint{"a": 1, "b": 2}},
114 | 		{"empty", []string{}, nil},
115 | 		{"nil", nil, nil},
116 | 		{"same_string", []string{"a", "a", "a"}, map[string]uint{"a": 1}},
117 | 		{"empty_string", []string{"", "", ""}, map[string]uint{}},
118 | 		{"zeros_single", []string{""}, map[string]uint{}},
119 | 		{"single", []string{"a"}, map[string]uint{"a": 1}},
120 | 	}
121 | 
122 | 	for _, s := range samples {
123 | 		t.Run(s.name, func(t *testing.T) {
124 | 			encoder := OrdinalEncoder{}
125 | 			encoder.Fit(s.input)
126 | 			assert.Equal(t, OrdinalEncoder{Mapping: s.output}, encoder)
127 | 		})
128 | 	}
129 | }
130 | 
131 | func TestOrdinalEncoderTransform(t *testing.T) {
132 | 	samples := []struct {
133 | 		name   string
134 | 		vals   map[string]uint
135 | 		input  string
136 | 		output float64
137 | 	}{
138 | 		{"basic", map[string]uint{"a": 1, "b": 3}, "a", 1},
139 | 		{"basic", map[string]uint{"a": 1, "b": 3}, "b", 3},
140 | 		{"none", map[string]uint{"a": 1, "b": 3}, "c", 0},
141 | 		{"empty_input", map[string]uint{"a": 1, "b": 3}, "", 0},
142 | 		{"empty_vals", map[string]uint{}, "a", 0},
143 | 		{"nil_vals", nil, "a", 0},
144 | 		{"zero_single", map[string]uint{"": 1}, "", 1},
145 | 		{"single", map[string]uint{"a": 1}, "a", 1},
146 | 	}
147 | 	for _, s := range samples {
148 | 		t.Run(s.name, func(t *testing.T) {
149 | 			encoder := OrdinalEncoder{Mapping: s.vals}
150 | 			assert.Equal(t, s.output, encoder.Transform(s.input))
151 | 		})
152 | 	}
153 | 
154 | 	t.Run("transform when encoder is nil", func(t *testing.T) {
155 | 		var encoder *OrdinalEncoder
156 | 		assert.Equal(t, 0., encoder.Transform("abcd"))
157 | 	})
158 | }
159 | 


--------------------------------------------------------------------------------
/transformers/textprocesors.go:
--------------------------------------------------------------------------------
  1 | package transformers
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"strings"
  6 | )
  7 | 
  8 | // CountVectorizer performs bag of words encoding of text.
  9 | //
 10 | // Separator should not be a part of any word.
 11 | // Responsibility to ensure this is on caller.
 12 | // Words that have separator as its substring will be ommited.
 13 | //
 14 | // Mapping should contain all values from 0 to N where N is len(Mapping).
 15 | // Responsibility to ensure this is on caller.
 16 | // If some index is higher than N or lower than 0, then code will panic.
 17 | // If some index is not set, then that index will be skipped.
 18 | // If some index is set twice, then index will have sum of words.
 19 | type CountVectorizer struct {
 20 | 	Mapping   map[string]uint // word to index
 21 | 	Separator string          // default space
 22 | }
 23 | 
 24 | // Fit assigns a number from 0 to N for each word in input, where N is number of words
 25 | func (t *CountVectorizer) Fit(vals []string) {
 26 | 	if t.Separator == "" {
 27 | 		t.Separator = " "
 28 | 	}
 29 | 	if len(vals) == 0 {
 30 | 		return
 31 | 	}
 32 | 	t.Mapping = make(map[string]uint)
 33 | 	var count uint = 0
 34 | 	for _, v := range vals {
 35 | 		ws := strings.Split(v, t.Separator)
 36 | 		for _, w := range ws {
 37 | 			if w == "" {
 38 | 				continue
 39 | 			}
 40 | 			if _, ok := t.Mapping[w]; !ok {
 41 | 				t.Mapping[w] = count
 42 | 				count++
 43 | 			}
 44 | 		}
 45 | 	}
 46 | }
 47 | 
 48 | // NumFeatures returns num of features made for single input field
 49 | func (t *CountVectorizer) NumFeatures() int {
 50 | 	if t == nil {
 51 | 		return 0
 52 | 	}
 53 | 	return len(t.Mapping)
 54 | }
 55 | 
 56 | // Transform counts how many times each word appeared in input
 57 | func (t *CountVectorizer) Transform(v string) []float64 {
 58 | 	if t == nil || v == "" || len(t.Mapping) == 0 {
 59 | 		return nil
 60 | 	}
 61 | 	counts := make([]float64, t.NumFeatures())
 62 | 	t.TransformInplace(counts, v)
 63 | 	return counts
 64 | }
 65 | 
 66 | // FeatureNames returns slice with produced feature names
 67 | func (t *CountVectorizer) FeatureNames() []string {
 68 | 	if t == nil || len(t.Mapping) == 0 {
 69 | 		return nil
 70 | 	}
 71 | 
 72 | 	names := make([]string, t.NumFeatures())
 73 | 	for w, i := range t.Mapping {
 74 | 		names[i] = w
 75 | 	}
 76 | 	return names
 77 | }
 78 | 
 79 | // TransformInplace counts how many time each word appeared in input, inplace version.
 80 | // It is responsibility of caller to zero-out destination.
 81 | // Using zero memory allocation algorithm based on `strings.Split`.
 82 | // Utilizing that string is slice of bytes.
 83 | // Works fine with UTF-8.
 84 | func (t *CountVectorizer) TransformInplace(dest []float64, v string) {
 85 | 	if t == nil || t.Separator == "" || len(t.Mapping) == 0 || len(dest) != t.NumFeatures() {
 86 | 		return
 87 | 	}
 88 | 	sep := t.Separator
 89 | 
 90 | 	n := strings.Count(v, sep)
 91 | 	if n == 0 {
 92 | 		// no separators, try to match whole string
 93 | 		if idx, ok := t.Mapping[v]; ok {
 94 | 			dest[idx] = 1
 95 | 		}
 96 | 		return
 97 | 	}
 98 | 
 99 | 	j := 0 // looking for position of separator in v starting from here
100 | 	for i := 0; i < n; i++ {
101 | 		// we are guaranteed to find next separator, m >= 0
102 | 		m := strings.Index(v[j:], sep)
103 | 
104 | 		// word between separators
105 | 		if idx, ok := t.Mapping[v[j:j+m]]; ok {
106 | 			dest[idx]++
107 | 		}
108 | 
109 | 		// increment by current word length and separator length
110 | 		j += m + len(sep)
111 | 	}
112 | 	if j != len(v) {
113 | 		// if string did not end with separator, it ended with word
114 | 		if idx, ok := t.Mapping[v[j:]]; ok {
115 | 			dest[idx]++
116 | 		}
117 | 	}
118 | }
119 | 
120 | // TFIDFVectorizer performs tf-idf vectorization on top of count vectorization.
121 | // Based on: https://scikit-learn.org/stable/modules/feature_extraction.html
122 | // Using non-smooth version, adding 1 to log instead of denominator in idf.
123 | //
124 | // DocCount should have len of len(CountVectorizer.Mapping).
125 | // It is responsibility of a caller to sensure it is so.
126 | type TFIDFVectorizer struct {
127 | 	CountVectorizer
128 | 	DocCount     []uint // number of documents where i-th word from CountVectorizer appeared in
129 | 	NumDocuments int
130 | 	Normalizer   SampleNormalizerL2
131 | }
132 | 
133 | // Fit fits CountVectorizer and extra information for tf-idf computation
134 | func (t *TFIDFVectorizer) Fit(vals []string) {
135 | 	t.CountVectorizer.Fit(vals)
136 | 	if len(vals) == 0 {
137 | 		return
138 | 	}
139 | 
140 | 	t.NumDocuments = len(vals)
141 | 	t.DocCount = make([]uint, t.NumFeatures())
142 | 
143 | 	// second pass over whole input to count how many documents each word appeared in
144 | 	for _, v := range vals {
145 | 		counts := t.CountVectorizer.Transform(v)
146 | 		for i, v := range counts {
147 | 			if v > 0 {
148 | 				t.DocCount[i]++
149 | 			}
150 | 		}
151 | 	}
152 | }
153 | 
154 | // NumFeatures returns number of features for single field
155 | func (t *TFIDFVectorizer) NumFeatures() int {
156 | 	if t == nil {
157 | 		return 0
158 | 	}
159 | 	return len(t.CountVectorizer.Mapping)
160 | }
161 | 
162 | // Transform performs tf-idf computation
163 | func (t *TFIDFVectorizer) Transform(v string) []float64 {
164 | 	if t == nil {
165 | 		return nil
166 | 	}
167 | 	features := make([]float64, t.NumFeatures())
168 | 	t.TransformInplace(features, v)
169 | 	return features
170 | }
171 | 
172 | // TransformInplace performs tf-idf computation, inplace.
173 | // It is responsibility of caller to zero-out destination.
174 | func (t *TFIDFVectorizer) TransformInplace(dest []float64, v string) {
175 | 	if t == nil || dest == nil || len(dest) != t.NumFeatures() {
176 | 		return
177 | 	}
178 | 	t.CountVectorizer.TransformInplace(dest, v)
179 | 
180 | 	for i, tf := range dest {
181 | 		if tf > 0 && t.DocCount[i] > 0 {
182 | 			dest[i] = tf * (math.Log(float64(t.NumDocuments)/float64(t.DocCount[i])) + 1)
183 | 		} else {
184 | 			dest[i] = 0
185 | 		}
186 | 	}
187 | 
188 | 	t.Normalizer.TransformInplace(dest, dest)
189 | }
190 | 
191 | // FeatureNames returns slice with produced feature names.
192 | func (t *TFIDFVectorizer) FeatureNames() []string {
193 | 	if t == nil {
194 | 		return nil
195 | 	}
196 | 	return t.CountVectorizer.FeatureNames()
197 | }
198 | 


--------------------------------------------------------------------------------
/cmd/generate/parser.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"go/ast"
  6 | 	"go/parser"
  7 | 	"go/token"
  8 | 	"strings"
  9 | 	"unicode"
 10 | 	"unicode/utf8"
 11 | )
 12 | 
 13 | // Field represents single transformer and field it transforms, for internal use only
 14 | type Field struct {
 15 | 	Name           string
 16 | 	Transformer    string
 17 | 	Expanding      bool
 18 | 	NumericalInput bool
 19 | 	TransformerTag string
 20 | }
 21 | 
 22 | // TemplateParams represents all parameters for template, for internal use only
 23 | type TemplateParams struct {
 24 | 	PackageName              string
 25 | 	StructName               string
 26 | 	NumFieldsFlat            int
 27 | 	Fields                   []Field
 28 | 	HasLargeTransformers     bool
 29 | 	HasNumericalTransformers bool
 30 | 	HasStringTransformers    bool
 31 | }
 32 | 
 33 | var tagToTransformer = map[string]string{
 34 | 	"identity":        "Identity",
 35 | 	"minmax":          "MinMaxScaler",
 36 | 	"maxabs":          "MaxAbsScaler",
 37 | 	"standard":        "StandardScaler",
 38 | 	"quantile":        "QuantileScaler",
 39 | 	"onehot":          "OneHotEncoder",
 40 | 	"ordinal":         "OrdinalEncoder",
 41 | 	"kbins":           "KBinsDiscretizer",
 42 | 	"countvectorizer": "CountVectorizer",
 43 | 	"tfidf":           "TFIDFVectorizer",
 44 | }
 45 | 
 46 | var isTransformerExpanding = map[string]bool{
 47 | 	"onehot":          true,
 48 | 	"countvectorizer": true,
 49 | 	"tfidf":           true,
 50 | }
 51 | 
 52 | var isTransformerLarge = map[string]bool{
 53 | 	"quantile":        true,
 54 | 	"onehot":          true,
 55 | 	"ordinal":         true,
 56 | 	"kbins":           true,
 57 | 	"countvectorizer": true,
 58 | 	"tfidf":           true,
 59 | }
 60 | 
 61 | var isTypeSupported = map[string]bool{
 62 | 	"int":     true,
 63 | 	"int8":    true,
 64 | 	"int16":   true,
 65 | 	"int32":   true,
 66 | 	"float32": true,
 67 | 	"float64": true,
 68 | 	"string":  true,
 69 | }
 70 | 
 71 | var isTypeNumerical = map[string]bool{
 72 | 	"int":     true,
 73 | 	"int8":    true,
 74 | 	"int16":   true,
 75 | 	"int32":   true,
 76 | 	"float32": true,
 77 | 	"float64": true,
 78 | }
 79 | 
 80 | // parseCode parses provided at filename or code into AST.
 81 | // It finds for struct delcarations matching structName and collects fields information
 82 | // that is next used to filling all necessary details for constructing StructTransformer.
 83 | func parseCode(filename string, code []byte, structName string, packageName string) (*TemplateParams, error) {
 84 | 	var err error
 85 | 	var fields []Field
 86 | 	numFieldsFlat := 0
 87 | 	numLargeTransformers := 0
 88 | 	numNumericalTransformers := 0
 89 | 	numStringTransformers := 0
 90 | 
 91 | 	f, err := parser.ParseFile(token.NewFileSet(), filename, code, parser.ParseComments)
 92 | 	if err != nil {
 93 | 		return nil, fmt.Errorf("can not parse input file: %w", err)
 94 | 	}
 95 | 
 96 | 	ast.Inspect(f, func(node ast.Node) bool {
 97 | 		decl, ok := node.(*ast.GenDecl)
 98 | 		if !ok {
 99 | 			return true
100 | 		}
101 | 
102 | 		for _, spec := range decl.Specs {
103 | 			typeSpec, ok := spec.(*ast.TypeSpec)
104 | 			if !ok {
105 | 				continue
106 | 			}
107 | 
108 | 			if typeSpec.Name == nil {
109 | 				continue
110 | 			}
111 | 
112 | 			if typeSpec.Name.Name != structName {
113 | 				continue
114 | 			}
115 | 
116 | 			structSpec, ok := typeSpec.Type.(*ast.StructType)
117 | 			if !ok {
118 | 				continue
119 | 			}
120 | 
121 | 			for _, field := range structSpec.Fields.List {
122 | 				if field == nil {
123 | 					continue
124 | 				}
125 | 
126 | 				// name
127 | 				if len(field.Names) == 0 {
128 | 					continue
129 | 				}
130 | 				name := field.Names[0].Name
131 | 
132 | 				// Field name has to start from UTF-8 letter.
133 | 				// This is contraint of Go language spec.
134 | 				firstRune, _ := utf8.DecodeRuneInString(name)
135 | 				if !unicode.IsLetter(firstRune) {
136 | 					continue
137 | 				}
138 | 
139 | 				// Should start from latin letter,
140 | 				// otherwise some weird error happens with fields inclusion.
141 | 				if !unicode.In(firstRune, unicode.Scripts["Latin"]) {
142 | 					continue
143 | 				}
144 | 
145 | 				// type
146 | 				fieldType := field.Type
147 | 				if fieldType == nil {
148 | 					continue
149 | 				}
150 | 				fieldTypeIndent := fieldType.(*ast.Ident)
151 | 				if fieldTypeIndent == nil {
152 | 					continue
153 | 				}
154 | 				fieldTypeVal := fieldTypeIndent.Name
155 | 
156 | 				// tag
157 | 				tagsLit := field.Tag
158 | 				if tagsLit == nil {
159 | 					continue
160 | 				}
161 | 				tags := tagsLit.Value
162 | 
163 | 				var tag string
164 | 				for _, t := range strings.Fields(strings.Trim(tags, "`")) {
165 | 					if strings.HasPrefix(t, "feature:") {
166 | 						tag = t
167 | 					}
168 | 				}
169 | 				if tag == "" {
170 | 					continue
171 | 				}
172 | 				tag = strings.Trim(strings.TrimPrefix(tag, "feature:"), "\"")
173 | 
174 | 				if _, ok := tagToTransformer[tag]; !ok {
175 | 					err = fmt.Errorf("unexpected value of struct tag \"%s\"", tag)
176 | 					return false
177 | 				}
178 | 
179 | 				if !isTypeSupported[fieldTypeVal] {
180 | 					err = fmt.Errorf("unsupported type %s, supported field types: %#v, note it has to be raw", fieldTypeVal, isTypeSupported)
181 | 					return false
182 | 				}
183 | 
184 | 				field := Field{
185 | 					Name:           name,
186 | 					Transformer:    tagToTransformer[tag],
187 | 					Expanding:      isTransformerExpanding[tag],
188 | 					NumericalInput: isTypeNumerical[fieldTypeVal],
189 | 					TransformerTag: tag,
190 | 				}
191 | 				if !isTransformerExpanding[tag] {
192 | 					numFieldsFlat++
193 | 				}
194 | 				if isTransformerLarge[tag] {
195 | 					numLargeTransformers++
196 | 				}
197 | 				fields = append(fields, field)
198 | 
199 | 				if isTypeNumerical[fieldTypeVal] {
200 | 					numNumericalTransformers++
201 | 				} else {
202 | 					numStringTransformers++
203 | 				}
204 | 			}
205 | 
206 | 		}
207 | 		return true
208 | 	})
209 | 
210 | 	params := TemplateParams{
211 | 		PackageName:              packageName,
212 | 		StructName:               structName,
213 | 		NumFieldsFlat:            numFieldsFlat,
214 | 		HasLargeTransformers:     numLargeTransformers > 0,
215 | 		Fields:                   fields,
216 | 		HasNumericalTransformers: numNumericalTransformers > 0,
217 | 		HasStringTransformers:    numStringTransformers > 0,
218 | 	}
219 | 
220 | 	return &params, err
221 | }
222 | 


--------------------------------------------------------------------------------
/cmd/generate/tests/weirdtagsfp.go:
--------------------------------------------------------------------------------
  1 | // Code generated by go-featureprocessing DO NOT EDIT
  2 | 
  3 | package examplemodule
  4 | 
  5 | import (
  6 | 	"sync"
  7 | 
  8 | 	fp "github.com/nikolaydubina/go-featureprocessing/transformers"
  9 | )
 10 | 
 11 | // WeirdTagsFeatureTransformer is a feature processor for WeirdTags.
 12 | // It was automatically generated by go-featureprocessing tool.
 13 | type WeirdTagsFeatureTransformer struct {
 14 | 	OnlyFeature     fp.MinMaxScaler    `json:"OnlyFeature_minmax"`
 15 | 	FeatureNotFirst fp.MaxAbsScaler    `json:"FeatureNotFirst_maxabs"`
 16 | 	FirstFeature    fp.OneHotEncoder   `json:"FirstFeature_onehot"`
 17 | 	Multiline       fp.MaxAbsScaler    `json:"Multiline_maxabs"`
 18 | 	A안녕하세요          fp.MinMaxScaler    `json:"A안녕하세요_minmax"`
 19 | 	B안녕하세요1         fp.OneHotEncoder   `json:"B안녕하세요1_onehot"`
 20 | 	C안녕하세요0         fp.TFIDFVectorizer `json:"C안녕하세요0_tfidf"`
 21 | }
 22 | 
 23 | // Fit fits transformer for each field
 24 | func (e *WeirdTagsFeatureTransformer) Fit(s []WeirdTags) {
 25 | 	if e == nil || len(s) == 0 {
 26 | 		return
 27 | 	}
 28 | 
 29 | 	dataNum := make([]float64, len(s))
 30 | 	dataStr := make([]string, len(s))
 31 | 
 32 | 	for i, v := range s {
 33 | 		dataNum[i] = float64(v.OnlyFeature)
 34 | 	}
 35 | 
 36 | 	e.OnlyFeature.Fit(dataNum)
 37 | 
 38 | 	for i, v := range s {
 39 | 		dataNum[i] = float64(v.FeatureNotFirst)
 40 | 	}
 41 | 
 42 | 	e.FeatureNotFirst.Fit(dataNum)
 43 | 
 44 | 	for i, v := range s {
 45 | 		dataStr[i] = v.FirstFeature
 46 | 	}
 47 | 
 48 | 	e.FirstFeature.Fit(dataStr)
 49 | 
 50 | 	for i, v := range s {
 51 | 		dataNum[i] = float64(v.Multiline)
 52 | 	}
 53 | 
 54 | 	e.Multiline.Fit(dataNum)
 55 | 
 56 | 	for i, v := range s {
 57 | 		dataNum[i] = float64(v.A안녕하세요)
 58 | 	}
 59 | 
 60 | 	e.A안녕하세요.Fit(dataNum)
 61 | 
 62 | 	for i, v := range s {
 63 | 		dataStr[i] = v.B안녕하세요1
 64 | 	}
 65 | 
 66 | 	e.B안녕하세요1.Fit(dataStr)
 67 | 
 68 | 	for i, v := range s {
 69 | 		dataStr[i] = v.C안녕하세요0
 70 | 	}
 71 | 
 72 | 	e.C안녕하세요0.Fit(dataStr)
 73 | 
 74 | }
 75 | 
 76 | // Transform transforms struct into feature vector accordingly to transformers
 77 | func (e *WeirdTagsFeatureTransformer) Transform(s *WeirdTags) []float64 {
 78 | 	if s == nil || e == nil {
 79 | 		return nil
 80 | 	}
 81 | 	features := make([]float64, e.NumFeatures())
 82 | 	e.TransformInplace(features, s)
 83 | 	return features
 84 | }
 85 | 
 86 | // TransformInplace transforms struct into feature vector accordingly to transformers, and does so inplace
 87 | func (e *WeirdTagsFeatureTransformer) TransformInplace(dst []float64, s *WeirdTags) {
 88 | 	if s == nil || e == nil || len(dst) != e.NumFeatures() {
 89 | 		return
 90 | 	}
 91 | 	idx := 0
 92 | 
 93 | 	dst[idx] = e.OnlyFeature.Transform(float64(s.OnlyFeature))
 94 | 	idx++
 95 | 
 96 | 	dst[idx] = e.FeatureNotFirst.Transform(float64(s.FeatureNotFirst))
 97 | 	idx++
 98 | 
 99 | 	e.FirstFeature.TransformInplace(dst[idx:idx+e.FirstFeature.NumFeatures()], s.FirstFeature)
100 | 	idx += e.FirstFeature.NumFeatures()
101 | 
102 | 	dst[idx] = e.Multiline.Transform(float64(s.Multiline))
103 | 	idx++
104 | 
105 | 	dst[idx] = e.A안녕하세요.Transform(float64(s.A안녕하세요))
106 | 	idx++
107 | 
108 | 	e.B안녕하세요1.TransformInplace(dst[idx:idx+e.B안녕하세요1.NumFeatures()], s.B안녕하세요1)
109 | 	idx += e.B안녕하세요1.NumFeatures()
110 | 
111 | 	e.C안녕하세요0.TransformInplace(dst[idx:idx+e.C안녕하세요0.NumFeatures()], s.C안녕하세요0)
112 | 	idx += e.C안녕하세요0.NumFeatures()
113 | 
114 | }
115 | 
116 | // TransformAll transforms a slice of WeirdTags
117 | func (e *WeirdTagsFeatureTransformer) TransformAll(s []WeirdTags) []float64 {
118 | 	if e == nil {
119 | 		return nil
120 | 	}
121 | 	features := make([]float64, len(s)*e.NumFeatures())
122 | 	e.TransformAllInplace(features, s)
123 | 	return features
124 | }
125 | 
126 | // TransformAllInplace transforms a slice of WeirdTags inplace
127 | func (e *WeirdTagsFeatureTransformer) TransformAllInplace(dst []float64, s []WeirdTags) {
128 | 	if e == nil {
129 | 		return
130 | 	}
131 | 	n := e.NumFeatures()
132 | 	if len(dst) != n*len(s) {
133 | 		return
134 | 	}
135 | 	for i := range s {
136 | 		e.TransformInplace(dst[i*n:(i+1)*n], &s[i])
137 | 	}
138 | }
139 | 
140 | // TransformAllParallel transforms a slice of WeirdTags in parallel
141 | func (e *WeirdTagsFeatureTransformer) TransformAllParallel(s []WeirdTags, nworkers uint) []float64 {
142 | 	if e == nil {
143 | 		return nil
144 | 	}
145 | 	features := make([]float64, len(s)*e.NumFeatures())
146 | 	e.TransformAllInplaceParallel(features, s, nworkers)
147 | 	return features
148 | }
149 | 
150 | // TransformAllInplaceParallel transforms a slice of WeirdTags inplace parallel
151 | // Useful for very large slices.
152 | func (e *WeirdTagsFeatureTransformer) TransformAllInplaceParallel(dst []float64, s []WeirdTags, nworkers uint) {
153 | 	if e == nil || nworkers == 0 {
154 | 		return
155 | 	}
156 | 	ns := uint(len(s))
157 | 	nf := uint(e.NumFeatures())
158 | 	if uint(len(dst)) != nf*ns {
159 | 		return
160 | 	}
161 | 
162 | 	nbatch := ns / nworkers
163 | 	var wg sync.WaitGroup
164 | 
165 | 	for i := uint(0); i < nworkers; i++ {
166 | 		wg.Add(1)
167 | 		go func(i uint) {
168 | 			defer wg.Done()
169 | 			iStart := nbatch * i
170 | 			iEnd := nbatch * (i + 1)
171 | 			if i == (nworkers - 1) {
172 | 				iEnd = ns
173 | 			}
174 | 			e.TransformAllInplace(dst[iStart*nf:iEnd*nf], s[iStart:iEnd])
175 | 		}(i)
176 | 	}
177 | 
178 | 	wg.Wait()
179 | }
180 | 
181 | // NumFeatures returns number of features in output feature vector
182 | func (e *WeirdTagsFeatureTransformer) NumFeatures() int {
183 | 	if e == nil {
184 | 		return 0
185 | 	}
186 | 
187 | 	count := 4
188 | 
189 | 	count += e.FirstFeature.NumFeatures()
190 | 
191 | 	count += e.B안녕하세요1.NumFeatures()
192 | 	count += e.C안녕하세요0.NumFeatures()
193 | 
194 | 	return count
195 | }
196 | 
197 | // FeatureNames provides names of features that match output of transform
198 | func (e *WeirdTagsFeatureTransformer) FeatureNames() []string {
199 | 	if e == nil {
200 | 		return nil
201 | 	}
202 | 
203 | 	idx := 0
204 | 	names := make([]string, e.NumFeatures())
205 | 
206 | 	names[idx] = "OnlyFeature"
207 | 	idx++
208 | 
209 | 	names[idx] = "FeatureNotFirst"
210 | 	idx++
211 | 
212 | 	for _, w := range e.FirstFeature.FeatureNames() {
213 | 		names[idx] = "FirstFeature_" + w
214 | 		idx++
215 | 	}
216 | 
217 | 	names[idx] = "Multiline"
218 | 	idx++
219 | 
220 | 	names[idx] = "A안녕하세요"
221 | 	idx++
222 | 
223 | 	for _, w := range e.B안녕하세요1.FeatureNames() {
224 | 		names[idx] = "B안녕하세요1_" + w
225 | 		idx++
226 | 	}
227 | 
228 | 	for _, w := range e.C안녕하세요0.FeatureNames() {
229 | 		names[idx] = "C안녕하세요0_" + w
230 | 		idx++
231 | 	}
232 | 
233 | 	return names
234 | }
235 | 


--------------------------------------------------------------------------------
/cmd/generate/tests/largememorytransformerfp.go:
--------------------------------------------------------------------------------
  1 | // Code generated by go-featureprocessing DO NOT EDIT
  2 | 
  3 | package examplemodule
  4 | 
  5 | import (
  6 | 	"sync"
  7 | 
  8 | 	fp "github.com/nikolaydubina/go-featureprocessing/transformers"
  9 | )
 10 | 
 11 | // LargeMemoryTransformerFeatureTransformer is a feature processor for LargeMemoryTransformer.
 12 | // It was automatically generated by go-featureprocessing tool.
 13 | type LargeMemoryTransformerFeatureTransformer struct {
 14 | 	Name1 fp.OneHotEncoder    `json:"Name1_onehot"`
 15 | 	Name2 fp.OneHotEncoder    `json:"Name2_onehot"`
 16 | 	Name3 fp.OrdinalEncoder   `json:"Name3_ordinal"`
 17 | 	Name4 fp.OrdinalEncoder   `json:"Name4_ordinal"`
 18 | 	Name5 fp.QuantileScaler   `json:"Name5_quantile"`
 19 | 	Name6 fp.QuantileScaler   `json:"Name6_quantile"`
 20 | 	Name7 fp.KBinsDiscretizer `json:"Name7_kbins"`
 21 | 	Name8 fp.KBinsDiscretizer `json:"Name8_kbins"`
 22 | }
 23 | 
 24 | // Fit fits transformer for each field
 25 | func (e *LargeMemoryTransformerFeatureTransformer) Fit(s []LargeMemoryTransformer) {
 26 | 	if e == nil || len(s) == 0 {
 27 | 		return
 28 | 	}
 29 | 
 30 | 	dataNum := make([]float64, len(s))
 31 | 	dataStr := make([]string, len(s))
 32 | 
 33 | 	for i, v := range s {
 34 | 		dataStr[i] = v.Name1
 35 | 	}
 36 | 
 37 | 	e.Name1.Fit(dataStr)
 38 | 
 39 | 	for i, v := range s {
 40 | 		dataStr[i] = v.Name2
 41 | 	}
 42 | 
 43 | 	e.Name2.Fit(dataStr)
 44 | 
 45 | 	for i, v := range s {
 46 | 		dataStr[i] = v.Name3
 47 | 	}
 48 | 
 49 | 	e.Name3.Fit(dataStr)
 50 | 
 51 | 	for i, v := range s {
 52 | 		dataStr[i] = v.Name4
 53 | 	}
 54 | 
 55 | 	e.Name4.Fit(dataStr)
 56 | 
 57 | 	for i, v := range s {
 58 | 		dataNum[i] = float64(v.Name5)
 59 | 	}
 60 | 
 61 | 	e.Name5.Fit(dataNum)
 62 | 
 63 | 	for i, v := range s {
 64 | 		dataNum[i] = float64(v.Name6)
 65 | 	}
 66 | 
 67 | 	e.Name6.Fit(dataNum)
 68 | 
 69 | 	for i, v := range s {
 70 | 		dataNum[i] = float64(v.Name7)
 71 | 	}
 72 | 
 73 | 	e.Name7.Fit(dataNum)
 74 | 
 75 | 	for i, v := range s {
 76 | 		dataNum[i] = float64(v.Name8)
 77 | 	}
 78 | 
 79 | 	e.Name8.Fit(dataNum)
 80 | 
 81 | }
 82 | 
 83 | // Transform transforms struct into feature vector accordingly to transformers
 84 | func (e *LargeMemoryTransformerFeatureTransformer) Transform(s *LargeMemoryTransformer) []float64 {
 85 | 	if s == nil || e == nil {
 86 | 		return nil
 87 | 	}
 88 | 	features := make([]float64, e.NumFeatures())
 89 | 	e.TransformInplace(features, s)
 90 | 	return features
 91 | }
 92 | 
 93 | // TransformInplace transforms struct into feature vector accordingly to transformers, and does so inplace
 94 | func (e *LargeMemoryTransformerFeatureTransformer) TransformInplace(dst []float64, s *LargeMemoryTransformer) {
 95 | 	if s == nil || e == nil || len(dst) != e.NumFeatures() {
 96 | 		return
 97 | 	}
 98 | 	idx := 0
 99 | 
100 | 	e.Name1.TransformInplace(dst[idx:idx+e.Name1.NumFeatures()], s.Name1)
101 | 	idx += e.Name1.NumFeatures()
102 | 
103 | 	e.Name2.TransformInplace(dst[idx:idx+e.Name2.NumFeatures()], s.Name2)
104 | 	idx += e.Name2.NumFeatures()
105 | 
106 | 	dst[idx] = e.Name3.Transform((s.Name3))
107 | 	idx++
108 | 
109 | 	dst[idx] = e.Name4.Transform((s.Name4))
110 | 	idx++
111 | 
112 | 	dst[idx] = e.Name5.Transform(float64(s.Name5))
113 | 	idx++
114 | 
115 | 	dst[idx] = e.Name6.Transform(float64(s.Name6))
116 | 	idx++
117 | 
118 | 	dst[idx] = e.Name7.Transform(float64(s.Name7))
119 | 	idx++
120 | 
121 | 	dst[idx] = e.Name8.Transform(float64(s.Name8))
122 | 	idx++
123 | 
124 | }
125 | 
126 | // TransformAll transforms a slice of LargeMemoryTransformer
127 | func (e *LargeMemoryTransformerFeatureTransformer) TransformAll(s []LargeMemoryTransformer) []float64 {
128 | 	if e == nil {
129 | 		return nil
130 | 	}
131 | 	features := make([]float64, len(s)*e.NumFeatures())
132 | 	e.TransformAllInplace(features, s)
133 | 	return features
134 | }
135 | 
136 | // TransformAllInplace transforms a slice of LargeMemoryTransformer inplace
137 | func (e *LargeMemoryTransformerFeatureTransformer) TransformAllInplace(dst []float64, s []LargeMemoryTransformer) {
138 | 	if e == nil {
139 | 		return
140 | 	}
141 | 	n := e.NumFeatures()
142 | 	if len(dst) != n*len(s) {
143 | 		return
144 | 	}
145 | 	for i := range s {
146 | 		e.TransformInplace(dst[i*n:(i+1)*n], &s[i])
147 | 	}
148 | }
149 | 
150 | // TransformAllParallel transforms a slice of LargeMemoryTransformer in parallel
151 | func (e *LargeMemoryTransformerFeatureTransformer) TransformAllParallel(s []LargeMemoryTransformer, nworkers uint) []float64 {
152 | 	if e == nil {
153 | 		return nil
154 | 	}
155 | 	features := make([]float64, len(s)*e.NumFeatures())
156 | 	e.TransformAllInplaceParallel(features, s, nworkers)
157 | 	return features
158 | }
159 | 
160 | // TransformAllInplaceParallel transforms a slice of LargeMemoryTransformer inplace parallel
161 | // Useful for very large slices.
162 | func (e *LargeMemoryTransformerFeatureTransformer) TransformAllInplaceParallel(dst []float64, s []LargeMemoryTransformer, nworkers uint) {
163 | 	if e == nil || nworkers == 0 {
164 | 		return
165 | 	}
166 | 	ns := uint(len(s))
167 | 	nf := uint(e.NumFeatures())
168 | 	if uint(len(dst)) != nf*ns {
169 | 		return
170 | 	}
171 | 
172 | 	nbatch := ns / nworkers
173 | 	var wg sync.WaitGroup
174 | 
175 | 	for i := uint(0); i < nworkers; i++ {
176 | 		wg.Add(1)
177 | 		go func(i uint) {
178 | 			defer wg.Done()
179 | 			iStart := nbatch * i
180 | 			iEnd := nbatch * (i + 1)
181 | 			if i == (nworkers - 1) {
182 | 				iEnd = ns
183 | 			}
184 | 			e.TransformAllInplace(dst[iStart*nf:iEnd*nf], s[iStart:iEnd])
185 | 		}(i)
186 | 	}
187 | 
188 | 	wg.Wait()
189 | }
190 | 
191 | // NumFeatures returns number of features in output feature vector
192 | func (e *LargeMemoryTransformerFeatureTransformer) NumFeatures() int {
193 | 	if e == nil {
194 | 		return 0
195 | 	}
196 | 
197 | 	count := 6
198 | 	count += e.Name1.NumFeatures()
199 | 	count += e.Name2.NumFeatures()
200 | 
201 | 	return count
202 | }
203 | 
204 | // FeatureNames provides names of features that match output of transform
205 | func (e *LargeMemoryTransformerFeatureTransformer) FeatureNames() []string {
206 | 	if e == nil {
207 | 		return nil
208 | 	}
209 | 
210 | 	idx := 0
211 | 	names := make([]string, e.NumFeatures())
212 | 
213 | 	for _, w := range e.Name1.FeatureNames() {
214 | 		names[idx] = "Name1_" + w
215 | 		idx++
216 | 	}
217 | 
218 | 	for _, w := range e.Name2.FeatureNames() {
219 | 		names[idx] = "Name2_" + w
220 | 		idx++
221 | 	}
222 | 
223 | 	names[idx] = "Name3"
224 | 	idx++
225 | 
226 | 	names[idx] = "Name4"
227 | 	idx++
228 | 
229 | 	names[idx] = "Name5"
230 | 	idx++
231 | 
232 | 	names[idx] = "Name6"
233 | 	idx++
234 | 
235 | 	names[idx] = "Name7"
236 | 	idx++
237 | 
238 | 	names[idx] = "Name8"
239 | 	idx++
240 | 
241 | 	return names
242 | }
243 | 


--------------------------------------------------------------------------------
/cmd/generate/tests/employeefp.go:
--------------------------------------------------------------------------------
  1 | // Code generated by go-featureprocessing DO NOT EDIT
  2 | 
  3 | package examplemodule
  4 | 
  5 | import (
  6 | 	"sync"
  7 | 
  8 | 	fp "github.com/nikolaydubina/go-featureprocessing/transformers"
  9 | )
 10 | 
 11 | // EmployeeFeatureTransformer is a feature processor for Employee.
 12 | // It was automatically generated by go-featureprocessing tool.
 13 | type EmployeeFeatureTransformer struct {
 14 | 	Age         fp.Identity         `json:"Age_identity"`
 15 | 	Salary      fp.MinMaxScaler     `json:"Salary_minmax"`
 16 | 	Kids        fp.MaxAbsScaler     `json:"Kids_maxabs"`
 17 | 	Weight      fp.StandardScaler   `json:"Weight_standard"`
 18 | 	Height      fp.QuantileScaler   `json:"Height_quantile"`
 19 | 	City        fp.OneHotEncoder    `json:"City_onehot"`
 20 | 	Car         fp.OrdinalEncoder   `json:"Car_ordinal"`
 21 | 	Income      fp.KBinsDiscretizer `json:"Income_kbins"`
 22 | 	Description fp.TFIDFVectorizer  `json:"Description_tfidf"`
 23 | }
 24 | 
 25 | // Fit fits transformer for each field
 26 | func (e *EmployeeFeatureTransformer) Fit(s []Employee) {
 27 | 	if e == nil || len(s) == 0 {
 28 | 		return
 29 | 	}
 30 | 
 31 | 	dataNum := make([]float64, len(s))
 32 | 	dataStr := make([]string, len(s))
 33 | 
 34 | 	for i, v := range s {
 35 | 		dataNum[i] = float64(v.Age)
 36 | 	}
 37 | 
 38 | 	e.Age.Fit(dataNum)
 39 | 
 40 | 	for i, v := range s {
 41 | 		dataNum[i] = float64(v.Salary)
 42 | 	}
 43 | 
 44 | 	e.Salary.Fit(dataNum)
 45 | 
 46 | 	for i, v := range s {
 47 | 		dataNum[i] = float64(v.Kids)
 48 | 	}
 49 | 
 50 | 	e.Kids.Fit(dataNum)
 51 | 
 52 | 	for i, v := range s {
 53 | 		dataNum[i] = float64(v.Weight)
 54 | 	}
 55 | 
 56 | 	e.Weight.Fit(dataNum)
 57 | 
 58 | 	for i, v := range s {
 59 | 		dataNum[i] = float64(v.Height)
 60 | 	}
 61 | 
 62 | 	e.Height.Fit(dataNum)
 63 | 
 64 | 	for i, v := range s {
 65 | 		dataStr[i] = v.City
 66 | 	}
 67 | 
 68 | 	e.City.Fit(dataStr)
 69 | 
 70 | 	for i, v := range s {
 71 | 		dataStr[i] = v.Car
 72 | 	}
 73 | 
 74 | 	e.Car.Fit(dataStr)
 75 | 
 76 | 	for i, v := range s {
 77 | 		dataNum[i] = float64(v.Income)
 78 | 	}
 79 | 
 80 | 	e.Income.Fit(dataNum)
 81 | 
 82 | 	for i, v := range s {
 83 | 		dataStr[i] = v.Description
 84 | 	}
 85 | 
 86 | 	e.Description.Fit(dataStr)
 87 | 
 88 | }
 89 | 
 90 | // Transform transforms struct into feature vector accordingly to transformers
 91 | func (e *EmployeeFeatureTransformer) Transform(s *Employee) []float64 {
 92 | 	if s == nil || e == nil {
 93 | 		return nil
 94 | 	}
 95 | 	features := make([]float64, e.NumFeatures())
 96 | 	e.TransformInplace(features, s)
 97 | 	return features
 98 | }
 99 | 
100 | // TransformInplace transforms struct into feature vector accordingly to transformers, and does so inplace
101 | func (e *EmployeeFeatureTransformer) TransformInplace(dst []float64, s *Employee) {
102 | 	if s == nil || e == nil || len(dst) != e.NumFeatures() {
103 | 		return
104 | 	}
105 | 	idx := 0
106 | 
107 | 	dst[idx] = e.Age.Transform(float64(s.Age))
108 | 	idx++
109 | 
110 | 	dst[idx] = e.Salary.Transform(float64(s.Salary))
111 | 	idx++
112 | 
113 | 	dst[idx] = e.Kids.Transform(float64(s.Kids))
114 | 	idx++
115 | 
116 | 	dst[idx] = e.Weight.Transform(float64(s.Weight))
117 | 	idx++
118 | 
119 | 	dst[idx] = e.Height.Transform(float64(s.Height))
120 | 	idx++
121 | 
122 | 	e.City.TransformInplace(dst[idx:idx+e.City.NumFeatures()], s.City)
123 | 	idx += e.City.NumFeatures()
124 | 
125 | 	dst[idx] = e.Car.Transform((s.Car))
126 | 	idx++
127 | 
128 | 	dst[idx] = e.Income.Transform(float64(s.Income))
129 | 	idx++
130 | 
131 | 	e.Description.TransformInplace(dst[idx:idx+e.Description.NumFeatures()], s.Description)
132 | 	idx += e.Description.NumFeatures()
133 | 
134 | }
135 | 
136 | // TransformAll transforms a slice of Employee
137 | func (e *EmployeeFeatureTransformer) TransformAll(s []Employee) []float64 {
138 | 	if e == nil {
139 | 		return nil
140 | 	}
141 | 	features := make([]float64, len(s)*e.NumFeatures())
142 | 	e.TransformAllInplace(features, s)
143 | 	return features
144 | }
145 | 
146 | // TransformAllInplace transforms a slice of Employee inplace
147 | func (e *EmployeeFeatureTransformer) TransformAllInplace(dst []float64, s []Employee) {
148 | 	if e == nil {
149 | 		return
150 | 	}
151 | 	n := e.NumFeatures()
152 | 	if len(dst) != n*len(s) {
153 | 		return
154 | 	}
155 | 	for i := range s {
156 | 		e.TransformInplace(dst[i*n:(i+1)*n], &s[i])
157 | 	}
158 | }
159 | 
160 | // TransformAllParallel transforms a slice of Employee in parallel
161 | func (e *EmployeeFeatureTransformer) TransformAllParallel(s []Employee, nworkers uint) []float64 {
162 | 	if e == nil {
163 | 		return nil
164 | 	}
165 | 	features := make([]float64, len(s)*e.NumFeatures())
166 | 	e.TransformAllInplaceParallel(features, s, nworkers)
167 | 	return features
168 | }
169 | 
170 | // TransformAllInplaceParallel transforms a slice of Employee inplace parallel
171 | // Useful for very large slices.
172 | func (e *EmployeeFeatureTransformer) TransformAllInplaceParallel(dst []float64, s []Employee, nworkers uint) {
173 | 	if e == nil || nworkers == 0 {
174 | 		return
175 | 	}
176 | 	ns := uint(len(s))
177 | 	nf := uint(e.NumFeatures())
178 | 	if uint(len(dst)) != nf*ns {
179 | 		return
180 | 	}
181 | 
182 | 	nbatch := ns / nworkers
183 | 	var wg sync.WaitGroup
184 | 
185 | 	for i := uint(0); i < nworkers; i++ {
186 | 		wg.Add(1)
187 | 		go func(i uint) {
188 | 			defer wg.Done()
189 | 			iStart := nbatch * i
190 | 			iEnd := nbatch * (i + 1)
191 | 			if i == (nworkers - 1) {
192 | 				iEnd = ns
193 | 			}
194 | 			e.TransformAllInplace(dst[iStart*nf:iEnd*nf], s[iStart:iEnd])
195 | 		}(i)
196 | 	}
197 | 
198 | 	wg.Wait()
199 | }
200 | 
201 | // NumFeatures returns number of features in output feature vector
202 | func (e *EmployeeFeatureTransformer) NumFeatures() int {
203 | 	if e == nil {
204 | 		return 0
205 | 	}
206 | 
207 | 	count := 7
208 | 
209 | 	count += e.City.NumFeatures()
210 | 
211 | 	count += e.Description.NumFeatures()
212 | 
213 | 	return count
214 | }
215 | 
216 | // FeatureNames provides names of features that match output of transform
217 | func (e *EmployeeFeatureTransformer) FeatureNames() []string {
218 | 	if e == nil {
219 | 		return nil
220 | 	}
221 | 
222 | 	idx := 0
223 | 	names := make([]string, e.NumFeatures())
224 | 
225 | 	names[idx] = "Age"
226 | 	idx++
227 | 
228 | 	names[idx] = "Salary"
229 | 	idx++
230 | 
231 | 	names[idx] = "Kids"
232 | 	idx++
233 | 
234 | 	names[idx] = "Weight"
235 | 	idx++
236 | 
237 | 	names[idx] = "Height"
238 | 	idx++
239 | 
240 | 	for _, w := range e.City.FeatureNames() {
241 | 		names[idx] = "City_" + w
242 | 		idx++
243 | 	}
244 | 
245 | 	names[idx] = "Car"
246 | 	idx++
247 | 
248 | 	names[idx] = "Income"
249 | 	idx++
250 | 
251 | 	for _, w := range e.Description.FeatureNames() {
252 | 		names[idx] = "Description_" + w
253 | 		idx++
254 | 	}
255 | 
256 | 	return names
257 | }
258 | 


--------------------------------------------------------------------------------
/cmd/generate/tests/alltransformersfp.go:
--------------------------------------------------------------------------------
  1 | // Code generated by go-featureprocessing DO NOT EDIT
  2 | 
  3 | package examplemodule
  4 | 
  5 | import (
  6 | 	"sync"
  7 | 
  8 | 	fp "github.com/nikolaydubina/go-featureprocessing/transformers"
  9 | )
 10 | 
 11 | // AllTransformersFeatureTransformer is a feature processor for AllTransformers.
 12 | // It was automatically generated by go-featureprocessing tool.
 13 | type AllTransformersFeatureTransformer struct {
 14 | 	Name0 fp.Identity         `json:"Name0_identity"`
 15 | 	Name1 fp.MinMaxScaler     `json:"Name1_minmax"`
 16 | 	Name2 fp.MaxAbsScaler     `json:"Name2_maxabs"`
 17 | 	Name3 fp.StandardScaler   `json:"Name3_standard"`
 18 | 	Name4 fp.QuantileScaler   `json:"Name4_quantile"`
 19 | 	Name5 fp.OneHotEncoder    `json:"Name5_onehot"`
 20 | 	Name6 fp.OrdinalEncoder   `json:"Name6_ordinal"`
 21 | 	Name7 fp.KBinsDiscretizer `json:"Name7_kbins"`
 22 | 	Name8 fp.CountVectorizer  `json:"Name8_countvectorizer"`
 23 | 	Name9 fp.TFIDFVectorizer  `json:"Name9_tfidf"`
 24 | }
 25 | 
 26 | // Fit fits transformer for each field
 27 | func (e *AllTransformersFeatureTransformer) Fit(s []AllTransformers) {
 28 | 	if e == nil || len(s) == 0 {
 29 | 		return
 30 | 	}
 31 | 
 32 | 	dataNum := make([]float64, len(s))
 33 | 	dataStr := make([]string, len(s))
 34 | 
 35 | 	for i, v := range s {
 36 | 		dataNum[i] = float64(v.Name0)
 37 | 	}
 38 | 
 39 | 	e.Name0.Fit(dataNum)
 40 | 
 41 | 	for i, v := range s {
 42 | 		dataNum[i] = float64(v.Name1)
 43 | 	}
 44 | 
 45 | 	e.Name1.Fit(dataNum)
 46 | 
 47 | 	for i, v := range s {
 48 | 		dataNum[i] = float64(v.Name2)
 49 | 	}
 50 | 
 51 | 	e.Name2.Fit(dataNum)
 52 | 
 53 | 	for i, v := range s {
 54 | 		dataNum[i] = float64(v.Name3)
 55 | 	}
 56 | 
 57 | 	e.Name3.Fit(dataNum)
 58 | 
 59 | 	for i, v := range s {
 60 | 		dataNum[i] = float64(v.Name4)
 61 | 	}
 62 | 
 63 | 	e.Name4.Fit(dataNum)
 64 | 
 65 | 	for i, v := range s {
 66 | 		dataStr[i] = v.Name5
 67 | 	}
 68 | 
 69 | 	e.Name5.Fit(dataStr)
 70 | 
 71 | 	for i, v := range s {
 72 | 		dataStr[i] = v.Name6
 73 | 	}
 74 | 
 75 | 	e.Name6.Fit(dataStr)
 76 | 
 77 | 	for i, v := range s {
 78 | 		dataNum[i] = float64(v.Name7)
 79 | 	}
 80 | 
 81 | 	e.Name7.Fit(dataNum)
 82 | 
 83 | 	for i, v := range s {
 84 | 		dataStr[i] = v.Name8
 85 | 	}
 86 | 
 87 | 	e.Name8.Fit(dataStr)
 88 | 
 89 | 	for i, v := range s {
 90 | 		dataStr[i] = v.Name9
 91 | 	}
 92 | 
 93 | 	e.Name9.Fit(dataStr)
 94 | 
 95 | }
 96 | 
 97 | // Transform transforms struct into feature vector accordingly to transformers
 98 | func (e *AllTransformersFeatureTransformer) Transform(s *AllTransformers) []float64 {
 99 | 	if s == nil || e == nil {
100 | 		return nil
101 | 	}
102 | 	features := make([]float64, e.NumFeatures())
103 | 	e.TransformInplace(features, s)
104 | 	return features
105 | }
106 | 
107 | // TransformInplace transforms struct into feature vector accordingly to transformers, and does so inplace
108 | func (e *AllTransformersFeatureTransformer) TransformInplace(dst []float64, s *AllTransformers) {
109 | 	if s == nil || e == nil || len(dst) != e.NumFeatures() {
110 | 		return
111 | 	}
112 | 	idx := 0
113 | 
114 | 	dst[idx] = e.Name0.Transform(float64(s.Name0))
115 | 	idx++
116 | 
117 | 	dst[idx] = e.Name1.Transform(float64(s.Name1))
118 | 	idx++
119 | 
120 | 	dst[idx] = e.Name2.Transform(float64(s.Name2))
121 | 	idx++
122 | 
123 | 	dst[idx] = e.Name3.Transform(float64(s.Name3))
124 | 	idx++
125 | 
126 | 	dst[idx] = e.Name4.Transform(float64(s.Name4))
127 | 	idx++
128 | 
129 | 	e.Name5.TransformInplace(dst[idx:idx+e.Name5.NumFeatures()], s.Name5)
130 | 	idx += e.Name5.NumFeatures()
131 | 
132 | 	dst[idx] = e.Name6.Transform((s.Name6))
133 | 	idx++
134 | 
135 | 	dst[idx] = e.Name7.Transform(float64(s.Name7))
136 | 	idx++
137 | 
138 | 	e.Name8.TransformInplace(dst[idx:idx+e.Name8.NumFeatures()], s.Name8)
139 | 	idx += e.Name8.NumFeatures()
140 | 
141 | 	e.Name9.TransformInplace(dst[idx:idx+e.Name9.NumFeatures()], s.Name9)
142 | 	idx += e.Name9.NumFeatures()
143 | 
144 | }
145 | 
146 | // TransformAll transforms a slice of AllTransformers
147 | func (e *AllTransformersFeatureTransformer) TransformAll(s []AllTransformers) []float64 {
148 | 	if e == nil {
149 | 		return nil
150 | 	}
151 | 	features := make([]float64, len(s)*e.NumFeatures())
152 | 	e.TransformAllInplace(features, s)
153 | 	return features
154 | }
155 | 
156 | // TransformAllInplace transforms a slice of AllTransformers inplace
157 | func (e *AllTransformersFeatureTransformer) TransformAllInplace(dst []float64, s []AllTransformers) {
158 | 	if e == nil {
159 | 		return
160 | 	}
161 | 	n := e.NumFeatures()
162 | 	if len(dst) != n*len(s) {
163 | 		return
164 | 	}
165 | 	for i := range s {
166 | 		e.TransformInplace(dst[i*n:(i+1)*n], &s[i])
167 | 	}
168 | }
169 | 
170 | // TransformAllParallel transforms a slice of AllTransformers in parallel
171 | func (e *AllTransformersFeatureTransformer) TransformAllParallel(s []AllTransformers, nworkers uint) []float64 {
172 | 	if e == nil {
173 | 		return nil
174 | 	}
175 | 	features := make([]float64, len(s)*e.NumFeatures())
176 | 	e.TransformAllInplaceParallel(features, s, nworkers)
177 | 	return features
178 | }
179 | 
180 | // TransformAllInplaceParallel transforms a slice of AllTransformers inplace parallel
181 | // Useful for very large slices.
182 | func (e *AllTransformersFeatureTransformer) TransformAllInplaceParallel(dst []float64, s []AllTransformers, nworkers uint) {
183 | 	if e == nil || nworkers == 0 {
184 | 		return
185 | 	}
186 | 	ns := uint(len(s))
187 | 	nf := uint(e.NumFeatures())
188 | 	if uint(len(dst)) != nf*ns {
189 | 		return
190 | 	}
191 | 
192 | 	nbatch := ns / nworkers
193 | 	var wg sync.WaitGroup
194 | 
195 | 	for i := uint(0); i < nworkers; i++ {
196 | 		wg.Add(1)
197 | 		go func(i uint) {
198 | 			defer wg.Done()
199 | 			iStart := nbatch * i
200 | 			iEnd := nbatch * (i + 1)
201 | 			if i == (nworkers - 1) {
202 | 				iEnd = ns
203 | 			}
204 | 			e.TransformAllInplace(dst[iStart*nf:iEnd*nf], s[iStart:iEnd])
205 | 		}(i)
206 | 	}
207 | 
208 | 	wg.Wait()
209 | }
210 | 
211 | // NumFeatures returns number of features in output feature vector
212 | func (e *AllTransformersFeatureTransformer) NumFeatures() int {
213 | 	if e == nil {
214 | 		return 0
215 | 	}
216 | 
217 | 	count := 7
218 | 
219 | 	count += e.Name5.NumFeatures()
220 | 
221 | 	count += e.Name8.NumFeatures()
222 | 	count += e.Name9.NumFeatures()
223 | 
224 | 	return count
225 | }
226 | 
227 | // FeatureNames provides names of features that match output of transform
228 | func (e *AllTransformersFeatureTransformer) FeatureNames() []string {
229 | 	if e == nil {
230 | 		return nil
231 | 	}
232 | 
233 | 	idx := 0
234 | 	names := make([]string, e.NumFeatures())
235 | 
236 | 	names[idx] = "Name0"
237 | 	idx++
238 | 
239 | 	names[idx] = "Name1"
240 | 	idx++
241 | 
242 | 	names[idx] = "Name2"
243 | 	idx++
244 | 
245 | 	names[idx] = "Name3"
246 | 	idx++
247 | 
248 | 	names[idx] = "Name4"
249 | 	idx++
250 | 
251 | 	for _, w := range e.Name5.FeatureNames() {
252 | 		names[idx] = "Name5_" + w
253 | 		idx++
254 | 	}
255 | 
256 | 	names[idx] = "Name6"
257 | 	idx++
258 | 
259 | 	names[idx] = "Name7"
260 | 	idx++
261 | 
262 | 	for _, w := range e.Name8.FeatureNames() {
263 | 		names[idx] = "Name8_" + w
264 | 		idx++
265 | 	}
266 | 
267 | 	for _, w := range e.Name9.FeatureNames() {
268 | 		names[idx] = "Name9_" + w
269 | 		idx++
270 | 	}
271 | 
272 | 	return names
273 | }
274 | 


--------------------------------------------------------------------------------
/transformers/scalers_test.go:
--------------------------------------------------------------------------------
  1 | package transformers_test
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	. "github.com/nikolaydubina/go-featureprocessing/transformers"
  7 | 	"github.com/stretchr/testify/assert"
  8 | )
  9 | 
 10 | func TestIdentity(t *testing.T) {
 11 | 	samples := []struct {
 12 | 		name   string
 13 | 		input  float64
 14 | 		output float64
 15 | 	}{
 16 | 		{"basic", 42, 42},
 17 | 		{"negative", -42, -42},
 18 | 		{"zero", 0, 0},
 19 | 		{"fraction", 0.5, 0.5},
 20 | 	}
 21 | 	for _, s := range samples {
 22 | 		t.Run(s.name, func(t *testing.T) {
 23 | 			encoder := Identity{}
 24 | 			features := encoder.Transform((s.input))
 25 | 			assert.Equal(t, s.output, features)
 26 | 		})
 27 | 	}
 28 | 
 29 | 	t.Run("fit", func(t *testing.T) {
 30 | 		encoder := Identity{}
 31 | 		encoder.Fit(nil)
 32 | 		assert.Equal(t, Identity{}, encoder)
 33 | 	})
 34 | }
 35 | 
 36 | func TestMinMaxScalerTransform(t *testing.T) {
 37 | 	samples := []struct {
 38 | 		name   string
 39 | 		min    float64
 40 | 		max    float64
 41 | 		input  float64
 42 | 		output float64
 43 | 	}{
 44 | 		{"basic", 1, 101, 51, 0.5},
 45 | 		{"basic", 1, 101, 71, 0.7},
 46 | 		{"bellow", 1, 101, 0.5, 0},
 47 | 		{"above", 1, 101, 102, 1},
 48 | 		{"negative", 1, 101, -1, 0},
 49 | 		{"zero", 1, 101, 0, 0},
 50 | 		{"same1", 1, 1, 1, 0},
 51 | 		{"same2", 1, 1, 0.5, 0},
 52 | 		{"same2", 1, 1, 2, 0},
 53 | 	}
 54 | 	for _, s := range samples {
 55 | 		t.Run(s.name, func(t *testing.T) {
 56 | 			encoder := MinMaxScaler{Min: s.min, Max: s.max}
 57 | 			features := encoder.Transform((s.input))
 58 | 			assert.Equal(t, s.output, features)
 59 | 		})
 60 | 	}
 61 | }
 62 | 
 63 | func TestMinMaxScalerFit(t *testing.T) {
 64 | 	samples := []struct {
 65 | 		name string
 66 | 		min  float64
 67 | 		max  float64
 68 | 		vals []float64
 69 | 	}{
 70 | 		{"noinput", 0, 0, nil},
 71 | 		{"basic", 1, 101, []float64{1, 101}},
 72 | 		{"negative_1", -1, 101, []float64{-1, 101}},
 73 | 		{"negative_2", -10, -1, []float64{-10, -1}},
 74 | 		{"zero", 0, 0, []float64{0, 0}},
 75 | 		{"same", 1, 1, []float64{1, 1}},
 76 | 		{"reverse_order", 1, 10, []float64{10, 1}},
 77 | 		{"reverse_order_negative", -10, -1, []float64{-1, -10}},
 78 | 	}
 79 | 	for _, s := range samples {
 80 | 		t.Run(s.name, func(t *testing.T) {
 81 | 			encoder := MinMaxScaler{}
 82 | 			encoder.Fit(s.vals)
 83 | 			assert.Equal(t, MinMaxScaler{Min: s.min, Max: s.max}, encoder)
 84 | 		})
 85 | 	}
 86 | }
 87 | 
 88 | func TestMaxAbsScalerTransform(t *testing.T) {
 89 | 	samples := []struct {
 90 | 		name   string
 91 | 		max    float64
 92 | 		input  float64
 93 | 		output float64
 94 | 	}{
 95 | 		{"basic", 100, 50, 0.5},
 96 | 		{"basic", 100, 70, 0.7},
 97 | 		{"above", 100, 102, 1},
 98 | 		{"above_negative", 100, -102, -1},
 99 | 		{"negative", 100, -50, -0.5},
100 | 		{"zero1", 100, 0, 0},
101 | 		{"zero2", 0, 0, 0},
102 | 	}
103 | 	for _, s := range samples {
104 | 		t.Run(s.name, func(t *testing.T) {
105 | 			encoder := MaxAbsScaler{Max: s.max}
106 | 			features := encoder.Transform((s.input))
107 | 			assert.Equal(t, s.output, features)
108 | 		})
109 | 	}
110 | }
111 | 
112 | func TestMaxAbsScalerFit(t *testing.T) {
113 | 	samples := []struct {
114 | 		name string
115 | 		max  float64
116 | 		vals []float64
117 | 	}{
118 | 		{"noinput", 0, nil},
119 | 		{"basic", 100, []float64{1, 100}},
120 | 		{"negative", 100, []float64{-1, -100}},
121 | 		{"zero", 0, []float64{0, 0}},
122 | 		{"same", 1, []float64{1, 1}},
123 | 		{"reverse_order", 10, []float64{10, 1}},
124 | 		{"reverse_order_negative", 10, []float64{-1, -10}},
125 | 	}
126 | 	for _, s := range samples {
127 | 		t.Run(s.name, func(t *testing.T) {
128 | 			encoder := MaxAbsScaler{}
129 | 			encoder.Fit(s.vals)
130 | 			assert.Equal(t, MaxAbsScaler{Max: s.max}, encoder)
131 | 		})
132 | 	}
133 | }
134 | 
135 | func TestStandardScalerTransform(t *testing.T) {
136 | 	samples := []struct {
137 | 		name   string
138 | 		mean   float64
139 | 		std    float64
140 | 		input  float64
141 | 		output float64
142 | 	}{
143 | 		{"basic_0", 100, 50, 100, 0},
144 | 		{"basic_-0.5", 100, 50, 75, -0.5},
145 | 		{"basic_0.5", 100, 50, 125, 0.5},
146 | 		{"basic_-1", 100, 50, 50, -1},
147 | 		{"basic_+1", 100, 50, 150, 1},
148 | 		{"basic_-2", 100, 50, 0, -2},
149 | 		{"basic_+2", 100, 50, 200, 2},
150 | 		{"basic_-3", 100, 50, -50, -3},
151 | 		{"basic_+3", 100, 50, 250, 3},
152 | 	}
153 | 	for _, s := range samples {
154 | 		t.Run(s.name, func(t *testing.T) {
155 | 			encoder := StandardScaler{Mean: s.mean, STD: s.std}
156 | 			assert.Equal(t, s.output, encoder.Transform(s.input))
157 | 		})
158 | 	}
159 | }
160 | 
161 | func TestStandardScalerFit(t *testing.T) {
162 | 	samples := []struct {
163 | 		name string
164 | 		mean float64
165 | 		std  float64
166 | 		vals []float64
167 | 	}{
168 | 		{"noinput", 0, 0, nil},
169 | 		{"basic", 50.5, 70.0035713374682, []float64{1, 100}},
170 | 		{"negative", -50.5, 70.0035713374682, []float64{-1, -100}},
171 | 		{"zero", 0, 0, []float64{0, 0}},
172 | 		{"same", 1, 0, []float64{1, 1, 1, 1}},
173 | 	}
174 | 	for _, s := range samples {
175 | 		t.Run(s.name, func(t *testing.T) {
176 | 			encoder := StandardScaler{}
177 | 			encoder.Fit(s.vals)
178 | 			assert.Equal(t, StandardScaler{Mean: s.mean, STD: s.std}, encoder)
179 | 		})
180 | 	}
181 | }
182 | 
183 | func TestQuantileScalerTransform(t *testing.T) {
184 | 	samples := []struct {
185 | 		name      string
186 | 		quantiles []float64
187 | 		input     float64
188 | 		output    float64
189 | 	}{
190 | 		{"basic1", []float64{25, 50, 75, 100}, 0, 0.25},
191 | 		{"basic2", []float64{25, 50, 75, 100}, 11, 0.25},
192 | 		{"basic3", []float64{25, 50, 75, 100}, 25, 0.25},
193 | 		{"basic4", []float64{25, 50, 75, 100}, 40, 0.5},
194 | 		{"basic5", []float64{25, 50, 75, 100}, 50, 0.5},
195 | 		{"basic6", []float64{25, 50, 75, 100}, 80, 1},
196 | 		{"basic7", []float64{25, 50, 75, 100}, 101, 1},
197 | 		{"empty", nil, 10, 0},
198 | 	}
199 | 	for _, s := range samples {
200 | 		t.Run(s.name, func(t *testing.T) {
201 | 			encoder := QuantileScaler{Quantiles: s.quantiles}
202 | 			features := encoder.Transform((s.input))
203 | 			assert.Equal(t, s.output, features)
204 | 		})
205 | 	}
206 | }
207 | 
208 | func TestQuantileScalerFit(t *testing.T) {
209 | 	samples := []struct {
210 | 		name      string
211 | 		n         int
212 | 		quantiles []float64
213 | 		vals      []float64
214 | 	}{
215 | 		{"basic", 4, []float64{25, 50, 75, 100}, []float64{25, 50, 75, 100}},
216 | 		{"reverse_order", 4, []float64{25, 50, 75, 100}, []float64{100, 75, 50, 25}},
217 | 		{"negative", 4, []float64{-100, -75, -50, -25}, []float64{-25, -50, -75, -100}},
218 | 		{"one_quantile", 1, []float64{1}, []float64{1, 2, 3, 4, 5}},
219 | 		{"one_value", 4, []float64{1}, []float64{1}},
220 | 		{"less_elements_than_quantiles", 6, []float64{1, 2, 3}, []float64{1, 2, 3}},
221 | 		{"more_inputs_than_quantiles", 3, []float64{1, 6, 11}, []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}},
222 | 		{"more_inputs_than_quantiles_reverse", 3, []float64{1, 6, 11}, []float64{15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}},
223 | 		{"more_inputs_than_quantiles_shape", 3, []float64{1, 6, 8}, []float64{1, 1, 1, 1, 1, 6, 6, 7, 8, 7, 8, 12, 6, 8, 15}},
224 | 	}
225 | 	for _, s := range samples {
226 | 		t.Run(s.name, func(t *testing.T) {
227 | 			encoder := QuantileScaler{Quantiles: make([]float64, s.n)}
228 | 			encoder.Fit(s.vals)
229 | 			assert.Equal(t, QuantileScaler{Quantiles: s.quantiles}, encoder)
230 | 		})
231 | 	}
232 | 
233 | 	t.Run("no input", func(t *testing.T) {
234 | 		encoder := QuantileScaler{}
235 | 		encoder.Fit(nil)
236 | 		assert.Equal(t, QuantileScaler{}, encoder)
237 | 	})
238 | 
239 | 	t.Run("nquantiles is zero in beginning", func(t *testing.T) {
240 | 		encoder := QuantileScaler{}
241 | 		encoder.Fit(nil)
242 | 		assert.Equal(t, QuantileScaler{}, encoder)
243 | 	})
244 | }
245 | 


--------------------------------------------------------------------------------
/transformers/textprocessors_test.go:
--------------------------------------------------------------------------------
  1 | package transformers_test
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	. "github.com/nikolaydubina/go-featureprocessing/transformers"
  7 | 	"github.com/stretchr/testify/assert"
  8 | )
  9 | 
 10 | func TestCountVectorizer(t *testing.T) {
 11 | 	samplesFit := []struct {
 12 | 		name   string
 13 | 		input  []string
 14 | 		output map[string]uint
 15 | 	}{
 16 | 		{"basic", []string{"a b", "b a", "a", "b", ""}, map[string]uint{"a": 0, "b": 1}},
 17 | 		{"same_string", []string{"a", "a", "a"}, map[string]uint{"a": 0}},
 18 | 		{"empty_string", []string{"", "", ""}, map[string]uint{}},
 19 | 		{"zeros_single", []string{""}, map[string]uint{}},
 20 | 		{"single", []string{"a"}, map[string]uint{"a": 0}},
 21 | 		{"empty", nil, nil},
 22 | 	}
 23 | 
 24 | 	for _, s := range samplesFit {
 25 | 		t.Run(s.name, func(t *testing.T) {
 26 | 			encoder := CountVectorizer{}
 27 | 			encoder.Fit(s.input)
 28 | 			assert.Equal(t, CountVectorizer{Mapping: s.output, Separator: " "}, encoder)
 29 | 		})
 30 | 	}
 31 | 
 32 | 	t.Run("num features is zero for nil encoder", func(t *testing.T) {
 33 | 		var encoder *CountVectorizer
 34 | 		assert.Equal(t, 0, encoder.NumFeatures())
 35 | 	})
 36 | 
 37 | 	t.Run("transform returns nil on nil encoder", func(t *testing.T) {
 38 | 		var encoder *CountVectorizer
 39 | 		assert.Equal(t, []float64(nil), encoder.Transform("asdf"))
 40 | 	})
 41 | 
 42 | 	t.Run("feature names on empty transformer", func(t *testing.T) {
 43 | 		var encoder *CountVectorizer
 44 | 		assert.Equal(t, []string(nil), encoder.FeatureNames())
 45 | 	})
 46 | 
 47 | 	t.Run("feature names", func(t *testing.T) {
 48 | 		encoder := CountVectorizer{Mapping: map[string]uint{"a": 1, "b": 0}}
 49 | 		assert.Equal(t, []string{"b", "a"}, encoder.FeatureNames())
 50 | 	})
 51 | 
 52 | 	samplesTransform := []struct {
 53 | 		name    string
 54 | 		sep     string
 55 | 		mapping map[string]uint
 56 | 		input   string
 57 | 		output  []float64
 58 | 	}{
 59 | 		{"empty string", "", map[string]uint{"a": 0, "b": 1, "c": 2}, "a b c", []float64{0, 0, 0}},
 60 | 		{"no separator", " ", map[string]uint{"a": 0, "b": 1, "c": 2}, "a", []float64{1, 0, 0}},
 61 | 		{"no separator repeating not counted", " ", map[string]uint{"a": 0, "b": 1, "c": 2}, "aaa", []float64{0, 0, 0}},
 62 | 		{"no separator utf-8", " ", map[string]uint{"안녕": 0, "b": 1, "c": 2}, "안녕", []float64{1, 0, 0}},
 63 | 		{"no separator utf-8 repeating not counted", " ", map[string]uint{"a": 0, "b": 1, "c": 2}, "안녕안녕안녕", []float64{0, 0, 0}},
 64 | 		{"basic", " ", map[string]uint{"a": 0, "b": 1, "c": 2}, "a b c", []float64{1, 1, 1}},
 65 | 		{"ending with separator", " ", map[string]uint{"a": 0, "b": 1, "c": 2}, "a b c ", []float64{1, 1, 1}},
 66 | 		{"separators continuosly", " ", map[string]uint{"a": 0, "b": 1, "c": 2}, " a b    c  ", []float64{1, 1, 1}},
 67 | 		{"counting", " ", map[string]uint{"a": 0, "b": 1, "c": 2}, "a a a b b c", []float64{3, 2, 1}},
 68 | 	}
 69 | 
 70 | 	for _, s := range samplesTransform {
 71 | 		t.Run("transform_inplace_"+s.name, func(t *testing.T) {
 72 | 			tr := CountVectorizer{Separator: s.sep, Mapping: s.mapping}
 73 | 			assert.Equal(t, s.output, tr.Transform(s.input))
 74 | 		})
 75 | 	}
 76 | }
 77 | 
 78 | func TestTFIDFVectorizerFit(t *testing.T) {
 79 | 	samples := []struct {
 80 | 		name        string
 81 | 		ndocs       int
 82 | 		doccount    []uint
 83 | 		mapping     map[string]uint
 84 | 		input       []string
 85 | 		numFeatures int
 86 | 	}{
 87 | 		{"basic", 6, []uint{6, 1, 2}, map[string]uint{"a": 0, "b": 1, "c": 2}, []string{"a a a b b", "a a a c", "a a", "a a a", "a a a a", "a a a c c"}, 3},
 88 | 		{"empty encoder empty input", 0, []uint(nil), map[string]uint(nil), nil, 0},
 89 | 	}
 90 | 
 91 | 	for _, s := range samples {
 92 | 		t.Run(s.name, func(t *testing.T) {
 93 | 			encoder := TFIDFVectorizer{}
 94 | 			expectedEncoder := TFIDFVectorizer{
 95 | 				CountVectorizer: CountVectorizer{Mapping: s.mapping, Separator: " "},
 96 | 				NumDocuments:    s.ndocs,
 97 | 				DocCount:        s.doccount,
 98 | 			}
 99 | 			encoder.Fit(s.input)
100 | 			assert.Equal(t, expectedEncoder, encoder)
101 | 			assert.Equal(t, s.numFeatures, encoder.NumFeatures())
102 | 		})
103 | 	}
104 | 
105 | 	t.Run("transofmer is nil", func(t *testing.T) {
106 | 		var encoder *TFIDFVectorizer
107 | 		assert.Equal(t, []float64(nil), encoder.Transform("asdf asdf"))
108 | 		assert.Equal(t, 0, encoder.NumFeatures())
109 | 	})
110 | }
111 | 
112 | // test is based on data from: https://scikit-learn.org/stable/modules/feature_extraction.html
113 | func TestTFIDFVectorizerTransform(t *testing.T) {
114 | 	samples := []struct {
115 | 		name     string
116 | 		ndocs    int
117 | 		doccount []uint
118 | 		mapping  map[string]uint
119 | 		input    string
120 | 		output   []float64
121 | 	}{
122 | 		{"basic_1", 6, []uint{6, 1, 2}, map[string]uint{"a": 0, "b": 1, "c": 2}, "a a a c", []float64{0.8194099510753755, 0, 0.5732079309279058}},
123 | 		{"basic_2", 6, []uint{6, 1, 2}, map[string]uint{"a": 0, "b": 1, "c": 2}, "a a", []float64{1, 0, 0}},
124 | 		{"basic_3", 6, []uint{6, 1, 2}, map[string]uint{"a": 0, "b": 1, "c": 2}, "a a a", []float64{1, 0, 0}},
125 | 		{"basic_4", 6, []uint{6, 1, 2}, map[string]uint{"a": 0, "b": 1, "c": 2}, "a a a a", []float64{1, 0, 0}},
126 | 		{"basic_5", 6, []uint{6, 1, 2}, map[string]uint{"a": 0, "b": 1, "c": 2}, "a a a b b", []float64{0.47330339145578754, 0.8808994832762984, 0}},
127 | 		{"basic_6", 6, []uint{6, 1, 2}, map[string]uint{"a": 0, "b": 1, "c": 2}, "a a a c c", []float64{0.58149260706886, 0, 0.8135516873095773}},
128 | 		{"not found", 6, []uint{6, 1, 2}, map[string]uint{"a": 0, "b": 1, "c": 2}, "dddd", []float64{0, 0, 0}},
129 | 		{"empty input", 2, []uint{1, 2}, map[string]uint{"a": 0, "b": 1}, "     ", []float64{0, 0}},
130 | 		{"empty vals", 2, []uint{1, 2}, map[string]uint{}, " b  a  ", []float64{}},
131 | 		{"nil input", 2, []uint{1, 2}, map[string]uint{}, "", []float64{}},
132 | 	}
133 | 
134 | 	for _, s := range samples {
135 | 		t.Run(s.name, func(t *testing.T) {
136 | 			encoder := TFIDFVectorizer{
137 | 				CountVectorizer: CountVectorizer{Mapping: s.mapping, Separator: " "},
138 | 				NumDocuments:    s.ndocs,
139 | 				DocCount:        s.doccount,
140 | 			}
141 | 			assert.Equal(t, s.output, encoder.Transform(s.input))
142 | 		})
143 | 
144 | 		if len(s.output) > 0 {
145 | 			t.Run(s.name+"_inplace", func(t *testing.T) {
146 | 				encoder := TFIDFVectorizer{
147 | 					CountVectorizer: CountVectorizer{Mapping: s.mapping, Separator: " "},
148 | 					NumDocuments:    s.ndocs,
149 | 					DocCount:        s.doccount,
150 | 				}
151 | 
152 | 				features := make([]float64, encoder.NumFeatures())
153 | 				encoder.TransformInplace(features, s.input)
154 | 				assert.Equal(t, s.output, features)
155 | 
156 | 				// note, values in copied range should be zero
157 | 				features = make([]float64, encoder.NumFeatures()+100)
158 | 				features[0] = 11223344556677
159 | 				features[1] = 10101010110101
160 | 				features[99] = 1231231231
161 | 
162 | 				expected := make([]float64, len(features))
163 | 				copy(expected, features)
164 | 				copy(expected[10:], s.output)
165 | 
166 | 				encoder.TransformInplace(features[10:10+encoder.NumFeatures()], s.input)
167 | 				assert.Equal(t, expected, features)
168 | 			})
169 | 		}
170 | 	}
171 | 
172 | 	t.Run("inplace does not run when dest len is not equal num features", func(t *testing.T) {
173 | 		encoder := TFIDFVectorizer{
174 | 			CountVectorizer: CountVectorizer{Mapping: map[string]uint{"a": 0, "b": 1}, Separator: " "},
175 | 			NumDocuments:    5,
176 | 			DocCount:        []uint{2, 5},
177 | 		}
178 | 
179 | 		features := []float64{1, 2, 3, 4}
180 | 		encoder.TransformInplace(features, "a b c d")
181 | 		assert.Equal(t, []float64{1, 2, 3, 4}, features)
182 | 	})
183 | }
184 | 
185 | func TestTFIDFVectorizerFeatureNames(t *testing.T) {
186 | 	t.Run("feature names on empty transformer", func(t *testing.T) {
187 | 		var encoder *TFIDFVectorizer
188 | 		assert.Equal(t, []string(nil), encoder.FeatureNames())
189 | 	})
190 | 
191 | 	t.Run("feature names", func(t *testing.T) {
192 | 		encoder := TFIDFVectorizer{CountVectorizer: CountVectorizer{Mapping: map[string]uint{"a": 1, "b": 0}}}
193 | 		assert.Equal(t, []string{"b", "a"}, encoder.FeatureNames())
194 | 	})
195 | }
196 | 


--------------------------------------------------------------------------------
/cmd/generate/tests/readme_test.go:
--------------------------------------------------------------------------------
  1 | package examplemodule
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"testing"
  6 | 
  7 | 	. "github.com/nikolaydubina/go-featureprocessing/transformers"
  8 | 	"github.com/stretchr/testify/assert"
  9 | )
 10 | 
 11 | func TestEmployeeFeatureTransformerReadme(t *testing.T) {
 12 | 	t.Run("transform", func(t *testing.T) {
 13 | 		employee := Employee{
 14 | 			Age:         22,
 15 | 			Salary:      1000.0,
 16 | 			Kids:        2,
 17 | 			Weight:      85.1,
 18 | 			Height:      160.0,
 19 | 			City:        "Pangyo",
 20 | 			Car:         "Tesla",
 21 | 			Income:      9000.1,
 22 | 			SecretValue: 42,
 23 | 			Description: "large text fields are not a problem neither, tf-idf can help here too! more advanced NLP will be added later!",
 24 | 		}
 25 | 
 26 | 		tr := EmployeeFeatureTransformer{
 27 | 			Salary: MinMaxScaler{Min: 500, Max: 900},
 28 | 			Kids:   MaxAbsScaler{Max: 4},
 29 | 			Weight: StandardScaler{Mean: 60, STD: 25},
 30 | 			Height: QuantileScaler{Quantiles: []float64{20, 100, 110, 120, 150}},
 31 | 			City:   OneHotEncoder{Mapping: map[string]uint{"Pangyo": 0, "Seoul": 1, "Daejeon": 2, "Busan": 3}},
 32 | 			Car:    OrdinalEncoder{Mapping: map[string]uint{"Tesla": 1, "BMW": 90000}},
 33 | 			Income: KBinsDiscretizer{QuantileScaler: QuantileScaler{Quantiles: []float64{1000, 1100, 2000, 3000, 10000}}},
 34 | 			Description: TFIDFVectorizer{
 35 | 				NumDocuments:    2,
 36 | 				DocCount:        []uint{1, 2, 2},
 37 | 				CountVectorizer: CountVectorizer{Mapping: map[string]uint{"text": 0, "problem": 1, "help": 2}, Separator: " "},
 38 | 			},
 39 | 		}
 40 | 
 41 | 		features := tr.Transform(&employee)
 42 | 		expected := []float64{22, 1, 0.5, 1.0039999999999998, 1, 1, 0, 0, 0, 1, 5, 0.7674945674619879, 0.4532946552278861, 0.4532946552278861}
 43 | 		assert.Equal(t, expected, features)
 44 | 	})
 45 | 
 46 | 	t.Run("transform_all", func(t *testing.T) {
 47 | 		employee := Employee{
 48 | 			Age:         22,
 49 | 			Salary:      1000.0,
 50 | 			Kids:        2,
 51 | 			Weight:      85.1,
 52 | 			Height:      160.0,
 53 | 			City:        "Pangyo",
 54 | 			Car:         "Tesla",
 55 | 			Income:      9000.1,
 56 | 			SecretValue: 42,
 57 | 			Description: "large text fields are not a problem neither, tf-idf can help here too! more advanced NLP will be added later!",
 58 | 		}
 59 | 
 60 | 		employees := []Employee{
 61 | 			employee,
 62 | 			employee,
 63 | 			employee,
 64 | 		}
 65 | 
 66 | 		tr := EmployeeFeatureTransformer{
 67 | 			Salary: MinMaxScaler{Min: 500, Max: 900},
 68 | 			Kids:   MaxAbsScaler{Max: 4},
 69 | 			Weight: StandardScaler{Mean: 60, STD: 25},
 70 | 			Height: QuantileScaler{Quantiles: []float64{20, 100, 110, 120, 150}},
 71 | 			City:   OneHotEncoder{Mapping: map[string]uint{"Pangyo": 0, "Seoul": 1, "Daejeon": 2, "Busan": 3}},
 72 | 			Car:    OrdinalEncoder{Mapping: map[string]uint{"Tesla": 1, "BMW": 90000}},
 73 | 			Income: KBinsDiscretizer{QuantileScaler: QuantileScaler{Quantiles: []float64{1000, 1100, 2000, 3000, 10000}}},
 74 | 			Description: TFIDFVectorizer{
 75 | 				NumDocuments:    2,
 76 | 				DocCount:        []uint{1, 2, 2},
 77 | 				CountVectorizer: CountVectorizer{Mapping: map[string]uint{"text": 0, "problem": 1, "help": 2}, Separator: " "},
 78 | 			},
 79 | 		}
 80 | 
 81 | 		features := tr.TransformAll(employees)
 82 | 		expectedOne := []float64{22, 1, 0.5, 1.0039999999999998, 1, 1, 0, 0, 0, 1, 5, 0.7674945674619879, 0.4532946552278861, 0.4532946552278861}
 83 | 		var expected []float64
 84 | 		expected = append(expected, expectedOne...)
 85 | 		expected = append(expected, expectedOne...)
 86 | 		expected = append(expected, expectedOne...)
 87 | 		assert.Equal(t, expected, features)
 88 | 	})
 89 | 
 90 | 	t.Run("transform_all_parallel", func(t *testing.T) {
 91 | 		employee := Employee{
 92 | 			Age:         22,
 93 | 			Salary:      1000.0,
 94 | 			Kids:        2,
 95 | 			Weight:      85.1,
 96 | 			Height:      160.0,
 97 | 			City:        "Pangyo",
 98 | 			Car:         "Tesla",
 99 | 			Income:      9000.1,
100 | 			SecretValue: 42,
101 | 			Description: "large text fields are not a problem neither, tf-idf can help here too! more advanced NLP will be added later!",
102 | 		}
103 | 
104 | 		employees := []Employee{
105 | 			employee,
106 | 			employee,
107 | 			employee,
108 | 			employee,
109 | 			employee,
110 | 			employee,
111 | 		}
112 | 
113 | 		tr := EmployeeFeatureTransformer{
114 | 			Salary: MinMaxScaler{Min: 500, Max: 900},
115 | 			Kids:   MaxAbsScaler{Max: 4},
116 | 			Weight: StandardScaler{Mean: 60, STD: 25},
117 | 			Height: QuantileScaler{Quantiles: []float64{20, 100, 110, 120, 150}},
118 | 			City:   OneHotEncoder{Mapping: map[string]uint{"Pangyo": 0, "Seoul": 1, "Daejeon": 2, "Busan": 3}},
119 | 			Car:    OrdinalEncoder{Mapping: map[string]uint{"Tesla": 1, "BMW": 90000}},
120 | 			Income: KBinsDiscretizer{QuantileScaler: QuantileScaler{Quantiles: []float64{1000, 1100, 2000, 3000, 10000}}},
121 | 			Description: TFIDFVectorizer{
122 | 				NumDocuments:    2,
123 | 				DocCount:        []uint{1, 2, 2},
124 | 				CountVectorizer: CountVectorizer{Mapping: map[string]uint{"text": 0, "problem": 1, "help": 2}, Separator: " "},
125 | 			},
126 | 		}
127 | 
128 | 		features := tr.TransformAllParallel(employees, 3)
129 | 		expectedOne := []float64{22, 1, 0.5, 1.0039999999999998, 1, 1, 0, 0, 0, 1, 5, 0.7674945674619879, 0.4532946552278861, 0.4532946552278861}
130 | 		var expected []float64
131 | 		expected = append(expected, expectedOne...)
132 | 		expected = append(expected, expectedOne...)
133 | 		expected = append(expected, expectedOne...)
134 | 		expected = append(expected, expectedOne...)
135 | 		expected = append(expected, expectedOne...)
136 | 		expected = append(expected, expectedOne...)
137 | 		assert.Equal(t, expected, features)
138 | 	})
139 | 
140 | 	t.Run("feature names", func(t *testing.T) {
141 | 		tr := EmployeeFeatureTransformer{
142 | 			Salary: MinMaxScaler{Min: 500, Max: 900},
143 | 			Kids:   MaxAbsScaler{Max: 4},
144 | 			Weight: StandardScaler{Mean: 60, STD: 25},
145 | 			Height: QuantileScaler{Quantiles: []float64{20, 100, 110, 120, 150}},
146 | 			City:   OneHotEncoder{Mapping: map[string]uint{"Pangyo": 0, "Seoul": 1, "Daejeon": 2, "Busan": 3}},
147 | 			Car:    OrdinalEncoder{Mapping: map[string]uint{"Tesla": 1, "BMW": 90000}},
148 | 			Income: KBinsDiscretizer{QuantileScaler: QuantileScaler{Quantiles: []float64{1000, 1100, 2000, 3000, 10000}}},
149 | 			Description: TFIDFVectorizer{
150 | 				NumDocuments:    2,
151 | 				DocCount:        []uint{1, 2, 2},
152 | 				CountVectorizer: CountVectorizer{Mapping: map[string]uint{"text": 0, "problem": 1, "help": 2}, Separator: " "},
153 | 			},
154 | 		}
155 | 		names := tr.FeatureNames()
156 | 		expected := []string{"Age", "Salary", "Kids", "Weight", "Height", "City_Pangyo", "City_Seoul", "City_Daejeon", "City_Busan", "Car", "Income", "Description_text", "Description_problem", "Description_help"}
157 | 		assert.Equal(t, expected, names)
158 | 	})
159 | 
160 | 	t.Run("feature names empty categorical skipped", func(t *testing.T) {
161 | 		tr := EmployeeFeatureTransformer{}
162 | 		names := tr.FeatureNames()
163 | 		expected := []string{"Age", "Salary", "Kids", "Weight", "Height", "Car", "Income"}
164 | 		assert.Equal(t, expected, names)
165 | 	})
166 | 
167 | 	t.Run("fit", func(t *testing.T) {
168 | 		employee := []Employee{
169 | 			{
170 | 				Age:         22,
171 | 				Salary:      500.0,
172 | 				Kids:        2,
173 | 				Weight:      50,
174 | 				Height:      160.0,
175 | 				City:        "Pangyo",
176 | 				Car:         "Tesla",
177 | 				Income:      9000.1,
178 | 				SecretValue: 42,
179 | 				Description: "text problem help",
180 | 			},
181 | 			{
182 | 				Age:         10,
183 | 				Salary:      900.0,
184 | 				Kids:        0,
185 | 				Weight:      10,
186 | 				Height:      120.0,
187 | 				City:        "Seoul",
188 | 				Car:         "BMW",
189 | 				Income:      420.1,
190 | 				Description: "problem help",
191 | 			},
192 | 		}
193 | 
194 | 		tr := EmployeeFeatureTransformer{}
195 | 		tr.Fit(employee)
196 | 
197 | 		trExpected := EmployeeFeatureTransformer{
198 | 			Salary: MinMaxScaler{Min: 500, Max: 900},
199 | 			Kids:   MaxAbsScaler{Max: 2},
200 | 			Weight: StandardScaler{Mean: 30, STD: 28.284271247461902},
201 | 			Height: QuantileScaler{Quantiles: []float64{120, 160}},
202 | 			City:   OneHotEncoder{Mapping: map[string]uint{"Pangyo": 0, "Seoul": 1}},
203 | 			Car:    OrdinalEncoder{Mapping: map[string]uint{"Tesla": 1, "BMW": 2}},
204 | 			Income: KBinsDiscretizer{QuantileScaler: QuantileScaler{Quantiles: []float64{420.1, 9000.1}}},
205 | 			Description: TFIDFVectorizer{
206 | 				NumDocuments:    2,
207 | 				DocCount:        []uint{1, 2, 2},
208 | 				CountVectorizer: CountVectorizer{Mapping: map[string]uint{"text": 0, "problem": 1, "help": 2}, Separator: " "},
209 | 			},
210 | 		}
211 | 
212 | 		assert.Equal(t, trExpected, tr)
213 | 	})
214 | 
215 | 	t.Run("serialize transformer", func(t *testing.T) {
216 | 		tr := EmployeeFeatureTransformer{
217 | 			Salary: MinMaxScaler{Min: 500, Max: 900},
218 | 			Kids:   MaxAbsScaler{Max: 4},
219 | 			Weight: StandardScaler{Mean: 60, STD: 25},
220 | 			Height: QuantileScaler{Quantiles: []float64{20, 100, 110, 120, 150}},
221 | 			City:   OneHotEncoder{Mapping: map[string]uint{"Pangyo": 0, "Seoul": 1, "Daejeon": 2, "Busan": 3}},
222 | 			Car:    OrdinalEncoder{Mapping: map[string]uint{"Tesla": 1, "BMW": 90000}},
223 | 			Income: KBinsDiscretizer{QuantileScaler: QuantileScaler{Quantiles: []float64{1000, 1100, 2000, 3000, 10000}}},
224 | 			Description: TFIDFVectorizer{
225 | 				NumDocuments:    2,
226 | 				DocCount:        []uint{1, 2, 2},
227 | 				CountVectorizer: CountVectorizer{Mapping: map[string]uint{"text": 0, "problem": 1, "help": 2}, Separator: " "},
228 | 			},
229 | 		}
230 | 
231 | 		output, err := json.MarshalIndent(tr, "", "    ")
232 | 		outputStr := string(output)
233 | 		expected := `{
234 |     "Age_identity": {},
235 |     "Salary_minmax": {
236 |         "Min": 500,
237 |         "Max": 900
238 |     },
239 |     "Kids_maxabs": {
240 |         "Max": 4
241 |     },
242 |     "Weight_standard": {
243 |         "Mean": 60,
244 |         "STD": 25
245 |     },
246 |     "Height_quantile": {
247 |         "Quantiles": [
248 |             20,
249 |             100,
250 |             110,
251 |             120,
252 |             150
253 |         ]
254 |     },
255 |     "City_onehot": {
256 |         "Mapping": {
257 |             "Busan": 3,
258 |             "Daejeon": 2,
259 |             "Pangyo": 0,
260 |             "Seoul": 1
261 |         }
262 |     },
263 |     "Car_ordinal": {
264 |         "Mapping": {
265 |             "BMW": 90000,
266 |             "Tesla": 1
267 |         }
268 |     },
269 |     "Income_kbins": {
270 |         "Quantiles": [
271 |             1000,
272 |             1100,
273 |             2000,
274 |             3000,
275 |             10000
276 |         ]
277 |     },
278 |     "Description_tfidf": {
279 |         "Mapping": {
280 |             "help": 2,
281 |             "problem": 1,
282 |             "text": 0
283 |         },
284 |         "Separator": " ",
285 |         "DocCount": [
286 |             1,
287 |             2,
288 |             2
289 |         ],
290 |         "NumDocuments": 2,
291 |         "Normalizer": {}
292 |     }
293 | }`
294 | 		assert.Nil(t, err)
295 | 		assert.Equal(t, expected, outputStr)
296 | 	})
297 | }
298 | 


--------------------------------------------------------------------------------
/cmd/generate/tests/with32fieldsfp_test.go:
--------------------------------------------------------------------------------
  1 | // Code generated by go-featureprocessing DO NOT EDIT
  2 | 
  3 | package examplemodule
  4 | 
  5 | import (
  6 | 	"encoding/json"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/google/gofuzz"
 10 | 	"github.com/stretchr/testify/assert"
 11 | )
 12 | 
 13 | // makeMock creates some valid With32FieldsFeatureTransformer by fitting on fuzzy data.
 14 | // This function is handy for tests.
 15 | func makeMockWith32FieldsFeatureTransformer() *With32FieldsFeatureTransformer {
 16 | 	s := make([]With32Fields, 10)
 17 | 	fuzz.New().NilChance(0).NumElements(10, 10).Fuzz(&s)
 18 | 
 19 | 	tr := With32FieldsFeatureTransformer{}
 20 | 	tr.Fit(s)
 21 | 	return &tr
 22 | }
 23 | 
 24 | func TestWith32FieldsFeatureTransformerFeatureNames(t *testing.T) {
 25 | 	tr := makeMockWith32FieldsFeatureTransformer()
 26 | 
 27 | 	t.Run("feature names", func(t *testing.T) {
 28 | 		names := tr.FeatureNames()
 29 | 		assert.True(t, len(names) > 0)
 30 | 		assert.Equal(t, len(names), tr.NumFeatures())
 31 | 	})
 32 | 
 33 | 	t.Run("feature name transformer is empty", func(t *testing.T) {
 34 | 		tr := With32FieldsFeatureTransformer{}
 35 | 		names := tr.FeatureNames()
 36 | 		assert.True(t, len(names) > 0)
 37 | 		assert.Equal(t, len(names), tr.NumFeatures())
 38 | 	})
 39 | 
 40 | 	t.Run("feature name transformer is nil", func(t *testing.T) {
 41 | 		var tr *With32FieldsFeatureTransformer
 42 | 		names := tr.FeatureNames()
 43 | 		assert.Nil(t, names)
 44 | 	})
 45 | }
 46 | 
 47 | func TestWith32FieldsFeatureTransformerTransform(t *testing.T) {
 48 | 	tr := makeMockWith32FieldsFeatureTransformer()
 49 | 
 50 | 	t.Run("empty struct", func(t *testing.T) {
 51 | 		s := With32Fields{}
 52 | 		features := tr.Transform(&s)
 53 | 
 54 | 		assert.NotNil(t, features)
 55 | 		assert.True(t, len(features) > 0)
 56 | 		assert.Equal(t, tr.NumFeatures(), len(features))
 57 | 	})
 58 | 
 59 | 	t.Run("fuzzy struct", func(t *testing.T) {
 60 | 		var s With32Fields
 61 | 		fuzz.New().Fuzz(&s)
 62 | 
 63 | 		tr := With32FieldsFeatureTransformer{}
 64 | 		fuzz.New().NilChance(0).NumElements(1, 1).Fuzz(&tr)
 65 | 
 66 | 		features := tr.Transform(&s)
 67 | 
 68 | 		assert.NotNil(t, features)
 69 | 		assert.True(t, len(features) > 0)
 70 | 		assert.Equal(t, tr.NumFeatures(), len(features))
 71 | 	})
 72 | 
 73 | 	t.Run("struct is nil", func(t *testing.T) {
 74 | 		var s *With32Fields
 75 | 		features := tr.Transform(s)
 76 | 		assert.Nil(t, features)
 77 | 		assert.True(t, tr.NumFeatures() > 0)
 78 | 	})
 79 | 
 80 | 	t.Run("transformer is nil", func(t *testing.T) {
 81 | 		var s With32Fields
 82 | 		fuzz.New().Fuzz(&s)
 83 | 
 84 | 		var tr *With32FieldsFeatureTransformer
 85 | 		features := tr.Transform(&s)
 86 | 
 87 | 		assert.Nil(t, features)
 88 | 		assert.Equal(t, tr.NumFeatures(), 0)
 89 | 	})
 90 | 
 91 | 	t.Run("serialize and deserialize transformer", func(t *testing.T) {
 92 | 		output, err := json.Marshal(tr)
 93 | 		assert.Nil(t, err)
 94 | 		assert.NotEmpty(t, output)
 95 | 
 96 | 		var tr2 With32FieldsFeatureTransformer
 97 | 		err = json.Unmarshal(output, &tr2)
 98 | 		assert.Nil(t, err)
 99 | 		assert.Equal(t, *tr, tr2)
100 | 	})
101 | 
102 | 	t.Run("inplace transform does not run when destination does not match num features", func(t *testing.T) {
103 | 		var s With32Fields
104 | 		fuzz.New().Fuzz(&s)
105 | 
106 | 		tr := With32FieldsFeatureTransformer{}
107 | 
108 | 		features := make([]float64, 1000)
109 | 		features[0] = 123456789.0
110 | 		tr.TransformInplace(features, &s)
111 | 
112 | 		assert.Equal(t, 123456789.0, features[0])
113 | 	})
114 | }
115 | 
116 | func TestWith32FieldsFeatureTransformerTransformAll(t *testing.T) {
117 | 	t.Run("when transformer is nil", func(t *testing.T) {
118 | 		s := make([]With32Fields, 100)
119 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
120 | 
121 | 		dst := make([]float64, 100*100)
122 | 
123 | 		var tr *With32FieldsFeatureTransformer
124 | 		assert.Nil(t, tr.TransformAll(s))
125 | 		assert.Nil(t, tr.TransformAllParallel(s, 4))
126 | 
127 | 		// does not panic
128 | 		tr.TransformAllInplace(dst, s)
129 | 		tr.TransformAllInplaceParallel(dst, s, 4)
130 | 	})
131 | 
132 | 	t.Run("inplace with wrong output dimensions, output is smaller", func(t *testing.T) {
133 | 		s := make([]With32Fields, 100)
134 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
135 | 
136 | 		dst := make([]float64, 100)
137 | 
138 | 		tr := makeMockWith32FieldsFeatureTransformer()
139 | 
140 | 		// does not panic
141 | 		tr.TransformAllInplace(dst, s)
142 | 		tr.TransformAllInplaceParallel(dst, s, 4)
143 | 	})
144 | 
145 | 	t.Run("inplace with wrong output dimensions, output is bigger", func(t *testing.T) {
146 | 		s := make([]With32Fields, 100)
147 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
148 | 
149 | 		dst := make([]float64, 100*120)
150 | 
151 | 		tr := makeMockWith32FieldsFeatureTransformer()
152 | 
153 | 		// does not panic
154 | 		tr.TransformAllInplace(dst, s)
155 | 		tr.TransformAllInplaceParallel(dst, s, 4)
156 | 	})
157 | 
158 | 	t.Run("transform all", func(t *testing.T) {
159 | 		s := make([]With32Fields, 100)
160 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
161 | 
162 | 		tr := makeMockWith32FieldsFeatureTransformer()
163 | 
164 | 		features := tr.TransformAll(s)
165 | 		assert.Equal(t, len(s)*tr.NumFeatures(), len(features))
166 | 	})
167 | 
168 | 	t.Run("transform all parallel 1 worker", func(t *testing.T) {
169 | 		s := make([]With32Fields, 100)
170 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
171 | 
172 | 		tr := makeMockWith32FieldsFeatureTransformer()
173 | 
174 | 		features := tr.TransformAllParallel(s, 1)
175 | 		assert.Equal(t, len(s)*tr.NumFeatures(), len(features))
176 | 	})
177 | 
178 | 	t.Run("transform all parallel 4 workers", func(t *testing.T) {
179 | 		s := make([]With32Fields, 100)
180 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
181 | 
182 | 		tr := makeMockWith32FieldsFeatureTransformer()
183 | 
184 | 		features := tr.TransformAllParallel(s, 4)
185 | 		assert.Equal(t, len(s)*tr.NumFeatures(), len(features))
186 | 	})
187 | }
188 | 
189 | func TestWith32FieldsFeatureTransformerFit(t *testing.T) {
190 | 	t.Run("fuzzy input", func(t *testing.T) {
191 | 		s := make([]With32Fields, 10)
192 | 		fuzz.New().NilChance(0).NumElements(1, 1).Fuzz(&s)
193 | 
194 | 		trEmpty := With32FieldsFeatureTransformer{}
195 | 		tr := With32FieldsFeatureTransformer{}
196 | 		tr.Fit(s)
197 | 
198 | 		assert.NotNil(t, tr)
199 | 		assert.NotEqual(t, tr, trEmpty)
200 | 	})
201 | 
202 | 	t.Run("not nil transformer nil input", func(t *testing.T) {
203 | 		trEmpty := With32FieldsFeatureTransformer{}
204 | 		tr := With32FieldsFeatureTransformer{}
205 | 		tr.Fit(nil)
206 | 
207 | 		assert.Equal(t, trEmpty, tr)
208 | 	})
209 | 
210 | 	t.Run("nil transformer not nil input", func(t *testing.T) {
211 | 		s := make([]With32Fields, 10)
212 | 
213 | 		var tr *With32FieldsFeatureTransformer
214 | 		tr.Fit(s)
215 | 
216 | 		assert.Nil(t, tr)
217 | 	})
218 | }
219 | 
220 | func fitTransformerWith32Fields(b *testing.B, numelem int) {
221 | 	s := make([]With32Fields, numelem)
222 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
223 | 
224 | 	var tr With32FieldsFeatureTransformer
225 | 
226 | 	b.ResetTimer()
227 | 	for n := 0; n < b.N; n++ {
228 | 		tr.Fit(s)
229 | 	}
230 | }
231 | 
232 | func BenchmarkWith32FieldsFeatureTransformer_Fit_100elements(b *testing.B) {
233 | 	fitTransformerWith32Fields(b, 100)
234 | }
235 | 
236 | func BenchmarkWith32FieldsFeatureTransformer_Fit_1000elements(b *testing.B) {
237 | 	fitTransformerWith32Fields(b, 1000)
238 | }
239 | 
240 | func BenchmarkWith32FieldsFeatureTransformer_Fit_10000elements(b *testing.B) {
241 | 	fitTransformerWith32Fields(b, 10000)
242 | }
243 | 
244 | func BenchmarkWith32FieldsFeatureTransformer_Transform(b *testing.B) {
245 | 	var s With32Fields
246 | 	fuzz.New().Fuzz(&s)
247 | 
248 | 	tr := makeMockWith32FieldsFeatureTransformer()
249 | 
250 | 	b.ResetTimer()
251 | 	for n := 0; n < b.N; n++ {
252 | 		tr.Transform(&s)
253 | 	}
254 | }
255 | 
256 | func BenchmarkWith32FieldsFeatureTransformer_Transform_Inplace(b *testing.B) {
257 | 	var s With32Fields
258 | 	fuzz.New().Fuzz(&s)
259 | 
260 | 	tr := makeMockWith32FieldsFeatureTransformer()
261 | 
262 | 	features := make([]float64, tr.NumFeatures())
263 | 
264 | 	b.ResetTimer()
265 | 	for n := 0; n < b.N; n++ {
266 | 		tr.TransformInplace(features, &s)
267 | 	}
268 | }
269 | 
270 | func benchTransformAllWith32Fields(b *testing.B, numelem int) {
271 | 	s := make([]With32Fields, numelem)
272 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
273 | 
274 | 	tr := makeMockWith32FieldsFeatureTransformer()
275 | 
276 | 	b.ResetTimer()
277 | 	for n := 0; n < b.N; n++ {
278 | 		tr.TransformAll(s)
279 | 	}
280 | }
281 | 
282 | func BenchmarkWith32FieldsFeatureTransformer_TransformAll_10elems(b *testing.B) {
283 | 	benchTransformAllWith32Fields(b, 10)
284 | }
285 | 
286 | func BenchmarkWith32FieldsFeatureTransformer_TransformAll_100elems(b *testing.B) {
287 | 	benchTransformAllWith32Fields(b, 100)
288 | }
289 | 
290 | func BenchmarkWith32FieldsFeatureTransformer_TransformAll_1000elems(b *testing.B) {
291 | 	benchTransformAllWith32Fields(b, 1000)
292 | }
293 | 
294 | func BenchmarkWith32FieldsFeatureTransformer_TransformAll_10000elems(b *testing.B) {
295 | 	benchTransformAllWith32Fields(b, 10000)
296 | }
297 | 
298 | func BenchmarkWith32FieldsFeatureTransformer_TransformAll_100000elems(b *testing.B) {
299 | 	benchTransformAllWith32Fields(b, 100000)
300 | }
301 | 
302 | func BenchmarkWith32FieldsFeatureTransformer_TransformAll_1000000elems(b *testing.B) {
303 | 	benchTransformAllWith32Fields(b, 1000000)
304 | }
305 | 
306 | func benchTransformAllParallelWith32Fields(b *testing.B, numelem int, nworkers uint) {
307 | 	s := make([]With32Fields, numelem)
308 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
309 | 
310 | 	tr := makeMockWith32FieldsFeatureTransformer()
311 | 
312 | 	b.ResetTimer()
313 | 	for n := 0; n < b.N; n++ {
314 | 		tr.TransformAllParallel(s, nworkers)
315 | 	}
316 | }
317 | 
318 | func BenchmarkWith32FieldsFeatureTransformer_TransformAll_10elems_8workers(b *testing.B) {
319 | 	benchTransformAllParallelWith32Fields(b, 10, 8)
320 | }
321 | 
322 | func BenchmarkWith32FieldsFeatureTransformer_TransformAll_100elems_8workers(b *testing.B) {
323 | 	benchTransformAllParallelWith32Fields(b, 100, 8)
324 | }
325 | 
326 | func BenchmarkWith32FieldsFeatureTransformer_TransformAll_1000elems_8workers(b *testing.B) {
327 | 	benchTransformAllParallelWith32Fields(b, 1000, 8)
328 | }
329 | 
330 | func BenchmarkWith32FieldsFeatureTransformer_TransformAll_10000elems_8workers(b *testing.B) {
331 | 	benchTransformAllParallelWith32Fields(b, 10000, 8)
332 | }
333 | 
334 | func BenchmarkWith32FieldsFeatureTransformer_TransformAll_100000elems_8workers(b *testing.B) {
335 | 	benchTransformAllParallelWith32Fields(b, 100000, 8)
336 | }
337 | 
338 | func BenchmarkWith32FieldsFeatureTransformer_TransformAll_1000000elems_8workers(b *testing.B) {
339 | 	benchTransformAllParallelWith32Fields(b, 1000000, 8)
340 | }
341 | 
342 | func BenchmarkWith32FieldsFeatureTransformer_TransformAll_5000000elems_8workers(b *testing.B) {
343 | 	benchTransformAllParallelWith32Fields(b, 5000000, 8)
344 | }
345 | 
346 | func BenchmarkWith32FieldsFeatureTransformer_TransformAll_15000000elems_8workers(b *testing.B) {
347 | 	benchTransformAllParallelWith32Fields(b, 15000000, 8)
348 | }
349 | 


--------------------------------------------------------------------------------
/cmd/generate/tests/employeefp_test.go:
--------------------------------------------------------------------------------
  1 | // Code generated by go-featureprocessing DO NOT EDIT
  2 | 
  3 | package examplemodule
  4 | 
  5 | import (
  6 | 	"encoding/json"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/google/gofuzz"
 10 | 	"github.com/stretchr/testify/assert"
 11 | )
 12 | 
 13 | // makeMock creates some valid EmployeeFeatureTransformer by fitting on fuzzy data.
 14 | // This function is handy for tests.
 15 | func makeMockEmployeeFeatureTransformer() *EmployeeFeatureTransformer {
 16 | 	s := make([]Employee, 10)
 17 | 	fuzz.New().NilChance(0).NumElements(10, 10).Fuzz(&s)
 18 | 
 19 | 	tr := EmployeeFeatureTransformer{}
 20 | 	tr.Fit(s)
 21 | 	return &tr
 22 | }
 23 | 
 24 | func TestEmployeeFeatureTransformerFeatureNames(t *testing.T) {
 25 | 	tr := makeMockEmployeeFeatureTransformer()
 26 | 
 27 | 	t.Run("feature names", func(t *testing.T) {
 28 | 		names := tr.FeatureNames()
 29 | 		assert.True(t, len(names) > 0)
 30 | 		assert.Equal(t, len(names), tr.NumFeatures())
 31 | 	})
 32 | 
 33 | 	t.Run("feature name transformer is empty", func(t *testing.T) {
 34 | 		tr := EmployeeFeatureTransformer{}
 35 | 		names := tr.FeatureNames()
 36 | 		assert.True(t, len(names) > 0)
 37 | 		assert.Equal(t, len(names), tr.NumFeatures())
 38 | 	})
 39 | 
 40 | 	t.Run("feature name transformer is nil", func(t *testing.T) {
 41 | 		var tr *EmployeeFeatureTransformer
 42 | 		names := tr.FeatureNames()
 43 | 		assert.Nil(t, names)
 44 | 	})
 45 | }
 46 | 
 47 | func TestEmployeeFeatureTransformerTransform(t *testing.T) {
 48 | 	tr := makeMockEmployeeFeatureTransformer()
 49 | 
 50 | 	t.Run("empty struct", func(t *testing.T) {
 51 | 		s := Employee{}
 52 | 		features := tr.Transform(&s)
 53 | 
 54 | 		assert.NotNil(t, features)
 55 | 		assert.True(t, len(features) > 0)
 56 | 		assert.Equal(t, tr.NumFeatures(), len(features))
 57 | 	})
 58 | 
 59 | 	t.Run("fuzzy struct", func(t *testing.T) {
 60 | 		var s Employee
 61 | 		fuzz.New().Fuzz(&s)
 62 | 
 63 | 		tr := EmployeeFeatureTransformer{}
 64 | 		fuzz.New().NilChance(0).NumElements(1, 1).Fuzz(&tr)
 65 | 
 66 | 		features := tr.Transform(&s)
 67 | 
 68 | 		assert.NotNil(t, features)
 69 | 		assert.True(t, len(features) > 0)
 70 | 		assert.Equal(t, tr.NumFeatures(), len(features))
 71 | 	})
 72 | 
 73 | 	t.Run("struct is nil", func(t *testing.T) {
 74 | 		var s *Employee
 75 | 		features := tr.Transform(s)
 76 | 		assert.Nil(t, features)
 77 | 		assert.True(t, tr.NumFeatures() > 0)
 78 | 	})
 79 | 
 80 | 	t.Run("transformer is nil", func(t *testing.T) {
 81 | 		var s Employee
 82 | 		fuzz.New().Fuzz(&s)
 83 | 
 84 | 		var tr *EmployeeFeatureTransformer
 85 | 		features := tr.Transform(&s)
 86 | 
 87 | 		assert.Nil(t, features)
 88 | 		assert.Equal(t, tr.NumFeatures(), 0)
 89 | 	})
 90 | 
 91 | 	t.Run("serialize and deserialize transformer", func(t *testing.T) {
 92 | 		output, err := json.Marshal(tr)
 93 | 		assert.Nil(t, err)
 94 | 		assert.NotEmpty(t, output)
 95 | 
 96 | 		var tr2 EmployeeFeatureTransformer
 97 | 		err = json.Unmarshal(output, &tr2)
 98 | 		assert.Nil(t, err)
 99 | 		assert.Equal(t, *tr, tr2)
100 | 	})
101 | 
102 | 	t.Run("inplace transform does not run when destination does not match num features", func(t *testing.T) {
103 | 		var s Employee
104 | 		fuzz.New().Fuzz(&s)
105 | 
106 | 		tr := EmployeeFeatureTransformer{}
107 | 
108 | 		features := make([]float64, 1000)
109 | 		features[0] = 123456789.0
110 | 		tr.TransformInplace(features, &s)
111 | 
112 | 		assert.Equal(t, 123456789.0, features[0])
113 | 	})
114 | }
115 | 
116 | func TestEmployeeFeatureTransformerTransformAll(t *testing.T) {
117 | 	t.Run("when transformer is nil", func(t *testing.T) {
118 | 		s := make([]Employee, 100)
119 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
120 | 
121 | 		dst := make([]float64, 100*100)
122 | 
123 | 		var tr *EmployeeFeatureTransformer
124 | 		assert.Nil(t, tr.TransformAll(s))
125 | 		assert.Nil(t, tr.TransformAllParallel(s, 4))
126 | 
127 | 		// does not panic
128 | 		tr.TransformAllInplace(dst, s)
129 | 		tr.TransformAllInplaceParallel(dst, s, 4)
130 | 	})
131 | 
132 | 	t.Run("inplace with wrong output dimensions, output is smaller", func(t *testing.T) {
133 | 		s := make([]Employee, 100)
134 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
135 | 
136 | 		dst := make([]float64, 100)
137 | 
138 | 		tr := makeMockEmployeeFeatureTransformer()
139 | 
140 | 		// does not panic
141 | 		tr.TransformAllInplace(dst, s)
142 | 		tr.TransformAllInplaceParallel(dst, s, 4)
143 | 	})
144 | 
145 | 	t.Run("inplace with wrong output dimensions, output is bigger", func(t *testing.T) {
146 | 		s := make([]Employee, 100)
147 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
148 | 
149 | 		dst := make([]float64, 100*120)
150 | 
151 | 		tr := makeMockEmployeeFeatureTransformer()
152 | 
153 | 		// does not panic
154 | 		tr.TransformAllInplace(dst, s)
155 | 		tr.TransformAllInplaceParallel(dst, s, 4)
156 | 	})
157 | 
158 | 	t.Run("transform all", func(t *testing.T) {
159 | 		s := make([]Employee, 100)
160 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
161 | 
162 | 		tr := makeMockEmployeeFeatureTransformer()
163 | 
164 | 		features := tr.TransformAll(s)
165 | 		assert.Equal(t, len(s)*tr.NumFeatures(), len(features))
166 | 	})
167 | 
168 | 	t.Run("transform all parallel 1 worker", func(t *testing.T) {
169 | 		s := make([]Employee, 100)
170 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
171 | 
172 | 		tr := makeMockEmployeeFeatureTransformer()
173 | 
174 | 		features := tr.TransformAllParallel(s, 1)
175 | 		assert.Equal(t, len(s)*tr.NumFeatures(), len(features))
176 | 	})
177 | 
178 | 	t.Run("transform all parallel 4 workers", func(t *testing.T) {
179 | 		s := make([]Employee, 100)
180 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
181 | 
182 | 		tr := makeMockEmployeeFeatureTransformer()
183 | 
184 | 		features := tr.TransformAllParallel(s, 4)
185 | 		assert.Equal(t, len(s)*tr.NumFeatures(), len(features))
186 | 	})
187 | }
188 | 
189 | func TestEmployeeFeatureTransformerFit(t *testing.T) {
190 | 	t.Run("fuzzy input", func(t *testing.T) {
191 | 		s := make([]Employee, 10)
192 | 		fuzz.New().NilChance(0).NumElements(1, 1).Fuzz(&s)
193 | 
194 | 		trEmpty := EmployeeFeatureTransformer{}
195 | 		tr := EmployeeFeatureTransformer{}
196 | 		tr.Fit(s)
197 | 
198 | 		assert.NotNil(t, tr)
199 | 		assert.NotEqual(t, tr, trEmpty)
200 | 	})
201 | 
202 | 	t.Run("not nil transformer nil input", func(t *testing.T) {
203 | 		trEmpty := EmployeeFeatureTransformer{}
204 | 		tr := EmployeeFeatureTransformer{}
205 | 		tr.Fit(nil)
206 | 
207 | 		assert.Equal(t, trEmpty, tr)
208 | 	})
209 | 
210 | 	t.Run("nil transformer not nil input", func(t *testing.T) {
211 | 		s := make([]Employee, 10)
212 | 
213 | 		var tr *EmployeeFeatureTransformer
214 | 		tr.Fit(s)
215 | 
216 | 		assert.Nil(t, tr)
217 | 	})
218 | }
219 | 
220 | func fitTransformerEmployee(b *testing.B, numelem int) {
221 | 	s := make([]Employee, numelem)
222 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
223 | 
224 | 	var tr EmployeeFeatureTransformer
225 | 
226 | 	b.ResetTimer()
227 | 	for n := 0; n < b.N; n++ {
228 | 		tr.Fit(s)
229 | 	}
230 | }
231 | 
232 | func BenchmarkEmployeeFeatureTransformer_Fit_100elements(b *testing.B) {
233 | 	fitTransformerEmployee(b, 100)
234 | }
235 | 
236 | func BenchmarkEmployeeFeatureTransformer_Fit_1000elements(b *testing.B) {
237 | 	fitTransformerEmployee(b, 1000)
238 | }
239 | 
240 | func BenchmarkEmployeeFeatureTransformer_Fit_10000elements(b *testing.B) {
241 | 	fitTransformerEmployee(b, 10000)
242 | }
243 | 
244 | func BenchmarkEmployeeFeatureTransformer_Transform(b *testing.B) {
245 | 	var s Employee
246 | 	fuzz.New().Fuzz(&s)
247 | 
248 | 	tr := makeMockEmployeeFeatureTransformer()
249 | 
250 | 	b.ResetTimer()
251 | 	for n := 0; n < b.N; n++ {
252 | 		tr.Transform(&s)
253 | 	}
254 | }
255 | 
256 | func BenchmarkEmployeeFeatureTransformer_Transform_Inplace(b *testing.B) {
257 | 	var s Employee
258 | 	fuzz.New().Fuzz(&s)
259 | 
260 | 	tr := makeMockEmployeeFeatureTransformer()
261 | 
262 | 	features := make([]float64, tr.NumFeatures())
263 | 
264 | 	b.ResetTimer()
265 | 	for n := 0; n < b.N; n++ {
266 | 		tr.TransformInplace(features, &s)
267 | 	}
268 | }
269 | 
270 | func benchTransformAllEmployee(b *testing.B, numelem int) {
271 | 	s := make([]Employee, numelem)
272 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
273 | 
274 | 	tr := makeMockEmployeeFeatureTransformer()
275 | 
276 | 	b.ResetTimer()
277 | 	for n := 0; n < b.N; n++ {
278 | 		tr.TransformAll(s)
279 | 	}
280 | }
281 | 
282 | func BenchmarkEmployeeFeatureTransformer_TransformAll_10elems(b *testing.B) {
283 | 	benchTransformAllEmployee(b, 10)
284 | }
285 | 
286 | func BenchmarkEmployeeFeatureTransformer_TransformAll_100elems(b *testing.B) {
287 | 	benchTransformAllEmployee(b, 100)
288 | }
289 | 
290 | func BenchmarkEmployeeFeatureTransformer_TransformAll_1000elems(b *testing.B) {
291 | 	benchTransformAllEmployee(b, 1000)
292 | }
293 | 
294 | func BenchmarkEmployeeFeatureTransformer_TransformAll_10000elems(b *testing.B) {
295 | 	benchTransformAllEmployee(b, 10000)
296 | }
297 | 
298 | func BenchmarkEmployeeFeatureTransformer_TransformAll_100000elems(b *testing.B) {
299 | 	benchTransformAllEmployee(b, 100000)
300 | }
301 | 
302 | func BenchmarkEmployeeFeatureTransformer_TransformAll_1000000elems(b *testing.B) {
303 | 	benchTransformAllEmployee(b, 1000000)
304 | }
305 | 
306 | func benchTransformAllParallelEmployee(b *testing.B, numelem int, nworkers uint) {
307 | 	s := make([]Employee, numelem)
308 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
309 | 
310 | 	tr := makeMockEmployeeFeatureTransformer()
311 | 
312 | 	b.ResetTimer()
313 | 	for n := 0; n < b.N; n++ {
314 | 		tr.TransformAllParallel(s, nworkers)
315 | 	}
316 | }
317 | 
318 | func BenchmarkEmployeeFeatureTransformer_TransformAll_10elems_8workers(b *testing.B) {
319 | 	benchTransformAllParallelEmployee(b, 10, 8)
320 | }
321 | 
322 | func BenchmarkEmployeeFeatureTransformer_TransformAll_100elems_8workers(b *testing.B) {
323 | 	benchTransformAllParallelEmployee(b, 100, 8)
324 | }
325 | 
326 | func BenchmarkEmployeeFeatureTransformer_TransformAll_1000elems_8workers(b *testing.B) {
327 | 	benchTransformAllParallelEmployee(b, 1000, 8)
328 | }
329 | 
330 | func BenchmarkEmployeeFeatureTransformer_TransformAll_10000elems_8workers(b *testing.B) {
331 | 	benchTransformAllParallelEmployee(b, 10000, 8)
332 | }
333 | 
334 | func BenchmarkEmployeeFeatureTransformer_TransformAll_100000elems_8workers(b *testing.B) {
335 | 	benchTransformAllParallelEmployee(b, 100000, 8)
336 | }
337 | 
338 | func BenchmarkEmployeeFeatureTransformer_TransformAll_1000000elems_8workers(b *testing.B) {
339 | 	benchTransformAllParallelEmployee(b, 1000000, 8)
340 | }
341 | 
342 | func BenchmarkEmployeeFeatureTransformer_TransformAll_5000000elems_8workers(b *testing.B) {
343 | 	benchTransformAllParallelEmployee(b, 5000000, 8)
344 | }
345 | 
346 | func BenchmarkEmployeeFeatureTransformer_TransformAll_15000000elems_8workers(b *testing.B) {
347 | 	benchTransformAllParallelEmployee(b, 15000000, 8)
348 | }
349 | 
350 | func benchLargeTransformerEmployee(b *testing.B, numelem int) {
351 | 	var s []Employee
352 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
353 | 
354 | 	tr := EmployeeFeatureTransformer{}
355 | 	tr.Fit(s)
356 | 
357 | 	b.ResetTimer()
358 | 	for n := 0; n < b.N; n++ {
359 | 		tr.Transform(&s[0])
360 | 	}
361 | }
362 | 
363 | func BenchmarkEmployeeFeatureTransformer_Transform_LargeComposites_100elements(b *testing.B) {
364 | 	benchLargeTransformerEmployee(b, 100)
365 | }
366 | 
367 | func BenchmarkEmployeeFeatureTransformer_Transform_LargeComposites_1000elements(b *testing.B) {
368 | 	benchLargeTransformerEmployee(b, 1000)
369 | }
370 | 
371 | func BenchmarkEmployeeFeatureTransformer_Transform_LargeComposites_10000elements(b *testing.B) {
372 | 	benchLargeTransformerEmployee(b, 10000)
373 | }
374 | 
375 | func BenchmarkEmployeeFeatureTransformer_Transform_LargeComposites_100000elements(b *testing.B) {
376 | 	benchLargeTransformerEmployee(b, 100000)
377 | }
378 | 


--------------------------------------------------------------------------------
/cmd/generate/tests/weirdtagsfp_test.go:
--------------------------------------------------------------------------------
  1 | // Code generated by go-featureprocessing DO NOT EDIT
  2 | 
  3 | package examplemodule
  4 | 
  5 | import (
  6 | 	"encoding/json"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/google/gofuzz"
 10 | 	"github.com/stretchr/testify/assert"
 11 | )
 12 | 
 13 | // makeMock creates some valid WeirdTagsFeatureTransformer by fitting on fuzzy data.
 14 | // This function is handy for tests.
 15 | func makeMockWeirdTagsFeatureTransformer() *WeirdTagsFeatureTransformer {
 16 | 	s := make([]WeirdTags, 10)
 17 | 	fuzz.New().NilChance(0).NumElements(10, 10).Fuzz(&s)
 18 | 
 19 | 	tr := WeirdTagsFeatureTransformer{}
 20 | 	tr.Fit(s)
 21 | 	return &tr
 22 | }
 23 | 
 24 | func TestWeirdTagsFeatureTransformerFeatureNames(t *testing.T) {
 25 | 	tr := makeMockWeirdTagsFeatureTransformer()
 26 | 
 27 | 	t.Run("feature names", func(t *testing.T) {
 28 | 		names := tr.FeatureNames()
 29 | 		assert.True(t, len(names) > 0)
 30 | 		assert.Equal(t, len(names), tr.NumFeatures())
 31 | 	})
 32 | 
 33 | 	t.Run("feature name transformer is empty", func(t *testing.T) {
 34 | 		tr := WeirdTagsFeatureTransformer{}
 35 | 		names := tr.FeatureNames()
 36 | 		assert.True(t, len(names) > 0)
 37 | 		assert.Equal(t, len(names), tr.NumFeatures())
 38 | 	})
 39 | 
 40 | 	t.Run("feature name transformer is nil", func(t *testing.T) {
 41 | 		var tr *WeirdTagsFeatureTransformer
 42 | 		names := tr.FeatureNames()
 43 | 		assert.Nil(t, names)
 44 | 	})
 45 | }
 46 | 
 47 | func TestWeirdTagsFeatureTransformerTransform(t *testing.T) {
 48 | 	tr := makeMockWeirdTagsFeatureTransformer()
 49 | 
 50 | 	t.Run("empty struct", func(t *testing.T) {
 51 | 		s := WeirdTags{}
 52 | 		features := tr.Transform(&s)
 53 | 
 54 | 		assert.NotNil(t, features)
 55 | 		assert.True(t, len(features) > 0)
 56 | 		assert.Equal(t, tr.NumFeatures(), len(features))
 57 | 	})
 58 | 
 59 | 	t.Run("fuzzy struct", func(t *testing.T) {
 60 | 		var s WeirdTags
 61 | 		fuzz.New().Fuzz(&s)
 62 | 
 63 | 		tr := WeirdTagsFeatureTransformer{}
 64 | 		fuzz.New().NilChance(0).NumElements(1, 1).Fuzz(&tr)
 65 | 
 66 | 		features := tr.Transform(&s)
 67 | 
 68 | 		assert.NotNil(t, features)
 69 | 		assert.True(t, len(features) > 0)
 70 | 		assert.Equal(t, tr.NumFeatures(), len(features))
 71 | 	})
 72 | 
 73 | 	t.Run("struct is nil", func(t *testing.T) {
 74 | 		var s *WeirdTags
 75 | 		features := tr.Transform(s)
 76 | 		assert.Nil(t, features)
 77 | 		assert.True(t, tr.NumFeatures() > 0)
 78 | 	})
 79 | 
 80 | 	t.Run("transformer is nil", func(t *testing.T) {
 81 | 		var s WeirdTags
 82 | 		fuzz.New().Fuzz(&s)
 83 | 
 84 | 		var tr *WeirdTagsFeatureTransformer
 85 | 		features := tr.Transform(&s)
 86 | 
 87 | 		assert.Nil(t, features)
 88 | 		assert.Equal(t, tr.NumFeatures(), 0)
 89 | 	})
 90 | 
 91 | 	t.Run("serialize and deserialize transformer", func(t *testing.T) {
 92 | 		output, err := json.Marshal(tr)
 93 | 		assert.Nil(t, err)
 94 | 		assert.NotEmpty(t, output)
 95 | 
 96 | 		var tr2 WeirdTagsFeatureTransformer
 97 | 		err = json.Unmarshal(output, &tr2)
 98 | 		assert.Nil(t, err)
 99 | 		assert.Equal(t, *tr, tr2)
100 | 	})
101 | 
102 | 	t.Run("inplace transform does not run when destination does not match num features", func(t *testing.T) {
103 | 		var s WeirdTags
104 | 		fuzz.New().Fuzz(&s)
105 | 
106 | 		tr := WeirdTagsFeatureTransformer{}
107 | 
108 | 		features := make([]float64, 1000)
109 | 		features[0] = 123456789.0
110 | 		tr.TransformInplace(features, &s)
111 | 
112 | 		assert.Equal(t, 123456789.0, features[0])
113 | 	})
114 | }
115 | 
116 | func TestWeirdTagsFeatureTransformerTransformAll(t *testing.T) {
117 | 	t.Run("when transformer is nil", func(t *testing.T) {
118 | 		s := make([]WeirdTags, 100)
119 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
120 | 
121 | 		dst := make([]float64, 100*100)
122 | 
123 | 		var tr *WeirdTagsFeatureTransformer
124 | 		assert.Nil(t, tr.TransformAll(s))
125 | 		assert.Nil(t, tr.TransformAllParallel(s, 4))
126 | 
127 | 		// does not panic
128 | 		tr.TransformAllInplace(dst, s)
129 | 		tr.TransformAllInplaceParallel(dst, s, 4)
130 | 	})
131 | 
132 | 	t.Run("inplace with wrong output dimensions, output is smaller", func(t *testing.T) {
133 | 		s := make([]WeirdTags, 100)
134 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
135 | 
136 | 		dst := make([]float64, 100)
137 | 
138 | 		tr := makeMockWeirdTagsFeatureTransformer()
139 | 
140 | 		// does not panic
141 | 		tr.TransformAllInplace(dst, s)
142 | 		tr.TransformAllInplaceParallel(dst, s, 4)
143 | 	})
144 | 
145 | 	t.Run("inplace with wrong output dimensions, output is bigger", func(t *testing.T) {
146 | 		s := make([]WeirdTags, 100)
147 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
148 | 
149 | 		dst := make([]float64, 100*120)
150 | 
151 | 		tr := makeMockWeirdTagsFeatureTransformer()
152 | 
153 | 		// does not panic
154 | 		tr.TransformAllInplace(dst, s)
155 | 		tr.TransformAllInplaceParallel(dst, s, 4)
156 | 	})
157 | 
158 | 	t.Run("transform all", func(t *testing.T) {
159 | 		s := make([]WeirdTags, 100)
160 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
161 | 
162 | 		tr := makeMockWeirdTagsFeatureTransformer()
163 | 
164 | 		features := tr.TransformAll(s)
165 | 		assert.Equal(t, len(s)*tr.NumFeatures(), len(features))
166 | 	})
167 | 
168 | 	t.Run("transform all parallel 1 worker", func(t *testing.T) {
169 | 		s := make([]WeirdTags, 100)
170 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
171 | 
172 | 		tr := makeMockWeirdTagsFeatureTransformer()
173 | 
174 | 		features := tr.TransformAllParallel(s, 1)
175 | 		assert.Equal(t, len(s)*tr.NumFeatures(), len(features))
176 | 	})
177 | 
178 | 	t.Run("transform all parallel 4 workers", func(t *testing.T) {
179 | 		s := make([]WeirdTags, 100)
180 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
181 | 
182 | 		tr := makeMockWeirdTagsFeatureTransformer()
183 | 
184 | 		features := tr.TransformAllParallel(s, 4)
185 | 		assert.Equal(t, len(s)*tr.NumFeatures(), len(features))
186 | 	})
187 | }
188 | 
189 | func TestWeirdTagsFeatureTransformerFit(t *testing.T) {
190 | 	t.Run("fuzzy input", func(t *testing.T) {
191 | 		s := make([]WeirdTags, 10)
192 | 		fuzz.New().NilChance(0).NumElements(1, 1).Fuzz(&s)
193 | 
194 | 		trEmpty := WeirdTagsFeatureTransformer{}
195 | 		tr := WeirdTagsFeatureTransformer{}
196 | 		tr.Fit(s)
197 | 
198 | 		assert.NotNil(t, tr)
199 | 		assert.NotEqual(t, tr, trEmpty)
200 | 	})
201 | 
202 | 	t.Run("not nil transformer nil input", func(t *testing.T) {
203 | 		trEmpty := WeirdTagsFeatureTransformer{}
204 | 		tr := WeirdTagsFeatureTransformer{}
205 | 		tr.Fit(nil)
206 | 
207 | 		assert.Equal(t, trEmpty, tr)
208 | 	})
209 | 
210 | 	t.Run("nil transformer not nil input", func(t *testing.T) {
211 | 		s := make([]WeirdTags, 10)
212 | 
213 | 		var tr *WeirdTagsFeatureTransformer
214 | 		tr.Fit(s)
215 | 
216 | 		assert.Nil(t, tr)
217 | 	})
218 | }
219 | 
220 | func fitTransformerWeirdTags(b *testing.B, numelem int) {
221 | 	s := make([]WeirdTags, numelem)
222 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
223 | 
224 | 	var tr WeirdTagsFeatureTransformer
225 | 
226 | 	b.ResetTimer()
227 | 	for n := 0; n < b.N; n++ {
228 | 		tr.Fit(s)
229 | 	}
230 | }
231 | 
232 | func BenchmarkWeirdTagsFeatureTransformer_Fit_100elements(b *testing.B) {
233 | 	fitTransformerWeirdTags(b, 100)
234 | }
235 | 
236 | func BenchmarkWeirdTagsFeatureTransformer_Fit_1000elements(b *testing.B) {
237 | 	fitTransformerWeirdTags(b, 1000)
238 | }
239 | 
240 | func BenchmarkWeirdTagsFeatureTransformer_Fit_10000elements(b *testing.B) {
241 | 	fitTransformerWeirdTags(b, 10000)
242 | }
243 | 
244 | func BenchmarkWeirdTagsFeatureTransformer_Transform(b *testing.B) {
245 | 	var s WeirdTags
246 | 	fuzz.New().Fuzz(&s)
247 | 
248 | 	tr := makeMockWeirdTagsFeatureTransformer()
249 | 
250 | 	b.ResetTimer()
251 | 	for n := 0; n < b.N; n++ {
252 | 		tr.Transform(&s)
253 | 	}
254 | }
255 | 
256 | func BenchmarkWeirdTagsFeatureTransformer_Transform_Inplace(b *testing.B) {
257 | 	var s WeirdTags
258 | 	fuzz.New().Fuzz(&s)
259 | 
260 | 	tr := makeMockWeirdTagsFeatureTransformer()
261 | 
262 | 	features := make([]float64, tr.NumFeatures())
263 | 
264 | 	b.ResetTimer()
265 | 	for n := 0; n < b.N; n++ {
266 | 		tr.TransformInplace(features, &s)
267 | 	}
268 | }
269 | 
270 | func benchTransformAllWeirdTags(b *testing.B, numelem int) {
271 | 	s := make([]WeirdTags, numelem)
272 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
273 | 
274 | 	tr := makeMockWeirdTagsFeatureTransformer()
275 | 
276 | 	b.ResetTimer()
277 | 	for n := 0; n < b.N; n++ {
278 | 		tr.TransformAll(s)
279 | 	}
280 | }
281 | 
282 | func BenchmarkWeirdTagsFeatureTransformer_TransformAll_10elems(b *testing.B) {
283 | 	benchTransformAllWeirdTags(b, 10)
284 | }
285 | 
286 | func BenchmarkWeirdTagsFeatureTransformer_TransformAll_100elems(b *testing.B) {
287 | 	benchTransformAllWeirdTags(b, 100)
288 | }
289 | 
290 | func BenchmarkWeirdTagsFeatureTransformer_TransformAll_1000elems(b *testing.B) {
291 | 	benchTransformAllWeirdTags(b, 1000)
292 | }
293 | 
294 | func BenchmarkWeirdTagsFeatureTransformer_TransformAll_10000elems(b *testing.B) {
295 | 	benchTransformAllWeirdTags(b, 10000)
296 | }
297 | 
298 | func BenchmarkWeirdTagsFeatureTransformer_TransformAll_100000elems(b *testing.B) {
299 | 	benchTransformAllWeirdTags(b, 100000)
300 | }
301 | 
302 | func BenchmarkWeirdTagsFeatureTransformer_TransformAll_1000000elems(b *testing.B) {
303 | 	benchTransformAllWeirdTags(b, 1000000)
304 | }
305 | 
306 | func benchTransformAllParallelWeirdTags(b *testing.B, numelem int, nworkers uint) {
307 | 	s := make([]WeirdTags, numelem)
308 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
309 | 
310 | 	tr := makeMockWeirdTagsFeatureTransformer()
311 | 
312 | 	b.ResetTimer()
313 | 	for n := 0; n < b.N; n++ {
314 | 		tr.TransformAllParallel(s, nworkers)
315 | 	}
316 | }
317 | 
318 | func BenchmarkWeirdTagsFeatureTransformer_TransformAll_10elems_8workers(b *testing.B) {
319 | 	benchTransformAllParallelWeirdTags(b, 10, 8)
320 | }
321 | 
322 | func BenchmarkWeirdTagsFeatureTransformer_TransformAll_100elems_8workers(b *testing.B) {
323 | 	benchTransformAllParallelWeirdTags(b, 100, 8)
324 | }
325 | 
326 | func BenchmarkWeirdTagsFeatureTransformer_TransformAll_1000elems_8workers(b *testing.B) {
327 | 	benchTransformAllParallelWeirdTags(b, 1000, 8)
328 | }
329 | 
330 | func BenchmarkWeirdTagsFeatureTransformer_TransformAll_10000elems_8workers(b *testing.B) {
331 | 	benchTransformAllParallelWeirdTags(b, 10000, 8)
332 | }
333 | 
334 | func BenchmarkWeirdTagsFeatureTransformer_TransformAll_100000elems_8workers(b *testing.B) {
335 | 	benchTransformAllParallelWeirdTags(b, 100000, 8)
336 | }
337 | 
338 | func BenchmarkWeirdTagsFeatureTransformer_TransformAll_1000000elems_8workers(b *testing.B) {
339 | 	benchTransformAllParallelWeirdTags(b, 1000000, 8)
340 | }
341 | 
342 | func BenchmarkWeirdTagsFeatureTransformer_TransformAll_5000000elems_8workers(b *testing.B) {
343 | 	benchTransformAllParallelWeirdTags(b, 5000000, 8)
344 | }
345 | 
346 | func BenchmarkWeirdTagsFeatureTransformer_TransformAll_15000000elems_8workers(b *testing.B) {
347 | 	benchTransformAllParallelWeirdTags(b, 15000000, 8)
348 | }
349 | 
350 | func benchLargeTransformerWeirdTags(b *testing.B, numelem int) {
351 | 	var s []WeirdTags
352 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
353 | 
354 | 	tr := WeirdTagsFeatureTransformer{}
355 | 	tr.Fit(s)
356 | 
357 | 	b.ResetTimer()
358 | 	for n := 0; n < b.N; n++ {
359 | 		tr.Transform(&s[0])
360 | 	}
361 | }
362 | 
363 | func BenchmarkWeirdTagsFeatureTransformer_Transform_LargeComposites_100elements(b *testing.B) {
364 | 	benchLargeTransformerWeirdTags(b, 100)
365 | }
366 | 
367 | func BenchmarkWeirdTagsFeatureTransformer_Transform_LargeComposites_1000elements(b *testing.B) {
368 | 	benchLargeTransformerWeirdTags(b, 1000)
369 | }
370 | 
371 | func BenchmarkWeirdTagsFeatureTransformer_Transform_LargeComposites_10000elements(b *testing.B) {
372 | 	benchLargeTransformerWeirdTags(b, 10000)
373 | }
374 | 
375 | func BenchmarkWeirdTagsFeatureTransformer_Transform_LargeComposites_100000elements(b *testing.B) {
376 | 	benchLargeTransformerWeirdTags(b, 100000)
377 | }
378 | 


--------------------------------------------------------------------------------
/cmd/generate/tests/alltransformersfp_test.go:
--------------------------------------------------------------------------------
  1 | // Code generated by go-featureprocessing DO NOT EDIT
  2 | 
  3 | package examplemodule
  4 | 
  5 | import (
  6 | 	"encoding/json"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/google/gofuzz"
 10 | 	"github.com/stretchr/testify/assert"
 11 | )
 12 | 
 13 | // makeMock creates some valid AllTransformersFeatureTransformer by fitting on fuzzy data.
 14 | // This function is handy for tests.
 15 | func makeMockAllTransformersFeatureTransformer() *AllTransformersFeatureTransformer {
 16 | 	s := make([]AllTransformers, 10)
 17 | 	fuzz.New().NilChance(0).NumElements(10, 10).Fuzz(&s)
 18 | 
 19 | 	tr := AllTransformersFeatureTransformer{}
 20 | 	tr.Fit(s)
 21 | 	return &tr
 22 | }
 23 | 
 24 | func TestAllTransformersFeatureTransformerFeatureNames(t *testing.T) {
 25 | 	tr := makeMockAllTransformersFeatureTransformer()
 26 | 
 27 | 	t.Run("feature names", func(t *testing.T) {
 28 | 		names := tr.FeatureNames()
 29 | 		assert.True(t, len(names) > 0)
 30 | 		assert.Equal(t, len(names), tr.NumFeatures())
 31 | 	})
 32 | 
 33 | 	t.Run("feature name transformer is empty", func(t *testing.T) {
 34 | 		tr := AllTransformersFeatureTransformer{}
 35 | 		names := tr.FeatureNames()
 36 | 		assert.True(t, len(names) > 0)
 37 | 		assert.Equal(t, len(names), tr.NumFeatures())
 38 | 	})
 39 | 
 40 | 	t.Run("feature name transformer is nil", func(t *testing.T) {
 41 | 		var tr *AllTransformersFeatureTransformer
 42 | 		names := tr.FeatureNames()
 43 | 		assert.Nil(t, names)
 44 | 	})
 45 | }
 46 | 
 47 | func TestAllTransformersFeatureTransformerTransform(t *testing.T) {
 48 | 	tr := makeMockAllTransformersFeatureTransformer()
 49 | 
 50 | 	t.Run("empty struct", func(t *testing.T) {
 51 | 		s := AllTransformers{}
 52 | 		features := tr.Transform(&s)
 53 | 
 54 | 		assert.NotNil(t, features)
 55 | 		assert.True(t, len(features) > 0)
 56 | 		assert.Equal(t, tr.NumFeatures(), len(features))
 57 | 	})
 58 | 
 59 | 	t.Run("fuzzy struct", func(t *testing.T) {
 60 | 		var s AllTransformers
 61 | 		fuzz.New().Fuzz(&s)
 62 | 
 63 | 		tr := AllTransformersFeatureTransformer{}
 64 | 		fuzz.New().NilChance(0).NumElements(1, 1).Fuzz(&tr)
 65 | 
 66 | 		features := tr.Transform(&s)
 67 | 
 68 | 		assert.NotNil(t, features)
 69 | 		assert.True(t, len(features) > 0)
 70 | 		assert.Equal(t, tr.NumFeatures(), len(features))
 71 | 	})
 72 | 
 73 | 	t.Run("struct is nil", func(t *testing.T) {
 74 | 		var s *AllTransformers
 75 | 		features := tr.Transform(s)
 76 | 		assert.Nil(t, features)
 77 | 		assert.True(t, tr.NumFeatures() > 0)
 78 | 	})
 79 | 
 80 | 	t.Run("transformer is nil", func(t *testing.T) {
 81 | 		var s AllTransformers
 82 | 		fuzz.New().Fuzz(&s)
 83 | 
 84 | 		var tr *AllTransformersFeatureTransformer
 85 | 		features := tr.Transform(&s)
 86 | 
 87 | 		assert.Nil(t, features)
 88 | 		assert.Equal(t, tr.NumFeatures(), 0)
 89 | 	})
 90 | 
 91 | 	t.Run("serialize and deserialize transformer", func(t *testing.T) {
 92 | 		output, err := json.Marshal(tr)
 93 | 		assert.Nil(t, err)
 94 | 		assert.NotEmpty(t, output)
 95 | 
 96 | 		var tr2 AllTransformersFeatureTransformer
 97 | 		err = json.Unmarshal(output, &tr2)
 98 | 		assert.Nil(t, err)
 99 | 		assert.Equal(t, *tr, tr2)
100 | 	})
101 | 
102 | 	t.Run("inplace transform does not run when destination does not match num features", func(t *testing.T) {
103 | 		var s AllTransformers
104 | 		fuzz.New().Fuzz(&s)
105 | 
106 | 		tr := AllTransformersFeatureTransformer{}
107 | 
108 | 		features := make([]float64, 1000)
109 | 		features[0] = 123456789.0
110 | 		tr.TransformInplace(features, &s)
111 | 
112 | 		assert.Equal(t, 123456789.0, features[0])
113 | 	})
114 | }
115 | 
116 | func TestAllTransformersFeatureTransformerTransformAll(t *testing.T) {
117 | 	t.Run("when transformer is nil", func(t *testing.T) {
118 | 		s := make([]AllTransformers, 100)
119 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
120 | 
121 | 		dst := make([]float64, 100*100)
122 | 
123 | 		var tr *AllTransformersFeatureTransformer
124 | 		assert.Nil(t, tr.TransformAll(s))
125 | 		assert.Nil(t, tr.TransformAllParallel(s, 4))
126 | 
127 | 		// does not panic
128 | 		tr.TransformAllInplace(dst, s)
129 | 		tr.TransformAllInplaceParallel(dst, s, 4)
130 | 	})
131 | 
132 | 	t.Run("inplace with wrong output dimensions, output is smaller", func(t *testing.T) {
133 | 		s := make([]AllTransformers, 100)
134 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
135 | 
136 | 		dst := make([]float64, 100)
137 | 
138 | 		tr := makeMockAllTransformersFeatureTransformer()
139 | 
140 | 		// does not panic
141 | 		tr.TransformAllInplace(dst, s)
142 | 		tr.TransformAllInplaceParallel(dst, s, 4)
143 | 	})
144 | 
145 | 	t.Run("inplace with wrong output dimensions, output is bigger", func(t *testing.T) {
146 | 		s := make([]AllTransformers, 100)
147 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
148 | 
149 | 		dst := make([]float64, 100*120)
150 | 
151 | 		tr := makeMockAllTransformersFeatureTransformer()
152 | 
153 | 		// does not panic
154 | 		tr.TransformAllInplace(dst, s)
155 | 		tr.TransformAllInplaceParallel(dst, s, 4)
156 | 	})
157 | 
158 | 	t.Run("transform all", func(t *testing.T) {
159 | 		s := make([]AllTransformers, 100)
160 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
161 | 
162 | 		tr := makeMockAllTransformersFeatureTransformer()
163 | 
164 | 		features := tr.TransformAll(s)
165 | 		assert.Equal(t, len(s)*tr.NumFeatures(), len(features))
166 | 	})
167 | 
168 | 	t.Run("transform all parallel 1 worker", func(t *testing.T) {
169 | 		s := make([]AllTransformers, 100)
170 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
171 | 
172 | 		tr := makeMockAllTransformersFeatureTransformer()
173 | 
174 | 		features := tr.TransformAllParallel(s, 1)
175 | 		assert.Equal(t, len(s)*tr.NumFeatures(), len(features))
176 | 	})
177 | 
178 | 	t.Run("transform all parallel 4 workers", func(t *testing.T) {
179 | 		s := make([]AllTransformers, 100)
180 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
181 | 
182 | 		tr := makeMockAllTransformersFeatureTransformer()
183 | 
184 | 		features := tr.TransformAllParallel(s, 4)
185 | 		assert.Equal(t, len(s)*tr.NumFeatures(), len(features))
186 | 	})
187 | }
188 | 
189 | func TestAllTransformersFeatureTransformerFit(t *testing.T) {
190 | 	t.Run("fuzzy input", func(t *testing.T) {
191 | 		s := make([]AllTransformers, 10)
192 | 		fuzz.New().NilChance(0).NumElements(1, 1).Fuzz(&s)
193 | 
194 | 		trEmpty := AllTransformersFeatureTransformer{}
195 | 		tr := AllTransformersFeatureTransformer{}
196 | 		tr.Fit(s)
197 | 
198 | 		assert.NotNil(t, tr)
199 | 		assert.NotEqual(t, tr, trEmpty)
200 | 	})
201 | 
202 | 	t.Run("not nil transformer nil input", func(t *testing.T) {
203 | 		trEmpty := AllTransformersFeatureTransformer{}
204 | 		tr := AllTransformersFeatureTransformer{}
205 | 		tr.Fit(nil)
206 | 
207 | 		assert.Equal(t, trEmpty, tr)
208 | 	})
209 | 
210 | 	t.Run("nil transformer not nil input", func(t *testing.T) {
211 | 		s := make([]AllTransformers, 10)
212 | 
213 | 		var tr *AllTransformersFeatureTransformer
214 | 		tr.Fit(s)
215 | 
216 | 		assert.Nil(t, tr)
217 | 	})
218 | }
219 | 
220 | func fitTransformerAllTransformers(b *testing.B, numelem int) {
221 | 	s := make([]AllTransformers, numelem)
222 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
223 | 
224 | 	var tr AllTransformersFeatureTransformer
225 | 
226 | 	b.ResetTimer()
227 | 	for n := 0; n < b.N; n++ {
228 | 		tr.Fit(s)
229 | 	}
230 | }
231 | 
232 | func BenchmarkAllTransformersFeatureTransformer_Fit_100elements(b *testing.B) {
233 | 	fitTransformerAllTransformers(b, 100)
234 | }
235 | 
236 | func BenchmarkAllTransformersFeatureTransformer_Fit_1000elements(b *testing.B) {
237 | 	fitTransformerAllTransformers(b, 1000)
238 | }
239 | 
240 | func BenchmarkAllTransformersFeatureTransformer_Fit_10000elements(b *testing.B) {
241 | 	fitTransformerAllTransformers(b, 10000)
242 | }
243 | 
244 | func BenchmarkAllTransformersFeatureTransformer_Transform(b *testing.B) {
245 | 	var s AllTransformers
246 | 	fuzz.New().Fuzz(&s)
247 | 
248 | 	tr := makeMockAllTransformersFeatureTransformer()
249 | 
250 | 	b.ResetTimer()
251 | 	for n := 0; n < b.N; n++ {
252 | 		tr.Transform(&s)
253 | 	}
254 | }
255 | 
256 | func BenchmarkAllTransformersFeatureTransformer_Transform_Inplace(b *testing.B) {
257 | 	var s AllTransformers
258 | 	fuzz.New().Fuzz(&s)
259 | 
260 | 	tr := makeMockAllTransformersFeatureTransformer()
261 | 
262 | 	features := make([]float64, tr.NumFeatures())
263 | 
264 | 	b.ResetTimer()
265 | 	for n := 0; n < b.N; n++ {
266 | 		tr.TransformInplace(features, &s)
267 | 	}
268 | }
269 | 
270 | func benchTransformAllAllTransformers(b *testing.B, numelem int) {
271 | 	s := make([]AllTransformers, numelem)
272 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
273 | 
274 | 	tr := makeMockAllTransformersFeatureTransformer()
275 | 
276 | 	b.ResetTimer()
277 | 	for n := 0; n < b.N; n++ {
278 | 		tr.TransformAll(s)
279 | 	}
280 | }
281 | 
282 | func BenchmarkAllTransformersFeatureTransformer_TransformAll_10elems(b *testing.B) {
283 | 	benchTransformAllAllTransformers(b, 10)
284 | }
285 | 
286 | func BenchmarkAllTransformersFeatureTransformer_TransformAll_100elems(b *testing.B) {
287 | 	benchTransformAllAllTransformers(b, 100)
288 | }
289 | 
290 | func BenchmarkAllTransformersFeatureTransformer_TransformAll_1000elems(b *testing.B) {
291 | 	benchTransformAllAllTransformers(b, 1000)
292 | }
293 | 
294 | func BenchmarkAllTransformersFeatureTransformer_TransformAll_10000elems(b *testing.B) {
295 | 	benchTransformAllAllTransformers(b, 10000)
296 | }
297 | 
298 | func BenchmarkAllTransformersFeatureTransformer_TransformAll_100000elems(b *testing.B) {
299 | 	benchTransformAllAllTransformers(b, 100000)
300 | }
301 | 
302 | func BenchmarkAllTransformersFeatureTransformer_TransformAll_1000000elems(b *testing.B) {
303 | 	benchTransformAllAllTransformers(b, 1000000)
304 | }
305 | 
306 | func benchTransformAllParallelAllTransformers(b *testing.B, numelem int, nworkers uint) {
307 | 	s := make([]AllTransformers, numelem)
308 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
309 | 
310 | 	tr := makeMockAllTransformersFeatureTransformer()
311 | 
312 | 	b.ResetTimer()
313 | 	for n := 0; n < b.N; n++ {
314 | 		tr.TransformAllParallel(s, nworkers)
315 | 	}
316 | }
317 | 
318 | func BenchmarkAllTransformersFeatureTransformer_TransformAll_10elems_8workers(b *testing.B) {
319 | 	benchTransformAllParallelAllTransformers(b, 10, 8)
320 | }
321 | 
322 | func BenchmarkAllTransformersFeatureTransformer_TransformAll_100elems_8workers(b *testing.B) {
323 | 	benchTransformAllParallelAllTransformers(b, 100, 8)
324 | }
325 | 
326 | func BenchmarkAllTransformersFeatureTransformer_TransformAll_1000elems_8workers(b *testing.B) {
327 | 	benchTransformAllParallelAllTransformers(b, 1000, 8)
328 | }
329 | 
330 | func BenchmarkAllTransformersFeatureTransformer_TransformAll_10000elems_8workers(b *testing.B) {
331 | 	benchTransformAllParallelAllTransformers(b, 10000, 8)
332 | }
333 | 
334 | func BenchmarkAllTransformersFeatureTransformer_TransformAll_100000elems_8workers(b *testing.B) {
335 | 	benchTransformAllParallelAllTransformers(b, 100000, 8)
336 | }
337 | 
338 | func BenchmarkAllTransformersFeatureTransformer_TransformAll_1000000elems_8workers(b *testing.B) {
339 | 	benchTransformAllParallelAllTransformers(b, 1000000, 8)
340 | }
341 | 
342 | func BenchmarkAllTransformersFeatureTransformer_TransformAll_5000000elems_8workers(b *testing.B) {
343 | 	benchTransformAllParallelAllTransformers(b, 5000000, 8)
344 | }
345 | 
346 | func BenchmarkAllTransformersFeatureTransformer_TransformAll_15000000elems_8workers(b *testing.B) {
347 | 	benchTransformAllParallelAllTransformers(b, 15000000, 8)
348 | }
349 | 
350 | func benchLargeTransformerAllTransformers(b *testing.B, numelem int) {
351 | 	var s []AllTransformers
352 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
353 | 
354 | 	tr := AllTransformersFeatureTransformer{}
355 | 	tr.Fit(s)
356 | 
357 | 	b.ResetTimer()
358 | 	for n := 0; n < b.N; n++ {
359 | 		tr.Transform(&s[0])
360 | 	}
361 | }
362 | 
363 | func BenchmarkAllTransformersFeatureTransformer_Transform_LargeComposites_100elements(b *testing.B) {
364 | 	benchLargeTransformerAllTransformers(b, 100)
365 | }
366 | 
367 | func BenchmarkAllTransformersFeatureTransformer_Transform_LargeComposites_1000elements(b *testing.B) {
368 | 	benchLargeTransformerAllTransformers(b, 1000)
369 | }
370 | 
371 | func BenchmarkAllTransformersFeatureTransformer_Transform_LargeComposites_10000elements(b *testing.B) {
372 | 	benchLargeTransformerAllTransformers(b, 10000)
373 | }
374 | 
375 | func BenchmarkAllTransformersFeatureTransformer_Transform_LargeComposites_100000elements(b *testing.B) {
376 | 	benchLargeTransformerAllTransformers(b, 100000)
377 | }
378 | 


--------------------------------------------------------------------------------
/cmd/generate/tests/with32fieldsfp.go:
--------------------------------------------------------------------------------
  1 | // Code generated by go-featureprocessing DO NOT EDIT
  2 | 
  3 | package examplemodule
  4 | 
  5 | import (
  6 | 	"sync"
  7 | 
  8 | 	fp "github.com/nikolaydubina/go-featureprocessing/transformers"
  9 | )
 10 | 
 11 | // With32FieldsFeatureTransformer is a feature processor for With32Fields.
 12 | // It was automatically generated by go-featureprocessing tool.
 13 | type With32FieldsFeatureTransformer struct {
 14 | 	Name1  fp.MinMaxScaler `json:"Name1_minmax"`
 15 | 	Name2  fp.MinMaxScaler `json:"Name2_minmax"`
 16 | 	Name3  fp.MinMaxScaler `json:"Name3_minmax"`
 17 | 	Name4  fp.MinMaxScaler `json:"Name4_minmax"`
 18 | 	Name5  fp.MinMaxScaler `json:"Name5_minmax"`
 19 | 	Name6  fp.MinMaxScaler `json:"Name6_minmax"`
 20 | 	Name7  fp.MinMaxScaler `json:"Name7_minmax"`
 21 | 	Name8  fp.MinMaxScaler `json:"Name8_minmax"`
 22 | 	Name9  fp.MinMaxScaler `json:"Name9_minmax"`
 23 | 	Name10 fp.MinMaxScaler `json:"Name10_minmax"`
 24 | 	Name11 fp.MinMaxScaler `json:"Name11_minmax"`
 25 | 	Name12 fp.MinMaxScaler `json:"Name12_minmax"`
 26 | 	Name13 fp.MinMaxScaler `json:"Name13_minmax"`
 27 | 	Name14 fp.MinMaxScaler `json:"Name14_minmax"`
 28 | 	Name15 fp.MinMaxScaler `json:"Name15_minmax"`
 29 | 	Name16 fp.MinMaxScaler `json:"Name16_minmax"`
 30 | 	Name17 fp.MinMaxScaler `json:"Name17_minmax"`
 31 | 	Name18 fp.MinMaxScaler `json:"Name18_minmax"`
 32 | 	Name19 fp.MinMaxScaler `json:"Name19_minmax"`
 33 | 	Name21 fp.MinMaxScaler `json:"Name21_minmax"`
 34 | 	Name22 fp.MinMaxScaler `json:"Name22_minmax"`
 35 | 	Name23 fp.MinMaxScaler `json:"Name23_minmax"`
 36 | 	Name24 fp.MinMaxScaler `json:"Name24_minmax"`
 37 | 	Name25 fp.MinMaxScaler `json:"Name25_minmax"`
 38 | 	Name26 fp.MinMaxScaler `json:"Name26_minmax"`
 39 | 	Name27 fp.MinMaxScaler `json:"Name27_minmax"`
 40 | 	Name28 fp.MinMaxScaler `json:"Name28_minmax"`
 41 | 	Name29 fp.MinMaxScaler `json:"Name29_minmax"`
 42 | 	Name30 fp.MinMaxScaler `json:"Name30_minmax"`
 43 | 	Name31 fp.MinMaxScaler `json:"Name31_minmax"`
 44 | 	Name32 fp.MinMaxScaler `json:"Name32_minmax"`
 45 | }
 46 | 
 47 | // Fit fits transformer for each field
 48 | func (e *With32FieldsFeatureTransformer) Fit(s []With32Fields) {
 49 | 	if e == nil || len(s) == 0 {
 50 | 		return
 51 | 	}
 52 | 
 53 | 	dataNum := make([]float64, len(s))
 54 | 
 55 | 	for i, v := range s {
 56 | 		dataNum[i] = float64(v.Name1)
 57 | 	}
 58 | 
 59 | 	e.Name1.Fit(dataNum)
 60 | 
 61 | 	for i, v := range s {
 62 | 		dataNum[i] = float64(v.Name2)
 63 | 	}
 64 | 
 65 | 	e.Name2.Fit(dataNum)
 66 | 
 67 | 	for i, v := range s {
 68 | 		dataNum[i] = float64(v.Name3)
 69 | 	}
 70 | 
 71 | 	e.Name3.Fit(dataNum)
 72 | 
 73 | 	for i, v := range s {
 74 | 		dataNum[i] = float64(v.Name4)
 75 | 	}
 76 | 
 77 | 	e.Name4.Fit(dataNum)
 78 | 
 79 | 	for i, v := range s {
 80 | 		dataNum[i] = float64(v.Name5)
 81 | 	}
 82 | 
 83 | 	e.Name5.Fit(dataNum)
 84 | 
 85 | 	for i, v := range s {
 86 | 		dataNum[i] = float64(v.Name6)
 87 | 	}
 88 | 
 89 | 	e.Name6.Fit(dataNum)
 90 | 
 91 | 	for i, v := range s {
 92 | 		dataNum[i] = float64(v.Name7)
 93 | 	}
 94 | 
 95 | 	e.Name7.Fit(dataNum)
 96 | 
 97 | 	for i, v := range s {
 98 | 		dataNum[i] = float64(v.Name8)
 99 | 	}
100 | 
101 | 	e.Name8.Fit(dataNum)
102 | 
103 | 	for i, v := range s {
104 | 		dataNum[i] = float64(v.Name9)
105 | 	}
106 | 
107 | 	e.Name9.Fit(dataNum)
108 | 
109 | 	for i, v := range s {
110 | 		dataNum[i] = float64(v.Name10)
111 | 	}
112 | 
113 | 	e.Name10.Fit(dataNum)
114 | 
115 | 	for i, v := range s {
116 | 		dataNum[i] = float64(v.Name11)
117 | 	}
118 | 
119 | 	e.Name11.Fit(dataNum)
120 | 
121 | 	for i, v := range s {
122 | 		dataNum[i] = float64(v.Name12)
123 | 	}
124 | 
125 | 	e.Name12.Fit(dataNum)
126 | 
127 | 	for i, v := range s {
128 | 		dataNum[i] = float64(v.Name13)
129 | 	}
130 | 
131 | 	e.Name13.Fit(dataNum)
132 | 
133 | 	for i, v := range s {
134 | 		dataNum[i] = float64(v.Name14)
135 | 	}
136 | 
137 | 	e.Name14.Fit(dataNum)
138 | 
139 | 	for i, v := range s {
140 | 		dataNum[i] = float64(v.Name15)
141 | 	}
142 | 
143 | 	e.Name15.Fit(dataNum)
144 | 
145 | 	for i, v := range s {
146 | 		dataNum[i] = float64(v.Name16)
147 | 	}
148 | 
149 | 	e.Name16.Fit(dataNum)
150 | 
151 | 	for i, v := range s {
152 | 		dataNum[i] = float64(v.Name17)
153 | 	}
154 | 
155 | 	e.Name17.Fit(dataNum)
156 | 
157 | 	for i, v := range s {
158 | 		dataNum[i] = float64(v.Name18)
159 | 	}
160 | 
161 | 	e.Name18.Fit(dataNum)
162 | 
163 | 	for i, v := range s {
164 | 		dataNum[i] = float64(v.Name19)
165 | 	}
166 | 
167 | 	e.Name19.Fit(dataNum)
168 | 
169 | 	for i, v := range s {
170 | 		dataNum[i] = float64(v.Name21)
171 | 	}
172 | 
173 | 	e.Name21.Fit(dataNum)
174 | 
175 | 	for i, v := range s {
176 | 		dataNum[i] = float64(v.Name22)
177 | 	}
178 | 
179 | 	e.Name22.Fit(dataNum)
180 | 
181 | 	for i, v := range s {
182 | 		dataNum[i] = float64(v.Name23)
183 | 	}
184 | 
185 | 	e.Name23.Fit(dataNum)
186 | 
187 | 	for i, v := range s {
188 | 		dataNum[i] = float64(v.Name24)
189 | 	}
190 | 
191 | 	e.Name24.Fit(dataNum)
192 | 
193 | 	for i, v := range s {
194 | 		dataNum[i] = float64(v.Name25)
195 | 	}
196 | 
197 | 	e.Name25.Fit(dataNum)
198 | 
199 | 	for i, v := range s {
200 | 		dataNum[i] = float64(v.Name26)
201 | 	}
202 | 
203 | 	e.Name26.Fit(dataNum)
204 | 
205 | 	for i, v := range s {
206 | 		dataNum[i] = float64(v.Name27)
207 | 	}
208 | 
209 | 	e.Name27.Fit(dataNum)
210 | 
211 | 	for i, v := range s {
212 | 		dataNum[i] = float64(v.Name28)
213 | 	}
214 | 
215 | 	e.Name28.Fit(dataNum)
216 | 
217 | 	for i, v := range s {
218 | 		dataNum[i] = float64(v.Name29)
219 | 	}
220 | 
221 | 	e.Name29.Fit(dataNum)
222 | 
223 | 	for i, v := range s {
224 | 		dataNum[i] = float64(v.Name30)
225 | 	}
226 | 
227 | 	e.Name30.Fit(dataNum)
228 | 
229 | 	for i, v := range s {
230 | 		dataNum[i] = float64(v.Name31)
231 | 	}
232 | 
233 | 	e.Name31.Fit(dataNum)
234 | 
235 | 	for i, v := range s {
236 | 		dataNum[i] = float64(v.Name32)
237 | 	}
238 | 
239 | 	e.Name32.Fit(dataNum)
240 | 
241 | }
242 | 
243 | // Transform transforms struct into feature vector accordingly to transformers
244 | func (e *With32FieldsFeatureTransformer) Transform(s *With32Fields) []float64 {
245 | 	if s == nil || e == nil {
246 | 		return nil
247 | 	}
248 | 	features := make([]float64, e.NumFeatures())
249 | 	e.TransformInplace(features, s)
250 | 	return features
251 | }
252 | 
253 | // TransformInplace transforms struct into feature vector accordingly to transformers, and does so inplace
254 | func (e *With32FieldsFeatureTransformer) TransformInplace(dst []float64, s *With32Fields) {
255 | 	if s == nil || e == nil || len(dst) != e.NumFeatures() {
256 | 		return
257 | 	}
258 | 	idx := 0
259 | 
260 | 	dst[idx] = e.Name1.Transform(float64(s.Name1))
261 | 	idx++
262 | 
263 | 	dst[idx] = e.Name2.Transform(float64(s.Name2))
264 | 	idx++
265 | 
266 | 	dst[idx] = e.Name3.Transform(float64(s.Name3))
267 | 	idx++
268 | 
269 | 	dst[idx] = e.Name4.Transform(float64(s.Name4))
270 | 	idx++
271 | 
272 | 	dst[idx] = e.Name5.Transform(float64(s.Name5))
273 | 	idx++
274 | 
275 | 	dst[idx] = e.Name6.Transform(float64(s.Name6))
276 | 	idx++
277 | 
278 | 	dst[idx] = e.Name7.Transform(float64(s.Name7))
279 | 	idx++
280 | 
281 | 	dst[idx] = e.Name8.Transform(float64(s.Name8))
282 | 	idx++
283 | 
284 | 	dst[idx] = e.Name9.Transform(float64(s.Name9))
285 | 	idx++
286 | 
287 | 	dst[idx] = e.Name10.Transform(float64(s.Name10))
288 | 	idx++
289 | 
290 | 	dst[idx] = e.Name11.Transform(float64(s.Name11))
291 | 	idx++
292 | 
293 | 	dst[idx] = e.Name12.Transform(float64(s.Name12))
294 | 	idx++
295 | 
296 | 	dst[idx] = e.Name13.Transform(float64(s.Name13))
297 | 	idx++
298 | 
299 | 	dst[idx] = e.Name14.Transform(float64(s.Name14))
300 | 	idx++
301 | 
302 | 	dst[idx] = e.Name15.Transform(float64(s.Name15))
303 | 	idx++
304 | 
305 | 	dst[idx] = e.Name16.Transform(float64(s.Name16))
306 | 	idx++
307 | 
308 | 	dst[idx] = e.Name17.Transform(float64(s.Name17))
309 | 	idx++
310 | 
311 | 	dst[idx] = e.Name18.Transform(float64(s.Name18))
312 | 	idx++
313 | 
314 | 	dst[idx] = e.Name19.Transform(float64(s.Name19))
315 | 	idx++
316 | 
317 | 	dst[idx] = e.Name21.Transform(float64(s.Name21))
318 | 	idx++
319 | 
320 | 	dst[idx] = e.Name22.Transform(float64(s.Name22))
321 | 	idx++
322 | 
323 | 	dst[idx] = e.Name23.Transform(float64(s.Name23))
324 | 	idx++
325 | 
326 | 	dst[idx] = e.Name24.Transform(float64(s.Name24))
327 | 	idx++
328 | 
329 | 	dst[idx] = e.Name25.Transform(float64(s.Name25))
330 | 	idx++
331 | 
332 | 	dst[idx] = e.Name26.Transform(float64(s.Name26))
333 | 	idx++
334 | 
335 | 	dst[idx] = e.Name27.Transform(float64(s.Name27))
336 | 	idx++
337 | 
338 | 	dst[idx] = e.Name28.Transform(float64(s.Name28))
339 | 	idx++
340 | 
341 | 	dst[idx] = e.Name29.Transform(float64(s.Name29))
342 | 	idx++
343 | 
344 | 	dst[idx] = e.Name30.Transform(float64(s.Name30))
345 | 	idx++
346 | 
347 | 	dst[idx] = e.Name31.Transform(float64(s.Name31))
348 | 	idx++
349 | 
350 | 	dst[idx] = e.Name32.Transform(float64(s.Name32))
351 | 	idx++
352 | 
353 | }
354 | 
355 | // TransformAll transforms a slice of With32Fields
356 | func (e *With32FieldsFeatureTransformer) TransformAll(s []With32Fields) []float64 {
357 | 	if e == nil {
358 | 		return nil
359 | 	}
360 | 	features := make([]float64, len(s)*e.NumFeatures())
361 | 	e.TransformAllInplace(features, s)
362 | 	return features
363 | }
364 | 
365 | // TransformAllInplace transforms a slice of With32Fields inplace
366 | func (e *With32FieldsFeatureTransformer) TransformAllInplace(dst []float64, s []With32Fields) {
367 | 	if e == nil {
368 | 		return
369 | 	}
370 | 	n := e.NumFeatures()
371 | 	if len(dst) != n*len(s) {
372 | 		return
373 | 	}
374 | 	for i := range s {
375 | 		e.TransformInplace(dst[i*n:(i+1)*n], &s[i])
376 | 	}
377 | }
378 | 
379 | // TransformAllParallel transforms a slice of With32Fields in parallel
380 | func (e *With32FieldsFeatureTransformer) TransformAllParallel(s []With32Fields, nworkers uint) []float64 {
381 | 	if e == nil {
382 | 		return nil
383 | 	}
384 | 	features := make([]float64, len(s)*e.NumFeatures())
385 | 	e.TransformAllInplaceParallel(features, s, nworkers)
386 | 	return features
387 | }
388 | 
389 | // TransformAllInplaceParallel transforms a slice of With32Fields inplace parallel
390 | // Useful for very large slices.
391 | func (e *With32FieldsFeatureTransformer) TransformAllInplaceParallel(dst []float64, s []With32Fields, nworkers uint) {
392 | 	if e == nil || nworkers == 0 {
393 | 		return
394 | 	}
395 | 	ns := uint(len(s))
396 | 	nf := uint(e.NumFeatures())
397 | 	if uint(len(dst)) != nf*ns {
398 | 		return
399 | 	}
400 | 
401 | 	nbatch := ns / nworkers
402 | 	var wg sync.WaitGroup
403 | 
404 | 	for i := uint(0); i < nworkers; i++ {
405 | 		wg.Add(1)
406 | 		go func(i uint) {
407 | 			defer wg.Done()
408 | 			iStart := nbatch * i
409 | 			iEnd := nbatch * (i + 1)
410 | 			if i == (nworkers - 1) {
411 | 				iEnd = ns
412 | 			}
413 | 			e.TransformAllInplace(dst[iStart*nf:iEnd*nf], s[iStart:iEnd])
414 | 		}(i)
415 | 	}
416 | 
417 | 	wg.Wait()
418 | }
419 | 
420 | // NumFeatures returns number of features in output feature vector
421 | func (e *With32FieldsFeatureTransformer) NumFeatures() int {
422 | 	if e == nil {
423 | 		return 0
424 | 	}
425 | 
426 | 	count := 31
427 | 
428 | 	return count
429 | }
430 | 
431 | // FeatureNames provides names of features that match output of transform
432 | func (e *With32FieldsFeatureTransformer) FeatureNames() []string {
433 | 	if e == nil {
434 | 		return nil
435 | 	}
436 | 
437 | 	idx := 0
438 | 	names := make([]string, e.NumFeatures())
439 | 
440 | 	names[idx] = "Name1"
441 | 	idx++
442 | 
443 | 	names[idx] = "Name2"
444 | 	idx++
445 | 
446 | 	names[idx] = "Name3"
447 | 	idx++
448 | 
449 | 	names[idx] = "Name4"
450 | 	idx++
451 | 
452 | 	names[idx] = "Name5"
453 | 	idx++
454 | 
455 | 	names[idx] = "Name6"
456 | 	idx++
457 | 
458 | 	names[idx] = "Name7"
459 | 	idx++
460 | 
461 | 	names[idx] = "Name8"
462 | 	idx++
463 | 
464 | 	names[idx] = "Name9"
465 | 	idx++
466 | 
467 | 	names[idx] = "Name10"
468 | 	idx++
469 | 
470 | 	names[idx] = "Name11"
471 | 	idx++
472 | 
473 | 	names[idx] = "Name12"
474 | 	idx++
475 | 
476 | 	names[idx] = "Name13"
477 | 	idx++
478 | 
479 | 	names[idx] = "Name14"
480 | 	idx++
481 | 
482 | 	names[idx] = "Name15"
483 | 	idx++
484 | 
485 | 	names[idx] = "Name16"
486 | 	idx++
487 | 
488 | 	names[idx] = "Name17"
489 | 	idx++
490 | 
491 | 	names[idx] = "Name18"
492 | 	idx++
493 | 
494 | 	names[idx] = "Name19"
495 | 	idx++
496 | 
497 | 	names[idx] = "Name21"
498 | 	idx++
499 | 
500 | 	names[idx] = "Name22"
501 | 	idx++
502 | 
503 | 	names[idx] = "Name23"
504 | 	idx++
505 | 
506 | 	names[idx] = "Name24"
507 | 	idx++
508 | 
509 | 	names[idx] = "Name25"
510 | 	idx++
511 | 
512 | 	names[idx] = "Name26"
513 | 	idx++
514 | 
515 | 	names[idx] = "Name27"
516 | 	idx++
517 | 
518 | 	names[idx] = "Name28"
519 | 	idx++
520 | 
521 | 	names[idx] = "Name29"
522 | 	idx++
523 | 
524 | 	names[idx] = "Name30"
525 | 	idx++
526 | 
527 | 	names[idx] = "Name31"
528 | 	idx++
529 | 
530 | 	names[idx] = "Name32"
531 | 	idx++
532 | 
533 | 	return names
534 | }
535 | 


--------------------------------------------------------------------------------
/cmd/generate/templatetests.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | const templateTests = `
  4 | // Code generated by go-featureprocessing DO NOT EDIT
  5 | 
  6 | package {{$.PackageName}}
  7 | 
  8 | import (
  9 | 	"encoding/json"
 10 | 	"testing"
 11 | 
 12 | 	"github.com/google/gofuzz"
 13 | 	"github.com/stretchr/testify/assert"
 14 | )
 15 | 
 16 | // makeMock creates some valid {{$.StructName}}FeatureTransformer by fitting on fuzzy data.
 17 | // This function is handy for tests.
 18 | func makeMock{{$.StructName}}FeatureTransformer() *{{$.StructName}}FeatureTransformer {
 19 | 	s := make([]{{$.StructName}}, 10)
 20 | 	fuzz.New().NilChance(0).NumElements(10, 10).Fuzz(&s)
 21 | 	
 22 | 	tr := {{$.StructName}}FeatureTransformer{}
 23 | 	tr.Fit(s)
 24 | 	return &tr
 25 | }
 26 | 
 27 | func Test{{$.StructName}}FeatureTransformerFeatureNames(t *testing.T) {
 28 | 	tr := makeMock{{$.StructName}}FeatureTransformer()
 29 | 
 30 | 	t.Run("feature names", func(t *testing.T) {
 31 | 		names := tr.FeatureNames()
 32 | 		assert.True(t, len(names) > 0)
 33 | 		assert.Equal(t, len(names), tr.NumFeatures())
 34 | 	})
 35 | 
 36 | 	t.Run("feature name transformer is empty", func(t *testing.T) {
 37 | 		tr := {{$.StructName}}FeatureTransformer{}
 38 | 		names := tr.FeatureNames()
 39 | 		assert.True(t, len(names) > 0)
 40 | 		assert.Equal(t, len(names), tr.NumFeatures())
 41 | 	})
 42 | 
 43 | 	t.Run("feature name transformer is nil", func(t *testing.T) {
 44 | 		var tr *{{$.StructName}}FeatureTransformer
 45 | 		names := tr.FeatureNames()
 46 | 		assert.Nil(t, names)
 47 | 	})
 48 | }
 49 | 
 50 | func Test{{$.StructName}}FeatureTransformerTransform(t *testing.T) {
 51 | 	tr := makeMock{{$.StructName}}FeatureTransformer()
 52 | 
 53 | 	t.Run("empty struct", func(t *testing.T) {
 54 | 		s := {{$.StructName}}{}
 55 | 		features := tr.Transform(&s)
 56 | 
 57 | 		assert.NotNil(t, features)
 58 | 		assert.True(t, len(features) > 0)
 59 | 		assert.Equal(t, tr.NumFeatures(), len(features))
 60 | 	})
 61 | 
 62 | 	t.Run("fuzzy struct", func(t *testing.T) {
 63 | 		var s {{$.StructName}}
 64 | 		fuzz.New().Fuzz(&s)
 65 | 		
 66 | 		tr := {{$.StructName}}FeatureTransformer{}
 67 | 		fuzz.New().NilChance(0).NumElements(1, 1).Fuzz(&tr)
 68 | 
 69 | 		features := tr.Transform(&s)
 70 | 
 71 | 		assert.NotNil(t, features)
 72 | 		assert.True(t, len(features) > 0)
 73 | 		assert.Equal(t, tr.NumFeatures(), len(features))
 74 | 	})
 75 | 
 76 | 	t.Run("struct is nil", func(t *testing.T) {
 77 | 		var s *{{$.StructName}}
 78 | 		features := tr.Transform(s)
 79 | 		assert.Nil(t, features)
 80 | 		assert.True(t, tr.NumFeatures() > 0)
 81 | 	})
 82 | 
 83 | 	t.Run("transformer is nil", func(t *testing.T) {
 84 | 		var s {{$.StructName}}
 85 | 		fuzz.New().Fuzz(&s)
 86 | 		
 87 | 		var tr *{{$.StructName}}FeatureTransformer
 88 | 		features := tr.Transform(&s)
 89 | 
 90 | 		assert.Nil(t, features)
 91 | 		assert.Equal(t, tr.NumFeatures(), 0)
 92 | 	})
 93 | 
 94 | 	t.Run("serialize and deserialize transformer", func(t *testing.T) {
 95 | 		output, err := json.Marshal(tr)
 96 | 		assert.Nil(t, err)
 97 | 		assert.NotEmpty(t, output)
 98 | 		
 99 | 		var tr2 {{$.StructName}}FeatureTransformer
100 | 		err = json.Unmarshal(output, &tr2)
101 | 		assert.Nil(t, err)
102 | 		assert.Equal(t, *tr, tr2)
103 | 	})
104 | 
105 | 	t.Run("inplace transform does not run when destination does not match num features", func(t *testing.T) {
106 | 		var s {{$.StructName}}
107 | 		fuzz.New().Fuzz(&s)
108 | 		
109 | 		tr := {{$.StructName}}FeatureTransformer{}
110 | 
111 | 		features := make([]float64, 1000)
112 | 		features[0] = 123456789.0
113 | 		tr.TransformInplace(features, &s)
114 | 
115 | 		assert.Equal(t, 123456789.0, features[0])
116 | 	})
117 | }
118 | 
119 | func Test{{$.StructName}}FeatureTransformerTransformAll(t *testing.T) {
120 | 	t.Run("when transformer is nil", func(t *testing.T) {
121 | 		s := make([]{{$.StructName}}, 100)
122 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
123 | 
124 | 		dst := make([]float64, 100 * 100)
125 | 
126 | 		var tr *{{$.StructName}}FeatureTransformer
127 | 		assert.Nil(t, tr.TransformAll(s))
128 | 		assert.Nil(t, tr.TransformAllParallel(s, 4))
129 | 		
130 | 		// does not panic
131 | 		tr.TransformAllInplace(dst, s)
132 | 		tr.TransformAllInplaceParallel(dst, s, 4)
133 | 	})
134 | 	
135 | 	t.Run("inplace with wrong output dimensions, output is smaller", func(t *testing.T) {
136 | 		s := make([]{{$.StructName}}, 100)
137 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
138 | 
139 | 		dst := make([]float64, 100)
140 | 
141 | 		tr := makeMock{{$.StructName}}FeatureTransformer()
142 | 		
143 | 		// does not panic
144 | 		tr.TransformAllInplace(dst, s)
145 | 		tr.TransformAllInplaceParallel(dst, s, 4)
146 | 	})
147 | 	
148 | 	t.Run("inplace with wrong output dimensions, output is bigger", func(t *testing.T) {
149 | 		s := make([]{{$.StructName}}, 100)
150 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
151 | 
152 | 		dst := make([]float64, 100 * 120)
153 | 
154 | 		tr := makeMock{{$.StructName}}FeatureTransformer()
155 | 		
156 | 		// does not panic
157 | 		tr.TransformAllInplace(dst, s)
158 | 		tr.TransformAllInplaceParallel(dst, s, 4)
159 | 	})
160 | 
161 | 	t.Run("transform all", func(t *testing.T) {
162 | 		s := make([]{{$.StructName}}, 100)
163 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
164 | 		
165 | 		tr := makeMock{{$.StructName}}FeatureTransformer()
166 | 
167 | 		features := tr.TransformAll(s)
168 | 		assert.Equal(t, len(s) * tr.NumFeatures(), len(features))
169 | 	})
170 | 	
171 | 	t.Run("transform all parallel 1 worker", func(t *testing.T) {
172 | 		s := make([]{{$.StructName}}, 100)
173 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
174 | 		
175 | 		tr := makeMock{{$.StructName}}FeatureTransformer()
176 | 
177 | 		features := tr.TransformAllParallel(s, 1)
178 | 		assert.Equal(t, len(s) * tr.NumFeatures(), len(features))
179 | 	})
180 | 	
181 | 	t.Run("transform all parallel 4 workers", func(t *testing.T) {
182 | 		s := make([]{{$.StructName}}, 100)
183 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
184 | 		
185 | 		tr := makeMock{{$.StructName}}FeatureTransformer()
186 | 
187 | 		features := tr.TransformAllParallel(s, 4)
188 | 		assert.Equal(t, len(s) * tr.NumFeatures(), len(features))
189 | 	})
190 | }
191 | 
192 | func Test{{$.StructName}}FeatureTransformerFit(t *testing.T) {
193 | 	t.Run("fuzzy input", func(t *testing.T) {
194 | 		s := make([]{{$.StructName}}, 10)
195 | 		fuzz.New().NilChance(0).NumElements(1, 1).Fuzz(&s)
196 | 		
197 | 		trEmpty := {{$.StructName}}FeatureTransformer{}
198 | 		tr := {{$.StructName}}FeatureTransformer{}
199 | 		tr.Fit(s)
200 | 
201 | 		assert.NotNil(t, tr)
202 | 		assert.NotEqual(t, tr, trEmpty)
203 | 	})
204 | 
205 | 	t.Run("not nil transformer nil input", func(t *testing.T) {
206 | 		trEmpty := {{$.StructName}}FeatureTransformer{}
207 | 		tr := {{$.StructName}}FeatureTransformer{}
208 | 		tr.Fit(nil)
209 | 
210 | 		assert.Equal(t, trEmpty, tr)
211 | 	})
212 | 
213 | 	t.Run("nil transformer not nil input", func(t *testing.T) {
214 | 		s := make([]{{$.StructName}}, 10)
215 | 		
216 | 		var tr *{{$.StructName}}FeatureTransformer
217 | 		tr.Fit(s)
218 | 
219 | 		assert.Nil(t, tr)
220 | 	})
221 | }
222 | 
223 | func fitTransformer{{$.StructName}}(b *testing.B, numelem int) {
224 | 	s := make([]{{$.StructName}}, numelem)
225 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
226 | 	
227 | 	var tr {{$.StructName}}FeatureTransformer
228 | 
229 | 	b.ResetTimer()
230 | 	for n := 0; n < b.N; n++ {
231 | 		tr.Fit(s)
232 | 	}
233 | }
234 | 
235 | func Benchmark{{$.StructName}}FeatureTransformer_Fit_100elements(b *testing.B) {
236 | 	fitTransformer{{$.StructName}}(b, 100)
237 | }
238 | 
239 | func Benchmark{{$.StructName}}FeatureTransformer_Fit_1000elements(b *testing.B) {
240 | 	fitTransformer{{$.StructName}}(b, 1000)
241 | }
242 | 
243 | func Benchmark{{$.StructName}}FeatureTransformer_Fit_10000elements(b *testing.B) {
244 | 	fitTransformer{{$.StructName}}(b, 10000)
245 | }
246 | 
247 | func Benchmark{{$.StructName}}FeatureTransformer_Transform(b *testing.B) {
248 | 	var s {{$.StructName}}
249 | 	fuzz.New().Fuzz(&s)
250 | 	
251 | 	tr := makeMock{{$.StructName}}FeatureTransformer()
252 | 
253 | 	b.ResetTimer()
254 | 	for n := 0; n < b.N; n++ {
255 | 		tr.Transform(&s)
256 | 	}
257 | }
258 | 
259 | func Benchmark{{$.StructName}}FeatureTransformer_Transform_Inplace(b *testing.B) {
260 | 	var s {{$.StructName}}
261 | 	fuzz.New().Fuzz(&s)
262 | 	
263 | 	tr := makeMock{{$.StructName}}FeatureTransformer()
264 | 
265 | 	features := make([]float64, tr.NumFeatures())
266 | 
267 | 	b.ResetTimer()
268 | 	for n := 0; n < b.N; n++ {
269 | 		tr.TransformInplace(features, &s)
270 | 	}
271 | }
272 | 
273 | func benchTransformAll{{$.StructName}}(b *testing.B, numelem int) {
274 | 	s := make([]{{$.StructName}}, numelem)
275 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
276 | 	
277 | 	tr := makeMock{{$.StructName}}FeatureTransformer()
278 | 
279 | 	b.ResetTimer()
280 | 	for n := 0; n < b.N; n++ {
281 | 		tr.TransformAll(s)
282 | 	}
283 | }
284 | 
285 | func Benchmark{{$.StructName}}FeatureTransformer_TransformAll_10elems(b *testing.B) {
286 | 	benchTransformAll{{$.StructName}}(b, 10)
287 | }
288 | 
289 | func Benchmark{{$.StructName}}FeatureTransformer_TransformAll_100elems(b *testing.B) {
290 | 	benchTransformAll{{$.StructName}}(b, 100)
291 | }
292 | 
293 | func Benchmark{{$.StructName}}FeatureTransformer_TransformAll_1000elems(b *testing.B) {
294 | 	benchTransformAll{{$.StructName}}(b, 1000)
295 | }
296 | 
297 | func Benchmark{{$.StructName}}FeatureTransformer_TransformAll_10000elems(b *testing.B) {
298 | 	benchTransformAll{{$.StructName}}(b, 10000)
299 | }
300 | 
301 | func Benchmark{{$.StructName}}FeatureTransformer_TransformAll_100000elems(b *testing.B) {
302 | 	benchTransformAll{{$.StructName}}(b, 100000)
303 | }
304 | 
305 | func Benchmark{{$.StructName}}FeatureTransformer_TransformAll_1000000elems(b *testing.B) {
306 | 	benchTransformAll{{$.StructName}}(b, 1000000)
307 | }
308 | 
309 | func benchTransformAllParallel{{$.StructName}}(b *testing.B, numelem int, nworkers uint) {
310 | 	s := make([]{{$.StructName}}, numelem)
311 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
312 | 	
313 | 	tr := makeMock{{$.StructName}}FeatureTransformer()
314 | 
315 | 	b.ResetTimer()
316 | 	for n := 0; n < b.N; n++ {
317 | 		tr.TransformAllParallel(s, nworkers)
318 | 	}
319 | }
320 | 
321 | func Benchmark{{$.StructName}}FeatureTransformer_TransformAll_10elems_8workers(b *testing.B) {
322 | 	benchTransformAllParallel{{$.StructName}}(b, 10, 8)
323 | }
324 | 
325 | func Benchmark{{$.StructName}}FeatureTransformer_TransformAll_100elems_8workers(b *testing.B) {
326 | 	benchTransformAllParallel{{$.StructName}}(b, 100, 8)
327 | }
328 | 
329 | func Benchmark{{$.StructName}}FeatureTransformer_TransformAll_1000elems_8workers(b *testing.B) {
330 | 	benchTransformAllParallel{{$.StructName}}(b, 1000, 8)
331 | }
332 | 
333 | func Benchmark{{$.StructName}}FeatureTransformer_TransformAll_10000elems_8workers(b *testing.B) {
334 | 	benchTransformAllParallel{{$.StructName}}(b, 10000, 8)
335 | }
336 | 
337 | func Benchmark{{$.StructName}}FeatureTransformer_TransformAll_100000elems_8workers(b *testing.B) {
338 | 	benchTransformAllParallel{{$.StructName}}(b, 100000, 8)
339 | }
340 | 
341 | func Benchmark{{$.StructName}}FeatureTransformer_TransformAll_1000000elems_8workers(b *testing.B) {
342 | 	benchTransformAllParallel{{$.StructName}}(b, 1000000, 8)
343 | }
344 | 
345 | func Benchmark{{$.StructName}}FeatureTransformer_TransformAll_5000000elems_8workers(b *testing.B) {
346 | 	benchTransformAllParallel{{$.StructName}}(b, 5000000, 8)
347 | }
348 | 
349 | func Benchmark{{$.StructName}}FeatureTransformer_TransformAll_15000000elems_8workers(b *testing.B) {
350 | 	benchTransformAllParallel{{$.StructName}}(b, 15000000, 8)
351 | }
352 | 
353 | {{if $.HasLargeTransformers}}
354 | 
355 | func benchLargeTransformer{{$.StructName}}(b *testing.B, numelem int) {
356 | 	var s []{{$.StructName}}
357 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
358 | 	
359 | 	tr := {{$.StructName}}FeatureTransformer{}
360 | 	tr.Fit(s)
361 | 
362 | 	b.ResetTimer()
363 | 	for n := 0; n < b.N; n++ {
364 | 		tr.Transform(&s[0])
365 | 	}
366 | }
367 | 
368 | func Benchmark{{$.StructName}}FeatureTransformer_Transform_LargeComposites_100elements(b *testing.B) {
369 | 	benchLargeTransformer{{$.StructName}}(b, 100)
370 | }
371 | 
372 | func Benchmark{{$.StructName}}FeatureTransformer_Transform_LargeComposites_1000elements(b *testing.B) {
373 | 	benchLargeTransformer{{$.StructName}}(b, 1000)
374 | }
375 | 
376 | func Benchmark{{$.StructName}}FeatureTransformer_Transform_LargeComposites_10000elements(b *testing.B) {
377 | 	benchLargeTransformer{{$.StructName}}(b, 10000)
378 | }
379 | 
380 | func Benchmark{{$.StructName}}FeatureTransformer_Transform_LargeComposites_100000elements(b *testing.B) {
381 | 	benchLargeTransformer{{$.StructName}}(b, 100000)
382 | }
383 | 
384 | {{end}}
385 | `
386 | 


--------------------------------------------------------------------------------
/cmd/generate/tests/largememorytransformerfp_test.go:
--------------------------------------------------------------------------------
  1 | // Code generated by go-featureprocessing DO NOT EDIT
  2 | 
  3 | package examplemodule
  4 | 
  5 | import (
  6 | 	"encoding/json"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/google/gofuzz"
 10 | 	"github.com/stretchr/testify/assert"
 11 | )
 12 | 
 13 | // makeMock creates some valid LargeMemoryTransformerFeatureTransformer by fitting on fuzzy data.
 14 | // This function is handy for tests.
 15 | func makeMockLargeMemoryTransformerFeatureTransformer() *LargeMemoryTransformerFeatureTransformer {
 16 | 	s := make([]LargeMemoryTransformer, 10)
 17 | 	fuzz.New().NilChance(0).NumElements(10, 10).Fuzz(&s)
 18 | 
 19 | 	tr := LargeMemoryTransformerFeatureTransformer{}
 20 | 	tr.Fit(s)
 21 | 	return &tr
 22 | }
 23 | 
 24 | func TestLargeMemoryTransformerFeatureTransformerFeatureNames(t *testing.T) {
 25 | 	tr := makeMockLargeMemoryTransformerFeatureTransformer()
 26 | 
 27 | 	t.Run("feature names", func(t *testing.T) {
 28 | 		names := tr.FeatureNames()
 29 | 		assert.True(t, len(names) > 0)
 30 | 		assert.Equal(t, len(names), tr.NumFeatures())
 31 | 	})
 32 | 
 33 | 	t.Run("feature name transformer is empty", func(t *testing.T) {
 34 | 		tr := LargeMemoryTransformerFeatureTransformer{}
 35 | 		names := tr.FeatureNames()
 36 | 		assert.True(t, len(names) > 0)
 37 | 		assert.Equal(t, len(names), tr.NumFeatures())
 38 | 	})
 39 | 
 40 | 	t.Run("feature name transformer is nil", func(t *testing.T) {
 41 | 		var tr *LargeMemoryTransformerFeatureTransformer
 42 | 		names := tr.FeatureNames()
 43 | 		assert.Nil(t, names)
 44 | 	})
 45 | }
 46 | 
 47 | func TestLargeMemoryTransformerFeatureTransformerTransform(t *testing.T) {
 48 | 	tr := makeMockLargeMemoryTransformerFeatureTransformer()
 49 | 
 50 | 	t.Run("empty struct", func(t *testing.T) {
 51 | 		s := LargeMemoryTransformer{}
 52 | 		features := tr.Transform(&s)
 53 | 
 54 | 		assert.NotNil(t, features)
 55 | 		assert.True(t, len(features) > 0)
 56 | 		assert.Equal(t, tr.NumFeatures(), len(features))
 57 | 	})
 58 | 
 59 | 	t.Run("fuzzy struct", func(t *testing.T) {
 60 | 		var s LargeMemoryTransformer
 61 | 		fuzz.New().Fuzz(&s)
 62 | 
 63 | 		tr := LargeMemoryTransformerFeatureTransformer{}
 64 | 		fuzz.New().NilChance(0).NumElements(1, 1).Fuzz(&tr)
 65 | 
 66 | 		features := tr.Transform(&s)
 67 | 
 68 | 		assert.NotNil(t, features)
 69 | 		assert.True(t, len(features) > 0)
 70 | 		assert.Equal(t, tr.NumFeatures(), len(features))
 71 | 	})
 72 | 
 73 | 	t.Run("struct is nil", func(t *testing.T) {
 74 | 		var s *LargeMemoryTransformer
 75 | 		features := tr.Transform(s)
 76 | 		assert.Nil(t, features)
 77 | 		assert.True(t, tr.NumFeatures() > 0)
 78 | 	})
 79 | 
 80 | 	t.Run("transformer is nil", func(t *testing.T) {
 81 | 		var s LargeMemoryTransformer
 82 | 		fuzz.New().Fuzz(&s)
 83 | 
 84 | 		var tr *LargeMemoryTransformerFeatureTransformer
 85 | 		features := tr.Transform(&s)
 86 | 
 87 | 		assert.Nil(t, features)
 88 | 		assert.Equal(t, tr.NumFeatures(), 0)
 89 | 	})
 90 | 
 91 | 	t.Run("serialize and deserialize transformer", func(t *testing.T) {
 92 | 		output, err := json.Marshal(tr)
 93 | 		assert.Nil(t, err)
 94 | 		assert.NotEmpty(t, output)
 95 | 
 96 | 		var tr2 LargeMemoryTransformerFeatureTransformer
 97 | 		err = json.Unmarshal(output, &tr2)
 98 | 		assert.Nil(t, err)
 99 | 		assert.Equal(t, *tr, tr2)
100 | 	})
101 | 
102 | 	t.Run("inplace transform does not run when destination does not match num features", func(t *testing.T) {
103 | 		var s LargeMemoryTransformer
104 | 		fuzz.New().Fuzz(&s)
105 | 
106 | 		tr := LargeMemoryTransformerFeatureTransformer{}
107 | 
108 | 		features := make([]float64, 1000)
109 | 		features[0] = 123456789.0
110 | 		tr.TransformInplace(features, &s)
111 | 
112 | 		assert.Equal(t, 123456789.0, features[0])
113 | 	})
114 | }
115 | 
116 | func TestLargeMemoryTransformerFeatureTransformerTransformAll(t *testing.T) {
117 | 	t.Run("when transformer is nil", func(t *testing.T) {
118 | 		s := make([]LargeMemoryTransformer, 100)
119 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
120 | 
121 | 		dst := make([]float64, 100*100)
122 | 
123 | 		var tr *LargeMemoryTransformerFeatureTransformer
124 | 		assert.Nil(t, tr.TransformAll(s))
125 | 		assert.Nil(t, tr.TransformAllParallel(s, 4))
126 | 
127 | 		// does not panic
128 | 		tr.TransformAllInplace(dst, s)
129 | 		tr.TransformAllInplaceParallel(dst, s, 4)
130 | 	})
131 | 
132 | 	t.Run("inplace with wrong output dimensions, output is smaller", func(t *testing.T) {
133 | 		s := make([]LargeMemoryTransformer, 100)
134 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
135 | 
136 | 		dst := make([]float64, 100)
137 | 
138 | 		tr := makeMockLargeMemoryTransformerFeatureTransformer()
139 | 
140 | 		// does not panic
141 | 		tr.TransformAllInplace(dst, s)
142 | 		tr.TransformAllInplaceParallel(dst, s, 4)
143 | 	})
144 | 
145 | 	t.Run("inplace with wrong output dimensions, output is bigger", func(t *testing.T) {
146 | 		s := make([]LargeMemoryTransformer, 100)
147 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
148 | 
149 | 		dst := make([]float64, 100*120)
150 | 
151 | 		tr := makeMockLargeMemoryTransformerFeatureTransformer()
152 | 
153 | 		// does not panic
154 | 		tr.TransformAllInplace(dst, s)
155 | 		tr.TransformAllInplaceParallel(dst, s, 4)
156 | 	})
157 | 
158 | 	t.Run("transform all", func(t *testing.T) {
159 | 		s := make([]LargeMemoryTransformer, 100)
160 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
161 | 
162 | 		tr := makeMockLargeMemoryTransformerFeatureTransformer()
163 | 
164 | 		features := tr.TransformAll(s)
165 | 		assert.Equal(t, len(s)*tr.NumFeatures(), len(features))
166 | 	})
167 | 
168 | 	t.Run("transform all parallel 1 worker", func(t *testing.T) {
169 | 		s := make([]LargeMemoryTransformer, 100)
170 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
171 | 
172 | 		tr := makeMockLargeMemoryTransformerFeatureTransformer()
173 | 
174 | 		features := tr.TransformAllParallel(s, 1)
175 | 		assert.Equal(t, len(s)*tr.NumFeatures(), len(features))
176 | 	})
177 | 
178 | 	t.Run("transform all parallel 4 workers", func(t *testing.T) {
179 | 		s := make([]LargeMemoryTransformer, 100)
180 | 		fuzz.New().NilChance(0).NumElements(100, 100).Fuzz(&s)
181 | 
182 | 		tr := makeMockLargeMemoryTransformerFeatureTransformer()
183 | 
184 | 		features := tr.TransformAllParallel(s, 4)
185 | 		assert.Equal(t, len(s)*tr.NumFeatures(), len(features))
186 | 	})
187 | }
188 | 
189 | func TestLargeMemoryTransformerFeatureTransformerFit(t *testing.T) {
190 | 	t.Run("fuzzy input", func(t *testing.T) {
191 | 		s := make([]LargeMemoryTransformer, 10)
192 | 		fuzz.New().NilChance(0).NumElements(1, 1).Fuzz(&s)
193 | 
194 | 		trEmpty := LargeMemoryTransformerFeatureTransformer{}
195 | 		tr := LargeMemoryTransformerFeatureTransformer{}
196 | 		tr.Fit(s)
197 | 
198 | 		assert.NotNil(t, tr)
199 | 		assert.NotEqual(t, tr, trEmpty)
200 | 	})
201 | 
202 | 	t.Run("not nil transformer nil input", func(t *testing.T) {
203 | 		trEmpty := LargeMemoryTransformerFeatureTransformer{}
204 | 		tr := LargeMemoryTransformerFeatureTransformer{}
205 | 		tr.Fit(nil)
206 | 
207 | 		assert.Equal(t, trEmpty, tr)
208 | 	})
209 | 
210 | 	t.Run("nil transformer not nil input", func(t *testing.T) {
211 | 		s := make([]LargeMemoryTransformer, 10)
212 | 
213 | 		var tr *LargeMemoryTransformerFeatureTransformer
214 | 		tr.Fit(s)
215 | 
216 | 		assert.Nil(t, tr)
217 | 	})
218 | }
219 | 
220 | func fitTransformerLargeMemoryTransformer(b *testing.B, numelem int) {
221 | 	s := make([]LargeMemoryTransformer, numelem)
222 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
223 | 
224 | 	var tr LargeMemoryTransformerFeatureTransformer
225 | 
226 | 	b.ResetTimer()
227 | 	for n := 0; n < b.N; n++ {
228 | 		tr.Fit(s)
229 | 	}
230 | }
231 | 
232 | func BenchmarkLargeMemoryTransformerFeatureTransformer_Fit_100elements(b *testing.B) {
233 | 	fitTransformerLargeMemoryTransformer(b, 100)
234 | }
235 | 
236 | func BenchmarkLargeMemoryTransformerFeatureTransformer_Fit_1000elements(b *testing.B) {
237 | 	fitTransformerLargeMemoryTransformer(b, 1000)
238 | }
239 | 
240 | func BenchmarkLargeMemoryTransformerFeatureTransformer_Fit_10000elements(b *testing.B) {
241 | 	fitTransformerLargeMemoryTransformer(b, 10000)
242 | }
243 | 
244 | func BenchmarkLargeMemoryTransformerFeatureTransformer_Transform(b *testing.B) {
245 | 	var s LargeMemoryTransformer
246 | 	fuzz.New().Fuzz(&s)
247 | 
248 | 	tr := makeMockLargeMemoryTransformerFeatureTransformer()
249 | 
250 | 	b.ResetTimer()
251 | 	for n := 0; n < b.N; n++ {
252 | 		tr.Transform(&s)
253 | 	}
254 | }
255 | 
256 | func BenchmarkLargeMemoryTransformerFeatureTransformer_Transform_Inplace(b *testing.B) {
257 | 	var s LargeMemoryTransformer
258 | 	fuzz.New().Fuzz(&s)
259 | 
260 | 	tr := makeMockLargeMemoryTransformerFeatureTransformer()
261 | 
262 | 	features := make([]float64, tr.NumFeatures())
263 | 
264 | 	b.ResetTimer()
265 | 	for n := 0; n < b.N; n++ {
266 | 		tr.TransformInplace(features, &s)
267 | 	}
268 | }
269 | 
270 | func benchTransformAllLargeMemoryTransformer(b *testing.B, numelem int) {
271 | 	s := make([]LargeMemoryTransformer, numelem)
272 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
273 | 
274 | 	tr := makeMockLargeMemoryTransformerFeatureTransformer()
275 | 
276 | 	b.ResetTimer()
277 | 	for n := 0; n < b.N; n++ {
278 | 		tr.TransformAll(s)
279 | 	}
280 | }
281 | 
282 | func BenchmarkLargeMemoryTransformerFeatureTransformer_TransformAll_10elems(b *testing.B) {
283 | 	benchTransformAllLargeMemoryTransformer(b, 10)
284 | }
285 | 
286 | func BenchmarkLargeMemoryTransformerFeatureTransformer_TransformAll_100elems(b *testing.B) {
287 | 	benchTransformAllLargeMemoryTransformer(b, 100)
288 | }
289 | 
290 | func BenchmarkLargeMemoryTransformerFeatureTransformer_TransformAll_1000elems(b *testing.B) {
291 | 	benchTransformAllLargeMemoryTransformer(b, 1000)
292 | }
293 | 
294 | func BenchmarkLargeMemoryTransformerFeatureTransformer_TransformAll_10000elems(b *testing.B) {
295 | 	benchTransformAllLargeMemoryTransformer(b, 10000)
296 | }
297 | 
298 | func BenchmarkLargeMemoryTransformerFeatureTransformer_TransformAll_100000elems(b *testing.B) {
299 | 	benchTransformAllLargeMemoryTransformer(b, 100000)
300 | }
301 | 
302 | func BenchmarkLargeMemoryTransformerFeatureTransformer_TransformAll_1000000elems(b *testing.B) {
303 | 	benchTransformAllLargeMemoryTransformer(b, 1000000)
304 | }
305 | 
306 | func benchTransformAllParallelLargeMemoryTransformer(b *testing.B, numelem int, nworkers uint) {
307 | 	s := make([]LargeMemoryTransformer, numelem)
308 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
309 | 
310 | 	tr := makeMockLargeMemoryTransformerFeatureTransformer()
311 | 
312 | 	b.ResetTimer()
313 | 	for n := 0; n < b.N; n++ {
314 | 		tr.TransformAllParallel(s, nworkers)
315 | 	}
316 | }
317 | 
318 | func BenchmarkLargeMemoryTransformerFeatureTransformer_TransformAll_10elems_8workers(b *testing.B) {
319 | 	benchTransformAllParallelLargeMemoryTransformer(b, 10, 8)
320 | }
321 | 
322 | func BenchmarkLargeMemoryTransformerFeatureTransformer_TransformAll_100elems_8workers(b *testing.B) {
323 | 	benchTransformAllParallelLargeMemoryTransformer(b, 100, 8)
324 | }
325 | 
326 | func BenchmarkLargeMemoryTransformerFeatureTransformer_TransformAll_1000elems_8workers(b *testing.B) {
327 | 	benchTransformAllParallelLargeMemoryTransformer(b, 1000, 8)
328 | }
329 | 
330 | func BenchmarkLargeMemoryTransformerFeatureTransformer_TransformAll_10000elems_8workers(b *testing.B) {
331 | 	benchTransformAllParallelLargeMemoryTransformer(b, 10000, 8)
332 | }
333 | 
334 | func BenchmarkLargeMemoryTransformerFeatureTransformer_TransformAll_100000elems_8workers(b *testing.B) {
335 | 	benchTransformAllParallelLargeMemoryTransformer(b, 100000, 8)
336 | }
337 | 
338 | func BenchmarkLargeMemoryTransformerFeatureTransformer_TransformAll_1000000elems_8workers(b *testing.B) {
339 | 	benchTransformAllParallelLargeMemoryTransformer(b, 1000000, 8)
340 | }
341 | 
342 | func BenchmarkLargeMemoryTransformerFeatureTransformer_TransformAll_5000000elems_8workers(b *testing.B) {
343 | 	benchTransformAllParallelLargeMemoryTransformer(b, 5000000, 8)
344 | }
345 | 
346 | func BenchmarkLargeMemoryTransformerFeatureTransformer_TransformAll_15000000elems_8workers(b *testing.B) {
347 | 	benchTransformAllParallelLargeMemoryTransformer(b, 15000000, 8)
348 | }
349 | 
350 | func benchLargeTransformerLargeMemoryTransformer(b *testing.B, numelem int) {
351 | 	var s []LargeMemoryTransformer
352 | 	fuzz.New().NilChance(0).NumElements(numelem, numelem).Fuzz(&s)
353 | 
354 | 	tr := LargeMemoryTransformerFeatureTransformer{}
355 | 	tr.Fit(s)
356 | 
357 | 	b.ResetTimer()
358 | 	for n := 0; n < b.N; n++ {
359 | 		tr.Transform(&s[0])
360 | 	}
361 | }
362 | 
363 | func BenchmarkLargeMemoryTransformerFeatureTransformer_Transform_LargeComposites_100elements(b *testing.B) {
364 | 	benchLargeTransformerLargeMemoryTransformer(b, 100)
365 | }
366 | 
367 | func BenchmarkLargeMemoryTransformerFeatureTransformer_Transform_LargeComposites_1000elements(b *testing.B) {
368 | 	benchLargeTransformerLargeMemoryTransformer(b, 1000)
369 | }
370 | 
371 | func BenchmarkLargeMemoryTransformerFeatureTransformer_Transform_LargeComposites_10000elements(b *testing.B) {
372 | 	benchLargeTransformerLargeMemoryTransformer(b, 10000)
373 | }
374 | 
375 | func BenchmarkLargeMemoryTransformerFeatureTransformer_Transform_LargeComposites_100000elements(b *testing.B) {
376 | 	benchLargeTransformerLargeMemoryTransformer(b, 100000)
377 | }
378 | 


--------------------------------------------------------------------------------
/structtransformer/structtransformer_test.go:
--------------------------------------------------------------------------------
  1 | package structtransformer_test
  2 | 
  3 | import (
  4 | 	"math/rand"
  5 | 	"testing"
  6 | 
  7 | 	. "github.com/nikolaydubina/go-featureprocessing/structtransformer"
  8 | 	. "github.com/nikolaydubina/go-featureprocessing/transformers"
  9 | 
 10 | 	"github.com/stretchr/testify/assert"
 11 | )
 12 | 
 13 | func TestStructTransformer(t *testing.T) {
 14 | 	t.Run("test transform basic", func(t *testing.T) {
 15 | 		type S struct {
 16 | 			Age    int     `feature:"minmax"`
 17 | 			Salary float64 `feature:"standard"`
 18 | 			Gender string  `feature:"onehot"`
 19 | 			City   string  `feature:"ordinal"`
 20 | 		}
 21 | 
 22 | 		tr := StructTransformer{Transformers: []interface{}{
 23 | 			&MinMaxScaler{Min: 1, Max: 10},
 24 | 			&StandardScaler{Mean: 15, STD: 2.5},
 25 | 			&OneHotEncoder{Mapping: map[string]uint{"male": 0, "female": 1}},
 26 | 			&OrdinalEncoder{Mapping: map[string]uint{"city-A": 1, "city-B": 2}},
 27 | 		}}
 28 | 
 29 | 		assert.Equal(t, []float64{1, 1, 0, 1, 2}, tr.Transform(S{Age: 23, Salary: 17.5, Gender: "female", City: "city-B"}))
 30 | 	})
 31 | 
 32 | 	t.Run("test transform struct has fields but transformers missing", func(t *testing.T) {
 33 | 		type S struct {
 34 | 			Age    int     `feature:"minmax"`
 35 | 			Salary float64 `feature:"standard"`
 36 | 			Gender string  `feature:"onehot"`
 37 | 			City   string  `feature:"ordinal"`
 38 | 		}
 39 | 
 40 | 		tr := StructTransformer{}
 41 | 		assert.Equal(t, []float64(nil), tr.Transform(S{Age: 23, Salary: 17.5, Gender: "female", City: "city-B"}))
 42 | 	})
 43 | 
 44 | 	t.Run("test transform nil", func(t *testing.T) {
 45 | 		tr := StructTransformer{}
 46 | 		assert.Equal(t, []float64(nil), tr.Transform(nil))
 47 | 	})
 48 | 
 49 | 	t.Run("test transform nil pointer to struct", func(t *testing.T) {
 50 | 		type S struct {
 51 | 			Age    int     `feature:"minmax"`
 52 | 			Salary float64 `feature:"standard"`
 53 | 			Gender string  `feature:"onehot"`
 54 | 			City   string  `feature:"ordinal"`
 55 | 		}
 56 | 		var s S
 57 | 		tr := StructTransformer{}
 58 | 		assert.Equal(t, []float64(nil), tr.Transform(&s))
 59 | 	})
 60 | 
 61 | 	t.Run("test transform unexpected type panics", func(t *testing.T) {
 62 | 		type T int
 63 | 		type S struct {
 64 | 			Age    T      `feature:"minmax"`
 65 | 			Salary bool   `feature:"standard"`
 66 | 			Gender string `feature:"onehot"`
 67 | 			City   string `feature:"ordinal"`
 68 | 		}
 69 | 		s := S{}
 70 | 		tr := StructTransformer{Transformers: []interface{}{
 71 | 			&MinMaxScaler{Min: 1, Max: 10},
 72 | 			&StandardScaler{Mean: 15, STD: 2.5},
 73 | 			&OneHotEncoder{Mapping: map[string]uint{"male": 0, "female": 1}},
 74 | 			&OrdinalEncoder{Mapping: map[string]uint{"city-A": 1, "city-B": 2}},
 75 | 		}}
 76 | 		assert.PanicsWithValue(t, "unsupported type in struct", func() { tr.Transform(s) })
 77 | 	})
 78 | 
 79 | 	t.Run("test transform nil transformer skipped", func(t *testing.T) {
 80 | 		type S struct {
 81 | 			Age    int     `feature:"minmax"`
 82 | 			Salary float64 `feature:"standard"`
 83 | 			Gender string  `feature:"onehot"`
 84 | 			City   string  `feature:"ordinal"`
 85 | 		}
 86 | 
 87 | 		tr := StructTransformer{Transformers: []interface{}{
 88 | 			&MinMaxScaler{Min: 1, Max: 10},
 89 | 			nil,
 90 | 			&OneHotEncoder{Mapping: map[string]uint{"male": 0, "female": 1}},
 91 | 			&OrdinalEncoder{Mapping: map[string]uint{"city-A": 1, "city-B": 2}},
 92 | 		}}
 93 | 
 94 | 		assert.Equal(t, []float64{1, 0, 1, 2}, tr.Transform(S{Age: 23, Salary: 17.5, Gender: "female", City: "city-B"}))
 95 | 	})
 96 | 
 97 | 	t.Run("test transform unexpected numerical transformer skipped", func(t *testing.T) {
 98 | 		type S struct {
 99 | 			Age    int     `feature:"minmax"`
100 | 			Salary float64 `feature:"standard"`
101 | 			Gender string  `feature:"onehot"`
102 | 			City   string  `feature:"ordinal"`
103 | 		}
104 | 		type T struct{}
105 | 
106 | 		tr := StructTransformer{Transformers: []interface{}{
107 | 			&MinMaxScaler{Min: 1, Max: 10},
108 | 			&T{},
109 | 			&OneHotEncoder{Mapping: map[string]uint{"male": 0, "female": 1}},
110 | 			&OrdinalEncoder{Mapping: map[string]uint{"city-A": 1, "city-B": 2}},
111 | 		}}
112 | 
113 | 		assert.Equal(t, []float64{1, 0, 1, 2}, tr.Transform(S{Age: 23, Salary: 17.5, Gender: "female", City: "city-B"}))
114 | 	})
115 | 
116 | 	t.Run("test transform unexpected string transformer skipped", func(t *testing.T) {
117 | 		type S struct {
118 | 			Age    int     `feature:"minmax"`
119 | 			Salary float64 `feature:"standard"`
120 | 			Gender string  `feature:"onehot"`
121 | 			City   string  `feature:"ordinal"`
122 | 		}
123 | 		type T struct{}
124 | 
125 | 		tr := StructTransformer{Transformers: []interface{}{
126 | 			&MinMaxScaler{Min: 1, Max: 10},
127 | 			&StandardScaler{Mean: 15, STD: 2.5},
128 | 			&OneHotEncoder{Mapping: map[string]uint{"male": 0, "female": 1}},
129 | 			&T{},
130 | 		}}
131 | 
132 | 		assert.Equal(t, []float64{1, 1, 0, 1}, tr.Transform(S{Age: 23, Salary: 17.5, Gender: "female", City: "city-B"}))
133 | 	})
134 | 
135 | 	t.Run("test transform nil interface", func(t *testing.T) {
136 | 		type S interface {
137 | 			Get() int
138 | 		}
139 | 		var s S
140 | 		tr := StructTransformer{}
141 | 		assert.Equal(t, []float64(nil), tr.Transform(&s))
142 | 		assert.Equal(t, []float64(nil), tr.Transform(s))
143 | 	})
144 | 
145 | 	t.Run("test fit not implemented", func(t *testing.T) {
146 | 		type S struct {
147 | 			Age    int     `feature:"minmax"`
148 | 			Salary float64 `feature:"standard"`
149 | 			Gender string  `feature:"onehot"`
150 | 			City   string  `feature:"ordinal"`
151 | 		}
152 | 		s := []interface{}{&S{}, &S{}}
153 | 		tr := StructTransformer{}
154 | 		assert.PanicsWithValue(t, "not implemented", func() { tr.Fit(s) })
155 | 
156 | 	})
157 | }
158 | 
159 | func BenchmarkStructTransformer_Transform_Small(b *testing.B) {
160 | 	type S struct {
161 | 		Age    int     `feature:"minmax"`
162 | 		Salary float64 `feature:"standard"`
163 | 		Gender string  `feature:"onehot"`
164 | 		City   string  `feature:"ordinal"`
165 | 	}
166 | 
167 | 	tr := StructTransformer{Transformers: []interface{}{
168 | 		&MinMaxScaler{Min: 1, Max: 10},
169 | 		&StandardScaler{Mean: 15, STD: 2.5},
170 | 		&OneHotEncoder{Mapping: map[string]uint{"male": 0, "female": 1}},
171 | 		&OrdinalEncoder{Mapping: map[string]uint{"city-A": 1, "city-B": 2}},
172 | 	}}
173 | 
174 | 	s := S{
175 | 		Age:    23,
176 | 		Salary: 17.5,
177 | 		Gender: "female",
178 | 		City:   "city-B",
179 | 	}
180 | 
181 | 	b.ResetTimer()
182 | 	for n := 0; n < b.N; n++ {
183 | 		tr.Transform(s)
184 | 	}
185 | }
186 | 
187 | func randomInt(min, max int) int {
188 | 	return min + rand.Intn(max-min)
189 | }
190 | 
191 | func randomString(len int) string {
192 | 	bytes := make([]byte, len)
193 | 	for i := 0; i < len; i++ {
194 | 		bytes[i] = byte(randomInt(65, 90))
195 | 	}
196 | 	return string(bytes)
197 | }
198 | 
199 | func randomSliceFloat64(num int) []float64 {
200 | 	ret := make([]float64, num)
201 | 	for i := 0; i < num; i++ {
202 | 		ret[i] = rand.Float64()
203 | 	}
204 | 	return ret
205 | }
206 | 
207 | func randomMappingString(num int, strlen int) map[string]uint {
208 | 	ret := make(map[string]uint)
209 | 	for i := 0; i < num; i++ {
210 | 		ret[randomString(strlen)] = uint(i)
211 | 	}
212 | 	return ret
213 | }
214 | 
215 | func getAnyKeyFromMap(mp map[string]uint) string {
216 | 	for k := range mp {
217 | 		return k
218 | 	}
219 | 	return ""
220 | }
221 | 
222 | func benchLargeTransformer(b *testing.B, numelem int) {
223 | 	type S struct {
224 | 		Name1 string  `feature:"onehot"`
225 | 		Name2 string  `feature:"onehot"`
226 | 		Name3 string  `feature:"ordinal"`
227 | 		Name4 string  `feature:"ordinal"`
228 | 		Name5 float64 `feature:"quantile"`
229 | 		Name6 float64 `feature:"quantile"`
230 | 		Name7 float64 `feature:"kbins"`
231 | 		Name8 float64 `feature:"kbins"`
232 | 	}
233 | 
234 | 	tr := StructTransformer{Transformers: []interface{}{
235 | 		&OneHotEncoder{Mapping: randomMappingString(numelem, 20)},
236 | 		&OneHotEncoder{Mapping: randomMappingString(numelem, 20)},
237 | 		&OrdinalEncoder{Mapping: randomMappingString(numelem, 20)},
238 | 		&OrdinalEncoder{Mapping: randomMappingString(numelem, 20)},
239 | 		&QuantileScaler{Quantiles: randomSliceFloat64(numelem)},
240 | 		&QuantileScaler{Quantiles: randomSliceFloat64(numelem)},
241 | 		&KBinsDiscretizer{QuantileScaler: QuantileScaler{Quantiles: randomSliceFloat64(numelem)}},
242 | 		&KBinsDiscretizer{QuantileScaler: QuantileScaler{Quantiles: randomSliceFloat64(numelem)}},
243 | 	}}
244 | 
245 | 	s := S{
246 | 		Name1: getAnyKeyFromMap(tr.Transformers[0].(*OrdinalEncoder).Mapping),
247 | 		Name2: getAnyKeyFromMap(tr.Transformers[1].(*OrdinalEncoder).Mapping),
248 | 		Name3: getAnyKeyFromMap(tr.Transformers[2].(*OrdinalEncoder).Mapping),
249 | 		Name4: getAnyKeyFromMap(tr.Transformers[3].(*OrdinalEncoder).Mapping),
250 | 		Name5: tr.Transformers[4].(*QuantileScaler).Quantiles[randomInt(1, numelem-1)],
251 | 		Name6: tr.Transformers[5].(*QuantileScaler).Quantiles[randomInt(1, numelem-1)],
252 | 		Name7: tr.Transformers[6].(*KBinsDiscretizer).Quantiles[randomInt(1, numelem-1)],
253 | 		Name8: tr.Transformers[7].(*KBinsDiscretizer).Quantiles[randomInt(1, numelem-1)],
254 | 	}
255 | 
256 | 	b.ResetTimer()
257 | 	for n := 0; n < b.N; n++ {
258 | 		tr.Transform(s)
259 | 	}
260 | }
261 | 
262 | func BenchmarkStructTransformer_Transform_LargeComposites_100elements(b *testing.B) {
263 | 	benchLargeTransformer(b, 100)
264 | }
265 | 
266 | func BenchmarkStructTransformer_Transform_LargeComposites_1000elements(b *testing.B) {
267 | 	benchLargeTransformer(b, 1000)
268 | }
269 | 
270 | func BenchmarkStructTransformer_Transform_LargeComposites_10000elements(b *testing.B) {
271 | 	benchLargeTransformer(b, 10000)
272 | }
273 | 
274 | func BenchmarkStructTransformer_Transform_LargeComposites_100000elements(b *testing.B) {
275 | 	benchLargeTransformer(b, 100000)
276 | }
277 | 
278 | func BenchmarkStructTransformer_Transform_32fields(b *testing.B) {
279 | 
280 | 	type S struct {
281 | 		F1  float64 `feature:"minmax"`
282 | 		F2  float64 `feature:"standard"`
283 | 		F3  float64 `feature:"minmax"`
284 | 		F4  float64 `feature:"standard"`
285 | 		F5  float64 `feature:"minmax"`
286 | 		F6  float64 `feature:"standard"`
287 | 		F7  float64 `feature:"minmax"`
288 | 		F8  float64 `feature:"standard"`
289 | 		F9  float64 `feature:"minmax"`
290 | 		F10 float64 `feature:"standard"`
291 | 		F11 float64 `feature:"minmax"`
292 | 		F12 float64 `feature:"standard"`
293 | 		F13 float64 `feature:"minmax"`
294 | 		F14 float64 `feature:"standard"`
295 | 		F15 float64 `feature:"minmax"`
296 | 		F16 float64 `feature:"standard"`
297 | 		F17 float64 `feature:"minmax"`
298 | 		F18 float64 `feature:"standard"`
299 | 		F19 float64 `feature:"minmax"`
300 | 		F20 float64 `feature:"standard"`
301 | 		F21 float64 `feature:"minmax"`
302 | 		F22 float64 `feature:"standard"`
303 | 		F23 float64 `feature:"minmax"`
304 | 		F24 float64 `feature:"standard"`
305 | 		F25 float64 `feature:"minmax"`
306 | 		F26 float64 `feature:"standard"`
307 | 		F27 float64 `feature:"minmax"`
308 | 		F28 float64 `feature:"standard"`
309 | 		F29 float64 `feature:"minmax"`
310 | 		F30 float64 `feature:"standard"`
311 | 		F31 float64 `feature:"minmax"`
312 | 		F32 float64 `feature:"standard"`
313 | 	}
314 | 
315 | 	tr := StructTransformer{Transformers: []interface{}{
316 | 		&MinMaxScaler{Min: 1, Max: 10},
317 | 		&StandardScaler{Mean: 15, STD: 2.5},
318 | 		&MinMaxScaler{Min: 1, Max: 10},
319 | 		&StandardScaler{Mean: 15, STD: 2.5},
320 | 		&MinMaxScaler{Min: 1, Max: 10},
321 | 		&StandardScaler{Mean: 15, STD: 2.5},
322 | 		&MinMaxScaler{Min: 1, Max: 10},
323 | 		&StandardScaler{Mean: 15, STD: 2.5},
324 | 		&MinMaxScaler{Min: 1, Max: 10},
325 | 		&StandardScaler{Mean: 15, STD: 2.5},
326 | 		&MinMaxScaler{Min: 1, Max: 10},
327 | 		&StandardScaler{Mean: 15, STD: 2.5},
328 | 		&MinMaxScaler{Min: 1, Max: 10},
329 | 		&StandardScaler{Mean: 15, STD: 2.5},
330 | 		&MinMaxScaler{Min: 1, Max: 10},
331 | 		&StandardScaler{Mean: 15, STD: 2.5},
332 | 		&MinMaxScaler{Min: 1, Max: 10},
333 | 		&StandardScaler{Mean: 15, STD: 2.5},
334 | 		&MinMaxScaler{Min: 1, Max: 10},
335 | 		&StandardScaler{Mean: 15, STD: 2.5},
336 | 		&MinMaxScaler{Min: 1, Max: 10},
337 | 		&StandardScaler{Mean: 15, STD: 2.5},
338 | 		&MinMaxScaler{Min: 1, Max: 10},
339 | 		&StandardScaler{Mean: 15, STD: 2.5},
340 | 		&MinMaxScaler{Min: 1, Max: 10},
341 | 		&StandardScaler{Mean: 15, STD: 2.5},
342 | 		&MinMaxScaler{Min: 1, Max: 10},
343 | 		&StandardScaler{Mean: 15, STD: 2.5},
344 | 		&MinMaxScaler{Min: 1, Max: 10},
345 | 		&StandardScaler{Mean: 15, STD: 2.5},
346 | 		&MinMaxScaler{Min: 1, Max: 10},
347 | 		&StandardScaler{Mean: 15, STD: 2.5},
348 | 	}}
349 | 
350 | 	s := S{
351 | 		F1:  1231231.123,
352 | 		F2:  1231231.123,
353 | 		F3:  1231231.123,
354 | 		F4:  1231231.123,
355 | 		F5:  1231231.123,
356 | 		F6:  1231231.123,
357 | 		F7:  1231231.123,
358 | 		F8:  1231231.123,
359 | 		F9:  1231231.123,
360 | 		F10: 1231231.123,
361 | 		F11: 1231231.123,
362 | 		F12: 1231231.123,
363 | 		F13: 1231231.123,
364 | 		F14: 1231231.123,
365 | 		F15: 1231231.123,
366 | 		F16: 1231231.123,
367 | 		F17: 1231231.123,
368 | 		F18: 1231231.123,
369 | 		F19: 1231231.123,
370 | 		F20: 1231231.123,
371 | 		F21: 1231231.123,
372 | 		F22: 1231231.123,
373 | 		F23: 1231231.123,
374 | 		F24: 1231231.123,
375 | 		F25: 1231231.123,
376 | 		F26: 1231231.123,
377 | 		F27: 1231231.123,
378 | 		F28: 1231231.123,
379 | 		F29: 1231231.123,
380 | 		F30: 1231231.123,
381 | 		F31: 1231231.123,
382 | 		F32: 1231231.123,
383 | 	}
384 | 
385 | 	b.ResetTimer()
386 | 	for n := 0; n < b.N; n++ {
387 | 		tr.Transform(s)
388 | 	}
389 | }
390 | 


--------------------------------------------------------------------------------