├── .github └── workflows │ ├── lint.yml │ └── test.yml ├── .gitignore ├── .golangci.yml ├── LICENSE ├── README.md ├── docs └── model-info.md ├── example └── main.go ├── go.mod ├── go.sum ├── pkg ├── errors │ └── errors.go └── guesser │ ├── answer.go │ ├── answer_test.go │ ├── guesser.go │ ├── guesser_test.go │ ├── model.go │ └── model │ ├── saved_model.pb │ └── variables │ ├── variables.data-00000-of-00001 │ └── variables.index └── script ├── install-libtensorflow ├── lint ├── pull-model └── test /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | push: 5 | 6 | permissions: 7 | contents: read 8 | 9 | jobs: 10 | golangci-lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - uses: actions/setup-go@v4 15 | with: 16 | go-version: '1.20' 17 | - name: Setup libtensorflow 18 | run: script/install-libtensorflow 19 | - uses: golangci/golangci-lint-action@v3 20 | with: 21 | version: v1.52.2 22 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | 6 | permissions: 7 | contents: read 8 | 9 | jobs: 10 | gotest: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - uses: actions/setup-go@v4 15 | with: 16 | go-version: '1.20' 17 | - name: Setup libtensorflow 18 | run: script/install-libtensorflow 19 | - name: Test 20 | uses: robherley/go-test-action@v0.1.0 21 | with: 22 | omitUntestedPackages: true 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | tmp/ 2 | bin/ 3 | 4 | # Created by https://www.toptal.com/developers/gitignore/api/go,macos,visualstudiocode,vim 5 | # Edit at https://www.toptal.com/developers/gitignore?templates=go,macos,visualstudiocode,vim 6 | 7 | ### Go ### 8 | # If you prefer the allow list template instead of the deny list, see community template: 9 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 10 | # 11 | # Binaries for programs and plugins 12 | *.exe 13 | *.exe~ 14 | *.dll 15 | *.so 16 | *.dylib 17 | 18 | # Test binary, built with `go test -c` 19 | *.test 20 | 21 | # Output of the go coverage tool, specifically when used with LiteIDE 22 | *.out 23 | 24 | # Dependency directories (remove the comment below to include it) 25 | # vendor/ 26 | 27 | # Go workspace file 28 | go.work 29 | 30 | ### macOS ### 31 | # General 32 | .DS_Store 33 | .AppleDouble 34 | .LSOverride 35 | 36 | # Icon must end with two \r 37 | Icon 38 | 39 | 40 | # Thumbnails 41 | ._* 42 | 43 | # Files that might appear in the root of a volume 44 | .DocumentRevisions-V100 45 | .fseventsd 46 | .Spotlight-V100 47 | .TemporaryItems 48 | .Trashes 49 | .VolumeIcon.icns 50 | .com.apple.timemachine.donotpresent 51 | 52 | # Directories potentially created on remote AFP share 53 | .AppleDB 54 | .AppleDesktop 55 | Network Trash Folder 56 | Temporary Items 57 | .apdisk 58 | 59 | ### macOS Patch ### 60 | # iCloud generated files 61 | *.icloud 62 | 63 | ### Vim ### 64 | # Swap 65 | [._]*.s[a-v][a-z] 66 | !*.svg # comment out if you don't need vector files 67 | [._]*.sw[a-p] 68 | [._]s[a-rt-v][a-z] 69 | [._]ss[a-gi-z] 70 | [._]sw[a-p] 71 | 72 | # Session 73 | Session.vim 74 | Sessionx.vim 75 | 76 | # Temporary 77 | .netrwhist 78 | *~ 79 | # Auto-generated tag files 80 | tags 81 | # Persistent undo 82 | [._]*.un~ 83 | 84 | ### VisualStudioCode ### 85 | .vscode/* 86 | !.vscode/settings.json 87 | !.vscode/tasks.json 88 | !.vscode/launch.json 89 | !.vscode/extensions.json 90 | !.vscode/*.code-snippets 91 | 92 | # Local History for Visual Studio Code 93 | .history/ 94 | 95 | # Built Visual Studio Code Extensions 96 | *.vsix 97 | 98 | ### VisualStudioCode Patch ### 99 | # Ignore all local history of files 100 | .history 101 | .ionide 102 | 103 | # End of https://www.toptal.com/developers/gitignore/api/go,macos,visualstudiocode,vim 104 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | run: 2 | tests: true 3 | skip-dirs-use-default: true 4 | 5 | linters: 6 | disable-all: true 7 | # https://golangci-lint.run/usage/linters/ 8 | enable: 9 | # rules enabled by default 10 | - errcheck 11 | - gosimple 12 | - govet 13 | - ineffassign 14 | - staticcheck 15 | # - structcheck disabled in 1.18 for now: https://github.com/golangci/golangci-lint/issues/2649 16 | - typecheck 17 | - unused 18 | # additional 19 | - goconst 20 | - gocritic 21 | - gocyclo 22 | - gofmt 23 | - goimports 24 | - revive 25 | - tenv 26 | - unconvert 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright © 2023 Robert Herley 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # guesslang-go 🔍 2 | 3 | [![GoDoc](https://godoc.org/github.com/golang/gddo?status.svg)](https://pkg.go.dev/github.com/robherley/guesslang-go) 4 | [![Test](https://github.com/robherley/guesslang-go/actions/workflows/test.yml/badge.svg)](https://github.com/robherley/guesslang-go/actions/workflows/test.yml) 5 | 6 | Go port of [yoeo/guesslang](https://github.com/yoeo/guesslang). Detects programming language of source code using a deep learning model. 7 | 8 | ## Setup 9 | 10 | ### Dependencies 11 | 12 | Requires [`libtensorflow`](https://www.tensorflow.org/install/lang_c) C API. 13 | 14 | On macOS, it can be installed with homebrew: 15 | 16 | ``` 17 | brew install libtensorflow 18 | ``` 19 | 20 | Alternatively, for Linux-based systems: 21 | 22 | ``` 23 | script/install-libtensorflow 24 | ``` 25 | 26 | ### Install 27 | 28 | ``` 29 | go get github.com/robherley/guesslang-go 30 | ``` 31 | 32 | See example usage in [`examples/main.go`](/example/main.go) 33 | 34 | ## Caveats 35 | 36 | To work around some of the limitations of the Go TensorFlow bindings (and the wrapper library)[^1], the [SavedModel](https://www.tensorflow.org/guide/saved_model) is embeded in the binary and 37 | when a [`Guesser`](https://pkg.go.dev/github.com/robherley/guesslang-go/pkg/guesser#Guesser) is initialized, it temporarily writes the model to a directory (and removes it after). 38 | 39 | So, in order to use this package, you must at least have a writeable temporary directory that aligns with Go's [`os.TempDir()`](https://pkg.go.dev/os@go1.20.3#TempDir). 40 | 41 | [^1]: https://github.com/galeone/tfgo/issues/44#issuecomment-841806254 42 | 43 | ## Acknowledgements 44 | 45 | Powered by: 46 | 47 | - [yoeo/guesslang](https://github.com/yoeo/guesslang): language model 48 | - [tensorflow/tensorflow](https://github.com/tensorflow/tensorflow): TensorFlow 49 | - [galeone/tfgo](https://github.com/galeone/tfgo): TensorFlow in Go 50 | 51 | Inspired by: 52 | 53 | - [microsoft/vscode-languagedetection](https://github.com/microsoft/vscode-languagedetection) 54 | - [hieplpvip/guesslang-js](https://github.com/hieplpvip/guesslang-js) 55 | -------------------------------------------------------------------------------- /docs/model-info.md: -------------------------------------------------------------------------------- 1 | ## Command 2 | 3 | ``` 4 | saved_model_cli show --all --dir pkg/guesser/model 5 | ``` 6 | 7 | ## Output 8 | 9 | ``` 10 | MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs: 11 | 12 | signature_def['classification']: 13 | The given SavedModel SignatureDef contains the following input(s): 14 | inputs['inputs'] tensor_info: 15 | dtype: DT_STRING 16 | shape: (-1) 17 | name: Placeholder:0 18 | The given SavedModel SignatureDef contains the following output(s): 19 | outputs['classes'] tensor_info: 20 | dtype: DT_STRING 21 | shape: (-1, 54) 22 | name: head/Tile:0 23 | outputs['scores'] tensor_info: 24 | dtype: DT_FLOAT 25 | shape: (-1, 54) 26 | name: head/predictions/probabilities:0 27 | Method name is: tensorflow/serving/classify 28 | 29 | signature_def['predict']: 30 | The given SavedModel SignatureDef contains the following input(s): 31 | inputs['content'] tensor_info: 32 | dtype: DT_STRING 33 | shape: (-1) 34 | name: Placeholder:0 35 | The given SavedModel SignatureDef contains the following output(s): 36 | outputs['all_class_ids'] tensor_info: 37 | dtype: DT_INT32 38 | shape: (-1, 54) 39 | name: head/predictions/Tile:0 40 | outputs['all_classes'] tensor_info: 41 | dtype: DT_STRING 42 | shape: (-1, 54) 43 | name: head/predictions/Tile_1:0 44 | outputs['class_ids'] tensor_info: 45 | dtype: DT_INT64 46 | shape: (-1, 1) 47 | name: head/predictions/ExpandDims:0 48 | outputs['classes'] tensor_info: 49 | dtype: DT_STRING 50 | shape: (-1, 1) 51 | name: head/predictions/hash_table_Lookup/LookupTableFindV2:0 52 | outputs['logits'] tensor_info: 53 | dtype: DT_FLOAT 54 | shape: (-1, 54) 55 | name: add:0 56 | outputs['probabilities'] tensor_info: 57 | dtype: DT_FLOAT 58 | shape: (-1, 54) 59 | name: head/predictions/probabilities:0 60 | Method name is: tensorflow/serving/predict 61 | 62 | signature_def['serving_default']: 63 | The given SavedModel SignatureDef contains the following input(s): 64 | inputs['inputs'] tensor_info: 65 | dtype: DT_STRING 66 | shape: (-1) 67 | name: Placeholder:0 68 | The given SavedModel SignatureDef contains the following output(s): 69 | outputs['classes'] tensor_info: 70 | dtype: DT_STRING 71 | shape: (-1, 54) 72 | name: head/Tile:0 73 | outputs['scores'] tensor_info: 74 | dtype: DT_FLOAT 75 | shape: (-1, 54) 76 | name: head/predictions/probabilities:0 77 | Method name is: tensorflow/serving/classify 78 | The MetaGraph with tag set ['serve'] contains the following ops: {'Size', 'BiasAdd', 'ResourceGather', 'GatherV2', 'Placeholder', 'ArgMax', 'StringToHashBucketFast', 'AssignVariableOp', 'Add', 'NotEqual', 'SparseReshape', 'Tile', 'StridedSlice', 'StaticRegexFullMatch', 'If', 'ScalarSummary', 'SparseFillEmptyRows', 'StringJoin', 'SaveV2', 'LookupTableImportV2', 'Fill', 'LookupTableFindV2', 'ReadVariableOp', 'TensorListStack', 'Mul', 'TensorListFromTensor', 'Reshape', 'RandomUniform', 'PlaceholderWithDefault', 'ZerosLike', 'RestoreV2', 'Const', 'GatherNd', 'Identity', 'StatelessIf', 'TruncatedNormal', 'Prod', 'Unique', 'LessEqual', 'HistogramSummary', 'MergeV2Checkpoints', 'ExpandDims', 'VarHandleOp', 'TensorListReserve', 'GreaterEqual', 'Cast', 'MatMul', 'SparseSegmentMean', 'SparseSegmentSum', 'Select', 'NoOp', 'Range', 'StatelessWhile', 'Pack', 'Relu', 'VarIsInitializedOp', 'Sub', 'Shape', 'HashTableV2', 'AddV2', 'RealDiv', 'Where', 'Slice', 'ShardedFilename', 'Equal', 'Softmax', 'ConcatV2'} 79 | ``` 80 | -------------------------------------------------------------------------------- /example/main.go: -------------------------------------------------------------------------------- 1 | //go:build exclude 2 | 3 | package main 4 | 5 | import ( 6 | "fmt" 7 | "os" 8 | 9 | "github.com/robherley/guesslang-go/pkg/guesser" 10 | ) 11 | 12 | /* 13 | 14 | This is a simple example of how to use the guesser package. 15 | 16 | Expected output: 17 | 18 | Best => rs 19 | Reliable => true 20 | Top Five: 21 | - rs (21.66%) 22 | - java (4.87%) 23 | - ts (4.66%) 24 | - js (4.62%) 25 | - html (4.07%) 26 | 27 | */ 28 | 29 | const snippet = ` 30 | fn bubble_sort(arr: &mut [i32]) { 31 | let len = arr.len(); 32 | for i in 0..len { 33 | for j in 0..len - i - 1 { 34 | if arr[j] > arr[j + 1] { 35 | arr.swap(j, j + 1); 36 | } 37 | } 38 | } 39 | } 40 | ` 41 | 42 | func main() { 43 | gsr, err := guesser.New() 44 | if err != nil { 45 | fmt.Println(err) 46 | os.Exit(1) 47 | } 48 | 49 | answer, err := gsr.Guess(snippet) 50 | if err != nil { 51 | fmt.Println(err) 52 | os.Exit(1) 53 | } 54 | 55 | fmt.Println("Best =>", answer.Predictions[0].Language) 56 | fmt.Println("Reliable =>", answer.Reliable) 57 | fmt.Println("Top Five:") 58 | for _, lang := range answer.Predictions[:5] { 59 | fmt.Printf("- %s (%.2f%%)\n", lang.Language, lang.Confidence*100) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/robherley/guesslang-go 2 | 3 | go 1.20 4 | 5 | require ( 6 | github.com/galeone/tensorflow/tensorflow/go v0.0.0-20221023090153-6b7fa0680c3e 7 | github.com/galeone/tfgo v0.0.0-20230214145115-56cedbc50978 8 | ) 9 | 10 | require google.golang.org/protobuf v1.28.1 // indirect 11 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/galeone/tensorflow/tensorflow/go v0.0.0-20221023090153-6b7fa0680c3e h1:9+2AEFZymTi25FIIcDwuzcOPH04z9+fV6XeLiGORPDI= 2 | github.com/galeone/tensorflow/tensorflow/go v0.0.0-20221023090153-6b7fa0680c3e/go.mod h1:TelZuq26kz2jysARBwOrTv16629hyUsHmIoj54QqyFo= 3 | github.com/galeone/tfgo v0.0.0-20230214145115-56cedbc50978 h1:8xhEVC2zjvI+3xWkt+78Krkd6JYp+0+iEoBVi0UBlJs= 4 | github.com/galeone/tfgo v0.0.0-20230214145115-56cedbc50978/go.mod h1:3YgYBeIX42t83uP27Bd4bSMxTnQhSbxl0pYSkCDB1tc= 5 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= 6 | github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= 7 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 8 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 9 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 10 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= 11 | google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= 12 | google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= 13 | -------------------------------------------------------------------------------- /pkg/errors/errors.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | ) 7 | 8 | var ( 9 | ErrFailedLoad = errors.New("unable to load model") 10 | ErrFailedExec = errors.New("unable to execute model") 11 | ErrInvalidResult = errors.New("invalid result") 12 | ) 13 | 14 | func NewFailedExec(msg any) error { 15 | return fmt.Errorf("%w: %v", ErrFailedExec, msg) 16 | } 17 | 18 | func NewFailedLoad(msg any) error { 19 | return fmt.Errorf("%w: %v", ErrFailedLoad, msg) 20 | } 21 | 22 | func NewInvalidResult(msg any) error { 23 | return fmt.Errorf("%w: %v", ErrInvalidResult, msg) 24 | } 25 | -------------------------------------------------------------------------------- /pkg/guesser/answer.go: -------------------------------------------------------------------------------- 1 | package guesser 2 | 3 | import "math" 4 | 5 | // Answer is the result of a guesslang model execution, also providing whether or not the answer is considered reliable. 6 | type Answer struct { 7 | Predictions []Prediction 8 | Reliable bool 9 | } 10 | 11 | // Prediction is a single language prediction, including the confidence and the language. 12 | type Prediction struct { 13 | Confidence float64 14 | Language string 15 | } 16 | 17 | // IsReliable returns true if the prediction is considered "reliable". 18 | // It is considered reliable if the probability of the predicted language is higher than 2 standard deviations from the mean. 19 | // Original: https://github.com/yoeo/guesslang/blob/42ec63776777e1bdce2d72f51710c6634e36e00c/guesslang/guess.py#L157-L165 20 | func IsReliable(confidences []float64) bool { 21 | mean := mean(confidences) 22 | stdev := stdev(confidences, mean) 23 | threshold := mean + 2*stdev 24 | predictedLanguageProbability := max(confidences) 25 | return predictedLanguageProbability > threshold 26 | } 27 | 28 | // stdev returns the standard deviation of a slice of float64. 29 | func stdev(values []float64, mean float64) float64 { 30 | var sum float64 31 | for _, v := range values { 32 | sum += math.Pow(v-mean, 2) 33 | } 34 | return math.Sqrt(sum / float64(len(values))) 35 | } 36 | 37 | // mean returns the mean of a slice of float64. 38 | func mean(values []float64) float64 { 39 | var sum float64 40 | for _, v := range values { 41 | sum += v 42 | } 43 | return sum / float64(len(values)) 44 | } 45 | 46 | // max returns the maximum value of a slice of float64. 47 | func max(values []float64) float64 { 48 | var maxVal = math.Inf(-1) 49 | for _, v := range values { 50 | if v > maxVal { 51 | maxVal = v 52 | } 53 | } 54 | return maxVal 55 | } 56 | -------------------------------------------------------------------------------- /pkg/guesser/answer_test.go: -------------------------------------------------------------------------------- 1 | package guesser_test 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/robherley/guesslang-go/pkg/guesser" 8 | ) 9 | 10 | func TestIsReliable(t *testing.T) { 11 | cases := []struct { 12 | confidences []float64 13 | expected bool 14 | }{ 15 | {[]float64{0.2, 0.1, 0.1, 0.1, 0.1}, false}, 16 | {[]float64{0.9, 0.1, 0.1, 0.1, 0.1}, true}, 17 | } 18 | 19 | for _, tc := range cases { 20 | t.Run(fmt.Sprintf("confidences: %v", tc.confidences), func(t *testing.T) { 21 | actual := guesser.IsReliable(tc.confidences) 22 | if actual != tc.expected { 23 | t.Errorf("want %t, got %t", tc.expected, actual) 24 | } 25 | }) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /pkg/guesser/guesser.go: -------------------------------------------------------------------------------- 1 | package guesser 2 | 3 | import ( 4 | "os" 5 | "sort" 6 | 7 | tf "github.com/galeone/tensorflow/tensorflow/go" 8 | tg "github.com/galeone/tfgo" 9 | "github.com/robherley/guesslang-go/pkg/errors" 10 | ) 11 | 12 | func init() { 13 | // Supress TensorFlow logging by default 14 | // https://github.com/tensorflow/tensorflow/blob/74d4bcde8b51963f8c7401d118e17b987d6c93fd/tensorflow/tsl/platform/default/logging.h#L67-L71 15 | os.Setenv("TF_CPP_MIN_LOG_LEVEL", "5") 16 | } 17 | 18 | type Guesser struct { 19 | model *tg.Model 20 | } 21 | 22 | // New initializes a guesslang model. It will write the TensorFlow SavedModel temporarily to disk, then load it. 23 | func New() (g *Guesser, err error) { 24 | defer func() { 25 | // unfortunately, the tfgo library panics instead of returning errors 26 | if r := recover(); r != nil { 27 | err = errors.NewFailedLoad(r) 28 | } 29 | }() 30 | 31 | modelPath, err := writeModelToTempDir() 32 | if err != nil { 33 | return nil, err 34 | } 35 | defer os.RemoveAll(modelPath) 36 | 37 | model := tg.LoadModel(modelPath, []string{"serve"}, nil) 38 | return &Guesser{model}, nil 39 | } 40 | 41 | // Guess executes the guesslang model on a code snippet, providing sorted confidences for all of the supported languages 42 | // and will also provide whether or not the answer is arbitrarily reliable. 43 | func (g *Guesser) Guess(snippet string) (a *Answer, err error) { 44 | results, err := g.exec(snippet) 45 | if err != nil { 46 | return nil, err 47 | } 48 | 49 | if len(results) != 2 { 50 | return nil, errors.NewInvalidResult("expected two outputs") 51 | } 52 | 53 | var languages []string 54 | if result, ok := results[0].Value().([][]string); ok && len(result) == 1 { 55 | languages = result[0] 56 | } else { 57 | return nil, errors.NewInvalidResult("invalid result for languages") 58 | } 59 | 60 | var confidences []float64 61 | if result, ok := results[1].Value().([][]float32); ok && len(result) == 1 { 62 | confidences = make([]float64, len(result[0])) 63 | for i, v := range result[0] { 64 | confidences[i] = float64(v) 65 | } 66 | } else { 67 | return nil, errors.NewInvalidResult("invalid result for confidences") 68 | } 69 | 70 | if len(languages) != len(confidences) { 71 | return nil, errors.NewInvalidResult("mismatch between languages and confidences") 72 | } 73 | 74 | predictions := make([]Prediction, 0, len(languages)) 75 | for i := range languages { 76 | predictions = append(predictions, Prediction{ 77 | Confidence: confidences[i], 78 | Language: languages[i], 79 | }) 80 | } 81 | 82 | sort.Slice(predictions, func(i, j int) bool { 83 | return predictions[i].Confidence > predictions[j].Confidence 84 | }) 85 | 86 | return &Answer{ 87 | Predictions: predictions, 88 | Reliable: IsReliable(confidences), 89 | }, nil 90 | } 91 | 92 | func (g *Guesser) exec(snippet string) (results []*tf.Tensor, err error) { 93 | defer func() { 94 | // unfortunately, the tfgo library panics instead of returning errors 95 | if r := recover(); r != nil { 96 | err = errors.NewFailedExec(r) 97 | } 98 | }() 99 | 100 | input, err := tf.NewTensor([1]string{snippet}) 101 | if err != nil { 102 | return nil, err 103 | } 104 | 105 | // see docs/model-info.md 106 | results = g.model.Exec([]tf.Output{ 107 | g.model.Op("head/Tile", 0), 108 | g.model.Op("head/predictions/probabilities", 0), 109 | }, map[tf.Output]*tf.Tensor{ 110 | g.model.Op("Placeholder", 0): input, 111 | }) 112 | 113 | return results, nil 114 | } 115 | -------------------------------------------------------------------------------- /pkg/guesser/guesser_test.go: -------------------------------------------------------------------------------- 1 | package guesser_test 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/robherley/guesslang-go/pkg/guesser" 8 | ) 9 | 10 | const ( 11 | CCode = ` 12 | #include 13 | 14 | int main(int argc, char* argv[]) 15 | { 16 | printf("Hello world"); 17 | } 18 | ` 19 | PythonCode = ` 20 | from __future__ import print_function 21 | 22 | if __name__ == "__main__": 23 | print("Hello world") 24 | ` 25 | RustCode = ` 26 | fn main() { 27 | println!("Hello world"); 28 | } 29 | ` 30 | Markdown = ` 31 | # Hello World 32 | Check out this [amazing website](https://reb.gg)! 33 | ` 34 | ) 35 | 36 | func TestGuesserNew(t *testing.T) { 37 | gsr, err := guesser.New() 38 | 39 | if err != nil { 40 | t.Error(err) 41 | } 42 | 43 | if gsr == nil { 44 | t.Error("gsr is nil") 45 | } 46 | } 47 | 48 | func TestGuesserGuess(t *testing.T) { 49 | cases := []struct { 50 | lang string 51 | code string 52 | }{ 53 | {"c", CCode}, 54 | {"py", PythonCode}, 55 | {"rs", RustCode}, 56 | {"md", Markdown}, 57 | } 58 | 59 | gsr, err := guesser.New() 60 | if err != nil { 61 | t.Error(err) 62 | return 63 | } 64 | 65 | for _, tc := range cases { 66 | t.Run(fmt.Sprintf("%s code", tc.lang), func(t *testing.T) { 67 | answer, err := gsr.Guess(tc.code) 68 | if err != nil { 69 | t.Error(err) 70 | return 71 | } 72 | 73 | if answer.Predictions[0].Language != tc.lang { 74 | t.Errorf("want %s, got %s", tc.lang, answer.Predictions[0].Language) 75 | } 76 | 77 | if !answer.Reliable { 78 | t.Error("want reliable, got unreliable") 79 | } 80 | }) 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /pkg/guesser/model.go: -------------------------------------------------------------------------------- 1 | package guesser 2 | 3 | import ( 4 | "embed" 5 | "io" 6 | "os" 7 | "path/filepath" 8 | ) 9 | 10 | var ( 11 | //go:embed model/* 12 | savedModel embed.FS 13 | ) 14 | 15 | // as far as I know, there is no way to load a SavedModel/graph (with variables) from memory with the go tf library 16 | // https://github.com/galeone/tfgo/issues/44#issuecomment-841806254 17 | func writeModelToTempDir() (string, error) { 18 | modelPath, err := os.MkdirTemp("", "guesslang-go") 19 | if err != nil { 20 | return "", err 21 | } 22 | 23 | savedModelFile, err := savedModel.Open("model/saved_model.pb") 24 | if err != nil { 25 | return "", err 26 | } 27 | defer savedModelFile.Close() 28 | 29 | savedModelFileDisk, err := os.Create(modelPath + "/saved_model.pb") 30 | if err != nil { 31 | return "", err 32 | } 33 | defer savedModelFileDisk.Close() 34 | 35 | if _, err := io.Copy(savedModelFileDisk, savedModelFile); err != nil { 36 | return "", err 37 | } 38 | 39 | err = os.MkdirAll(filepath.Join(modelPath, "variables"), 0755) 40 | if err != nil { 41 | return "", err 42 | } 43 | 44 | variablesFiles, err := savedModel.ReadDir("model/variables") 45 | if err != nil { 46 | return "", err 47 | } 48 | 49 | for _, file := range variablesFiles { 50 | variablesFile, err := savedModel.Open("model/variables/" + file.Name()) 51 | if err != nil { 52 | return "", err 53 | } 54 | defer variablesFile.Close() 55 | 56 | variablesFileDisk, err := os.Create(filepath.Join(modelPath, "variables", file.Name())) 57 | if err != nil { 58 | return "", err 59 | } 60 | defer variablesFileDisk.Close() 61 | 62 | if _, err := io.Copy(variablesFileDisk, variablesFile); err != nil { 63 | return "", err 64 | } 65 | } 66 | 67 | return modelPath, nil 68 | } 69 | -------------------------------------------------------------------------------- /pkg/guesser/model/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robherley/guesslang-go/4d5ce3ff81289275158e7378e1d1a1620a30505d/pkg/guesser/model/saved_model.pb -------------------------------------------------------------------------------- /pkg/guesser/model/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robherley/guesslang-go/4d5ce3ff81289275158e7378e1d1a1620a30505d/pkg/guesser/model/variables/variables.data-00000-of-00001 -------------------------------------------------------------------------------- /pkg/guesser/model/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robherley/guesslang-go/4d5ce3ff81289275158e7378e1d1a1620a30505d/pkg/guesser/model/variables/variables.index -------------------------------------------------------------------------------- /script/install-libtensorflow: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script downloads and installs libtensorflow (CPU only) for the current platform. 4 | # If you are on macOS, you can just use `brew install libtensorflow` instead. 5 | # https://www.tensorflow.org/install/lang_c 6 | 7 | set -e 8 | 9 | if [ "$(go env GOARCH)" != "amd64" ]; then 10 | echo "unsupported architecture: $ARCH" 11 | exit 1 12 | fi 13 | 14 | function maybe_sudo() { 15 | if [ "$(id -u)" -ne 0 ]; then 16 | sudo "$@" 17 | else 18 | "$@" 19 | fi 20 | } 21 | 22 | OS=$(go env GOOS) 23 | 24 | case "$OS" in 25 | darwin) 26 | brew install libtensorflow 27 | ;; 28 | linux) 29 | TENSORFLOW_VERSION=2.11.0 30 | ARCHIVE_PATH=$(mktemp -d)/tensorflow.tar.gz 31 | curl -o "$ARCHIVE_PATH" "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-${OS}-x86_64-${TENSORFLOW_VERSION}.tar.gz" 32 | maybe_sudo tar -xzf "$ARCHIVE_PATH" -C /usr/local 33 | maybe_sudo ldconfig /usr/local/lib 34 | rm -rf "$ARCHIVE_PATH" 35 | ;; 36 | *) 37 | echo "unsupported OS: $OS" 38 | exit 1 39 | ;; 40 | esac 41 | -------------------------------------------------------------------------------- /script/lint: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BIN_DIR="$(git rev-parse --show-toplevel)"/bin 4 | BINARY=$BIN_DIR/golangci-lint 5 | GOLANGCI_LINT_VERSION=v1.52.2 6 | 7 | if [ ! -f "$BINARY" ]; then 8 | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s "$GOLANGCI_LINT_VERSION" 9 | fi 10 | 11 | $BINARY run 12 | -------------------------------------------------------------------------------- /script/pull-model: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | REPO=https://github.com/yoeo/guesslang.git 4 | 5 | ROOT_DIR=$(git rev-parse --show-toplevel) 6 | TMP_DIR="$ROOT_DIR/tmp" 7 | GUESSLANG_DIR="$TMP_DIR/guesslang" 8 | SRC_DIR="$TMP_DIR/guesslang/guesslang/data/model/" 9 | DST_DIR="$ROOT_DIR/pkg/guesser/model" 10 | 11 | mkdir -p "$TMP_DIR" 12 | rm -rf "$GUESSLANG_DIR" 13 | mkdir -p "$GUESSLANG_DIR" 14 | git clone "$REPO" "$GUESSLANG_DIR" 15 | rm -rf "$DST_DIR" 16 | mkdir -p "$DST_DIR" 17 | cp -r "$SRC_DIR" "$DST_DIR" 18 | -------------------------------------------------------------------------------- /script/test: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BIN_DIR="$(git rev-parse --show-toplevel)"/bin 4 | BINARY=$BIN_DIR/gotestsum 5 | GOTESTSUM_VERSION=v1.10.0 6 | 7 | if [ ! -f "$BINARY" ]; then 8 | GOBIN=$BIN_DIR go install gotest.tools/gotestsum@${GOTESTSUM_VERSION} 9 | fi 10 | 11 | $BINARY "$@" 12 | --------------------------------------------------------------------------------