├── .gitignore ├── .travis.yml ├── Dockerfile.rpmbuild ├── LICENSE ├── README.md ├── cmd ├── gannoy-converter │ └── main.go ├── gannoy-db │ └── main.go └── gannoy │ └── main.go ├── const.go ├── converter.go ├── distance.go ├── distance_test.go ├── docker-compose.yml ├── file.go ├── file_test.go ├── free.go ├── free_test.go ├── gannoy.go ├── gannoy_test.go ├── lock.go ├── lock_test.go ├── maps.go ├── maps_test.go ├── meta.go ├── meta_test.go ├── node.go ├── node_test.go ├── queue.go ├── random.go ├── rpmbuild ├── RPMS │ └── x86_64 │ │ └── .gitkeep ├── SOURCES │ ├── gannoy-db.logrotate │ ├── gannoy-db.service │ └── gannoy-db.toml └── SPECS │ └── gannoy.spec ├── sort.go ├── sort_test.go ├── storage.go ├── util.go └── version.go /.gitignore: -------------------------------------------------------------------------------- 1 | *.tree 2 | *.map 3 | *.meta 4 | gannoy 5 | gannoy-db 6 | gannoy-converter 7 | !cmd/gannoy/ 8 | !cmd/gannoy-db/ 9 | !cmd/gannoy-converter/ 10 | rpmbuild/RPMS/x86_64/gannoy-* 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go: 3 | - 1.8.1 4 | script: 5 | - go test -v ./... 6 | -------------------------------------------------------------------------------- /Dockerfile.rpmbuild: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | MAINTAINER dev.kuro.obi@gmail.com 3 | 4 | RUN yum -y install wget git rpmdevtools yum-utils 5 | RUN rpmdev-setuptree 6 | RUN wget https://storage.googleapis.com/golang/go1.8.3.linux-amd64.tar.gz 7 | RUN tar -C /usr/local -xzf go1.8.3.linux-amd64.tar.gz 8 | ENV PATH $PATH:/usr/local/go/bin 9 | RUN go get github.com/coreos/go-semver && \ 10 | go get github.com/nightlyone/lockfile && \ 11 | go get github.com/labstack/echo && \ 12 | go get github.com/jessevdk/go-flags && \ 13 | go get github.com/dgrijalva/jwt-go && \ 14 | go get github.com/lestrrat/go-server-starter/listener && \ 15 | go get golang.org/x/net/netutil && \ 16 | go get github.com/monochromegane/conflag && \ 17 | go get github.com/gansidui/priority_queue 18 | RUN mkdir -p /root/go/src/github.com/monochromegane/gannoy 19 | ADD . /root/go/src/github.com/monochromegane/gannoy 20 | WORKDIR /root/go/src/github.com/monochromegane/gannoy 21 | RUN go build -o /root/rpmbuild/SOURCES/gannoy-0.0.1 cmd/gannoy/main.go && \ 22 | go build -o /root/rpmbuild/SOURCES/gannoy-converter-0.0.1 cmd/gannoy-converter/main.go && \ 23 | go build -o /root/rpmbuild/SOURCES/gannoy-db-0.0.1 cmd/gannoy-db/main.go 24 | WORKDIR /root 25 | ADD rpmbuild/SPECS/gannoy.spec /root/rpmbuild/SPECS/gannoy.spec 26 | ADD rpmbuild/SOURCES/gannoy-* /root/rpmbuild/SOURCES/ 27 | RUN rpmbuild -bb /root/rpmbuild/SPECS/gannoy.spec 28 | # RUN rpm -ivh /root/rpmbuild/RPMS/x86_64/gannoy-0.0.1-1.x86_64.rpm 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) [2014] [gannoy] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Gannoy [![Build Status](https://travis-ci.org/monochromegane/gannoy.svg?branch=master)](https://travis-ci.org/monochromegane/gannoy) 2 | 3 | Approximate nearest neighbor search server and dynamic index written in Golang. 4 | Gannoy is inspired by [spotify/annoy](https://github.com/spotify/annoy) and provides a dynamic database and API server. 5 | 6 | ## Quick start 7 | 8 | ```sh 9 | # Create database 10 | $ gannoy create -d 100 DATABASE_NAME 11 | # Start server 12 | $ gannoy-db 13 | ``` 14 | 15 | Regiter features using gannoy API. 16 | 17 | ```sh 18 | $ curl 'http://localhost:1323/databases/DATABASE_NAME/features/KEY' \ 19 | -H "Content-type: application/json" \ 20 | -X PUT \ 21 | -d '{"features": [1.0, 0.5, 0.2, ...]}' 22 | ``` 23 | 24 | Search similar items. 25 | 26 | ```sh 27 | $ curl 'http://localhost:1323/search?database=DATABASE_NAME&key=KEY' 28 | [10, 23, 2, 20, 300, 45, 11, 8, 39, 88] 29 | ``` 30 | 31 | See also `gannoy create --help` or `gannoy-db --help`. 32 | 33 | ## Install 34 | 35 | ```sh 36 | $ go get github.com/monochromegane/gannoy/... 37 | ``` 38 | 39 | Recommendation environment is **Linux (kernel >= 3.15.0)**. 40 | 41 | Gannoy uses fcntl system call and F\_OFD\_SETLKW command to lock the necessary minimum range from multiple goroutines for speeding up. 42 | 43 | ## API 44 | 45 | ### GET /search 46 | 47 | Search approximate nearest neighbor items. 48 | 49 | #### query parameters 50 | 51 | | key | value | 52 | | -------- | ------------------------------------------------- | 53 | | database | Search for similar items from this database name. | 54 | | key | Search for similar items from this key's feature. | 55 | | limit | Maxium number of result. | 56 | 57 | #### Response 58 | 59 | * Response 200 (application/json) 60 | * return list of item keys. 61 | * Response 404 (no content) 62 | * return no content if you specify not found database or key. 63 | 64 | ### POST /databases/:database/features 65 | 66 | Register features using a specified key. 67 | 68 | #### URI parameters 69 | 70 | | key | value | 71 | | -------- | ---------------------------------- | 72 | | database | Create item in this database name. | 73 | 74 | #### JSON parameters 75 | 76 | | key | value | 77 | | -------- | --------------------------- | 78 | | key | Create item using this key. | 79 | | features | List of feature value. | 80 | 81 | **Note**: `KEY` must be integer. 82 | 83 | #### Response 84 | 85 | * Response 200 (no content) 86 | * return no content. 87 | * Response 422 (no content) 88 | * return no content if you specify not found database or unprocessable parameter. 89 | 90 | ### PUT /databases/:database/features/:key 91 | 92 | Register or update features using a specified key. 93 | 94 | #### URI parameters 95 | 96 | | key | value | 97 | | -------- | -------------------------------------------- | 98 | | database | Create or update item in this database name. | 99 | | key | Create or update item using this key. | 100 | 101 | **Note**: `KEY` must be integer. 102 | 103 | #### JSON parameters 104 | 105 | | key | value | 106 | | -------- | ----------------------- | 107 | | features | List of feature value. | 108 | 109 | #### Response 110 | 111 | * Response 200 (no content) 112 | * return no content. 113 | * Response 422 (no content) 114 | * return no content if you specify not found database or unprocessable parameter. 115 | 116 | ### DELETE /databases/:database/features/:key 117 | 118 | Register or update features using a specified key. 119 | 120 | #### URI parameters 121 | 122 | | key | value | 123 | | -------- | ------------------------------------ | 124 | | database | Delete item from this database name. | 125 | | key | Delete item using this key. | 126 | 127 | #### Response 128 | 129 | * Response 200 (no content) 130 | * return no content. 131 | * Response 422 (no content) 132 | * return no content if you specify not found database or unprocessable parameter. 133 | 134 | ## Run with Server::Starter 135 | 136 | Gannoy can run with Server::Starter for supporting graceful restart. 137 | 138 | ```sh 139 | $ start_server --port 8080 --pid-file app.pid -- gannoy-db -s # gannoy-db listen Server::Starter port if you pass s option. 140 | ``` 141 | 142 | ## Configuration 143 | 144 | Gannoy can load option from configuration file. 145 | 146 | If you prepare a configuration file named `gannoy.toml` like the following: 147 | 148 | ```toml 149 | data-dir = "/var/lib/gannoy" 150 | log-dir = "/var/log/gannoy" 151 | lock-dir = "/var/run/gannoy" 152 | server-starter = true 153 | ``` 154 | 155 | You can specify the name with c option. 156 | 157 | ```sh 158 | $ gannoy-db -c gannoy.toml 159 | ``` 160 | 161 | **Note**: A priority of flag is `command-line flag > configration file > flag default value`. See also [monochromegane/conflag](https://github.com/monochromegane/conflag). 162 | 163 | ## Building rpm 164 | 165 | **Note**: Requirements are Docker and docker-compose. 166 | 167 | ```sh 168 | $ docker-compose build gannoy-rpmbuild 169 | $ docker-compose run gannoy-rpmbuild 170 | ``` 171 | 172 | Result (`gannoy-x.x.x-x.x86_64.rpm`) is put in `rpmbuild/RPMS/x86_64` directory on host. 173 | 174 | You can install the rpm and running gannoy-db process on CentOS. 175 | 176 | ```sh 177 | $ sudo rpm -ivh gannoy-x.x.x-x.x86_64.rpm 178 | $ sudo systemctl start gannoy-db 179 | ``` 180 | 181 | ## Data migration from annoy 182 | 183 | You can migrate [spotify/annoy](https://github.com/spotify/annoy) database file. 184 | 185 | ```sh 186 | $ gannoy-converter -d 100 ANNOY_FILE DATABASE_NAME 187 | ``` 188 | 189 | ## License 190 | 191 | [MIT](https://github.com/monochromegane/gannoy/blob/master/LICENSE) 192 | 193 | ## Author 194 | 195 | [monochromegane](https://github.com/monochromegane) 196 | -------------------------------------------------------------------------------- /cmd/gannoy-converter/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | "os" 7 | 8 | flags "github.com/jessevdk/go-flags" 9 | "github.com/monochromegane/gannoy" 10 | ) 11 | 12 | type Options struct { 13 | Dim int `short:"d" long:"dim" default:"2" description:"Specify size of feature dimention."` 14 | Tree int `short:"t" long:"tree" default:"1" description:"Specify size of index tree."` 15 | K int `short:"K" long:"K" default:"-1" default-mask:"twice the value of dim" description:"Specify max node size in a bucket node."` 16 | Path string `short:"p" long:"path" default:"." description:"Build meta file into this directory."` 17 | Maps string `short:"m" long:"map-path" default:"" description:"Specify key and index mapping CSV file, if exist."` 18 | Version bool `short:"v" long:"version" description:"Show version"` 19 | } 20 | 21 | var opts Options 22 | 23 | func main() { 24 | parser := flags.NewParser(&opts, flags.Default) 25 | parser.Usage = "[OPTIONS] SRC_ANNOY_OR_CSV_FILE DEST_DATABASE_NAME" 26 | args, err := parser.Parse() 27 | if err != nil { 28 | os.Exit(1) 29 | } 30 | if opts.Version { 31 | fmt.Printf("%s version %s\n", parser.Name, gannoy.VERSION) 32 | os.Exit(0) 33 | } 34 | if len(args) != 2 { 35 | fmt.Fprintf(os.Stderr, "source annoy or CSV file and destination database name not specified.\n") 36 | os.Exit(1) 37 | } 38 | if opts.K < 3 || opts.K > opts.Dim*2 { 39 | fmt.Fprintf(os.Stderr, "K must be less than dim*2 or be at least 3 or more, but %d.", opts.K) 40 | os.Exit(1) 41 | } 42 | K := opts.K 43 | if K == -1 { 44 | K = opts.Dim * 2 45 | } 46 | 47 | converter := gannoy.NewConverter(args[0], opts.Dim, opts.Tree, K, binary.LittleEndian) 48 | err = converter.Convert(args[0], opts.Path, args[1], opts.Maps) 49 | if err != nil { 50 | fmt.Fprintf(os.Stderr, "%v\n", err) 51 | os.Exit(1) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /cmd/gannoy-db/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io/ioutil" 7 | "net" 8 | "net/http" 9 | "os" 10 | "os/signal" 11 | "path/filepath" 12 | "runtime" 13 | "strconv" 14 | "strings" 15 | "syscall" 16 | "time" 17 | 18 | "golang.org/x/net/netutil" 19 | 20 | flags "github.com/jessevdk/go-flags" 21 | "github.com/labstack/echo" 22 | "github.com/labstack/echo/middleware" 23 | "github.com/labstack/gommon/log" 24 | "github.com/lestrrat/go-server-starter/listener" 25 | "github.com/monochromegane/conflag" 26 | "github.com/monochromegane/gannoy" 27 | "github.com/nightlyone/lockfile" 28 | ) 29 | 30 | type Options struct { 31 | DataDir string `short:"d" long:"data-dir" default:"." description:"Specify the directory where the meta files are located."` 32 | LogDir string `short:"l" long:"log-dir" default-mask:"os.Stdout" description:"Specify the log output directory."` 33 | LockDir string `short:"L" long:"lock-dir" default:"." description:"Specify the lock file directory. This option is used only server-starter option."` 34 | WithServerStarter bool `short:"s" long:"server-starter" description:"Use server-starter listener for server address."` 35 | ShutDownTimeout int `short:"t" long:"timeout" default:"10" description:"Specify the number of seconds for shutdown timeout."` 36 | MaxConnections int `short:"m" long:"max-connections" default:"100" description:"Specify the number of max connections."` 37 | Config string `short:"c" long:"config" default:"" description:"Configuration file path."` 38 | Version bool `short:"v" long:"version" description:"Show version"` 39 | } 40 | 41 | var opts Options 42 | 43 | type Feature struct { 44 | W []float64 `json:"features"` 45 | } 46 | 47 | type FeatureWithKey struct { 48 | Key int `json:"key"` 49 | W []float64 `json:"features"` 50 | } 51 | 52 | func main() { 53 | 54 | // Parse option from args and configuration file. 55 | conflag.LongHyphen = true 56 | conflag.BoolValue = false 57 | parser := flags.NewParser(&opts, flags.Default) 58 | _, err := parser.ParseArgs(os.Args[1:]) 59 | if err != nil { 60 | os.Exit(1) 61 | } 62 | if opts.Version { 63 | fmt.Printf("%s version %s\n", parser.Name, gannoy.VERSION) 64 | os.Exit(0) 65 | } 66 | if opts.Config != "" { 67 | if args, err := conflag.ArgsFrom(opts.Config); err == nil { 68 | if _, err := parser.ParseArgs(args); err != nil { 69 | os.Exit(1) 70 | } 71 | } 72 | } 73 | _, err = parser.ParseArgs(os.Args[1:]) 74 | if err != nil { 75 | os.Exit(1) 76 | } 77 | 78 | // Wait old process finishing. 79 | if opts.WithServerStarter { 80 | lock, err := initializeLock(opts.LockDir) 81 | if err != nil { 82 | fmt.Fprintln(os.Stderr, err) 83 | os.Exit(1) 84 | } 85 | defer lock.Unlock() 86 | for { 87 | if err := lock.TryLock(); err != nil { 88 | time.Sleep(100 * time.Millisecond) 89 | continue 90 | } 91 | break 92 | } 93 | } 94 | 95 | e := echo.New() 96 | 97 | // initialize log 98 | l, err := initializeLog(opts.LogDir) 99 | if err != nil { 100 | fmt.Fprintln(os.Stderr, err) 101 | os.Exit(1) 102 | } 103 | e.Logger.SetLevel(log.INFO) 104 | e.Logger.SetOutput(l) 105 | e.Use(middleware.LoggerWithConfig(middleware.LoggerConfig{Output: l})) 106 | 107 | // Load meta files 108 | files, err := ioutil.ReadDir(opts.DataDir) 109 | if err != nil { 110 | fmt.Fprintln(os.Stderr, err) 111 | os.Exit(1) 112 | } 113 | 114 | metaCh := make(chan string, len(files)) 115 | gannoyCh := make(chan gannoy.GannoyIndex) 116 | errCh := make(chan error) 117 | databases := map[string]gannoy.GannoyIndex{} 118 | var metaCount int 119 | for _, file := range files { 120 | if file.IsDir() || filepath.Ext(file.Name()) != ".meta" { 121 | continue 122 | } 123 | metaCh <- filepath.Join(opts.DataDir, file.Name()) 124 | metaCount++ 125 | } 126 | if metaCount == 0 { 127 | fmt.Fprintln(os.Stderr, "Do not exist Meta files.") 128 | close(metaCh) 129 | close(gannoyCh) 130 | close(errCh) 131 | os.Exit(1) 132 | } 133 | 134 | for i := 0; i < runtime.GOMAXPROCS(0); i++ { 135 | go gannoyIndexInitializer(metaCh, gannoyCh, errCh) 136 | } 137 | 138 | loop: 139 | for { 140 | select { 141 | case gannoy := <-gannoyCh: 142 | key := strings.TrimSuffix(filepath.Base(gannoy.MetaFile()), ".meta") 143 | databases[key] = gannoy 144 | if len(databases) >= metaCount { 145 | close(metaCh) 146 | close(gannoyCh) 147 | close(errCh) 148 | break loop 149 | } 150 | case err := <-errCh: 151 | fmt.Fprintln(os.Stderr, err) 152 | os.Exit(1) 153 | } 154 | } 155 | 156 | // Define API 157 | e.GET("/search", func(c echo.Context) error { 158 | database := c.QueryParam("database") 159 | if _, ok := databases[database]; !ok { 160 | return c.NoContent(http.StatusNotFound) 161 | } 162 | key, err := strconv.Atoi(c.QueryParam("key")) 163 | if err != nil { 164 | key = -1 165 | } 166 | limit, err := strconv.Atoi(c.QueryParam("limit")) 167 | if err != nil { 168 | limit = 10 169 | } 170 | 171 | gannoy := databases[database] 172 | r, err := gannoy.GetNnsByKey(key, limit, -1) 173 | if err != nil || len(r) == 0 { 174 | return c.NoContent(http.StatusNotFound) 175 | } 176 | 177 | return c.JSON(http.StatusOK, r) 178 | }) 179 | 180 | e.POST("/databases/:database/features", func(c echo.Context) error { 181 | database := c.Param("database") 182 | if _, ok := databases[database]; !ok { 183 | return c.NoContent(http.StatusUnprocessableEntity) 184 | } 185 | feature := new(FeatureWithKey) 186 | if err := c.Bind(feature); err != nil { 187 | return err 188 | } 189 | 190 | gannoy := databases[database] 191 | err = gannoy.AddItem(feature.Key, feature.W) 192 | if err != nil { 193 | return c.NoContent(http.StatusUnprocessableEntity) 194 | } 195 | return c.NoContent(http.StatusOK) 196 | }) 197 | 198 | e.PUT("/databases/:database/features/:key", func(c echo.Context) error { 199 | database := c.Param("database") 200 | if _, ok := databases[database]; !ok { 201 | return c.NoContent(http.StatusUnprocessableEntity) 202 | } 203 | key, err := strconv.Atoi(c.Param("key")) 204 | if err != nil { 205 | return c.NoContent(http.StatusUnprocessableEntity) 206 | } 207 | feature := new(Feature) 208 | if err := c.Bind(feature); err != nil { 209 | return err 210 | } 211 | 212 | gannoy := databases[database] 213 | err = gannoy.UpdateItem(key, feature.W) 214 | if err != nil { 215 | return c.NoContent(http.StatusUnprocessableEntity) 216 | } 217 | return c.NoContent(http.StatusOK) 218 | }) 219 | 220 | e.DELETE("/databases/:database/features/:key", func(c echo.Context) error { 221 | database := c.Param("database") 222 | if _, ok := databases[database]; !ok { 223 | return c.NoContent(http.StatusUnprocessableEntity) 224 | } 225 | key, err := strconv.Atoi(c.Param("key")) 226 | if err != nil { 227 | return c.NoContent(http.StatusUnprocessableEntity) 228 | } 229 | gannoy := databases[database] 230 | err = gannoy.RemoveItem(key) 231 | if err != nil { 232 | return c.NoContent(http.StatusUnprocessableEntity) 233 | } 234 | 235 | return c.NoContent(http.StatusOK) 236 | }) 237 | 238 | e.GET("/health", func(c echo.Context) error { 239 | return c.NoContent(http.StatusOK) 240 | }) 241 | 242 | // Start server 243 | sig := os.Interrupt 244 | if opts.WithServerStarter { 245 | sig = syscall.SIGTERM 246 | listeners, err := listener.ListenAll() 247 | if err != nil && err != listener.ErrNoListeningTarget { 248 | fmt.Fprintln(os.Stderr, err) 249 | os.Exit(1) 250 | } 251 | e.Listener = netutil.LimitListener(listeners[0], opts.MaxConnections) 252 | } else { 253 | l, err := net.Listen("tcp", ":1323") 254 | if err != nil { 255 | fmt.Fprintln(os.Stderr, err) 256 | os.Exit(1) 257 | } 258 | e.Listener = netutil.LimitListener(l, opts.MaxConnections) 259 | } 260 | 261 | go func() { 262 | if err := e.Start(""); err != nil { 263 | e.Logger.Info("shutting down the server") 264 | } 265 | }() 266 | 267 | sigCh := make(chan os.Signal, 1) 268 | signal.Notify(sigCh, sig) 269 | <-sigCh 270 | 271 | ctx, cancel := context.WithTimeout(context.Background(), time.Duration(opts.ShutDownTimeout)*time.Second) 272 | defer cancel() 273 | if err := e.Shutdown(ctx); err != nil { 274 | e.Logger.Fatal(err) 275 | } 276 | } 277 | 278 | func initializeLog(logDir string) (*os.File, error) { 279 | if logDir == "" { 280 | return os.Stdout, nil 281 | } 282 | if err := os.MkdirAll(logDir, os.ModePerm); err != nil { 283 | return nil, err 284 | } 285 | return os.OpenFile(filepath.Join(logDir, "gannoy-db.log"), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) 286 | } 287 | 288 | func initializeLock(lockDir string) (lockfile.Lockfile, error) { 289 | if err := os.MkdirAll(lockDir, os.ModePerm); err != nil { 290 | return "", err 291 | } 292 | lock := "gannoy-db.lock" 293 | if !filepath.IsAbs(lockDir) { 294 | lockDir, err := filepath.Abs(lockDir) 295 | if err != nil { 296 | return lockfile.Lockfile(""), err 297 | } 298 | return lockfile.New(filepath.Join(lockDir, lock)) 299 | } 300 | return lockfile.New(filepath.Join(lockDir, lock)) 301 | } 302 | 303 | func gannoyIndexInitializer(metaCh chan string, gannoyCh chan gannoy.GannoyIndex, errCh chan error) { 304 | for meta := range metaCh { 305 | gannoy, err := gannoy.NewGannoyIndex(meta, gannoy.Angular{}, gannoy.RandRandom{}) 306 | if err == nil { 307 | gannoyCh <- gannoy 308 | } else { 309 | errCh <- err 310 | } 311 | } 312 | } 313 | -------------------------------------------------------------------------------- /cmd/gannoy/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | flags "github.com/jessevdk/go-flags" 8 | "github.com/monochromegane/gannoy" 9 | ) 10 | 11 | type Options struct { 12 | Version bool `short:"v" long:"version" description:"Show version"` 13 | } 14 | 15 | type CreateCommand struct { 16 | Dim int `short:"d" long:"dim" default:"2" description:"Specify size of feature dimention."` 17 | Tree int `short:"t" long:"tree" default:"1" description:"Specify size of index tree."` 18 | K int `short:"K" long:"K" default:"-1" default-mask:"twice the value of dim" description:"Specify max node size in a bucket node."` 19 | Path string `short:"p" long:"path" default:"." description:"Build meta file into this directory."` 20 | } 21 | 22 | var opts Options 23 | var createCommand CreateCommand 24 | 25 | func (c *CreateCommand) Execute(args []string) error { 26 | if len(args) != 1 { 27 | return fmt.Errorf("database name not specified.") 28 | } 29 | if c.K < 3 || c.K > c.Dim*2 { 30 | return fmt.Errorf("K must be less than dim*2 or be at least 3 or more, but %d.", c.K) 31 | } 32 | K := c.K 33 | if K == -1 { 34 | K = c.Dim * 2 35 | } 36 | err := gannoy.CreateMeta(c.Path, args[0], c.Tree, c.Dim, K) 37 | if err != nil { 38 | return err 39 | } 40 | return nil 41 | } 42 | func (c *CreateCommand) Usage() string { 43 | return "[create-OPTIONS] DATABASE" 44 | } 45 | 46 | func main() { 47 | parser := flags.NewParser(&opts, flags.HelpFlag|flags.PassDoubleDash) // exclude PrintError 48 | parser.Name = "gannoy" 49 | 50 | parser.AddCommand("create", 51 | "Create database", 52 | "The create command creates a meta file for the database.", 53 | &createCommand) 54 | _, err := parser.Parse() 55 | if err != nil { 56 | if opts.Version && err.(*flags.Error).Type == flags.ErrCommandRequired { 57 | fmt.Printf("%s version %s\n", parser.Name, gannoy.VERSION) 58 | os.Exit(0) 59 | } 60 | fmt.Fprintln(os.Stderr, err) 61 | os.Exit(1) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /const.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | const ( 4 | ADD int = iota 5 | DELETE 6 | UPDATE 7 | ) 8 | 9 | const ( 10 | ASC int = iota 11 | DESC 12 | ) 13 | -------------------------------------------------------------------------------- /converter.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "encoding/csv" 7 | "fmt" 8 | "io" 9 | "os" 10 | "path/filepath" 11 | "strconv" 12 | "syscall" 13 | ) 14 | 15 | func NewConverter(from string, dim, tree, K int, order binary.ByteOrder) Converter { 16 | if filepath.Ext(from) == ".csv" { 17 | return csvConverter{ 18 | dim: dim, 19 | tree: tree, 20 | K: K, 21 | order: order, 22 | } 23 | } else { 24 | return converter{ 25 | dim: dim, 26 | tree: tree, 27 | K: K, 28 | order: order, 29 | } 30 | } 31 | } 32 | 33 | type Converter interface { 34 | Convert(string, string, string, string) error 35 | } 36 | 37 | type converter struct { 38 | dim int 39 | tree int 40 | K int 41 | order binary.ByteOrder 42 | } 43 | 44 | func (c converter) Convert(from, path, to, mapPath string) error { 45 | ann, err := os.Open(from) 46 | if err != nil { 47 | return err 48 | } 49 | defer ann.Close() 50 | 51 | var maps map[int]int 52 | if mapPath != "" { 53 | maps, err = c.initializeMaps(mapPath) 54 | if err != nil { 55 | return err 56 | } 57 | } 58 | 59 | err = CreateMeta(path, to, c.tree, c.dim, c.K) 60 | if err != nil { 61 | return err 62 | } 63 | 64 | gannoy, err := NewGannoyIndex(filepath.Join(path, to+".meta"), Angular{}, RandRandom{}) 65 | if err != nil { 66 | return err 67 | } 68 | 69 | stat, _ := ann.Stat() 70 | count := int(stat.Size() / c.nodeSize()) 71 | 72 | keys := make([]int, count) 73 | vecs := make([][]float64, count) 74 | for i := 0; i < count; i++ { 75 | b := make([]byte, c.nodeSize()) 76 | _, err = syscall.Pread(int(ann.Fd()), b, c.offset(i)) 77 | if err != nil { 78 | return err 79 | } 80 | 81 | buf := bytes.NewReader(b) 82 | 83 | var nDescendants int32 84 | binary.Read(buf, c.order, &nDescendants) 85 | if int(nDescendants) != 1 { 86 | break 87 | } 88 | 89 | buf.Seek(int64(4*2), io.SeekCurrent) // skip children 90 | 91 | vec := make([]float64, c.dim) 92 | binary.Read(buf, c.order, &vec) 93 | 94 | key := i 95 | if mapPath != "" { 96 | if k, ok := maps[i]; ok { 97 | key = k 98 | } else { 99 | return fmt.Errorf("Index is not found in mapping file.\n") 100 | } 101 | } 102 | keys[i] = key 103 | vecs[i] = vec 104 | } 105 | return gannoy.AddItems(keys, vecs) 106 | } 107 | 108 | func (c converter) offset(index int) int64 { 109 | return c.nodeSize() * int64(index) 110 | } 111 | 112 | func (c converter) nodeSize() int64 { 113 | return int64(4 + // n_descendants 114 | 4*2 + // children[2] 115 | 8*c.dim) // v[1] 116 | } 117 | 118 | func (c converter) initializeMaps(path string) (map[int]int, error) { 119 | maps := map[int]int{} 120 | file, err := os.Open(path) 121 | if err != nil { 122 | return maps, err 123 | } 124 | defer file.Close() 125 | 126 | reader := csv.NewReader(file) 127 | for { 128 | record, err := reader.Read() 129 | if err == io.EOF { 130 | break 131 | } else if err != nil { 132 | return maps, err 133 | } 134 | key, err := strconv.Atoi(record[0]) 135 | if err != nil { 136 | return maps, err 137 | } 138 | 139 | index, err := strconv.Atoi(record[1]) 140 | if err != nil { 141 | return maps, err 142 | } 143 | maps[index] = key 144 | } 145 | 146 | return maps, nil 147 | } 148 | 149 | type csvConverter struct { 150 | dim int 151 | tree int 152 | K int 153 | order binary.ByteOrder 154 | } 155 | 156 | func (c csvConverter) Convert(from, path, to, mapPath string) error { 157 | file, err := os.Open(from) 158 | if err != nil { 159 | return err 160 | } 161 | defer file.Close() 162 | 163 | err = CreateMeta(path, to, c.tree, c.dim, c.K) 164 | if err != nil { 165 | return err 166 | } 167 | 168 | gannoy, err := NewGannoyIndex(filepath.Join(path, to+".meta"), Angular{}, RandRandom{}) 169 | if err != nil { 170 | return err 171 | } 172 | reader := csv.NewReader(file) 173 | 174 | keys := []int{} 175 | vecs := [][]float64{} 176 | for { 177 | record, err := reader.Read() 178 | if err == io.EOF { 179 | break 180 | } 181 | if err != nil { 182 | return err 183 | } 184 | 185 | key, err := strconv.Atoi(record[0]) 186 | if err != nil { 187 | return err 188 | } 189 | 190 | vec := make([]float64, c.dim) 191 | for i, f := range record[1:] { 192 | if feature, err := strconv.ParseFloat(f, 64); err != nil { 193 | return err 194 | } else { 195 | vec[i] = feature 196 | } 197 | } 198 | keys = append(keys, key) 199 | vecs = append(vecs, vec) 200 | } 201 | return gannoy.AddItems(keys, vecs) 202 | } 203 | -------------------------------------------------------------------------------- /distance.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "math" 5 | ) 6 | 7 | type Distance interface { 8 | createSplit([]Node, Random, Node) Node 9 | distance([]float64, []float64) float64 10 | side(Node, []float64, Random) int 11 | margin(Node, []float64) float64 12 | } 13 | 14 | type Angular struct { 15 | } 16 | 17 | func (a Angular) createSplit(nodes []Node, random Random, n Node) Node { 18 | bestIv, bestJv := twoMeans(a, nodes, random, true) 19 | v := make([]float64, len(nodes[0].v)) 20 | for z, _ := range v { 21 | v[z] = bestIv[z] - bestJv[z] 22 | } 23 | n.v = normalize(v) 24 | return n 25 | } 26 | 27 | func (a Angular) distance(x, y []float64) float64 { 28 | var pp, qq, pq float64 29 | for z, xz := range x { 30 | pp += xz * xz 31 | qq += y[z] * y[z] 32 | pq += xz * y[z] 33 | } 34 | ppqq := pp * qq 35 | if ppqq > 0 { 36 | return 2.0 - 2.0*pq/math.Sqrt(ppqq) 37 | } 38 | return 2.0 39 | } 40 | 41 | func (a Angular) side(n Node, y []float64, random Random) int { 42 | dot := a.margin(n, y) 43 | if dot != 0.0 { 44 | if dot > 0 { 45 | return 1 46 | } else { 47 | return 0 48 | } 49 | } 50 | return random.flip() 51 | } 52 | 53 | func (a Angular) margin(n Node, y []float64) float64 { 54 | dot := 0.0 55 | for z, v := range n.v { 56 | dot += v * y[z] 57 | } 58 | return dot 59 | } 60 | -------------------------------------------------------------------------------- /distance_test.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestAngularMargin(t *testing.T) { 9 | angular := Angular{} 10 | node := Node{v: []float64{1, 2, 3}} 11 | y := []float64{1, 2, 3} 12 | dot := angular.margin(node, y) 13 | expect := 14.0 14 | if dot != expect { 15 | t.Errorf("Angular margin should return %d, but %d", expect, dot) 16 | } 17 | } 18 | 19 | func TestAngularSide(t *testing.T) { 20 | angular := Angular{} 21 | 22 | // dot is plus (14.0) 23 | node := Node{v: []float64{1, 2, 3}} 24 | y := []float64{1, 2, 3} 25 | if side := angular.side(node, y, RandRandom{}); side != 1 { 26 | t.Errorf("Angular side should return 1, but %d", side) 27 | } 28 | 29 | // dot is minus (-14.0) 30 | node = Node{v: []float64{1, 2, 3}} 31 | y = []float64{-1, -2, -3} 32 | if side := angular.side(node, y, RandRandom{}); side != 0 { 33 | t.Errorf("Angular side should return 0, but %d", side) 34 | } 35 | } 36 | 37 | func TestAngularDistance(t *testing.T) { 38 | angular := Angular{} 39 | 40 | x := []float64{1, 2, 3} 41 | y := []float64{-1, -2, -3} 42 | expect := 4.0 43 | if distance := angular.distance(x, y); distance != expect { 44 | t.Errorf("Angular distance should return %f, but %f.", expect, distance) 45 | } 46 | } 47 | 48 | func TestAngularCreateSplit(t *testing.T) { 49 | angular := Angular{} 50 | nodes := []Node{ 51 | {v: []float64{0.1, 0.1}}, 52 | {v: []float64{1.1, 1.1}}, 53 | {v: []float64{0.1, 1.1}}, 54 | {v: []float64{1.1, 0.1}}, 55 | } 56 | n := angular.createSplit(nodes, &TestLoopRandom{max: len(nodes)}, Node{}) 57 | expect := []string{"0.822251", "-0.569124"} 58 | for i, v := range n.v { 59 | if strv := fmt.Sprintf("%f", v); strv != expect[i] { 60 | t.Errorf("Create split should return node.v %s, but %s", strv, expect[i]) 61 | } 62 | } 63 | } 64 | 65 | type TestLoopRandom struct { 66 | max int 67 | current int 68 | flipCurrent int 69 | } 70 | 71 | func (r *TestLoopRandom) index(n int) int { 72 | index := r.current % r.max 73 | r.current++ 74 | return index 75 | } 76 | 77 | func (r *TestLoopRandom) flip() int { 78 | r.flipCurrent++ 79 | if r.flipCurrent%2 == 0 { 80 | return 0 81 | } else { 82 | return 1 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | gannoy-rpmbuild: 4 | build: 5 | context: . 6 | dockerfile: Dockerfile.rpmbuild 7 | command: cp /root/rpmbuild/RPMS/x86_64/gannoy-0.0.1-1.x86_64.rpm /tmp/rpmbuild/RPMS/x86_64/. 8 | volumes: 9 | - .:/tmp:rw 10 | -------------------------------------------------------------------------------- /file.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "math" 7 | "os" 8 | "syscall" 9 | ) 10 | 11 | type File struct { 12 | tree int 13 | dim int 14 | K int 15 | file *os.File 16 | filename string 17 | appendFile *os.File 18 | createChan chan createArgs 19 | locker Locker 20 | nodeSize int64 21 | offsetOfV int64 22 | } 23 | 24 | func newFile(filename string, tree, dim, K int) *File { 25 | _, err := os.Stat(filename) 26 | if err != nil { 27 | f, _ := os.Create(filename) 28 | f.Close() 29 | } 30 | 31 | file, _ := os.OpenFile(filename, os.O_RDWR, 0) 32 | appendFile, _ := os.OpenFile(filename, os.O_RDWR|os.O_APPEND, 0) 33 | 34 | f := &File{ 35 | tree: tree, 36 | dim: dim, 37 | K: K, 38 | file: file, 39 | filename: filename, 40 | appendFile: appendFile, 41 | createChan: make(chan createArgs, 1), 42 | locker: newLocker(), 43 | nodeSize: int64(1 + // free 44 | 4 + // nDescendants 45 | 4 + // key 46 | 4*tree + // parents 47 | 4*2 + // children 48 | 8*dim), // v 49 | offsetOfV: int64(1 + // free 50 | 4 + // nDescendants 51 | 4 + // key 52 | 4*tree + // parents 53 | 4*2), // children 54 | } 55 | go f.creator() 56 | return f 57 | } 58 | 59 | func (f *File) Create(n Node) (int, error) { 60 | args := createArgs{node: n, result: make(chan createResult)} 61 | f.createChan <- args 62 | result := <-args.result 63 | return result.id, result.err 64 | } 65 | 66 | func (f *File) create(n Node) (int, error) { 67 | id := f.nodeCount() 68 | _, err := f.appendFile.Write(f.nodeToBytes(n)) 69 | return id, err 70 | } 71 | 72 | func (f *File) Find(id int) (Node, error) { 73 | node := Node{} 74 | node.id = id 75 | node.storage = f 76 | offset := f.offset(id) 77 | err := f.locker.ReadLock(f.file.Fd(), offset, f.nodeSize) 78 | if err != nil { 79 | return node, err 80 | } 81 | defer f.locker.UnLock(f.file.Fd(), offset, f.nodeSize) 82 | 83 | b := make([]byte, f.nodeSize) 84 | _, err = syscall.Pread(int(f.file.Fd()), b, offset) 85 | if err != nil { 86 | return node, err 87 | } 88 | 89 | node.free = b[0] != 0 90 | node.nDescendants = int(int32(binary.BigEndian.Uint32(b[1:5]))) 91 | node.key = int(int32(binary.BigEndian.Uint32(b[5:9]))) 92 | 93 | node.parents = make([]int, f.tree) 94 | for i := 0; i < f.tree; i++ { 95 | node.parents[i] = int(int32(binary.BigEndian.Uint32(b[9+i*4 : 9+i*4+4]))) 96 | } 97 | 98 | if node.nDescendants == 1 { 99 | // leaf node 100 | node.children = []int{0, 0} // skip children 101 | node.v = bytesToFloat64s(b[f.offsetOfV:]) 102 | } else if node.nDescendants <= f.K { 103 | // bucket node 104 | node.children = make([]int, node.nDescendants) 105 | offsetOfChildren := int(f.offsetOfV - (4 * 2)) 106 | for i := 0; i < node.nDescendants; i++ { 107 | node.children[i] = int(int32(binary.BigEndian.Uint32(b[offsetOfChildren+i*4 : offsetOfChildren+i*4+4]))) 108 | } 109 | } else { 110 | // other node 111 | node.children = make([]int, 2) 112 | offsetOfChildren := int(f.offsetOfV - (4 * 2)) 113 | for i := 0; i < 2; i++ { 114 | node.children[i] = int(int32(binary.BigEndian.Uint32(b[offsetOfChildren+i*4 : offsetOfChildren+i*4+4]))) 115 | } 116 | node.v = bytesToFloat64s(b[f.offsetOfV:]) 117 | } 118 | return node, nil 119 | } 120 | 121 | func (f *File) Update(n Node) error { 122 | bytes := f.nodeToBytes(n) 123 | offset := f.offset(n.id) 124 | file, _ := os.OpenFile(f.filename, os.O_RDWR, 0) 125 | defer file.Close() 126 | 127 | err := f.locker.WriteLock(file.Fd(), offset, f.nodeSize) 128 | if err != nil { 129 | return err 130 | } 131 | defer f.locker.UnLock(file.Fd(), offset, f.nodeSize) 132 | 133 | _, err = syscall.Pwrite(int(file.Fd()), bytes, offset) 134 | return err 135 | } 136 | 137 | func (f *File) UpdateParent(id, rootIndex, parent int) error { 138 | offset := f.offset(id) + 139 | int64(1+ // free 140 | 4+ // nDescendants 141 | 4+ // key 142 | 4*rootIndex) // parents 143 | buf := &bytes.Buffer{} 144 | binary.Write(buf, binary.BigEndian, int32(parent)) 145 | 146 | file, _ := os.OpenFile(f.filename, os.O_RDWR, 0) 147 | defer file.Close() 148 | 149 | err := f.locker.WriteLock(file.Fd(), offset, 4) 150 | if err != nil { 151 | return err 152 | } 153 | defer f.locker.UnLock(file.Fd(), offset, 4) 154 | 155 | _, err = syscall.Pwrite(int(file.Fd()), buf.Bytes(), offset) 156 | return err 157 | } 158 | 159 | func (f *File) Delete(n Node) error { 160 | n.free = true 161 | return f.Update(n) 162 | } 163 | 164 | func (f *File) Iterate(c chan Node) { 165 | count := f.nodeCount() 166 | // TODO: Use goroutine 167 | for i := 0; i < count; i++ { 168 | n, err := f.Find(i) 169 | if err != nil { 170 | close(c) 171 | } 172 | c <- n 173 | } 174 | close(c) 175 | } 176 | 177 | func (f File) offset(id int) int64 { 178 | return (int64(id) * f.nodeSize) 179 | } 180 | 181 | func (f File) nodeCount() int { 182 | stat, _ := f.file.Stat() 183 | size := stat.Size() 184 | return int(size / f.nodeSize) 185 | } 186 | 187 | func (f File) nodeToBytes(node Node) []byte { 188 | bytes := make([]byte, f.nodeSize) 189 | 190 | // 1bytes free 191 | if node.free { 192 | bytes[0] = 1 193 | } else { 194 | bytes[0] = 0 195 | } 196 | // 4bytes nDescendants 197 | binary.BigEndian.PutUint32(bytes[1:5], uint32(node.nDescendants)) 198 | // 4bytes key 199 | binary.BigEndian.PutUint32(bytes[5:9], uint32(node.key)) 200 | // 4bytes parents 201 | for i := 0; i < f.tree; i++ { 202 | binary.BigEndian.PutUint32(bytes[9+i*4:9+i*4+4], uint32(node.parents[i])) 203 | } 204 | if node.isBucket() { 205 | // 4bytes children in K 206 | offsetOfChildren := int(f.offsetOfV - (4 * 2)) 207 | for i, child := range node.children { 208 | binary.BigEndian.PutUint32(bytes[offsetOfChildren+i*4:offsetOfChildren+i*4+4], uint32(child)) 209 | } 210 | // padding by zero (nothing to do) 211 | } else { 212 | offsetOfV := int(f.offsetOfV) 213 | offsetOfChildren := int(f.offsetOfV - (4 * 2)) 214 | // 4bytes 2 children 215 | for i, child := range node.children { 216 | binary.BigEndian.PutUint32(bytes[offsetOfChildren+i*4:offsetOfChildren+i*4+4], uint32(child)) 217 | } 218 | // 8bytes v in f 219 | for i, v := range node.v { 220 | binary.BigEndian.PutUint64(bytes[offsetOfV+i*8:offsetOfV+i*8+8], math.Float64bits(v)) 221 | } 222 | } 223 | return bytes 224 | } 225 | 226 | type createArgs struct { 227 | node Node 228 | result chan createResult 229 | } 230 | 231 | type createResult struct { 232 | id int 233 | err error 234 | } 235 | 236 | func (f *File) creator() { 237 | for args := range f.createChan { 238 | id, err := f.create(args.node) 239 | args.result <- createResult{ 240 | id: id, 241 | err: err, 242 | } 243 | } 244 | } 245 | 246 | func (f File) size() int64 { 247 | info, _ := f.file.Stat() 248 | return info.Size() 249 | } 250 | 251 | func bytesToFloat64s(bytes []byte) []float64 { 252 | size := len(bytes) / 8 253 | floats := make([]float64, size) 254 | for i := 0; i < size; i++ { 255 | floats[i] = math.Float64frombits(binary.BigEndian.Uint64(bytes[0:8])) 256 | bytes = bytes[8:] 257 | } 258 | return floats 259 | } 260 | -------------------------------------------------------------------------------- /file_test.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | ) 7 | 8 | func TestFileCreateAndFind(t *testing.T) { 9 | name := "test_file_create_and_find.tree" 10 | defer os.Remove(name) 11 | file := newFile(name, 2, 3, 6) 12 | 13 | nodes := []Node{ 14 | // Leaf node 15 | Node{ 16 | key: 10, 17 | nDescendants: 1, 18 | parents: []int{2, 3}, 19 | children: []int{0, 0}, 20 | v: []float64{1.1, 1.2, 1.3}, 21 | }, 22 | // Bucket node 23 | Node{ 24 | key: 20, 25 | nDescendants: 3, 26 | parents: []int{2, 3}, 27 | children: []int{5, 6, 7}, 28 | }, 29 | // Branch node 30 | Node{ 31 | key: 30, 32 | nDescendants: 2, 33 | parents: []int{2, 3}, 34 | children: []int{5, 6}, 35 | v: []float64{1.1, 1.2, 1.3}, 36 | }, 37 | } 38 | 39 | // Create 40 | for i, node := range nodes { 41 | id, err := file.Create(node) 42 | if id != i { 43 | t.Errorf("File create should return id: %d, but %d", i, id) 44 | } 45 | if err != nil { 46 | t.Errorf("File create should not return error.") 47 | } 48 | } 49 | 50 | // Find 51 | for id, node := range nodes { 52 | found, err := file.Find(id) 53 | if err != nil { 54 | t.Errorf("File find should not return error.") 55 | } 56 | if found.id != id { 57 | t.Errorf("File find should return created node with id %d, but %d", found.id, id) 58 | } 59 | if found.key != node.key { 60 | t.Errorf("File find should return created node with key %d, but %d", found.key, node.key) 61 | } 62 | if found.nDescendants != node.nDescendants { 63 | t.Errorf("File find should return created node with nDescendants %d, but %d", found.nDescendants, node.nDescendants) 64 | } 65 | for i, parent := range found.parents { 66 | if parent != node.parents[i] { 67 | t.Errorf("File find should return created node with parents %v, but %v", found.parents, node.parents) 68 | } 69 | } 70 | for i, child := range found.children { 71 | if child != node.children[i] { 72 | t.Errorf("File find should return created node with children %v, but %v", found.children, node.children) 73 | } 74 | } 75 | for i, v := range found.v { 76 | if v != node.v[i] { 77 | t.Errorf("File find should return created node with v %v, but %v", found.v, node.v) 78 | } 79 | } 80 | if found.free != node.free { 81 | t.Errorf("File find should return created node with free %d, but %d", found.free, node.free) 82 | } 83 | } 84 | } 85 | 86 | func TestFileUpdate(t *testing.T) { 87 | name := "test_file_update.tree" 88 | defer os.Remove(name) 89 | file := newFile(name, 2, 3, 4) 90 | 91 | node := Node{ 92 | key: 10, 93 | nDescendants: 1, 94 | parents: []int{2, 3}, 95 | v: []float64{1.1, 1.2, 1.3}, 96 | } 97 | 98 | // Create 99 | id, _ := file.Create(node) 100 | // Found 101 | found, _ := file.Find(id) 102 | // Update 103 | found.v = []float64{2.1, 2.2, 2.3} 104 | err := file.Update(found) 105 | if err != nil { 106 | t.Errorf("File update should not return error.") 107 | } 108 | 109 | updated, _ := file.Find(id) 110 | for i, v := range updated.v { 111 | if v != found.v[i] { 112 | t.Errorf("File update should return updated node with v %v, but %v", found.v, updated.v) 113 | } 114 | } 115 | } 116 | 117 | func TestUpdateParent(t *testing.T) { 118 | name := "test_file_update_parent.tree" 119 | defer os.Remove(name) 120 | file := newFile(name, 2, 3, 4) 121 | 122 | node := Node{ 123 | key: 10, 124 | nDescendants: 1, 125 | parents: []int{2, 3}, 126 | v: []float64{1.1, 1.2, 1.3}, 127 | } 128 | 129 | // Create 130 | id, _ := file.Create(node) 131 | // Update parent 132 | newParents := []int{20, 30} 133 | for i, newParent := range newParents { 134 | err := file.UpdateParent(id, i, newParent) 135 | if err != nil { 136 | t.Errorf("File update parent should not return error.") 137 | } 138 | } 139 | // Found 140 | updated, _ := file.Find(id) 141 | for i, newParent := range newParents { 142 | if updated.parents[i] != newParent { 143 | t.Errorf("File update parent should not return error.") 144 | } 145 | } 146 | } 147 | 148 | func TestFileIterate(t *testing.T) { 149 | name := "test_file_iterate.tree" 150 | defer os.Remove(name) 151 | file := newFile(name, 2, 3, 4) 152 | 153 | nodes := []Node{ 154 | // Leaf node 155 | Node{ 156 | key: 10, 157 | nDescendants: 1, 158 | parents: []int{2, 3}, 159 | v: []float64{1.1, 1.2, 1.3}, 160 | }, 161 | // Bucket node 162 | Node{ 163 | key: 20, 164 | nDescendants: 3, 165 | parents: []int{2, 3}, 166 | children: []int{5, 6, 7}, 167 | }, 168 | // Branch node 169 | Node{ 170 | key: 30, 171 | nDescendants: 2, 172 | parents: []int{2, 3}, 173 | children: []int{5, 6}, 174 | v: []float64{1.1, 1.2, 1.3}, 175 | }, 176 | } 177 | 178 | // Create 179 | for _, node := range nodes { 180 | file.Create(node) 181 | } 182 | // Iterate 183 | iterator := make(chan Node) 184 | go file.Iterate(iterator) 185 | 186 | i := 0 187 | for node := range iterator { 188 | if nodes[i].key != node.key { 189 | t.Errorf("File iterate should return node (key: %d), but %d", nodes[i].key, node.key) 190 | } 191 | i++ 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /free.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | ) 7 | 8 | type Free struct { 9 | mu sync.Mutex 10 | free []int 11 | } 12 | 13 | func newFree() Free { 14 | return Free{ 15 | mu: sync.Mutex{}, 16 | free: []int{}, 17 | } 18 | } 19 | 20 | func (f *Free) push(id int) { 21 | f.mu.Lock() 22 | defer f.mu.Unlock() 23 | 24 | f.free = append(f.free, id) 25 | } 26 | 27 | func (f *Free) pop() (int, error) { 28 | f.mu.Lock() 29 | defer f.mu.Unlock() 30 | 31 | if len(f.free) == 0 { 32 | return -1, fmt.Errorf("empty") 33 | } 34 | 35 | x, newFree := f.free[len(f.free)-1], f.free[:len(f.free)-1] 36 | f.free = newFree 37 | return x, nil 38 | } 39 | -------------------------------------------------------------------------------- /free_test.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import "testing" 4 | 5 | func TestFreePopEmpty(t *testing.T) { 6 | free := newFree() 7 | id, err := free.pop() 8 | if id != -1 { 9 | t.Errorf("Free pop with empty list should return -1, but %d.", id) 10 | } 11 | if err == nil { 12 | t.Errorf("Free pop with empty list should return error.") 13 | } 14 | } 15 | 16 | func TestFreePop(t *testing.T) { 17 | free := newFree() 18 | free.push(1) 19 | free.push(2) 20 | 21 | id, err := free.pop() 22 | if id != 2 { 23 | t.Errorf("Free pop should return 2, but %d.", id) 24 | } 25 | if err != nil { 26 | t.Errorf("Free pop should not return error.") 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /gannoy.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "runtime" 7 | "sort" 8 | "sync" 9 | 10 | "github.com/gansidui/priority_queue" 11 | ) 12 | 13 | type GannoyIndex struct { 14 | meta meta 15 | tree int 16 | dim int 17 | distance Distance 18 | random Random 19 | nodes Nodes 20 | K int 21 | numWorker int 22 | buildChan chan buildArgs 23 | } 24 | 25 | func NewGannoyIndex(metaFile string, distance Distance, random Random) (GannoyIndex, error) { 26 | 27 | meta, err := loadMeta(metaFile) 28 | if err != nil { 29 | return GannoyIndex{}, err 30 | } 31 | tree := meta.tree 32 | dim := meta.dim 33 | K := meta.K 34 | 35 | ann := meta.treePath() 36 | 37 | gannoy := GannoyIndex{ 38 | meta: meta, 39 | tree: tree, 40 | dim: dim, 41 | distance: distance, 42 | random: random, 43 | K: K, 44 | nodes: newNodes(ann, tree, dim, K), 45 | numWorker: numWorker(tree), 46 | buildChan: make(chan buildArgs, 1), 47 | } 48 | go gannoy.builder() 49 | return gannoy, nil 50 | } 51 | 52 | func (g GannoyIndex) Tree() { 53 | for i, root := range g.meta.roots() { 54 | n, err := g.nodes.getNode(root) 55 | if err != nil { 56 | fmt.Printf("%v\n", err) 57 | break 58 | } 59 | g.printTree(i, n, root, 0) 60 | } 61 | } 62 | 63 | func (g GannoyIndex) MetaFile() string { 64 | return g.meta.file.Name() 65 | } 66 | 67 | func (g *GannoyIndex) AddItem(key int, w []float64) error { 68 | args := buildArgs{action: ADD, key: key, w: w, result: make(chan error)} 69 | g.buildChan <- args 70 | return <-args.result 71 | } 72 | 73 | func (g *GannoyIndex) RemoveItem(key int) error { 74 | args := buildArgs{action: DELETE, key: key, result: make(chan error)} 75 | g.buildChan <- args 76 | return <-args.result 77 | } 78 | 79 | func (g *GannoyIndex) UpdateItem(key int, w []float64) error { 80 | args := buildArgs{action: UPDATE, key: key, w: w, result: make(chan error)} 81 | g.buildChan <- args 82 | return <-args.result 83 | } 84 | 85 | func (g *GannoyIndex) GetNnsByKey(key, n, searchK int) ([]int, error) { 86 | m, err := g.nodes.getNodeByKey(key) 87 | if err != nil || !m.isLeaf() { 88 | return []int{}, fmt.Errorf("Not found") 89 | } 90 | return g.GetAllNns(m.v, n, searchK) 91 | } 92 | 93 | func (g *GannoyIndex) GetAllNns(v []float64, n, searchK int) ([]int, error) { 94 | if searchK == -1 { 95 | searchK = n * g.tree 96 | } 97 | 98 | q := priority_queue.New() 99 | for _, root := range g.meta.roots() { 100 | q.Push(&Queue{priority: math.Inf(1), value: root}) 101 | } 102 | 103 | nns := []int{} 104 | for len(nns) < searchK && q.Len() > 0 { 105 | top := q.Top().(*Queue) 106 | d := top.priority 107 | i := top.value 108 | 109 | nd, err := g.nodes.getNode(i) 110 | if err != nil { 111 | return []int{}, err 112 | } 113 | q.Pop() 114 | if nd.isLeaf() { 115 | nns = append(nns, i) 116 | } else if nd.nDescendants <= g.K { 117 | dst := nd.children 118 | nns = append(nns, dst...) 119 | } else { 120 | margin := g.distance.margin(nd, v) 121 | q.Push(&Queue{priority: math.Min(d, +margin), value: nd.children[1]}) 122 | q.Push(&Queue{priority: math.Min(d, -margin), value: nd.children[0]}) 123 | } 124 | } 125 | 126 | sort.Ints(nns) 127 | nnsDist := make([]sorter, len(nns)) 128 | last := -1 129 | dup := 0 130 | for idx, j := range nns { 131 | if j == last { 132 | dup++ 133 | continue 134 | } 135 | last = j 136 | node, err := g.nodes.getNode(j) 137 | if err != nil { 138 | return []int{}, err 139 | } 140 | nnsDist[idx-dup] = sorter{value: g.distance.distance(v, node.v), id: node.key} 141 | } 142 | nnsDist = nnsDist[:len(nns)-dup] 143 | 144 | m := len(nnsDist) 145 | p := m 146 | if n < m { 147 | p = n 148 | } 149 | 150 | HeapSort(nnsDist, DESC, p) 151 | 152 | result := make([]int, p) 153 | for i := 0; i < p; i++ { 154 | result[i] = nnsDist[m-1-i].id 155 | } 156 | 157 | return result, nil 158 | } 159 | 160 | func (g *GannoyIndex) addItem(key int, w []float64) error { 161 | if len(w) != g.dim { 162 | return fmt.Errorf("Dimension mismatch. expect %d, but %d.\n", g.dim, len(w)) 163 | } 164 | if g.nodes.maps.isExist(key) { 165 | return fmt.Errorf("Key [%d] is already exist.\n", key) 166 | } 167 | n := g.nodes.newNode() 168 | n.key = key 169 | n.v = w 170 | n.parents = make([]int, g.tree) 171 | err := n.save() 172 | if err != nil { 173 | return err 174 | } 175 | // fmt.Printf("id %d\n", n.id) 176 | 177 | var wg sync.WaitGroup 178 | wg.Add(g.tree) 179 | buildChan := make(chan int, g.tree) 180 | worker := func(n Node) { 181 | for index := range buildChan { 182 | g.build(index, g.meta.roots()[index], n) 183 | wg.Done() 184 | } 185 | } 186 | 187 | for i := 0; i < g.numWorker; i++ { 188 | go worker(n) 189 | } 190 | 191 | for index, _ := range g.meta.roots() { 192 | buildChan <- index 193 | } 194 | 195 | wg.Wait() 196 | close(buildChan) 197 | g.nodes.maps.add(n.id, key) 198 | 199 | return nil 200 | } 201 | 202 | // Bulk insert. Currently, This method dosen't support mutex. 203 | // So, this method must be called only from converter. 204 | func (g *GannoyIndex) AddItems(keys []int, ws [][]float64) error { 205 | indices := make([]int, len(keys)) 206 | for i, key := range keys { 207 | n := g.nodes.newNode() 208 | n.key = key 209 | n.v = ws[i] 210 | n.parents = make([]int, g.tree) 211 | err := n.save() 212 | if err != nil { 213 | return err 214 | } 215 | indices[i] = n.id 216 | } 217 | for index, _ := range g.meta.roots() { 218 | m := g.makeTree(index, -1, indices) 219 | err := g.meta.updateRoot(index, m) 220 | if err != nil { 221 | return err 222 | } 223 | } 224 | return nil 225 | } 226 | 227 | func (g *GannoyIndex) build(index, root int, n Node) { 228 | if root == -1 { 229 | // 最初のノード 230 | n.parents[index] = -1 231 | n.save() 232 | g.meta.updateRoot(index, n.id) 233 | return 234 | } 235 | id := g.findBranchByVector(root, n.v) 236 | found, _ := g.nodes.getNode(id) 237 | // fmt.Printf("Found %d\n", item) 238 | 239 | org_parent := found.parents[index] 240 | if found.isBucket() && len(found.children) < g.K { 241 | // ノードに余裕があれば追加 242 | // fmt.Printf("pattern bucket\n") 243 | n.updateParents(index, id) 244 | found.nDescendants++ 245 | found.children = append(found.children, n.id) 246 | found.save() 247 | } else { 248 | // ノードが上限またはリーフノードであれば新しいノードを追加 249 | willDelete := false 250 | var indices []int 251 | if found.isLeaf() { 252 | // fmt.Printf("pattern leaf node\n") 253 | indices = []int{id, n.id} 254 | } else { 255 | // fmt.Printf("pattern full backet\n") 256 | indices = append(found.children, n.id) 257 | willDelete = true 258 | } 259 | 260 | m := g.makeTree(index, org_parent, indices) 261 | if root == 0 || org_parent == 0 || org_parent == -1 { 262 | // rootノードの入れ替え 263 | g.meta.updateRoot(index, m) 264 | } else { 265 | parent, _ := g.nodes.getNode(org_parent) 266 | parent.nDescendants++ 267 | children := make([]int, len(parent.children)) 268 | for i, child := range parent.children { 269 | if child == id { 270 | // 新しいノードに変更 271 | children[i] = m 272 | } else { 273 | // 既存のノードのまま 274 | children[i] = child 275 | } 276 | } 277 | parent.children = children 278 | parent.save() 279 | 280 | } 281 | if willDelete { 282 | found.destroy() 283 | g.nodes.free.push(found.id) 284 | } 285 | } 286 | } 287 | 288 | func (g *GannoyIndex) removeItem(key int) error { 289 | n, err := g.nodes.getNodeByKey(key) 290 | if err != nil { 291 | return err 292 | } 293 | 294 | var wg sync.WaitGroup 295 | wg.Add(g.tree) 296 | buildChan := make(chan int, g.tree) 297 | worker := func(n Node) { 298 | for root := range buildChan { 299 | g.remove(root, n) 300 | wg.Done() 301 | } 302 | } 303 | 304 | for i := 0; i < g.numWorker; i++ { 305 | go worker(n) 306 | } 307 | for index, _ := range g.meta.roots() { 308 | buildChan <- index 309 | } 310 | 311 | wg.Wait() 312 | close(buildChan) 313 | 314 | g.nodes.maps.remove(key) 315 | n.destroy() 316 | g.nodes.free.push(n.id) 317 | 318 | return nil 319 | } 320 | 321 | func (g *GannoyIndex) remove(root int, node Node) { 322 | if node.isRoot(root) { 323 | g.meta.updateRoot(root, -1) 324 | return 325 | } 326 | parent, _ := g.nodes.getNode(node.parents[root]) 327 | if parent.isBucket() && len(parent.children) > 2 { 328 | // fmt.Printf("pattern bucket\n") 329 | target := -1 330 | for i, child := range parent.children { 331 | if child == node.id { 332 | target = i 333 | } 334 | } 335 | if target == -1 { 336 | return 337 | } 338 | children := append(parent.children[:target], parent.children[(target+1):]...) 339 | parent.nDescendants-- 340 | parent.children = children 341 | parent.save() 342 | } else { 343 | // fmt.Printf("pattern leaf node\n") 344 | var other int 345 | for _, child := range parent.children { 346 | if child != node.id { 347 | other = child 348 | } 349 | } 350 | if parent.isRoot(root) { 351 | g.meta.updateRoot(root, other) 352 | } else { 353 | grandParent, _ := g.nodes.getNode(parent.parents[root]) 354 | children := []int{} 355 | for _, child := range grandParent.children { 356 | if child == node.parents[root] { 357 | children = append(children, other) 358 | } else { 359 | children = append(children, child) 360 | } 361 | } 362 | grandParent.nDescendants-- 363 | grandParent.children = children 364 | grandParent.save() 365 | } 366 | 367 | otherNode, _ := g.nodes.getNode(other) 368 | otherNode.updateParents(root, parent.parents[root]) 369 | 370 | parent.destroy() 371 | g.nodes.free.push(parent.id) 372 | } 373 | } 374 | 375 | func (g GannoyIndex) findBranchByVector(id int, v []float64) int { 376 | node, _ := g.nodes.getNode(id) 377 | if node.isLeaf() || node.isBucket() { 378 | return id 379 | } 380 | side := g.distance.side(node, v, g.random) 381 | return g.findBranchByVector(node.children[side], v) 382 | } 383 | 384 | func (g *GannoyIndex) makeTree(root, parent int, ids []int) int { 385 | if len(ids) == 1 { 386 | n, _ := g.nodes.getNode(ids[0]) 387 | if len(n.parents) == 0 { 388 | n.parents = make([]int, g.tree) 389 | } 390 | n.updateParents(root, parent) 391 | return ids[0] 392 | } 393 | 394 | if len(ids) <= g.K { 395 | m := g.nodes.newNode() 396 | m.parents = make([]int, g.tree) 397 | m.nDescendants = len(ids) 398 | m.parents[root] = parent 399 | m.children = ids 400 | m.save() 401 | for _, child := range ids { 402 | c, _ := g.nodes.getNode(child) 403 | if len(c.parents) == 0 { 404 | c.parents = make([]int, g.tree) 405 | } 406 | c.updateParents(root, m.id) 407 | } 408 | return m.id 409 | } 410 | 411 | children := make([]Node, len(ids)) 412 | for i, id := range ids { 413 | children[i], _ = g.nodes.getNode(id) 414 | } 415 | 416 | childrenIds := [2][]int{[]int{}, []int{}} 417 | 418 | m := g.nodes.newNode() 419 | m.parents = make([]int, g.tree) 420 | m.nDescendants = len(ids) 421 | m.parents[root] = parent 422 | 423 | m = g.distance.createSplit(children, g.random, m) 424 | for _, id := range ids { 425 | n, _ := g.nodes.getNode(id) 426 | side := g.distance.side(m, n.v, g.random) 427 | childrenIds[side] = append(childrenIds[side], id) 428 | } 429 | 430 | for len(childrenIds[0]) == 0 || len(childrenIds[1]) == 0 { 431 | childrenIds[0] = []int{} 432 | childrenIds[1] = []int{} 433 | for z := 0; z < g.dim; z++ { 434 | m.v[z] = 0.0 435 | } 436 | for _, id := range ids { 437 | side := g.random.flip() 438 | childrenIds[side] = append(childrenIds[side], id) 439 | } 440 | } 441 | 442 | var flip int 443 | if len(childrenIds[0]) > len(childrenIds[1]) { 444 | flip = 1 445 | } 446 | 447 | m.save() 448 | for side := 0; side < 2; side++ { 449 | m.children[side^flip] = g.makeTree(root, m.id, childrenIds[side^flip]) 450 | } 451 | m.save() 452 | 453 | return m.id 454 | } 455 | 456 | type buildArgs struct { 457 | action int 458 | key int 459 | w []float64 460 | result chan error 461 | } 462 | 463 | func (g *GannoyIndex) builder() { 464 | for args := range g.buildChan { 465 | switch args.action { 466 | case ADD: 467 | args.result <- g.addItem(args.key, args.w) 468 | case DELETE: 469 | args.result <- g.removeItem(args.key) 470 | case UPDATE: 471 | if g.nodes.maps.isExist(args.key) { 472 | err := g.removeItem(args.key) 473 | if err != nil { 474 | args.result <- err 475 | } 476 | } 477 | args.result <- g.addItem(args.key, args.w) 478 | } 479 | } 480 | } 481 | 482 | func (g GannoyIndex) PrintTree() { 483 | for index, root := range g.meta.roots() { 484 | node, err := g.nodes.getNode(root) 485 | if err != nil { 486 | fmt.Printf("%v\n", err) 487 | } 488 | g.printTree(index, node, node.id, 0) 489 | } 490 | } 491 | 492 | func (g GannoyIndex) printTree(root int, node Node, id, tab int) { 493 | for i := 0; i < tab*2; i++ { 494 | fmt.Print(" ") 495 | } 496 | fmt.Printf("%d [%d] (%d) [nDescendants: %d, v: %v]\n", id, node.key, node.parents[root], node.nDescendants, node.v) 497 | if !node.isLeaf() { 498 | for _, child := range node.children { 499 | n, err := g.nodes.getNode(child) 500 | if err != nil { 501 | fmt.Printf("%v\n", err) 502 | break 503 | } 504 | g.printTree(root, n, child, tab+1) 505 | } 506 | } 507 | } 508 | 509 | func numWorker(tree int) int { 510 | procs := runtime.GOMAXPROCS(0) // current setting 511 | if tree < procs { 512 | return tree 513 | } else { 514 | return procs 515 | } 516 | } 517 | -------------------------------------------------------------------------------- /gannoy_test.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | ) 7 | 8 | func TestGannoyIndexNotFound(t *testing.T) { 9 | name := "test_gannoy_index_not_found.tree" 10 | defer os.Remove(name) 11 | _, err := NewGannoyIndex("not_found.meta", Angular{}, RandRandom{}) 12 | if err == nil { 13 | t.Errorf("NewGannoyIndex with not exist meta file should return error.") 14 | } 15 | 16 | } 17 | 18 | func TestGannoyIndexAttribute(t *testing.T) { 19 | tree := 2 20 | dim := 3 21 | K := 4 22 | name := "test_gannoy_index_attribute" 23 | CreateMeta(".", name, tree, dim, K) 24 | defer os.Remove(name + ".meta") 25 | 26 | treeFile := name + ".tree" 27 | defer os.Remove(treeFile) 28 | gannoy, _ := NewGannoyIndex(name+".meta", Angular{}, RandRandom{}) 29 | 30 | if gannoy.tree != tree { 31 | t.Errorf("NewGannoyIndex should contain tree %d, but %d", tree, gannoy.tree) 32 | } 33 | if gannoy.dim != dim { 34 | t.Errorf("NewGannoyIndex should contain dim %d, but %d", dim, gannoy.dim) 35 | } 36 | if gannoy.K != K { 37 | t.Errorf("NewGannoyIndex should contain K %d, but %d", K, gannoy.K) 38 | } 39 | } 40 | func TestGannoyIndexAddItemAsRoot(t *testing.T) { 41 | tree := 2 42 | name := "test_gannoy_index_add_item_as_root" 43 | CreateMeta(".", name, tree, 3, 4) 44 | defer os.Remove(name + ".meta") 45 | 46 | treeFile := name + ".tree" 47 | defer os.Remove(treeFile) 48 | gannoy, _ := NewGannoyIndex(name+".meta", Angular{}, RandRandom{}) 49 | 50 | // first item (be root) 51 | err := gannoy.AddItem(10, []float64{1.1, 1.2, 1.3}) 52 | if err != nil { 53 | t.Errorf("GannoyIndex AddItem should not return error.") 54 | } 55 | node, _ := gannoy.nodes.getNodeByKey(10) 56 | for i := 0; i < tree; i++ { 57 | if !node.isRoot(i) { 58 | t.Errorf("GannoyIndex AddItem at first should build root node.") 59 | } 60 | } 61 | 62 | // second item (change root and make tree) 63 | err = gannoy.AddItem(20, []float64{1.1, 1.2, 1.3}) 64 | if err != nil { 65 | t.Errorf("GannoyIndex AddItem should not return error.") 66 | } 67 | node, _ = gannoy.nodes.getNodeByKey(20) 68 | for i := 0; i < tree; i++ { 69 | if node.isRoot(i) { 70 | t.Errorf("GannoyIndex AddItem at second should not build root node.") 71 | } 72 | 73 | if parent, _ := gannoy.nodes.getNode(node.parents[i]); !parent.isRoot(i) { 74 | t.Errorf("GannoyIndex AddItem at second should build root child node.") 75 | } 76 | } 77 | } 78 | 79 | func TestGannoyIndexAddItemToLeafNode(t *testing.T) { 80 | tree := 2 81 | name := "test_gannoy_index_add_item_to_leaf_node" 82 | CreateMeta(".", name, tree, 3, 3) 83 | defer os.Remove(name + ".meta") 84 | 85 | treeFile := name + ".tree" 86 | defer os.Remove(treeFile) 87 | gannoy, _ := NewGannoyIndex(name+".meta", Angular{}, &TestLoopRandom{max: 1}) 88 | 89 | // add item to leaf node 90 | items := [][]float64{ 91 | {1.1, 1.2, 1.3}, 92 | {-1.1, -1.2, -1.3}, 93 | {-1.1, -1.2, -1.3}, 94 | {-1.1, -1.2, -1.3}, 95 | } 96 | for i, item := range items { 97 | gannoy.AddItem(i*10, item) 98 | } 99 | 100 | // Current tree 101 | // 7 [-1] (-1) [nDescendants: 4, v: [0.5280168968110516 0.576018432884782 0.6240199689585159]] 102 | // 8 [-1] (7) [nDescendants: 3, v: []] 103 | // 1 [10] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 104 | // 4 [20] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 105 | // 5 [30] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 106 | // 0 [0] (7) [nDescendants: 1, v: [1.1 1.2 1.3]] 107 | 108 | err := gannoy.AddItem(40, []float64{1.1, 1.2, 1.3}) 109 | if err != nil { 110 | t.Errorf("GannoyIndex AddItem should not return error.") 111 | } 112 | 113 | // Expect tree (build new bucket node that contain node 0[0] and new node 3[40].) 114 | // 6 [-1] (-1) [nDescendants: 5, v: [0.5280168968110516 0.576018432884782 0.6240199689585159]] 115 | // 9 [-1] (6) [nDescendants: 3, v: []] 116 | // 1 [10] (9) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 117 | // 4 [20] (9) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 118 | // 5 [30] (9) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 119 | // 10 [-1] (6) [nDescendants: 2, v: []] 120 | // 0 [0] (10) [nDescendants: 1, v: [1.1 1.2 1.3]] 121 | // 3 [40] (10) [nDescendants: 1, v: [1.1 1.2 1.3]] 122 | 123 | node, _ := gannoy.nodes.getNodeByKey(40) 124 | for i := 0; i < tree; i++ { 125 | parent, _ := gannoy.nodes.getNode(node.parents[i]) 126 | if len(parent.children) != 2 { 127 | t.Errorf("GannoyIndex AddItem to leaf node should return node that contain 2 children.") 128 | } 129 | for _, child := range parent.children { 130 | if child != node.id && child != 0 { 131 | t.Errorf("GannoyIndex AddItem to leaf node should return node that contain 0[0] and 3[40].") 132 | } 133 | } 134 | } 135 | } 136 | 137 | func TestGannoyIndexAddItemToBucketNode(t *testing.T) { 138 | tree := 2 139 | name := "test_gannoy_index_add_item_to_bucket_node" 140 | CreateMeta(".", name, tree, 3, 3) 141 | defer os.Remove(name + ".meta") 142 | 143 | treeFile := name + ".tree" 144 | defer os.Remove(treeFile) 145 | gannoy, _ := NewGannoyIndex(name+".meta", Angular{}, &TestLoopRandom{max: 1}) 146 | 147 | // add item to bucket node 148 | items := [][]float64{ 149 | {1.1, 1.2, 1.3}, 150 | {-1.1, -1.2, -1.3}, 151 | {-1.1, -1.2, -1.3}, 152 | {-1.1, -1.2, -1.3}, 153 | {1.1, 1.2, 1.3}, 154 | } 155 | for i, item := range items { 156 | gannoy.AddItem(i*10, item) 157 | } 158 | 159 | // Current tree 160 | // 7 [-1] (-1) [nDescendants: 5, v: [0.5280168968110516 0.576018432884782 0.6240199689585159]] 161 | // 8 [-1] (7) [nDescendants: 3, v: []] 162 | // 1 [10] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 163 | // 4 [20] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 164 | // 5 [30] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 165 | // 2 [-1] (7) [nDescendants: 2, v: []] 166 | // 0 [0] (2) [nDescendants: 1, v: [1.1 1.2 1.3]] 167 | // 3 [40] (2) [nDescendants: 1, v: [1.1 1.2 1.3]] 168 | 169 | err := gannoy.AddItem(50, []float64{1.1, 1.2, 1.3}) 170 | if err != nil { 171 | t.Errorf("GannoyIndex AddItem should not return error.") 172 | } 173 | 174 | // Expect tree 175 | // 7 [-1] (-1) [nDescendants: 5, v: [0.5280168968110516 0.576018432884782 0.6240199689585159]] 176 | // 8 [-1] (7) [nDescendants: 3, v: []] 177 | // 1 [10] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 178 | // 4 [20] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 179 | // 5 [30] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 180 | // 2 [-1] (7) [nDescendants: 3, v: []] 181 | // 0 [0] (2) [nDescendants: 1, v: [1.1 1.2 1.3]] 182 | // 3 [40] (2) [nDescendants: 1, v: [1.1 1.2 1.3]] 183 | // 11 [50] (2) [nDescendants: 1, v: [1.1 1.2 1.3]] 184 | 185 | node, _ := gannoy.nodes.getNodeByKey(50) 186 | for i := 0; i < tree; i++ { 187 | parent, _ := gannoy.nodes.getNode(node.parents[i]) 188 | if len(parent.children) != 3 { 189 | t.Errorf("GannoyIndex AddItem to leaf node should return node that contain 3 children.") 190 | } 191 | } 192 | 193 | // add item in full bucket node 194 | err = gannoy.AddItem(60, []float64{1.1, 1.2, 1.3}) 195 | if err != nil { 196 | t.Errorf("GannoyIndex AddItem should not return error.") 197 | } 198 | 199 | // Expect tree (build new branch node that contain two branch nodes) 200 | // 7 [-1] (-1) [nDescendants: 6, v: [0.5280168968110516 0.576018432884782 0.6240199689585159]] 201 | // 8 [-1] (7) [nDescendants: 3, v: []] 202 | // 1 [10] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 203 | // 4 [20] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 204 | // 5 [30] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 205 | // 13 [-1] (7) [nDescendants: 4, v: [0 0 0]] 206 | // 16 [-1] (13) [nDescendants: 2, v: []] 207 | // 2 [40] (16) [nDescendants: 1, v: [1.1 1.2 1.3]] 208 | // 12 [60] (16) [nDescendants: 1, v: [1.1 1.2 1.3]] 209 | // 18 [-1] (13) [nDescendants: 2, v: []] 210 | // 0 [0] (18) [nDescendants: 1, v: [1.1 1.2 1.3]] 211 | // 11 [50] (18) [nDescendants: 1, v: [1.1 1.2 1.3]] 212 | 213 | node, _ = gannoy.nodes.getNodeByKey(60) 214 | for i := 0; i < tree; i++ { 215 | parent, _ := gannoy.nodes.getNode(node.parents[i]) 216 | grandParent, _ := gannoy.nodes.getNode(parent.parents[i]) 217 | if grandParent.nDescendants != 4 || grandParent.isLeaf() || grandParent.isBucket() { 218 | t.Errorf("GannoyIndex AddItem to full branch node should return branch node that has 4 nDescendants.") 219 | } 220 | } 221 | } 222 | 223 | func TestGannoyIndexRemoveItem(t *testing.T) { 224 | tree := 2 225 | name := "test_gannoy_index_remove_item" 226 | CreateMeta(".", name, tree, 3, 3) 227 | defer os.Remove(name + ".meta") 228 | 229 | treeFile := name + ".tree" 230 | defer os.Remove(treeFile) 231 | gannoy, _ := NewGannoyIndex(name+".meta", Angular{}, &TestLoopRandom{max: 1}) 232 | 233 | // remove from bucket node 234 | items := [][]float64{ 235 | {1.1, 1.2, 1.3}, 236 | {-1.1, -1.2, -1.3}, 237 | {-1.1, -1.2, -1.3}, 238 | {-1.1, -1.2, -1.3}, 239 | {1.1, 1.2, 1.3}, 240 | {1.1, 1.2, 1.3}, 241 | } 242 | for i, item := range items { 243 | gannoy.AddItem(i*10, item) 244 | } 245 | 246 | // Current tree 247 | // 6 [-1] (-1) [nDescendants: 5, v: [0.5280168968110516 0.576018432884782 0.6240199689585159]] 248 | // 8 [-1] (6) [nDescendants: 3, v: []] 249 | // 1 [10] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 250 | // 4 [20] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 251 | // 5 [30] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 252 | // 2 [-1] (6) [nDescendants: 3, v: []] 253 | // 0 [0] (2) [nDescendants: 1, v: [1.1 1.2 1.3]] 254 | // 3 [40] (2) [nDescendants: 1, v: [1.1 1.2 1.3]] 255 | // 11 [50] (2) [nDescendants: 1, v: [1.1 1.2 1.3]] 256 | 257 | removed, _ := gannoy.nodes.getNodeByKey(50) 258 | removedId := removed.id 259 | parents := removed.parents 260 | 261 | err := gannoy.removeItem(50) 262 | if err != nil { 263 | t.Errorf("GannoyIndex RemoveItem should not return error.") 264 | } 265 | 266 | // Expect tree (remove specified node from parent bucket node) 267 | // 6 [-1] (-1) [nDescendants: 5, v: [0.5280168968110516 0.576018432884782 0.6240199689585159]] 268 | // 8 [-1] (6) [nDescendants: 3, v: []] 269 | // 1 [10] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 270 | // 4 [20] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 271 | // 5 [30] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 272 | // 2 [-1] (6) [nDescendants: 2, v: []] 273 | // 0 [0] (2) [nDescendants: 1, v: [1.1 1.2 1.3]] 274 | // 3 [40] (2) [nDescendants: 1, v: [1.1 1.2 1.3]] 275 | 276 | for i := 0; i < tree; i++ { 277 | parent, _ := gannoy.nodes.getNode(parents[i]) 278 | if len(parent.children) != 2 { 279 | t.Errorf("GannoyIndex RemoveItem should return parent node that contain 2 children.") 280 | } 281 | for _, child := range parent.children { 282 | if child == removedId { 283 | t.Errorf("GannoyIndex RemoveItem should not return removeItem.") 284 | } 285 | } 286 | } 287 | 288 | removed, _ = gannoy.nodes.getNodeByKey(40) 289 | removedId = removed.id 290 | parents = removed.parents 291 | grandParents := make([]int, tree) 292 | for i, p := range parents { 293 | parent, _ := gannoy.nodes.getNode(p) 294 | grandParents[i] = parent.parents[i] 295 | } 296 | 297 | err = gannoy.removeItem(40) 298 | if err != nil { 299 | t.Errorf("GannoyIndex RemoveItem should not return error.") 300 | } 301 | 302 | // Expect tree (remove specified node and parent node, and be leaf node that remaining one.) 303 | // 6 [-1] (-1) [nDescendants: 4, v: [0.5280168968110516 0.576018432884782 0.6240199689585159]] 304 | // 8 [-1] (6) [nDescendants: 3, v: []] 305 | // 1 [10] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 306 | // 4 [20] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 307 | // 5 [30] (8) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 308 | // 0 [0] (6) [nDescendants: 1, v: [1.1 1.2 1.3]] 309 | 310 | for i := 0; i < tree; i++ { 311 | grandParent, _ := gannoy.nodes.getNode(grandParents[i]) 312 | if grandParent.nDescendants != 4 { 313 | t.Errorf("GannoyIndex RemoveItem should return grand parent node that has 4 nDescendants.") 314 | } 315 | for _, p := range grandParent.children { 316 | parent, _ := gannoy.nodes.getNode(p) 317 | if parent.isLeaf() { 318 | if parent.id == removedId || parent.id == parents[i] { 319 | t.Errorf("GannoyIndex RemoveItem should be leaf node that remainin node.") 320 | } 321 | } 322 | } 323 | } 324 | } 325 | 326 | func TestGannoyIndexUpdateItem(t *testing.T) { 327 | tree := 2 328 | name := "test_gannoy_index_update_item" 329 | CreateMeta(".", name, tree, 3, 4) 330 | defer os.Remove(name + ".meta") 331 | 332 | treeFile := name + ".tree" 333 | defer os.Remove(treeFile) 334 | gannoy, _ := NewGannoyIndex(name+".meta", Angular{}, &TestLoopRandom{max: 1}) 335 | 336 | // remove from bucket node 337 | items := [][]float64{ 338 | {1.1, 1.2, 1.3}, 339 | {-1.1, -1.2, -1.3}, 340 | {1.1, 1.2, 1.3}, 341 | {-1.1, -1.2, -1.3}, 342 | {-1.1, -1.2, -1.3}, 343 | } 344 | for i, item := range items { 345 | gannoy.AddItem(i*10, item) 346 | } 347 | 348 | updated, _ := gannoy.nodes.getNodeByKey(30) 349 | updatedId := updated.id 350 | updatedParents := updated.parents 351 | 352 | // Current tree 353 | // 7 [-1] (-1) [nDescendants: 5, v: [0.5280168968110516 0.576018432884782 0.6240199689585159]] 354 | // 11 [-1] (7) [nDescendants: 3, v: []] 355 | // 1 [10] (11) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 356 | // 5 [30] (11) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 357 | // 6 [40] (11) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 358 | // 9 [-1] (7) [nDescendants: 2, v: []] 359 | // 0 [0] (9) [nDescendants: 1, v: [1.1 1.2 1.3]] 360 | // 4 [20] (9) [nDescendants: 1, v: [1.1 1.2 1.3]] 361 | 362 | err := gannoy.UpdateItem(30, []float64{1.1, 1.2, 1.3}) 363 | if err != nil { 364 | t.Errorf("GannoyIndex UpdateItem should not return error.") 365 | } 366 | 367 | // Expect tree (move to new bucket node) 368 | // 7 [-1] (-1) [nDescendants: 5, v: [0.5280168968110516 0.576018432884782 0.6240199689585159]] 369 | // 11 [-1] (7) [nDescendants: 2, v: []] 370 | // 1 [10] (11) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 371 | // 6 [40] (11) [nDescendants: 1, v: [-1.1 -1.2 -1.3]] 372 | // 9 [-1] (7) [nDescendants: 3, v: []] 373 | // 0 [0] (9) [nDescendants: 1, v: [1.1 1.2 1.3]] 374 | // 4 [20] (9) [nDescendants: 1, v: [1.1 1.2 1.3]] 375 | // 5 [30] (9) [nDescendants: 1, v: [1.1 1.2 1.3]] 376 | 377 | for i := 0; i < tree; i++ { 378 | updatedParent, _ := gannoy.nodes.getNode(updatedParents[i]) 379 | for _, c := range updatedParent.children { 380 | if c == updatedId { 381 | t.Errorf("GannoyIndex UpdateItem should move to new bucket node.") 382 | } 383 | } 384 | grandParent, _ := gannoy.nodes.getNode(updatedParent.parents[i]) 385 | for _, p := range grandParent.children { 386 | if p != updatedParent.id { 387 | newParent, _ := gannoy.nodes.getNode(p) 388 | contain := false 389 | for _, child := range newParent.children { 390 | if child == updatedId { 391 | contain = true 392 | } 393 | } 394 | if !contain { 395 | t.Errorf("2 GannoyIndex UpdateItem should move to new bucket node.") 396 | } 397 | } 398 | } 399 | } 400 | } 401 | 402 | func TestGannoyIndexGetNnsByKey(t *testing.T) { 403 | // search nns (from builded tree file) 404 | tree := 2 405 | name := "test_gannoy_index_get_nns_by_key" 406 | CreateMeta(".", name, tree, 3, 4) 407 | defer os.Remove(name + ".meta") 408 | 409 | treeFile := name + ".tree" 410 | defer os.Remove(treeFile) 411 | gannoy, _ := NewGannoyIndex(name+".meta", Angular{}, &TestLoopRandom{max: 1}) 412 | 413 | // remove from bucket node 414 | items := [][]float64{ 415 | {1.1, 1.2, 1.3}, 416 | {-1.1, -1.2, -1.3}, 417 | {1.1, 1.2, 1.3}, 418 | {-1.1, -1.2, -1.3}, 419 | {-1.1, -1.2, -1.3}, 420 | } 421 | for i, item := range items { 422 | gannoy.AddItem(i*10, item) 423 | } 424 | 425 | size := 3 426 | // Not found key 427 | nns, err := gannoy.GetNnsByKey(100, size, -1) 428 | if len(nns) != 0 { 429 | t.Errorf("GannoyIndex GetNnsByKey should return empty list if key is not found.") 430 | } 431 | if err == nil { 432 | t.Errorf("GannoyIndex GetNnsByKey should return error if key is not found.") 433 | } 434 | 435 | // Exist key 436 | nns, err = gannoy.GetNnsByKey(40, size, -1) 437 | if len(nns) != size { 438 | t.Errorf("GannoyIndex GetNnsByKey should return specified size list if key exist.") 439 | 440 | } 441 | if err != nil { 442 | t.Errorf("GannoyIndex GetNnsByKey should not return error if key exist.") 443 | } 444 | } 445 | -------------------------------------------------------------------------------- /lock.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "io" 5 | "os/exec" 6 | "strings" 7 | "syscall" 8 | 9 | "github.com/coreos/go-semver/semver" 10 | "regexp" 11 | ) 12 | 13 | type Locker interface { 14 | ReadLock(uintptr, int64, int64) error 15 | WriteLock(uintptr, int64, int64) error 16 | UnLock(uintptr, int64, int64) error 17 | } 18 | 19 | func newLocker() Locker { 20 | bytes, err := exec.Command("uname", "-sr").Output() 21 | if err != nil { 22 | return Flock{} 23 | } 24 | if validateKernel(bytes) { 25 | return Fcntl{} 26 | } 27 | return Flock{} 28 | } 29 | 30 | func validateKernel(bytes []byte) bool { 31 | kernel := strings.Split(strings.TrimRight(string(bytes), "\n"), " ") 32 | nk := normalizeKernelVersion(kernel[1]) 33 | if kernel[0] == "Linux" && !semver.New(nk).LessThan(*semver.New("3.15.0")) { 34 | return true 35 | } 36 | return false 37 | } 38 | 39 | func normalizeKernelVersion(v string) string { 40 | re := regexp.MustCompile(".elrepo.x86_64|.el7.x86_64") 41 | return re.ReplaceAllString(v, "") 42 | } 43 | 44 | // Only Linux and kernel version 3.15 or later. 45 | // This depends on open file description lock (F_OFD_SETLKW). 46 | type Fcntl struct { 47 | } 48 | 49 | const F_OFD_SETLKW = 38 50 | 51 | func (f Fcntl) ReadLock(fd uintptr, start, len int64) error { 52 | return f.fcntl(syscall.F_RDLCK, fd, start, len) 53 | } 54 | 55 | func (f Fcntl) WriteLock(fd uintptr, start, len int64) error { 56 | return f.fcntl(syscall.F_WRLCK, fd, start, len) 57 | } 58 | 59 | func (f Fcntl) UnLock(fd uintptr, start, len int64) error { 60 | return f.fcntl(syscall.F_UNLCK, fd, start, len) 61 | } 62 | 63 | func (f Fcntl) fcntl(typ int16, fd uintptr, start, len int64) error { 64 | return syscall.FcntlFlock(fd, F_OFD_SETLKW, &syscall.Flock_t{ 65 | Start: start, 66 | Len: len, 67 | Type: typ, 68 | Whence: io.SeekStart, 69 | }) 70 | } 71 | 72 | type Flock struct { 73 | } 74 | 75 | func (f Flock) ReadLock(fd uintptr, start, len int64) error { 76 | return f.flock(fd, syscall.LOCK_SH) 77 | } 78 | 79 | func (f Flock) WriteLock(fd uintptr, start, len int64) error { 80 | return f.flock(fd, syscall.LOCK_EX) 81 | } 82 | 83 | func (f Flock) UnLock(fd uintptr, start, len int64) error { 84 | return f.flock(fd, syscall.LOCK_UN) 85 | } 86 | 87 | func (f Flock) flock(fd uintptr, how int) error { 88 | return syscall.Flock(int(fd), how) 89 | } 90 | -------------------------------------------------------------------------------- /lock_test.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestVersionCheckSemverKernel(t *testing.T) { 8 | bytes := []byte("Linux 4.12.2-1") 9 | if validateKernel(bytes) != true { 10 | t.Errorf("Kernel Version is not less than 3.15.0") 11 | } 12 | } 13 | 14 | func TestVersionCheckElrepoKernel(t *testing.T) { 15 | bytes := []byte("Linux 4.12.2-1.el7.elrepo.x86_64") 16 | if validateKernel(bytes) != true { 17 | t.Errorf("Kernel Version is not less than 3.15.0") 18 | } 19 | } 20 | 21 | func TestVersionCheckKernel(t *testing.T) { 22 | bytes := []byte("Linux 3.10.0-327.36.1.el7.x86_64") 23 | if validateKernel(bytes) != false { 24 | t.Errorf("Kernel Version is less than 3.15.0") 25 | } 26 | } 27 | 28 | -------------------------------------------------------------------------------- /maps.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | ) 7 | 8 | type Maps struct { 9 | mu *sync.RWMutex 10 | 11 | keyToId map[int]int 12 | } 13 | 14 | func newMaps() Maps { 15 | return Maps{ 16 | mu: &sync.RWMutex{}, 17 | keyToId: map[int]int{}, 18 | } 19 | } 20 | 21 | func (m *Maps) add(id, key int) { 22 | m.mu.Lock() 23 | defer m.mu.Unlock() 24 | 25 | m.keyToId[key] = id 26 | } 27 | 28 | func (m *Maps) remove(key int) { 29 | m.mu.Lock() 30 | defer m.mu.Unlock() 31 | 32 | delete(m.keyToId, key) 33 | } 34 | 35 | func (m Maps) getId(key int) (int, error) { 36 | m.mu.RLock() 37 | defer m.mu.RUnlock() 38 | 39 | if id, ok := m.keyToId[key]; !ok { 40 | return -1, fmt.Errorf("not found") 41 | } else { 42 | return id, nil 43 | } 44 | } 45 | 46 | func (m Maps) isExist(key int) bool { 47 | _, err := m.getId(key) 48 | return err == nil 49 | } 50 | -------------------------------------------------------------------------------- /maps_test.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import "testing" 4 | 5 | func TestMapsGetIdNotFound(t *testing.T) { 6 | maps := newMaps() 7 | 8 | id, err := maps.getId(0) 9 | if id != -1 { 10 | t.Errorf("Maps getId when not found should return -1, but %d.", id) 11 | } 12 | if err == nil { 13 | t.Errorf("Maps getId when not found should return error.") 14 | } 15 | } 16 | 17 | func TestMapsGetId(t *testing.T) { 18 | maps := newMaps() 19 | maps.add(1, 10) 20 | id, err := maps.getId(10) 21 | if id != 1 { 22 | t.Errorf("Maps getId should return 1, but %d.", id) 23 | } 24 | if err != nil { 25 | t.Errorf("Maps getId should not return error.") 26 | } 27 | 28 | maps.remove(10) 29 | id, err = maps.getId(10) 30 | if id != -1 { 31 | t.Errorf("Maps getId when not found should return -1, but %d.", id) 32 | } 33 | if err == nil { 34 | t.Errorf("Maps getId when not found should return error.") 35 | } 36 | } 37 | 38 | func TestMapsIsExist(t *testing.T) { 39 | maps := newMaps() 40 | if maps.isExist(10) { 41 | t.Errorf("Maps isExist when not exist should return false.") 42 | } 43 | 44 | maps.add(1, 10) 45 | 46 | if !maps.isExist(10) { 47 | t.Errorf("Maps isExist when exist should return true.") 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /meta.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "fmt" 7 | "io" 8 | "os" 9 | "path/filepath" 10 | "strings" 11 | "syscall" 12 | ) 13 | 14 | func CreateMeta(path, file string, tree, dim, K int) error { 15 | database := filepath.Join(path, file+".meta") 16 | _, err := os.Stat(database) 17 | if err == nil { 18 | return fmt.Errorf("Already exist database: %s.", database) 19 | } 20 | 21 | f, err := os.Create(database) 22 | if err != nil { 23 | return err 24 | } 25 | defer f.Close() 26 | 27 | binary.Write(f, binary.BigEndian, int32(tree)) 28 | binary.Write(f, binary.BigEndian, int32(dim)) 29 | binary.Write(f, binary.BigEndian, int32(K)) 30 | roots := make([]int32, tree) 31 | for i, _ := range roots { 32 | roots[i] = int32(-1) 33 | } 34 | binary.Write(f, binary.BigEndian, roots) 35 | 36 | return nil 37 | } 38 | 39 | type meta struct { 40 | path string 41 | file *os.File 42 | tree int 43 | dim int 44 | K int 45 | } 46 | 47 | func loadMeta(filename string) (meta, error) { 48 | _, err := os.Stat(filename) 49 | if err != nil { 50 | return meta{}, err 51 | } 52 | file, _ := os.OpenFile(filename, os.O_RDWR, 0) 53 | 54 | b := make([]byte, 4*3) 55 | syscall.Pread(int(file.Fd()), b, 0) 56 | 57 | buf := bytes.NewReader(b) 58 | var tree, dim, K int32 59 | binary.Read(buf, binary.BigEndian, &tree) 60 | binary.Read(buf, binary.BigEndian, &dim) 61 | binary.Read(buf, binary.BigEndian, &K) 62 | 63 | return meta{ 64 | path: filename, 65 | file: file, 66 | tree: int(tree), 67 | dim: int(dim), 68 | K: int(K), 69 | }, nil 70 | } 71 | 72 | func (m meta) rootOffset(index int) int64 { 73 | return int64(4 + // tree 74 | 4 + // dim 75 | 4 + // K 76 | 4*index) // roots 77 | } 78 | 79 | func (m meta) roots() []int { 80 | err := syscall.FcntlFlock(m.file.Fd(), syscall.F_SETLKW, &syscall.Flock_t{ 81 | Start: m.rootOffset(0), 82 | Len: int64(m.tree * 4), 83 | Type: syscall.F_RDLCK, 84 | Whence: io.SeekStart, 85 | }) 86 | if err != nil { 87 | return []int{} 88 | } 89 | defer syscall.FcntlFlock(m.file.Fd(), syscall.F_SETLKW, &syscall.Flock_t{ 90 | Start: m.rootOffset(0), 91 | Len: int64(m.tree * 4), 92 | Type: syscall.F_UNLCK, 93 | Whence: io.SeekStart, 94 | }) 95 | 96 | b := make([]byte, m.tree*4) 97 | syscall.Pread(int(m.file.Fd()), b, m.rootOffset(0)) 98 | buf := bytes.NewReader(b) 99 | roots := make([]int32, m.tree) 100 | binary.Read(buf, binary.BigEndian, &roots) 101 | result := make([]int, m.tree) 102 | for i, r := range roots { 103 | result[i] = int(r) 104 | } 105 | return result 106 | } 107 | 108 | func (m meta) updateRoot(index, root int) error { 109 | offset := m.rootOffset(index) 110 | err := syscall.FcntlFlock(m.file.Fd(), syscall.F_SETLKW, &syscall.Flock_t{ 111 | Start: offset, 112 | Len: 4, 113 | Type: syscall.F_WRLCK, 114 | Whence: io.SeekStart, 115 | }) 116 | if err != nil { 117 | return err 118 | } 119 | defer syscall.FcntlFlock(m.file.Fd(), syscall.F_SETLKW, &syscall.Flock_t{ 120 | Start: offset, 121 | Len: 4, 122 | Type: syscall.F_UNLCK, 123 | Whence: io.SeekStart, 124 | }) 125 | buf := &bytes.Buffer{} 126 | binary.Write(buf, binary.BigEndian, int32(root)) 127 | _, err = syscall.Pwrite(int(m.file.Fd()), buf.Bytes(), offset) 128 | if err != nil { 129 | return err 130 | } 131 | 132 | return err 133 | } 134 | 135 | func (m meta) treePath() string { 136 | return m.filePath("tree") 137 | } 138 | 139 | func (m meta) filePath(newExt string) string { 140 | ext := filepath.Ext(m.path) 141 | return fmt.Sprintf("%s.%s", strings.Split(m.path, ext)[0], newExt) 142 | } 143 | -------------------------------------------------------------------------------- /meta_test.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | ) 7 | 8 | func TestCreateMetaAlreadyExist(t *testing.T) { 9 | meta := "test_create_meta_already_exist" 10 | os.Create(meta + ".meta") 11 | defer os.Remove(meta + ".meta") 12 | 13 | err := CreateMeta(".", meta, 1, 1, 1) 14 | if err == nil { 15 | t.Errorf("CreateMeta when already exist should return error.") 16 | } 17 | } 18 | 19 | func TestLoadMeta(t *testing.T) { 20 | file := "test_load_meta" 21 | 22 | tree := 2 23 | dim := 3 24 | K := 4 25 | 26 | CreateMeta(".", file, tree, dim, K) 27 | defer os.Remove(file + ".meta") 28 | 29 | meta, err := loadMeta(file + ".meta") 30 | if err != nil { 31 | t.Errorf("LoadMeta should not return error.") 32 | } 33 | 34 | if meta.tree != tree { 35 | t.Errorf("tree should be %d, but %d.", tree, meta.tree) 36 | } 37 | if meta.dim != dim { 38 | t.Errorf("dim should be %d, but %d.", dim, meta.dim) 39 | } 40 | if meta.K != K { 41 | t.Errorf("K should be %d, but %d.", K, meta.K) 42 | } 43 | 44 | roots := meta.roots() 45 | if len(roots) != tree { 46 | t.Errorf("roots size should be %d, but %d.", tree, len(roots)) 47 | } 48 | for _, root := range roots { 49 | if root != -1 { 50 | t.Errorf("initialized roots value should be -1, but %d", root) 51 | break 52 | } 53 | } 54 | } 55 | 56 | func TestUpdateRoot(t *testing.T) { 57 | file := "test_update_root" 58 | 59 | tree := 2 60 | dim := 3 61 | K := 4 62 | 63 | CreateMeta(".", file, tree, dim, K) 64 | defer os.Remove(file + ".meta") 65 | 66 | meta, _ := loadMeta(file + ".meta") 67 | meta.updateRoot(0, 10) 68 | 69 | expects := []int{10, -1} 70 | for i, root := range meta.roots() { 71 | if root != expects[i] { 72 | t.Errorf("Updated root should be %d, but %d.", expects[i], root) 73 | break 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /node.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | type Nodes struct { 4 | Storage 5 | free Free 6 | maps Maps 7 | } 8 | 9 | func newNodes(filename string, tree, dim, K int) Nodes { 10 | // TODO Switch storage by parameter 11 | nodes := Nodes{ 12 | Storage: newFile(filename, tree, dim, K), 13 | } 14 | // initialize free and maps 15 | nodes.initialize() 16 | return nodes 17 | } 18 | 19 | func (n *Nodes) initialize() { 20 | n.free = newFree() 21 | n.maps = newMaps() 22 | 23 | iterator := make(chan Node) 24 | go n.Iterate(iterator) 25 | 26 | for node := range iterator { 27 | if node.free { 28 | n.free.push(node.id) 29 | } else { 30 | if node.isLeaf() { 31 | n.maps.add(node.id, node.key) 32 | } 33 | } 34 | } 35 | } 36 | 37 | func (ns *Nodes) newNode() Node { 38 | node := Node{ 39 | storage: ns.Storage, 40 | 41 | nDescendants: 1, 42 | id: -1, 43 | key: -1, 44 | parents: []int{}, 45 | children: []int{0, 0}, 46 | v: []float64{}, 47 | free: false, 48 | 49 | isNewRecord: true, 50 | } 51 | if free, err := ns.free.pop(); err == nil { 52 | node.id = free 53 | node.isNewRecord = false 54 | } 55 | return node 56 | } 57 | 58 | func (ns Nodes) getNode(id int) (Node, error) { 59 | return ns.Storage.Find(id) 60 | } 61 | 62 | func (ns *Nodes) getNodeByKey(key int) (Node, error) { 63 | id, err := ns.maps.getId(key) 64 | if err != nil { 65 | return Node{}, err 66 | } 67 | return ns.getNode(id) 68 | } 69 | 70 | type Node struct { 71 | storage Storage 72 | 73 | nDescendants int 74 | id int 75 | key int 76 | parents []int 77 | children []int 78 | v []float64 79 | free bool 80 | isNewRecord bool 81 | } 82 | 83 | func (n Node) isLeaf() bool { 84 | return n.nDescendants == 1 85 | } 86 | 87 | func (n Node) isBucket() bool { 88 | return len(n.v) == 0 89 | } 90 | 91 | func (n Node) isRoot(index int) bool { 92 | return n.parents[index] == -1 93 | } 94 | 95 | func (n *Node) save() error { 96 | if n.isNewRecord { 97 | id, err := n.storage.Create(*n) 98 | if err != nil { 99 | return err 100 | } 101 | n.id = id 102 | n.isNewRecord = false 103 | return nil 104 | } else { 105 | return n.storage.Update(*n) 106 | } 107 | } 108 | 109 | func (n *Node) updateParents(index, parent int) error { 110 | return n.storage.UpdateParent(n.id, index, parent) 111 | } 112 | 113 | func (n *Node) destroy() error { 114 | err := n.storage.Delete(*n) 115 | if err != nil { 116 | return err 117 | } 118 | n.free = true 119 | return nil 120 | } 121 | -------------------------------------------------------------------------------- /node_test.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | ) 7 | 8 | func TestNewNodeAtFirst(t *testing.T) { 9 | name := "test_new_node_at_first.tree" 10 | defer os.Remove(name) 11 | nodes := newNodes(name, 2, 3, 4) 12 | 13 | if len(nodes.free.free) != 0 { 14 | t.Errorf("Initialized nodes.free size should be 0, but %d", len(nodes.free.free)) 15 | } 16 | if len(nodes.maps.keyToId) != 0 { 17 | t.Errorf("Initialized nodes.maps size should be 0, but %d", len(nodes.maps.keyToId)) 18 | } 19 | } 20 | 21 | func TestNewNodeMpas(t *testing.T) { 22 | name := "test_new_node_maps.tree" 23 | defer os.Remove(name) 24 | nodes := newNodes(name, 2, 3, 4) 25 | 26 | // Create 27 | node := nodes.newNode() 28 | node.key = 10 29 | node.parents = []int{2, 3} 30 | node.v = []float64{1.1, 1.2, 1.3} 31 | node.save() 32 | 33 | nodes = newNodes(name, 2, 3, 4) 34 | id, err := nodes.maps.getId(10) 35 | if err != nil { 36 | t.Errorf("nodes.maps should not return error.") 37 | } 38 | if node.id != id { 39 | t.Errorf("nodes.maps should contain map for id: %d, but %d", id, node.id) 40 | } 41 | } 42 | 43 | func TestNewNodeFree(t *testing.T) { 44 | name := "test_new_node_free.tree" 45 | defer os.Remove(name) 46 | nodes := newNodes(name, 2, 3, 4) 47 | 48 | // Create 49 | node := nodes.newNode() 50 | node.key = 10 51 | node.parents = []int{2, 3} 52 | node.v = []float64{1.1, 1.2, 1.3} 53 | node.save() 54 | // Found and remove 55 | node, _ = nodes.getNode(node.id) 56 | node.destroy() 57 | 58 | nodes = newNodes(name, 2, 3, 4) 59 | newNode := nodes.newNode() // from free node list. 60 | if node.id != newNode.id { 61 | t.Errorf("nodes.free should contain free node: %d, but %d", newNode.id, node.id) 62 | } 63 | } 64 | 65 | func TestNodeSaveNew(t *testing.T) { 66 | name := "test_node_save_new.tree" 67 | defer os.Remove(name) 68 | nodes := newNodes(name, 2, 3, 4) 69 | 70 | // Create 71 | node := nodes.newNode() 72 | node.key = 10 73 | node.parents = []int{2, 3} 74 | node.v = []float64{1.1, 1.2, 1.3} 75 | err := node.save() 76 | if err != nil { 77 | t.Errorf("node save should not return error.") 78 | } 79 | 80 | if node.id == -1 { 81 | t.Errorf("node save should set id.") 82 | } 83 | if node.isNewRecord { 84 | t.Errorf("node save should set isNewRecord to false.") 85 | } 86 | } 87 | 88 | func TestNodeSaveUpdate(t *testing.T) { 89 | name := "test_node_save_update.tree" 90 | defer os.Remove(name) 91 | nodes := newNodes(name, 2, 3, 4) 92 | 93 | // Create 94 | node := nodes.newNode() 95 | node.key = 10 96 | node.parents = []int{2, 3} 97 | node.v = []float64{1.1, 1.2, 1.3} 98 | node.save() 99 | // Update 100 | found, _ := nodes.Find(node.id) 101 | found.v = []float64{2.1, 2.2, 2.3} 102 | err := found.save() 103 | if err != nil { 104 | t.Errorf("node save should not return error.") 105 | } 106 | } 107 | 108 | func TestNodeDestroy(t *testing.T) { 109 | name := "test_node_destroy.tree" 110 | defer os.Remove(name) 111 | nodes := newNodes(name, 2, 3, 4) 112 | 113 | // Create 114 | node := nodes.newNode() 115 | node.key = 10 116 | node.parents = []int{2, 3} 117 | node.v = []float64{1.1, 1.2, 1.3} 118 | node.save() 119 | 120 | // Destroy 121 | found, _ := nodes.Find(node.id) 122 | err := found.destroy() 123 | if err != nil { 124 | t.Errorf("node destroy should not return error.") 125 | } 126 | if !found.free { 127 | t.Errorf("node destroy should set free to true.") 128 | } 129 | } 130 | 131 | func TestNodeIsLeaf(t *testing.T) { 132 | // Leaf node 133 | node := Node{ 134 | key: 10, 135 | nDescendants: 1, 136 | parents: []int{2, 3}, 137 | v: []float64{1.1, 1.2, 1.3}, 138 | } 139 | if !node.isLeaf() { 140 | t.Errorf("node should be leaf node.") 141 | } 142 | } 143 | 144 | func TestNodeIsBucket(t *testing.T) { 145 | // Bucket node 146 | node := Node{ 147 | key: 20, 148 | nDescendants: 3, 149 | parents: []int{2, 3}, 150 | children: []int{5, 6, 7}, 151 | } 152 | if !node.isBucket() { 153 | t.Errorf("node should be bucket node.") 154 | } 155 | } 156 | 157 | func TestNodeIsRoot(t *testing.T) { 158 | // Root node 159 | parents := []int{-1, -1} 160 | node := Node{ 161 | key: 10, 162 | nDescendants: 1, 163 | parents: parents, 164 | v: []float64{1.1, 1.2, 1.3}, 165 | } 166 | for i, _ := range parents { 167 | if !node.isRoot(i) { 168 | t.Errorf("node should be root node.") 169 | } 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /queue.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | type Queue struct { 4 | priority float64 5 | value int 6 | } 7 | 8 | func (q *Queue) Less(other interface{}) bool { 9 | return q.priority > other.(*Queue).priority 10 | } 11 | -------------------------------------------------------------------------------- /random.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "math/rand" 5 | "time" 6 | ) 7 | 8 | type Random interface { 9 | index(int) int 10 | flip() int 11 | } 12 | 13 | type RandRandom struct { 14 | } 15 | 16 | func (r RandRandom) index(n int) int { 17 | rand.Seed(time.Now().UnixNano()) 18 | return rand.Intn(n) 19 | } 20 | 21 | func (r RandRandom) flip() int { 22 | return r.index(2) 23 | } 24 | -------------------------------------------------------------------------------- /rpmbuild/RPMS/x86_64/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monochromegane/gannoy/911c1c9f132f0cd77e27efb96bd163bd3e1d9aa5/rpmbuild/RPMS/x86_64/.gitkeep -------------------------------------------------------------------------------- /rpmbuild/SOURCES/gannoy-db.logrotate: -------------------------------------------------------------------------------- 1 | /var/log/gannoy/gannoy-db.log { 2 | daily 3 | rotate 7 4 | missingok 5 | notifempty 6 | compress 7 | } 8 | -------------------------------------------------------------------------------- /rpmbuild/SOURCES/gannoy-db.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Gannoy DB 3 | ConditionPathExists=/usr/bin/start_server 4 | ConditionPathExists=/usr/bin/gannoy-db 5 | After=network.target 6 | 7 | [Service] 8 | User=gannoy 9 | Group=gannoy 10 | Type=simple 11 | PIDFile=/var/run/gannoy/server_starter.pid 12 | ExecStart=/usr/bin/start_server --port 8080 --pid-file /var/run/gannoy/server_starter.pid -- /usr/bin/gannoy-db -c /etc/gannoy/gannoy-db.toml 13 | ExecReload=/bin/kill -HUP $MAINPID 14 | ExecStop=/bin/kill -TERM $MAINPID 15 | 16 | [Install] 17 | WantedBy=multi-user.target 18 | -------------------------------------------------------------------------------- /rpmbuild/SOURCES/gannoy-db.toml: -------------------------------------------------------------------------------- 1 | data-dir = "/var/lib/gannoy" 2 | log-dir = "/var/log/gannoy" 3 | lock-dir = "/var/run/gannoy" 4 | server-starter = true 5 | # timeout = 10 6 | # max-connections = 100 7 | -------------------------------------------------------------------------------- /rpmbuild/SPECS/gannoy.spec: -------------------------------------------------------------------------------- 1 | %define _binaries_in_noarch_packages_terminate_build 0 2 | %define gannoy_user gannoy 3 | %define gannoy_group %{gannoy_user} 4 | %define gannoy_confdir %{_sysconfdir}/gannoy 5 | %define gannoy_home %{_localstatedir}/lib/gannoy 6 | %define gannoy_logdir %{_localstatedir}/log/gannoy 7 | %define gannoy_rundir %{_localstatedir}/run/gannoy 8 | 9 | Summary: Approximate nearest neighbor search server and dynamic index written in Golang. 10 | Name: gannoy 11 | Version: 0.0.1 12 | Release: 1 13 | License: MIT 14 | Group: Applications/System 15 | URL: https://github.com/monochromegane/gannoy 16 | 17 | Source0: %{name}-%{version} 18 | Source1: %{name}-converter-%{version} 19 | Source2: %{name}-db-%{version} 20 | Source3: %{name}-db.toml 21 | Source4: %{name}-db.service 22 | Source5: %{name}-db.logrotate 23 | BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root 24 | 25 | %{?systemd_requires} 26 | BuildRequires: systemd 27 | 28 | %description 29 | %{summary} 30 | 31 | %prep 32 | 33 | %build 34 | 35 | %install 36 | %{__rm} -rf %{buildroot} 37 | %{__mkdir} -p %{buildroot}%{gannoy_rundir} 38 | %{__mkdir} -p %{buildroot}%{gannoy_home} 39 | %{__mkdir} -p %{buildroot}%{gannoy_logdir} 40 | %{__install} -Dp -m0755 %{SOURCE0} %{buildroot}/usr/bin/%{name} 41 | %{__install} -Dp -m0755 %{SOURCE1} %{buildroot}/usr/bin/%{name}-converter 42 | %{__install} -Dp -m0755 %{SOURCE2} %{buildroot}/usr/bin/%{name}-db 43 | %{__install} -Dp -m0644 %{SOURCE3} %{buildroot}%{gannoy_confdir}/%{name}-db.toml 44 | %{__install} -Dp -m0644 %{SOURCE4} %{buildroot}/usr/lib/systemd/system/%{name}-db.service 45 | %{__install} -Dp -m0644 %{SOURCE5} %{buildroot}/etc/logrotate.d/%{name}-db 46 | 47 | %clean 48 | %{__rm} -rf %{buildroot} 49 | 50 | %pre 51 | %{_sbindir}/useradd -c "Gannoy user" -s /bin/false -r -d %{gannoy_home} %{gannoy_user} 2>/dev/null || : 52 | 53 | %post 54 | %systemd_post %{name}-db.service 55 | systemctl enable %{name}-db.service 56 | 57 | %preun 58 | %systemd_preun %{name}-db.service 59 | 60 | %postun 61 | %systemd_postun %{name}-db.service 62 | 63 | %files 64 | %defattr(-,root,root) 65 | /usr/bin/%{name} 66 | /usr/bin/%{name}-converter 67 | /usr/bin/%{name}-db 68 | %config(noreplace) %{gannoy_confdir}/%{name}-db.toml 69 | %config(noreplace) /etc/logrotate.d/%{name}-db 70 | %config(noreplace) /usr/lib/systemd/system/%{name}-db.service 71 | %attr(-,%{gannoy_user},%{gannoy_group}) %dir %{gannoy_rundir} 72 | %attr(-,%{gannoy_user},%{gannoy_group}) %dir %{gannoy_home} 73 | %attr(-,%{gannoy_user},%{gannoy_group}) %dir %{gannoy_logdir} 74 | -------------------------------------------------------------------------------- /sort.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | func HeapSort(array []sorter, order, last int) { 4 | var heapifier heapifier 5 | switch order { 6 | case ASC: 7 | heapifier = heapifyFunc(downHeapify) 8 | default: 9 | heapifier = heapifyFunc(upHeapify) 10 | } 11 | 12 | heapSort(heapifier, array, last) 13 | } 14 | 15 | type heapifier interface { 16 | heapify([]sorter, int, int) 17 | } 18 | 19 | type heapifyFunc func([]sorter, int, int) 20 | 21 | func (f heapifyFunc) heapify(array []sorter, root, length int) { 22 | f(array, root, length) 23 | } 24 | 25 | type sorter struct { 26 | id int 27 | value float64 28 | } 29 | 30 | func heapSort(heapifier heapifier, array []sorter, last int) { 31 | // initialize 32 | for i := len(array) / 2; i >= 0; i-- { 33 | heapifier.heapify(array, i, len(array)) 34 | } 35 | 36 | // remove top and do heapify 37 | bp := len(array) - last 38 | for length := len(array); length > 1; length-- { 39 | lastIndex := length - 1 40 | array[0], array[lastIndex] = array[lastIndex], array[0] 41 | heapifier.heapify(array, 0, lastIndex) 42 | if lastIndex == bp { 43 | break 44 | } 45 | } 46 | } 47 | 48 | func downHeapify(array []sorter, root, length int) { 49 | max := root 50 | l := (root * 2) + 1 51 | r := l + 1 52 | 53 | if l < length && array[l].value > array[max].value { 54 | max = l 55 | } 56 | 57 | if r < length && array[r].value > array[max].value { 58 | max = r 59 | } 60 | 61 | if max != root { 62 | array[root], array[max] = array[max], array[root] 63 | downHeapify(array, max, length) 64 | } 65 | } 66 | 67 | func upHeapify(array []sorter, root, length int) { 68 | min := root 69 | l := (root * 2) + 1 70 | r := l + 1 71 | 72 | if l < length && array[l].value < array[min].value { 73 | min = l 74 | } 75 | 76 | if r < length && array[r].value < array[min].value { 77 | min = r 78 | } 79 | 80 | if min != root { 81 | array[root], array[min] = array[min], array[root] 82 | upHeapify(array, min, length) 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /sort_test.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestHeapSortAsc(t *testing.T) { 8 | array := testSortArray() 9 | HeapSort(array, ASC, len(array)) 10 | 11 | expects := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9} 12 | for i, expect := range expects { 13 | if array[i].value != expect { 14 | t.Errorf("Sorted array should be %v, but %v.", expects, array) 15 | break 16 | } 17 | } 18 | } 19 | 20 | func TestHeapSortAscPartial(t *testing.T) { 21 | array := testSortArray() 22 | HeapSort(array, ASC, 3) 23 | 24 | expects := []float64{7, 8, 9} 25 | for i, expect := range expects { 26 | if array[6:][i].value != expect { 27 | t.Errorf("Sorted array should be %v, but %v.", expects, array) 28 | break 29 | } 30 | } 31 | } 32 | 33 | func TestHeapSortDesc(t *testing.T) { 34 | array := testSortArray() 35 | HeapSort(array, DESC, len(array)) 36 | 37 | expects := []float64{9, 8, 7, 6, 5, 4, 3, 2, 1} 38 | for i, expect := range expects { 39 | if array[i].value != expect { 40 | t.Errorf("Sorted array should be %v, but %v.", expects, array) 41 | break 42 | } 43 | } 44 | } 45 | 46 | func TestHeapSortDescPartial(t *testing.T) { 47 | array := testSortArray() 48 | HeapSort(array, DESC, 3) 49 | 50 | expects := []float64{3, 2, 1} 51 | for i, expect := range expects { 52 | if array[6:][i].value != expect { 53 | t.Errorf("Sorted array should be %v, but %v.", expects, array) 54 | break 55 | } 56 | } 57 | } 58 | 59 | func testSortArray() []sorter { 60 | return []sorter{{value: 5}, {value: 4}, {value: 9}, {value: 2}, {value: 1}, {value: 8}, {value: 7}, {value: 6}, {value: 3}} 61 | } 62 | -------------------------------------------------------------------------------- /storage.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | type Storage interface { 4 | Create(Node) (int, error) 5 | Find(int) (Node, error) 6 | Update(Node) error 7 | UpdateParent(int, int, int) error 8 | Delete(Node) error 9 | Iterate(chan Node) 10 | } 11 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | import ( 4 | "math" 5 | ) 6 | 7 | func twoMeans(distance Distance, nodes []Node, random Random, cosine bool) ([]float64, []float64) { 8 | iteration_steps := 200 9 | count := len(nodes) 10 | 11 | i := random.index(count) 12 | j := random.index(count - 1) 13 | if j >= i { 14 | j++ 15 | } 16 | iv := make([]float64, len(nodes[i].v)) 17 | copy(iv, nodes[i].v) 18 | 19 | jv := make([]float64, len(nodes[j].v)) 20 | copy(jv, nodes[j].v) 21 | 22 | if cosine { 23 | normalize(iv) 24 | normalize(jv) 25 | } 26 | 27 | ic := 1 28 | jc := 1 29 | 30 | for l := 0; l < iteration_steps; l++ { 31 | k := random.index(count) 32 | 33 | di := float64(ic) * distance.distance(iv, nodes[k].v) 34 | dj := float64(jc) * distance.distance(jv, nodes[k].v) 35 | 36 | norm := 1.0 37 | if cosine { 38 | norm = getNorm(nodes[k].v) 39 | } 40 | 41 | if di < dj { 42 | for z, _ := range iv { 43 | iv[z] = (iv[z]*float64(ic) + nodes[k].v[z]/norm) / float64(ic+1) 44 | } 45 | ic++ 46 | } else if dj < di { 47 | for z, _ := range jv { 48 | jv[z] = (jv[z]*float64(jc) + nodes[k].v[z]/norm) / float64(jc+1) 49 | } 50 | jc++ 51 | } 52 | } 53 | return iv, jv 54 | } 55 | 56 | func normalize(v []float64) []float64 { 57 | norm := getNorm(v) 58 | for z, _ := range v { 59 | v[z] /= norm 60 | } 61 | return v 62 | } 63 | 64 | func getNorm(v []float64) float64 { 65 | var sq_norm float64 66 | for _, vz := range v { 67 | sq_norm += vz * vz 68 | } 69 | return math.Sqrt(sq_norm) 70 | } 71 | -------------------------------------------------------------------------------- /version.go: -------------------------------------------------------------------------------- 1 | package gannoy 2 | 3 | const VERSION string = "0.0.1" 4 | --------------------------------------------------------------------------------