├── scinim ├── signals.nim ├── experimental │ └── sugar.nim ├── signals │ └── filters.nim ├── fuse_loops.nim ├── primes.nim └── numpyarrays.nim ├── .gitignore ├── examples ├── numpyarrays │ ├── examply.nims │ ├── README.md │ ├── results.txt │ ├── examply.nim │ └── examply.py └── filters │ └── savitzky_golay_filter.nim ├── scinim.nim ├── tests ├── config.nims ├── tFuseLoops.nim ├── tsugar.nim ├── tnumpyarrays.nim └── test_primes.nim ├── README.md ├── changelog.org ├── scinim.nimble ├── LICENSE └── .github └── workflows └── ci.yml /scinim/signals.nim: -------------------------------------------------------------------------------- 1 | import signals / filters 2 | export filters 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | nimcache/ 2 | nimblecache/ 3 | htmldocs/ 4 | *.exe 5 | *.so 6 | *.out 7 | -------------------------------------------------------------------------------- /examples/numpyarrays/examply.nims: -------------------------------------------------------------------------------- 1 | --b:cpp 2 | --cc:gcc 3 | --outdir:bin 4 | --out:examply.so 5 | --define:openmp 6 | --define:danger 7 | --app:lib 8 | --define:useMalloc 9 | -------------------------------------------------------------------------------- /scinim.nim: -------------------------------------------------------------------------------- 1 | import ./scinim/signals 2 | export signals 3 | 4 | import ./scinim/numpyarrays 5 | export numpyarrays 6 | 7 | import ./scinim/fuse_loops 8 | export fuse_loops 9 | -------------------------------------------------------------------------------- /tests/config.nims: -------------------------------------------------------------------------------- 1 | switch("path", "$projectDir/../src") 2 | switch("threads", "on") 3 | switch("define", "openmp") 4 | switch("define", "useMalloc") 5 | switch("cc", "gcc") 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # scinim 2 | The core types and functions of the SciNim ecosystem 3 | 4 | This package also defines Nim type to directly interact with Python's Numpy array from Nim. In the examples, there is an example on how to use Nim to iterate over Numpy's array buffer directly. 5 | -------------------------------------------------------------------------------- /changelog.org: -------------------------------------------------------------------------------- 1 | * v0.2.4 2 | - export ~NumpyArray~ data type 3 | 4 | * v0.2.0 5 | - Add Numpy Array <-> Arraymancer; and more generally Numpy Array <-> ptr 6 | UncheckedArray interface 7 | 8 | * v0.1.0 9 | - add =signals= submodule consisting of Savitzky-Golay filters and 1D convolution 10 | - add example showing SVG filter usage 11 | -------------------------------------------------------------------------------- /scinim.nimble: -------------------------------------------------------------------------------- 1 | # Package 2 | version = "0.2.5" 3 | author = "SciNim" 4 | description = "The core types and functions of the SciNim ecosystem" 5 | license = "MIT" 6 | 7 | # C++ codegen catches more bugs than C 8 | backend = "cpp" 9 | 10 | # Dependencies 11 | requires "nim >= 1.6.0" 12 | requires "threading" 13 | requires "arraymancer >= 0.7.32" 14 | requires "polynumeric >= 0.2.0" 15 | requires "nimpy >= 0.2.0" 16 | requires "print" 17 | 18 | task test, "Run all tests": 19 | echo "Running tests command" 20 | exec "nim cpp -r --mm:arc tests/tnumpyarrays.nim" 21 | exec "nim cpp -r --mm:orc tests/tnumpyarrays.nim" 22 | -------------------------------------------------------------------------------- /examples/numpyarrays/README.md: -------------------------------------------------------------------------------- 1 | # Example of calling Nim from Python 2 | 3 | This examples shows how to call a Nim function from Python and how to use SciNim + Nimpy to loop directly over a Numpy array buffer. 4 | When returning data be careful not to return dangling pointer accidently (hint: you'll see a segfault from Python) 5 | 6 | 7 | ## Running the example 8 | 9 | To run the example you can simply compile nim and execute python : 10 | 11 | ``nim c examply && python examply.py > results.txt`` 12 | 13 | All the nim compile-time options are in examply.nims (nothing fancy just the standard "fast" nim compile options). 14 | To demonstrate how this speeds up Python execution loop, the examples has A LOT of element. Each Python loops takes about ~900 seconds and there is 3 of them so feel free to comment them if you are just interested in Nim's timing. 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 SciNim 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /examples/numpyarrays/results.txt: -------------------------------------------------------------------------------- 1 | Python => main() 2 | CPU COUNT= 8 3 | MAX_X= 3000 4 | MAX_Y= 4000 5 | MAX_LEN= 12000000 6 | BEGIN 7 | ---------------- 8 | [[0.70493978 0.20194409 0.36085703 ... 0.00690316 0.193097 0.23888245] 9 | [0.64239022 0.85585042 0.3957121 ... 0.37209593 0.46811377 0.6701858 ] 10 | [0.75608895 0.131218 0.42835299 ... 0.41229778 0.80615316 0.39378482] 11 | ... 12 | [0.48456984 0.58292855 0.40547578 ... 0.63544508 0.45781191 0.71426714] 13 | [0.79431227 0.03892199 0.42968725 ... 0.83385262 0.38061308 0.70092219] 14 | [0.51775866 0.20859028 0.56165316 ... 0.03074889 0.96434193 0.41807707]] 15 | 1) 16 | res= 4635196.899425893 17 | Python loop took : 5.607424815010745 seconds 18 | pyres= 4635196.899425893 19 | Python measured native loop took : 0.02337951000663452 seconds 20 | Nim measured native loop took : 23.357 ms 21 | res= 4635196.899425893 22 | 2) Showing in-place mod 23 | [0.70493978 0.20194409 0.36085703] 24 | [123. -5. 0.36085703] 25 | 3) Comparing for loops 26 | normalForOp: 0.04339129300205968 seconds 27 | indexedOp: 0.051275824022013694 seconds 28 | parallelForOp: 0.012469089997466654 seconds 29 | parallelIndexedForOp: 0.03665532698505558 seconds 30 | Native python for: 7.6476721960061695 seconds 31 | False 32 | False 33 | True 34 | ---------------- 35 | END 36 | -------------------------------------------------------------------------------- /tests/tFuseLoops.nim: -------------------------------------------------------------------------------- 1 | import ../scinim/fuse_loops 2 | import std / unittest 3 | 4 | suite "fuseLoops": 5 | test "Compiles test for different `fuseLoops` setups": 6 | const N = 5 7 | const T = 10 8 | const X = 3 9 | 10 | ## XXX: These should probably become proper tests. :) 11 | 12 | fuseLoops: 13 | for i in 0 ..< N: 14 | let x = i * 2 15 | for j in 0 ..< T: 16 | let z = x * j 17 | echo i, j, x, z 18 | echo x 19 | 20 | fuseLoops: 21 | for i in 0 ..< N: 22 | let x = i * 2 23 | for j in 0 ..< T: 24 | let z = x * j 25 | echo i, j, x, z 26 | for k in nofuse(0 ..< T): 27 | echo k 28 | echo x 29 | 30 | fuseLoops("parallel for"): 31 | for i in 0 ..< N: 32 | let x = i * 2 33 | for j in 0 ..< T: 34 | let z = x * j 35 | for k in 0 ..< X: 36 | echo i, j, k, x, z 37 | echo x 38 | 39 | ## The following raises a CT error 40 | when compiles(( 41 | fuseLoops: 42 | for i in 0 ..< N: 43 | let x = i * 2 44 | var zsum = 0 45 | for j in 0 ..< T: 46 | let z = x * j 47 | zsum += z 48 | echo i, x, z 49 | echo x 50 | for j in 0 ..< 2 * T: 51 | zsum += j 52 | echo zsum 53 | )): 54 | doAssert false 55 | -------------------------------------------------------------------------------- /examples/numpyarrays/examply.nim: -------------------------------------------------------------------------------- 1 | import nimpy 2 | import scinim/numpyarrays 3 | import scinim 4 | import std/[times, monotimes, math, sequtils] 5 | 6 | template timeIt(name:string, body) = 7 | let t0 = getMonoTime() 8 | body 9 | let sub = getMonoTime() - t0 10 | # echo(name, " took ", $(sub)) 11 | let elapsed {.inject.} = sub.inMicroseconds() / 1000 12 | 13 | proc doStuff[T](el: T) : T {.inline.} = 14 | result = (1.0-el)/(1.0+el) 15 | 16 | proc modArray*(x: NumpyArray[float64]) {.exportpy.} = 17 | # echo "modArrayInPlace.nim" 18 | # Example of accessing the buffer directly 19 | x[0, 0] = 123.0 20 | x[0, 1] = -5.0 21 | 22 | proc parallelForOp*(x: NumpyArray[float64]) : NumpyArray[float64] {.exportpy.} = 23 | result = initNumpyArray[float64](x.shape) 24 | let 25 | ur = result.toUnsafeView() 26 | ux = x.toUnsafeView() 27 | 28 | for i in 0||(x.len-1): 29 | ur[i] = doStuff ux[i] 30 | 31 | proc parallelIndexedForOp*(x: NumpyArray[float64]) : NumpyArray[float64] {.exportpy.} = 32 | result = initNumpyArray[float64](x.shape) 33 | 34 | fuseLoops("parallel for"): 35 | for i in 0.. int): x*x 44 | check fn(2) == 4 45 | check fn(3) == 9 46 | block: 47 | let fn = λ(x -> int): x+x 48 | check fn(2) == 4 49 | check fn(3) == 6 50 | 51 | test "`mathScope` DSL that acts 'untyped'": 52 | mathScope: 53 | g(x) = exp(-x) 54 | h(x, μ, σ) = 1.0/sqrt(2*Pi) * exp(-pow(x - μ, 2) / (2 * σ*σ)) 55 | 56 | check g(1.5) == exp(-1.5) 57 | check h(1.0, 0.5, 1.1) == 1.0/sqrt(2*Pi) * exp(-pow(1.0 - 0.5, 2) / (2 * 1.1^2)) 58 | -------------------------------------------------------------------------------- /examples/filters/savitzky_golay_filter.nim: -------------------------------------------------------------------------------- 1 | import random 2 | import numericalnim / interpolate 3 | import ggplotnim 4 | import arraymancer 5 | import nimpy 6 | import scinim / signals 7 | import sequtils 8 | 9 | #[ 10 | 11 | A simple example computing a smoothed function for some very noisy data 12 | using Savitzky-Golay filters. 13 | 14 | We compare the output to Scipy's result (so you need both `nimpy` and 15 | the `scipy` package in your PATH). Finally, the computation requires 16 | LAPACK. 17 | 18 | `ggplotnim` is used to plot the data and result. 19 | 20 | ]# 21 | 22 | proc generateData(): seq[float] = 23 | ## generate some x/y data that has significant noise 24 | var rng = initRand(123) 25 | let support = @[1.0, 2.0, 5.0, 2.0, 4.0] 26 | let at = arange(support.len).asType(float).toRawSeq 27 | let stds = @[0.5, 1.1, 0.7, 1.7, 0.3] 28 | let posSpline = newCubicSpline(at, support) 29 | let stdSpline = newCubicSpline(at, stds) 30 | const npt = 1000 31 | result = newSeq[float](npt) 32 | for i in 0 ..< npt: 33 | let x = i.float / npt.float * support.high.float 34 | result[i] = rng.gauss(posSpline.eval(x), 35 | stdSpline.eval(x)) 36 | 37 | # just import the savitzky golay module from scipy 38 | let svg = pyImport("scipy.signal._savitzky_golay") 39 | 40 | # generate our random data 41 | let y = generateData().toTensor 42 | let x = toSeq(0 ..< y.len) 43 | # define a window length and polynomial order (the longer the window, the smoother the result) 44 | let windowLength = 889 45 | let polyOrder = 5 46 | 47 | # compute 3 different cases 48 | # 1. SVG filter without interpolating on the sides 49 | let filtered = savitzkyGolayFilter(y, windowLength, polyOrder, interpolateEdges = false) 50 | # 2. SVG filter including interpolation on the sides 51 | let finishd = savitzkyGolayFilter(y, windowLength, polyOrder) 52 | # 3. SVG filter using scipy 53 | let purepypy = svg.savgol_filter(y.toRawSeq, windowLength, polyOrder) 54 | 55 | var purePy = newSeq[float]() 56 | for x in purepypy: 57 | purepy.add x.to(float) 58 | 59 | let df = seqsToDf(x, y, filtered, purepy, finishd) 60 | ggplot(df, aes("x", "y")) + 61 | geom_line() + 62 | geom_line(aes = aes(y = "purepy"), color = some(parseHex("FF00FF"))) + 63 | geom_line(aes = aes(y = "filtered"), color = some(parseHex("FF0000"))) + 64 | geom_line(aes = aes(y = "finishd"), color = some(parseHex("0000FF")), size = some(2.0)) + 65 | ggtitle("Comparison of SVG filters using no interpolation, our interpolation & scipy") + 66 | ggsave("svg_comparisons.png", width = 1000, height = 800) 67 | -------------------------------------------------------------------------------- /scinim/experimental/sugar.nim: -------------------------------------------------------------------------------- 1 | import std / [macros, math] 2 | export math 3 | 4 | ## This module contains a whole bunch of fun little sugar procs / templates / macros 5 | ## to (mostly) help with writing math code. The most likely use case might be for 6 | ## illustrative / explanatory code that is close to math in nature already. 7 | 8 | 9 | proc getTypeReplaceI(arg: NimNode): NimNode = 10 | if arg.len > 0: 11 | result = newTree(arg.kind) 12 | for ch in arg: 13 | result.add getTypeReplaceI(ch) 14 | else: 15 | case arg.kind 16 | of nnkIdent, nnkSym: 17 | if arg.strVal == "i": return newLit(0) 18 | else: return arg 19 | else: return arg 20 | 21 | proc Σ*[T](s: openArray[T]): T = s.sum 22 | proc Π*[T](s: openArray[T]): T = s.prod 23 | 24 | proc √*[T](x: T): T = sqrt(x) 25 | proc √*[T](x: openArray[T]): T = sqrt(x) 26 | 27 | template Σ_i*(frm, to: int, body: untyped): untyped = 28 | var res: int 29 | for i {.inject.} in frm ..< to: 30 | res += body 31 | res 32 | 33 | macro Σ_i*(col, body: untyped): untyped = 34 | let typ = getTypeReplaceI(body) 35 | let iId = ident"i" 36 | result = quote do: 37 | var res: typeof(`typ`) 38 | for `iId` in 0 ..< `col`.len: 39 | res += `body` 40 | res 41 | 42 | macro λ*(arg, body: untyped): untyped = 43 | ## 44 | # XXX: Support multiple arguments! 45 | if arg.kind != nnkInfix or 46 | (arg.kind == nnkInfix and arg[0].kind in {nnkIdent, nnkSym} and arg[0].strVal != "->"): 47 | error("Unsupported operation in `λ`. The infix must be `->`, but is: " & arg[0].repr) 48 | let a = arg[1] 49 | let typ = arg[2] 50 | result = quote do: 51 | proc(`a`: `typ`): auto = `body` 52 | 53 | proc sliceTypes(n: NimNode, sl: Slice[int]): tuple[args, genTyps: NimNode] = 54 | var args = nnkFormalParams.newTree(ident"auto") 55 | var genTyps = nnkIdentDefs.newTree() 56 | for i in sl.a .. sl.b: 57 | let typ = ident($char('A'.ord + i - 1)) 58 | args.add nnkIdentDefs.newTree(n[i], 59 | typ, 60 | newEmptyNode()) 61 | genTyps.add typ 62 | genTyps.add newEmptyNode() 63 | genTyps.add newEmptyNode() 64 | genTyps = nnkGenericParams.newTree(genTyps) 65 | result = (args: args, genTyps: genTyps) 66 | 67 | proc generateFunc(arg: NimNode): NimNode = 68 | expectKind(arg, nnkAsgn) 69 | let lhs = arg[0] 70 | let rhs = arg[1] 71 | let fnName = lhs[0] 72 | let (fnArgs, genTyps) = sliceTypes(lhs, 1 ..< lhs.len) 73 | result = newProc(name = fnName, body = rhs) 74 | result[2] = genTyps 75 | result[3] = fnArgs 76 | 77 | macro mathScope*(args: untyped): untyped = 78 | expectKind(args, nnkStmtList) 79 | result = newStmtList() 80 | for arg in args: 81 | result.add generateFunc(arg) 82 | -------------------------------------------------------------------------------- /tests/tnumpyarrays.nim: -------------------------------------------------------------------------------- 1 | import arraymancer 2 | 3 | import nimpy 4 | import ../scinim/numpyarrays 5 | import unittest 6 | 7 | when defined(osx): 8 | import nimpy/py_lib as lib 9 | pyInitLibPath("/Users/regis.caillaud/.pyenv/versions/3.11.9/lib/libpython3.11.dylib") 10 | 11 | proc test(arg: tuple[s: string]) = 12 | suite arg.s: 13 | test "int": 14 | var A: Tensor[int64] = toTensor(@[[1'i64, 2, 3], [4'i64, 5, 6]]) 15 | var pA = toNdArray(A) 16 | pyprint(pA.dtype()) 17 | check toTensor[int64](pA) == A 18 | 19 | test "float": 20 | var A: Tensor[float64] = toTensor(@[[1.1'f64, 2.2, 3.3], [4.4'f64, 5.5, 6.6]]) 21 | var pA = toNdArray(A) 22 | pyprint(pA.dtype()) 23 | check toTensor[float](pA) == A 24 | 25 | test "int32": 26 | var A: Tensor[int32] = toTensor(@[[1'i32, 2, 3], [4'i32, 5, 6]]) 27 | var pA = toNdArray(A) 28 | pyprint(pA.dtype()) 29 | check toTensor[int32](pA) == A 30 | 31 | test "float32": 32 | var A: Tensor[float32] = toTensor(@[[1.1'f32, 2.2, 3.3], [4.4'f32, 5.5, 6.6]]) 33 | var pA = toNdArray(A) 34 | pyprint(pA.dtype()) 35 | check toTensor[float32](pA) == A 36 | 37 | test "RaiseAssert": 38 | let np = pyImport("numpy") 39 | let py_array_type = dtype(float32) 40 | let pA = nimpy.callMethod(np, "zeros", @[2, 3, 4], py_array_type) 41 | pyprint(pA.dtype()) 42 | 43 | expect AssertionDefect: 44 | var ppA = asNumpyArray[float64](pA) 45 | pyprint(ppA.dtype()) 46 | check true 47 | 48 | test "RaiseAssert from double object": 49 | var A: Tensor[float32] = toTensor(@[[1.1'f32, 2.2, 3.3], [4.4'f32, 5.5, 6.6]]) 50 | var pA = toNdArray(A).obj() # Create a PyObject. In practice, this will often be the result of a callMethod proc 51 | pyprint(pA.dtype()) 52 | expect AssertionDefect: 53 | var ppA = asNumpyArray[float64](pA) 54 | pyprint(ppA.dtype()) 55 | check true 56 | 57 | test "Call a Python function": 58 | var A: Tensor[float32] = toTensor(@[[1.1'f32, 2.2, 3.3], [4.4'f32, 5.5, 6.6]]) 59 | var pA = toNdArray(A) 60 | pyprint(pA) 61 | let np = pyImport("numpy") 62 | # This effectively perform a copy because np.transpose is not C contiguous 63 | let ret = asNumpyArray[float32](np.transpose(pA)) 64 | pyprint(ret) 65 | check ret.toTensor() == A.transpose() 66 | 67 | test "Call a Python function using a compistion of NumpyArray": 68 | var A: Tensor[float32] = toTensor(@[[1.1'f32, 2.2, 3.3], [4.4'f32, 5.5, 6.6]]) 69 | var pA = toNdArray(A) 70 | var B : Tensor[float32] = toTensor(@[[1.1'f32, 2.2, 3.3], [4.4'f32, 5.5, 6.6]]) 71 | var pB = toNdArray(B) 72 | let py = pyBuiltinsModule() 73 | discard nimpy.callMethod(py, "print", (a: pA, b: pB)) 74 | 75 | 76 | when isMainModule: 77 | test((s: "toTensor, toNdArray in main thread")) 78 | # Disable for now 79 | #var thr: Thread[tuple[s: string]] 80 | #createThread(thr, test, (s: "toTensor, toNdArray in external thread")) 81 | #joinThread(thr) 82 | -------------------------------------------------------------------------------- /examples/numpyarrays/examply.py: -------------------------------------------------------------------------------- 1 | import examply 2 | import numpy as np 3 | from timeit import default_timer as timer 4 | import multiprocessing 5 | 6 | 7 | def fLoop(ar): 8 | s = 0.0 9 | iX = int(ar.shape[0]) 10 | iY = int(ar.shape[1]) 11 | 12 | for i in range(0, iX): 13 | for j in range(0, iY): 14 | el = ar[i, j] 15 | s = s + (1 - el) / (1 + el) 16 | print("res=", s) 17 | return s 18 | 19 | 20 | def main(): 21 | print("Python => main()") 22 | MAX_X = int(3 * 1e3) 23 | MAX_Y = int(4 * 1e2) 24 | MAX_LEN = int(MAX_X * MAX_Y) 25 | print("CPU COUNT=", multiprocessing.cpu_count()) 26 | print("MAX_X=", MAX_X) 27 | print("MAX_Y=", MAX_Y) 28 | print("MAX_LEN=", MAX_LEN) 29 | ar = np.random.rand(MAX_X, MAX_Y) 30 | 31 | print("BEGIN") 32 | print("----------------") 33 | print(ar) 34 | 35 | print("1)") 36 | timePythonLoop = False 37 | # Toggle - CAREFUL it takes a long time since Python is slow 38 | if timePythonLoop: 39 | start = timer() 40 | pyres = fLoop(ar) 41 | end = timer() 42 | print("Python loop took : ", end - start, " seconds") 43 | print("pyres=", pyres) 44 | 45 | start = timer() 46 | res = examply.runCalc(ar) 47 | end = timer() 48 | print("Python measured native loop took : ", end - start, " seconds") 49 | print("Nim measured native loop took : ", res[0], " ms") 50 | print("res=", res[1]) 51 | 52 | print("2) Showing in-place mod") 53 | print(ar[0, 0:3]) 54 | examply.modArray(ar) 55 | print(ar[0, 0:3]) 56 | 57 | print("3) Comparing for loops") 58 | 59 | start = timer() 60 | arr0 = examply.normalForOp(ar) 61 | end = timer() 62 | print("normalForOp: ", end - start, " seconds") 63 | 64 | start = timer() 65 | arr01 = examply.indexedOp(ar) 66 | end = timer() 67 | print("indexedOp: ", end - start, " seconds") 68 | 69 | start = timer() 70 | arr1 = examply.parallelForOp(ar) 71 | end = timer() 72 | print("parallelForOp: ", end - start, " seconds") 73 | 74 | start = timer() 75 | arr11 = examply.parallelIndexedForOp(ar) 76 | end = timer() 77 | print("parallelIndexedForOp: ", end - start, " seconds") 78 | 79 | if timePythonLoop: 80 | start = timer() 81 | arr2 = np.zeros(ar.shape) 82 | X = int(ar.shape[0]) 83 | Y = int(ar.shape[1]) 84 | for i in range(0, X): 85 | for j in range(0, Y): 86 | arr2[i, j] = (1.0 - ar[i, j]) / (1.0 + ar[i, j]) 87 | end = timer() 88 | print("Native python for: ", end - start, " seconds") 89 | 90 | # We can check that it returns a copy 91 | print(np.shares_memory(ar, arr0)) 92 | print(np.shares_memory(ar, arr1)) 93 | 94 | # Check results are identical 95 | eq = np.allclose(arr0, arr1) 96 | if timePythonLoop: 97 | eq = np.allclose(arr0, arr2) 98 | print(eq) 99 | 100 | print("----------------") 101 | print("END") 102 | 103 | 104 | main() 105 | ## In bash, simply run : 106 | ## time nim c examply && time python3 examply.py > results.txt 107 | ## This takes about ~30 minutes due to long python loop 108 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: scinim CI 2 | on: 3 | push: 4 | paths: 5 | - 'tests/**' 6 | - '**' 7 | - 'scinim.nimble' 8 | - '.github/workflows/ci.yml' 9 | pull_request: 10 | paths: 11 | - 'tests/**' 12 | - '**' 13 | - 'scinim.nimble' 14 | - '.github/workflows/ci.yml' 15 | 16 | jobs: 17 | build: 18 | runs-on: ${{ matrix.os }} 19 | strategy: 20 | matrix: 21 | nim: 22 | - '2.0.x' 23 | - '2.2.x' 24 | - 'stable' 25 | os: 26 | - ubuntu-latest 27 | # - windows-latest 28 | # - macOS-latest 29 | 30 | name: '${{ matrix.nim }} (${{ matrix.os }})' 31 | steps: 32 | - name: Checkout 33 | uses: actions/checkout@v3 34 | with: 35 | path: scinim 36 | 37 | - name: Setup nim 38 | uses: jiro4989/setup-nim-action@v1 39 | with: 40 | nim-version: ${{ matrix.nim }} 41 | repo-token: ${{ secrets.GITHUB_TOKEN }} 42 | 43 | - name: Setup MSYS2 (Windows) 44 | if: ${{matrix.os == 'windows-latest'}} 45 | uses: msys2/setup-msys2@v2 46 | with: 47 | path-type: inherit 48 | update: true 49 | install: base-devel git mingw-w64-x86_64-toolchain 50 | 51 | - name: Install dependencies (Ubuntu) 52 | if: ${{matrix.os == 'ubuntu-latest'}} 53 | run: | 54 | sudo apt-get update 55 | sudo apt-get install -y python3-numpy 56 | 57 | - name: Install dependencies (OSX) 58 | if: ${{matrix.os == 'macOS-latest'}} 59 | run: | 60 | brew install numpy 61 | 62 | - name: Install dependencies (Windows) 63 | if: ${{matrix.os == 'windows-latest'}} 64 | shell: msys2 {0} 65 | run: | 66 | pacman -Syu --noconfirm 67 | pacman -S --needed --noconfirm mingw-w64-x86_64-lapack 68 | pacman -S --needed --noconfirm mingw-w64-x86_64-python-numpy 69 | 70 | - name: Setup nimble & deps 71 | shell: bash 72 | run: | 73 | cd scinim 74 | nimble refresh -y 75 | nimble install -y 76 | 77 | - name: Run tests (Linux & OSX) 78 | if: ${{matrix.target != 'windows'}} 79 | shell: bash 80 | run: | 81 | cd scinim 82 | nimble -y test 83 | 84 | - name: Run tests (Windows) 85 | if: ${{matrix.target == 'windows'}} 86 | shell: msys2 {0} 87 | run: | 88 | cd scinim 89 | nimble -y test 90 | 91 | - name: Build docs 92 | if: > 93 | github.event_name == 'push' && github.ref == 'refs/heads/master' && 94 | matrix.target == 'linux' && matrix.branch == 'devel' 95 | shell: bash 96 | run: | 97 | cd scinim 98 | branch=${{ github.ref }} 99 | branch=${branch##*/} 100 | nimble doc --project --outdir:docs \ 101 | '--git.url:https://github.com/${{ github.repository }}' \ 102 | '--git.commit:${{ github.sha }}' \ 103 | "--git.devel:$branch" \ 104 | scinim.nim 105 | # Ignore failures for older Nim 106 | cp docs/{the,}index.html || true 107 | 108 | - name: Publish docs 109 | if: > 110 | github.event_name == 'push' && github.ref == 'refs/heads/master' && 111 | matrix.target == 'linux' && matrix.branch == 'devel' 112 | uses: crazy-max/ghaction-github-pages@v1 113 | with: 114 | build_dir: scinim/docs 115 | env: 116 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 117 | -------------------------------------------------------------------------------- /tests/test_primes.nim: -------------------------------------------------------------------------------- 1 | import ../scinim/primes 2 | import arraymancer 3 | import std / [unittest, random] 4 | 5 | proc test_primes() = 6 | ## Test the `primes` function 7 | test "Prime number generation (integer values)": 8 | check: primes(0).len == 0 9 | check: primes(1).len == 0 10 | check: primes(2) == [2].toTensor 11 | check: primes(3) == [2, 3].toTensor 12 | check: primes(4) == [2, 3].toTensor 13 | check: primes(11) == [2, 3, 5, 7, 11].toTensor 14 | check: primes(12) == [2, 3, 5, 7, 11].toTensor 15 | check: primes(19) == [2, 3, 5, 7, 11, 13, 17, 19].toTensor 16 | check: primes(20) == [2, 3, 5, 7, 11, 13, 17, 19].toTensor 17 | check: primes(22) == [2, 3, 5, 7, 11, 13, 17, 19].toTensor 18 | check: primes(100000).len == 9592 19 | check: primes(100003).len == 9593 20 | check: primes(100000)[^1].item == 99991 21 | 22 | test "Prime number generation (floating-point values)": 23 | check: primes(100000.0).len == 9592 24 | check: primes(100000.0)[^1].item == 99991.0 25 | 26 | # An exception must be raised if the `upto` value is not a whole number 27 | try: 28 | discard primes(100.5) 29 | check: false 30 | except ValueError: 31 | # This is what should happen! 32 | discard 33 | 34 | proc generate_random_factor_tensor[T]( 35 | max_value: T, max_factors: int, prime_list: Tensor[T]): Tensor[T] = 36 | ## Generate a tensor of random prime factors taken from a tensor of primes 37 | ## The tensor length will not exceed the `max_factors` and the product of 38 | ## the factors will not exceed `max_value` either. 39 | ## This is not just a random list of values taken from the `prime_list` 40 | ## Instead we artificially introduce a random level of repetition of the 41 | ## chosen factors to emulate the fact that many numbers have repeated 42 | ## prime factors 43 | let max_value = rand(4 .. 2 ^ 53) 44 | let max_factors = rand(1 .. 20) 45 | result = newTensor[int](max_factors) 46 | var value = 1 47 | var factor = prime_list[rand(prime_list.len - 1)] 48 | for idx in 0 ..< max_factors: 49 | # Randomly repeat the previous factor 50 | # Higher number of repetitions are less likely 51 | let repeat_factor = rand(5) < 1 52 | if not repeat_factor: 53 | factor = prime_list[rand(prime_list.len - 1)] 54 | let new_value = factor * value 55 | if new_value >= max_value: 56 | break 57 | result[idx] = factor 58 | value = new_value 59 | result = sorted(result) 60 | result = result[result >. 0] 61 | 62 | proc test_factor() = 63 | test "Prime factorization of integer values (factor)": 64 | check: factor(60) == [2, 2, 3, 5].toTensor 65 | check: factor(100001) == [11, 9091].toTensor 66 | 67 | # Check that the product of the factorization of a few random values 68 | # equals the original numbers 69 | for n in 0 ..< 10: 70 | let value = rand(10000) 71 | check: value == product(factor(value)) 72 | 73 | # Repeat the previous test in a more sophisticated manner 74 | # Instead of generating random values and checking that the 75 | # product of their factorization is the same as the original values 76 | # (which could work for many incorrect implementations of factor), 77 | # generate a few random factor tensors, multiply them to get 78 | # the number that has them as prime factors, factorize those numbers 79 | # and check that their factorizations matches the original tensors 80 | let prime_list = primes(100) 81 | for n in 0 ..< 10: 82 | let max_value = rand(4 .. 2 ^ 53) 83 | let max_factors = rand(1 .. 20) 84 | var factors = generate_random_factor_tensor( 85 | max_value, max_factors, prime_list) 86 | let value = product(factors) 87 | check: factor(value) == factors 88 | 89 | test "Prime factorization of floating-point values (factor)": 90 | # Floating-point 91 | check: factor(60.0) == [2.0, 2, 3, 5].toTensor 92 | check: factor(100001.0) == [11.0, 9091].toTensor 93 | 94 | # Check that the product of the factorization of a few random values 95 | # equals the original numbers 96 | # Note that here we do not also do the reverse check (as we do for ints) 97 | # in order to make the test faster 98 | for n in 0 ..< 10: 99 | let value = floor(rand(10000.0)) 100 | check: value == product(factor(value)) 101 | 102 | # An exception must be raised if we try to factorize a non-whole number 103 | try: 104 | discard factor(100.5) 105 | check: false 106 | except ValueError: 107 | # This is what should happen! 108 | discard 109 | 110 | proc test_isprime() = 111 | test "isprime": 112 | check: isprime(7) == true 113 | check: isprime(7.0) == true 114 | check: isprime(7.5) == false 115 | check: isprime(1) == false 116 | check: isprime(0) == false 117 | check: isprime(-1) == false 118 | let t = [ 119 | [-1, 0, 2, 4], 120 | [ 5, 6, 7, 11] 121 | ].toTensor 122 | let expected = [ 123 | [false, false, true, false], 124 | [ true, false, true, true] 125 | ].toTensor 126 | check: isprime(t) == expected 127 | check: isprime(t.asType(float)) == expected 128 | 129 | # Run the tests 130 | suite "Primes": 131 | test_primes() 132 | test_factor() 133 | test_isprime() 134 | -------------------------------------------------------------------------------- /scinim/signals/filters.nim: -------------------------------------------------------------------------------- 1 | import sequtils 2 | import math 3 | import arraymancer / [tensor, linear_algebra] 4 | import polynumeric 5 | import algorithm 6 | 7 | proc savitzkyGolayCoeffs*(windowLength: int, polyOrder: int, 8 | deriv = 0, delta = 1.0): Tensor[float] = 9 | ## Computes the Savitzky-Golay coefficients for a window of length 10 | ## `windowLength` using polynomials up to order `polyOrder`. 11 | ## 12 | ## `deriv` determines the order of the derivative to compute. By default 13 | ## no derivative is used (`deriv = 0`). 14 | ## 15 | ## `delta` only applies for derivatives and describes the sampling spacing 16 | ## of the data. 17 | let pos = windowLength div 2 18 | let rem = windowLength mod 2 19 | if rem == 0: 20 | raise newException(ValueError, "Given window length must be odd.") 21 | 22 | let x = arange(-pos.float, (windowLength - pos).float)[windowLength-1..0|-1] 23 | # compute a Vandermonde matrix for `x` up to polyOrder + 1 (to include polynomial order *up to* polyOrder) 24 | let A = vandermonde(x, polyOrder + 1).transpose 25 | 26 | var y = zeros[float](polyOrder + 1) 27 | y[deriv] = fac(deriv).float / (pow(delta, deriv.float)) 28 | 29 | let (coeffs, _, _, _) = least_squares_solver(A, y) 30 | result = coeffs 31 | 32 | proc fitEdge(windowStart, windowStop: int, 33 | interpStart, interpStop: int, 34 | polyOrder: int, 35 | data: Tensor[float], 36 | res: var Tensor[float], 37 | interpSVG: static bool) = 38 | ## Fits a polynomial of order `polyOrder` to the `data` within the given window 39 | ## and applies an interpolation to `res` within half the window. 40 | ## 41 | ## The `*Stop` values are taken as exclusive stops. 42 | ## 43 | ## If `interpSVG` is true, we perform a linear interpolation between the existing 44 | ## result of the Savitzky-Golay filter stored in `res` and the polynomial fit. The 45 | ## interpolation uses fully the polynomial fit at the exact edge and fully the data 46 | ## at the "inner" edge of the data. 47 | let winLength = windowStop - windowStart 48 | let xrange = arange(0.0, winLength.float) # the x values at which to fit 49 | let yrange = data[windowStart ..< windowStop] # the data to fit to 50 | let polyCoeff = polyFit(xrange, yrange, polyOrder) # perform the fit of desired order 51 | # NOTE: `initPoly` receives coefficient of polynomial of highest order ``first``! 52 | let p = initPoly(polyCoeff.toRawSeq.reversed) 53 | for i in interpStart ..< interpStop: 54 | when interpSVG: 55 | let part = block: 56 | let tmp = (interpStop - i).float / interpStop.float 57 | if interpStop < windowStop: 58 | tmp 59 | else: 60 | 1.0 - tmp 61 | res[i] = (1.0 - part) * res[i] + part * p.eval(xrange[i - windowStart]) 62 | else: 63 | # use purely the polynomial fit in the interpolation range 64 | res[i] = p.eval(xrange[i - windowStart]) 65 | 66 | proc interpolateEdges(filtered, y: Tensor[float], windowLength: int, polyOrder: int): Tensor[float] = 67 | ## Performs interpolation of the given SVG `filtered` data using a polynomial fit to 68 | ## the input data `y` within `windowLength` of order `polyOrder`. 69 | result = filtered.clone() 70 | fitEdge(0, windowLength, # window range 71 | 0, windowLength div 2, # interp range 72 | polyOrder, 73 | y, result, 74 | interpSVG = true) 75 | let num = filtered.size.int 76 | fitEdge(num - windowLength, num, # window range 77 | num - (windowLength div 2), num, # interp range 78 | polyOrder, 79 | y, result, 80 | interpSVG = true) 81 | 82 | proc convolve1D*(input: Tensor[float], kernel: Tensor[float]): Tensor[float] = 83 | ## Convolution of the `input` with the given `kernel`. 84 | ## 85 | ## Currently it only allows a convolution including a constant value extension 86 | ## of the data by 0 (i.e. the convolution is computed over 87 | ## `[-kernel.size, input.size + kernel.size]`, but the `input` is taken as 0 88 | ## outside the range of the input). 89 | ## 90 | ## Note: The implementation is naive. Performance improvements are certainly 91 | ## achievable. 92 | result = newTensor[float](input.size.int) 93 | let offset = kernel.size div 2 94 | for i in 0 ..< input.size: 95 | # compute start and stop of the window 96 | let windowStart = if i >= offset: 0 # window fully in data range 97 | else: offset - i # part is still outside (essentially extend by 0 data) 98 | let windowStop = if i + offset < input.size: kernel.size - 1 # kernel fits fully into rest of data 99 | else: input.size - 1 - i + offset # stop early (extend by 0 data) 100 | for j in windowStart ..< windowStop: 101 | result[i] += input[i - offset + j] * kernel[j] 102 | 103 | proc savitzkyGolayFilter*(data: Tensor[float], windowLength, polyOrder: int, 104 | interpolateEdges = true): Tensor[float] = 105 | ## Computes the result of applying a Savitzky-Golay filter to the input `data` 106 | ## using the given `windowLength` and polynomial order `polyOrder`. 107 | ## 108 | ## If `interpolateEdges` is true, we will perform a polynomial interpolation 109 | ## on the edges of the resulting filtered data to accomodate the problem of 110 | ## bad predictions at the edges of our data, due to extending the data by 111 | ## zeroes in the convolution. 112 | ## 113 | ## Note: this implementation depends on LAPACK, as it uses `gelsd` to perform 114 | ## linear least squares solving. 115 | let coeffs = savitzky_golay_coeffs(windowLength, polyOrder) 116 | result = convolve1D(data, coeffs) 117 | if interpolateEdges: 118 | result = result.interpolateEdges(data, windowLength, polyOrder) 119 | -------------------------------------------------------------------------------- /scinim/fuse_loops.nim: -------------------------------------------------------------------------------- 1 | import std / [macros, options, algorithm] 2 | 3 | type 4 | ForLoop = object 5 | n: NimNode # the actual node 6 | body: NimNode # body of the loop *WITHOUT* any inner loops! 7 | idx: NimNode # the loop index 8 | start: NimNode # start of the loop 9 | stop: NimNode # stop of the loop 10 | 11 | template nofuse*(arg: untyped): untyped = 12 | ## Just a dummy template, which can be easily used to disable fusing of 13 | ## a nested loop 14 | arg 15 | 16 | proc extractBody(n: NimNode): NimNode = 17 | ## Returns the input tree without any possible nested for loops. Nested 18 | ## loops are replaced by `nnkEmpty` nodes to be filled again later in `bodies`. 19 | case n.kind 20 | of nnkForStmt: 21 | if n[1].kind == nnkInfix and n[1][0].strVal == "..<": 22 | result = newEmptyNode() ## Flattened nested loop body will be inserted here 23 | else: 24 | result = n 25 | else: 26 | if n.len > 0: 27 | result = newTree(n.kind) 28 | for ch in n: 29 | let bd = extractBody(ch) 30 | if bd != nil: 31 | result.add bd 32 | else: 33 | result = n 34 | 35 | proc toForLoop(n: NimNode): Option[ForLoop] = 36 | ## Returns a `some(ForLoop)` if the given node is a fuse-able for loop 37 | doAssert n.kind == nnkForStmt 38 | if n[1].kind != nnkInfix: return 39 | if n[1][0].strVal != "..<": 40 | error("Unexpected iterator: " & $n[1].repr & 41 | ". It must be of the form `0 ..< X`.") 42 | if not (n[1][1].kind == nnkIntLit and n[1][1].intVal == 0): 43 | error("Starting iteration index must be 0!") 44 | result = some(ForLoop(n: n, 45 | body: extractBody(n[2]), 46 | idx: n[0], 47 | start: n[1][1], 48 | stop: n[1][2])) 49 | 50 | template addIf(s, opt): untyped = 51 | if opt.isSome: 52 | s.add opt.unsafeGet 53 | 54 | proc extractLoops(n: NimNode): seq[ForLoop] = 55 | ## Extracts (fuse-able) loops from the given Nim node and errors if more than 56 | ## one for loop found at the same level. 57 | case n.kind 58 | of nnkForStmt: 59 | result.addIf toForLoop(n) 60 | result.add extractLoops(n[2]) # go over body 61 | else: 62 | var foundLoops = 0 # counter for number of loops at current body 63 | for ch in n: 64 | let loops = extractLoops(ch) 65 | if loops.len > 0: 66 | result.add loops 67 | inc foundLoops 68 | if foundLoops > 1: 69 | error("Found more than one loop (" & $foundLoops & ") at the level of node: " & 70 | n.repr & ". Please wrap " & "these loops as `nofuse`, i.e. `nofuse(0 ..< X)`") 71 | 72 | proc genFusedLoop(idx: NimNode, stop: NimNode, ompStr = ""): NimNode = 73 | ## Generate either regular or OpenMP for loop 74 | let loopIter = if ompStr.len == 0: 75 | nnkInfix.newTree(ident"..<", 76 | newLit 0, 77 | stop) 78 | else: 79 | nnkCall.newTree(ident"||", 80 | newLit 0, 81 | stop, 82 | newLit ompStr) 83 | result = nnkForStmt.newTree( 84 | idx, 85 | loopIter 86 | ) 87 | 88 | proc calcStop(loops: seq[ForLoop]): NimNode = 89 | ## Returns `N * T * U * ...` expression where the indices are 90 | ## the stop indices of the loops to be fused. 91 | case loops.len 92 | of 0: doAssert false, "Must not happen" 93 | of 1: result = loops[0].stop 94 | else: 95 | var ml = loops.reversed # want last elements first 96 | let x = ml.pop 97 | result = nnkInfix.newTree(ident"*", x.stop, 98 | calcStop(ml.reversed)) 99 | 100 | proc modOrDiv(prefix, suffix: NimNode, isDiv: bool): NimNode = 101 | if isDiv: 102 | result = quote do: 103 | `prefix` div `suffix` 104 | else: 105 | result = quote do: 106 | `prefix` mod `suffix` 107 | 108 | proc asLet(v, val: NimNode): NimNode = 109 | result = quote do: 110 | let `v` = `val` 111 | 112 | proc genPrelude(idx: NimNode, loops: seq[ForLoop]): NimNode = 113 | ## The basic algorithm for generating the correct index for fused loops is 114 | ## 115 | ## Notation: 116 | ## `i` = Loop index of single remaining outer loop 117 | ## `N_i` = Stopping index (-1) of the inner loop `i` 118 | ## `n` = Total number of nested loops 119 | ## 120 | ## Whichever is easiest to read for you: 121 | ## 122 | ## `let i0 = i div (N_0 * N_1 ... N_n)` 123 | ## `let i1 = (i mod (N_0 * N_1 ... N_n)) div (N_1 * N_2 * ... N_n)` 124 | ## `let i2 = ((i mod (N_0 * N_1 ... N_n)) mod (N_1 * N_2 * ... N_n)) div (N_2 * ... * N_n)` 125 | ## ... 126 | ## 127 | ## ... or 128 | ## 129 | ## `let i0 = i div Π_i=0^n N_i` 130 | ## `let i1 = (i mod Π_i=0^n N_i) div Π_i=1^n N_i` 131 | ## `let i2 = ((i mod Π_i=0^n N_i) mod Π_i=1^n N_i) div Π_i=2^n N_i` 132 | ## 133 | ## ...or 134 | ## 135 | ## `let i0 = Idx div [Product of remaining N-1 loops]` 136 | ## `let i1 = (Idx mod [Product of remaining loops]) div [Product of remaining N-2 loops]` 137 | ## `let i2 = (Idx mod [Product of remaining loops]) mod [Product of remaining N-2 loops]` 138 | result = newStmtList() 139 | var prefix = idx 140 | var ml = loops.reversed 141 | var lIdx = ml.pop # drop first element 142 | var suffix = ml.calcStop() 143 | while ml.len > 0: 144 | result.add asLet(lIdx.idx, modOrDiv(prefix, suffix, isDiv = true)) 145 | lIdx = ml.pop # get next loop index & adjust remaining loops 146 | # now adjust prefix and suffix 147 | prefix = modOrDiv(prefix, suffix, isDiv = false) 148 | if ml.len > 0: # adjust suffix 149 | suffix = ml.calcStop() 150 | else: # simply add last 'prefix' 151 | result.add asLet(lIdx.idx, prefix) 152 | 153 | proc bodies(loops: seq[ForLoop]): NimNode = 154 | ## Concatenates all loop bodies, by placing the next loop into the 155 | ## `nnkEmpty` node of the current node 156 | var ml = loops.reversed 157 | #echo ml.repr 158 | var cur = ml.pop 159 | result = cur.body 160 | for i in 0 ..< result.len: 161 | let ch = result[i] 162 | if ch.kind == nnkEmpty: 163 | # insert next loop 164 | result[i] = bodies(ml.reversed) # revert order again 165 | break # there can only be a single `nnkEmpty` (multiple loops not allowed, 166 | # yields CT error) 167 | 168 | proc fuseLoopImpl(ompStr: string, body: NimNode): NimNode = 169 | # 1. extract all loops from the body 170 | let loops = extractLoops(body) 171 | # 2. generate identifier for the final loop 172 | let idx = genSym(nskForVar, "idx") 173 | # 3. generate the fused outer loop 174 | result = genFusedLoop(idx, calcStop(loops), ompStr) 175 | # 4. generate final loop body by... 176 | var loopBody = newStmtList() 177 | # 4a. generate prelude of loop variables of original loops 178 | loopBody.add genPrelude(idx, loops) # gen code to produce the old loop variables 179 | # 4b. insert old loop bodies into respective positions 180 | loopBody.add bodies(loops) 181 | result.add loopBody 182 | when defined(DebugFuseLoop): 183 | echo result.repr 184 | 185 | macro fuseLoops*(body: untyped): untyped = 186 | ## Fuses all loops inside the body of the macro, unless they are annotated with 187 | ## `nofuse`. 188 | result = fuseLoopImpl("", body) 189 | 190 | macro fuseLoops*(ompStr: untyped{lit}, body: untyped): untyped = 191 | ## Fuses all loops inside the body of the macro, unless they are annotated with 192 | ## `nofuse`. 193 | ## 194 | ## This version supports handing a string to be passed to OpenMP, i.e. 195 | ## `fuseLoops("parallelFor"): body` 196 | ## 197 | ## Note: To utilize OpenMP, you may have to compile with 198 | ## `--passC:"-fopenmp" --passL:"-lgomp"` 199 | ## (at least for GCC. For Clang the commands differ slightly I believe). 200 | ## 201 | ## There is also a chance you either have to compile with 202 | ## `--exceptions:quirky` 203 | ## or using the C++ backend, due to the C backend producing `goto` statements 204 | ## inside the loops, which lead to C compiler errors when combined with 205 | ## OpenMP. 206 | result = fuseLoopImpl(ompStr.strVal, body) 207 | -------------------------------------------------------------------------------- /scinim/primes.nim: -------------------------------------------------------------------------------- 1 | ## Module that implements several procedures related to prime numbers 2 | ## 3 | ## Prime numbers are an essential building block of many algorithms in diverse 4 | ## areas such as cryptography, digital communications and many others. 5 | ## This module adds a function to generate rank-1 tensors of primes upto a 6 | ## certain value; as well as a function to calculate the prime factors of a 7 | ## number. 8 | 9 | import arraymancer 10 | 11 | proc primes*[T: SomeInteger | SomeFloat](upto: T): Tensor[T] = 12 | ## Generate a Tensor of prime numbers up to a certain value 13 | ## 14 | ## Return a Tensor of the prime numbers less than or equal to `upto`. 15 | ## A prime number is one that has no factors other than 1 and itself. 16 | ## 17 | ## Input: 18 | ## - upto: Integer up to which primes will be generated 19 | ## 20 | ## Result: 21 | ## - Integer Tensor of prime values less than or equal to `upto` 22 | ## 23 | ## Note: 24 | ## - This function implements a "half" Sieve of Erathostenes algorithm 25 | ## which is a classical Sieve of Erathostenes in which only odd numbers 26 | ## are checked. Many examples of this algorithm can be found online. 27 | ## It also stops checking after sqrt(upto) 28 | ## - The memory required by this procedure is proportional to the input 29 | ## number. 30 | when T is SomeFloat: 31 | if upto != round(upto): 32 | raise newException(ValueError, 33 | "`upto` value (" & $upto & ") must be a whole number") 34 | 35 | if upto < 11: 36 | # Handle the primes below 11 to simplify the general code below 37 | # (by removing the need to handle the few cases in which the index to 38 | # `isprime`, calculated based on `factor` is negative) 39 | # This is the minimum set of primes that we must handle, but we could 40 | # extend this list to make the calculation faster for more of the 41 | # smallest primes 42 | let prime_candidates = [2.T, 3, 5, 7].toTensor() 43 | return prime_candidates[prime_candidates <=. upto] 44 | 45 | # General algorithm (valid for numbers higher than 10) 46 | let prime_candidates = arange(3.T, T(upto + 1), 2.T) 47 | var isprime = ones[bool]((upto.int - 1) div 2) 48 | let max_possible_factor_idx = int(sqrt(upto.float)) div 2 49 | for factor in prime_candidates[_ ..< max_possible_factor_idx]: 50 | if isprime[(factor.int - 2) div 2]: 51 | isprime[(factor.int * 3 - 2) div 2 .. _ | factor.int] = false 52 | 53 | # Note that 2 is missing from the result, so it must be manually added to 54 | # the front of the result tensor 55 | return [2.T].toTensor().append(prime_candidates[isprime]) 56 | 57 | # The maximum float64 that can be represented as an integer that is followed by a 58 | # another integer that is representable as a float64 as well 59 | const maximumConsecutiveFloat64Int = pow(2.0, 53) - 1.0 60 | 61 | proc factor*[T: SomeInteger | SomeFloat](n: T): Tensor[T] = 62 | ## Return a Tensor containing the prime factors of the input 63 | ## 64 | ## Input: 65 | ## - n: A value that will be factorized. 66 | ## If its type is floating-point it must be a whole number. Otherwise 67 | ## a ValueError will be raised. 68 | ## Result: 69 | ## - A sorted Tensor containing the prime factors of the input. 70 | ## 71 | ## Example: 72 | ## ```nim 73 | ## echo factor(60) 74 | ## # Tensor[system.int] of shape "[4]" on backend "Cpu" 75 | ## # 2 2 3 5 76 | ## ``` 77 | if n < 0: 78 | raise newException(ValueError, 79 | "Negative values (" & $n & ") cannot be factorized") 80 | when T is int64: 81 | if n > T(maximumConsecutiveFloat64Int): 82 | raise newException(ValueError, 83 | "Value (" & $n & ") is too large to be factorized") 84 | elif T is SomeFloat: 85 | if floor(n) != n: 86 | raise newException(ValueError, 87 | "Non whole numbers (" & $n & ") cannot be factorized") 88 | 89 | if n < 4: 90 | return [n].toTensor 91 | 92 | # The algorithm works by keeping track of the list of unique potential, 93 | # candidate prime factors of the input, and iteratively adding those 94 | # that are confirmed to be factors into a list of confirmed factors 95 | # (which is stored in the `result` tensor variable). 96 | 97 | # First we must initialize the `candidate_factor` Tensor 98 | # The factors of the input can be found among the list of primes 99 | # that are smaller than or equal to input. However we can significantly 100 | # reduce the candidate list by taking into account the fact that only a 101 | # single factor can be greater than the square root of the input. 102 | # The algorithm is such that if that is the case we will add the input number 103 | # at the very end of the loop below 104 | var candidate_factors = primes(T(ceil(sqrt(float(n))))) 105 | 106 | # This list of prime candidate_factors is refined by finding those of them 107 | # that divide the input value (i.e. those whose `input mod prime` == 0). 108 | # Those candiates that don't divide the input are known to not be valid 109 | # factors and can be removed from the candidate_factors list. Those that do 110 | # divide the input are confirmed as valid factors and as such are added to 111 | # the result list. Then the input is divided by all of the remaining 112 | # candidates (by dividing the input by the product of all the remaining 113 | # candidates). The result is a number that is the product of all the factors 114 | # that are still unknown (which must be among the remaining candidates!) and 115 | # which we can call `unknown_factor_product`. 116 | # Then we can simply repeat the same process over and over, replacing the 117 | # original input with the remaining `unknown_factor_product` after each 118 | # iteration, until the `unknown_factors_product` (which is reduced by each 119 | # division at the end of each iteration) reaches 1. Alternatively, we might 120 | # run out of candidates, which will only happen when there is only one factor 121 | # left (which must be greater than the square root of the input) and is stored 122 | # in the `unknown_factors_product`. In that case we add it to the confirmed 123 | # factors (result) list and the process can stop. 124 | var unknown_factor_product = n 125 | while unknown_factor_product > 1: 126 | # Find the primes that are divisors of the remaining unknown_factors_product 127 | # Note that this tells us which of the remaining candidate_factors are 128 | # factors of the input _at least once_ (but they could divide it more 129 | # than once) 130 | let is_factor = (unknown_factor_product mod candidate_factors) ==. 0 131 | # Keep only the factors that divide the remainder and remove the rest 132 | # from the list of candidates 133 | candidate_factors = candidate_factors[is_factor] 134 | # after this step, all the items incandidate_factors are _known_ to be 135 | # factors of `unknown_factor_product` _at least once_! 136 | if candidate_factors.len == 0: 137 | # If there are no more prime candidates left, it means that the remainder 138 | # is a prime (and that it must be greater than the sqrt of the input), 139 | # and that we are done (after adding it to the result list) 140 | result = result.append([unknown_factor_product].toTensor) 141 | break 142 | # If we didn't stop it means that there are still candidates which we 143 | # _know_ are factors of the remainder, so we must add them to the result 144 | result = result.append(candidate_factors) 145 | # Now we can prepare for the next iteration by dividing the remainder, 146 | # by the factors we just added to the result. This reminder is the product 147 | # of the factors we don't know yet 148 | unknown_factor_product = T(unknown_factor_product / product(candidate_factors)) 149 | # After this division the items in `candidate_factors` become candidates again 150 | # and we can start a new iteration 151 | result = sorted(result) 152 | 153 | proc isprimeImpl[T: SomeInteger | SomeFloat](n: T, candidate_factors: Tensor[int]): bool {.inline.} = 154 | ## Actual implementation of the isprime check 155 | # This function is optimized for speed in 2 ways: 156 | # 1. By first rejecting all non-whole float numbers and then performing the 157 | # actual isprime check using integers (which is faster than using floats) 158 | # 2. By receving a pre-calculated tensor of candidate_factors. This does not 159 | # speed up the check of a single value, but it does speed up the check of 160 | # a tensor of values. Note that because of #1 the candidate_factors must 161 | # be a tensor of ints (even if the number being checked is a float) 162 | when T is SomeFloat: 163 | if floor(n) != n: 164 | return false 165 | let n = int(n) 166 | result = (n > 1) and all(n mod candidate_factors[candidate_factors <. n]) 167 | 168 | proc isprime*[T: SomeInteger | SomeFloat](n: T): bool = 169 | ## Check whether the input is a prime number 170 | ## 171 | ## Only positive values higher than 1 can be primes (i.e. we exclude 1 and -1 172 | ## which are sometimes considered primes). 173 | ## 174 | ## Note that this function also works with floats, which are considered 175 | ## non-prime when they are not whole numbers. 176 | # Note that we do here some checks that are repeated later inside of 177 | # `isprimeImpl`. This is done to avoid the unnecessary calculation of 178 | # the `candidate_factors` tensor in those cases 179 | if n <= 1: 180 | return false 181 | when T is int64: 182 | if n > T(maximumConsecutiveFloat64Int): 183 | raise newException(ValueError, 184 | "Value (" & $n & ") is too large to be factorized") 185 | elif T is SomeFloat: 186 | if floor(n) != n: 187 | return false 188 | var candidate_factors = primes(int(ceil(sqrt(float(n))))) 189 | isprimeImpl(n, candidate_factors) 190 | 191 | proc isprime*[T: SomeInteger | SomeFloat](t: Tensor[T]): Tensor[bool] = 192 | ## Element-wise check if the input values are prime numbers 193 | result = zeros[bool](t.len) 194 | # Pre-calculate the list of primes that will be used for the element-wise 195 | # isprime check and then call isprimeImpl on each element 196 | # Note that the candidate_factors must be a tensor of ints (for performance 197 | # reasons) 198 | var candidate_factors = primes(int(ceil(sqrt(float(max(t.flatten())))))) 199 | for idx, val in t.enumerate(): 200 | result[idx] = isprimeImpl(val, candidate_factors) 201 | return result.reshape(t.shape) 202 | -------------------------------------------------------------------------------- /scinim/numpyarrays.nim: -------------------------------------------------------------------------------- 1 | import std/[sequtils, strformat, tables, sugar] 2 | 3 | import threading/smartptrs 4 | 5 | import arraymancer 6 | 7 | import nimpy {.all.} 8 | import nimpy/[raw_buffers, py_types, py_utils] 9 | import nimpy/py_lib as lib 10 | 11 | {.push gcsafe.} 12 | 13 | proc dtype*(t: PyObject): PyObject = 14 | nimpy.getAttr(t, "dtype") 15 | 16 | proc pyprint*(o: PyObject) = 17 | let py = pyBuiltinsModule() 18 | discard nimpy.callMethod(py, "print", o) 19 | 20 | proc nptypes(name: string): PyObject = 21 | let np = pyImport("numpy") 22 | nimpy.getAttr(np, name) 23 | 24 | template dtype*(T: typedesc[int8]): PyObject = nptypes("byte") 25 | template dtype*(T: typedesc[int16]): PyObject = nptypes("short") 26 | template dtype*(T: typedesc[int32]): PyObject = nptypes("int32") 27 | template dtype*(T: typedesc[int64]): PyObject = nptypes("int64") 28 | 29 | template dtype*(T: typedesc[uint8]): PyObject = nptypes("ubyte") 30 | template dtype*(T: typedesc[uint16]): PyObject = nptypes("ushort") 31 | template dtype*(T: typedesc[uint32]): PyObject = nptypes("uint32") 32 | template dtype*(T: typedesc[uint64]): PyObject = nptypes("uint64") 33 | 34 | proc dtype*(T: typedesc[int]): PyObject = 35 | when sizeof(T) == sizeof(int64): 36 | dtype(int64) 37 | elif sizeof(T) == sizeof(int32): 38 | dtype(int32) 39 | else: 40 | {.error: "Unsupported sizeof(uint)".} 41 | 42 | proc dtype*(T: typedesc[uint]): PyObject = 43 | when sizeof(T) == sizeof(uint64): 44 | dtype(uint64) 45 | elif sizeof(T) == sizeof(uint32): 46 | dtype(uint32) 47 | else: 48 | {.error: "Unsupported sizeof(uint)".} 49 | 50 | proc dtype*(T: typedesc[bool]): PyObject = nptypes("bool") 51 | proc dtype*(T: typedesc[char]): PyObject = nptypes("char") 52 | proc dtype*(T: typedesc[float32]): PyObject = nptypes("single") 53 | proc dtype*(T: typedesc[float64]): PyObject = nptypes("double") 54 | proc dtype*(T: typedesc[Complex32]): PyObject = nptypes("csingle") 55 | proc dtype*(T: typedesc[Complex64]): PyObject = nptypes("cdouble") 56 | 57 | proc assertNumpyType[T](ndArray: PyObject) = 58 | let 59 | dtype_sizeof = dtype(ndArray).itemsize.to(int)*sizeof(byte) 60 | dtype_kind = dtype(ndArray).kind.to(string)[0] 61 | 62 | if sizeof(T) != dtype_sizeof: 63 | raiseAssert(&"Error converting PyObject NDArray to Arraymancer Tensor. Type sizeof({$T})={sizeof(T)} not equal to numpy.dtype.itemsize ({dtype_sizeof}).") 64 | 65 | let msg = &"Error converting PyObject NDArray to Arraymancer Tensor. Type {$T} not compatible with numpy.dtype.kind {dtype_kind}." 66 | when T is SomeFloat: 67 | if dtype_kind != 'f': 68 | raiseAssert(msg) 69 | 70 | elif T is SomeSignedInt: 71 | if dtype_kind != 'i': 72 | raiseAssert(msg) 73 | 74 | elif T is SomeUnsignedInt: 75 | if dtype_kind != 'u': 76 | raiseAssert(msg) 77 | 78 | elif T is bool: 79 | if dtype_kind != 'b': 80 | raiseAssert(msg) 81 | 82 | else: 83 | raiseAssert(msg) 84 | 85 | type 86 | PyBuffer = object 87 | raw: RawPyBuffer 88 | 89 | NumpyArray*[T] = object 90 | # pyBuf: ptr RawPyBuffer # to keep track of the buffer so that we can release it 91 | # pyBuf: SharedPtr[RawPyBuffer] # to keep track of the buffer so that we can release it 92 | pyBuf: SharedPtr[PyBuffer] # to keep track of the buffer so that we can release it 93 | data*: ptr UncheckedArray[T] # this will be the raw data 94 | shape*: seq[int] 95 | strides*: seq[int] 96 | len*: int 97 | 98 | proc release*(b: var PyBuffer) = 99 | b.raw.release() 100 | 101 | proc `=destroy`*(b: var PyBuffer) = 102 | b.release() 103 | 104 | proc raw(x: SharedPtr[PyBuffer]): RawPyBuffer = 105 | x[].raw 106 | 107 | proc raw(x: var SharedPtr[PyBuffer]): var RawPyBuffer = 108 | x[].raw 109 | 110 | proc obj*[T](x: NumpyArray[T]): PyObject = 111 | pyValueToNim(x.pyBuf.raw.obj, result) 112 | 113 | proc ndim*[T](ar: NumpyArray[T]) : cint {.noSideEffect, inline.} = 114 | ar.pyBuf.raw.ndim 115 | 116 | proc dtype*[T](ar: NumpyArray[T]): PyObject = 117 | return dtype(T) 118 | 119 | proc nimValueToPy*[T](v: NumpyArray[T]): PPyObject {.inline.} = 120 | nimValueToPy(v.obj()) 121 | 122 | proc pyprint*[T](ar: NumpyArray[T]) = 123 | ## Short cut to call print() on a NumpyArray 124 | let py = pyBuiltinsModule() 125 | discard nimpy.callMethod(py, "print", ar) 126 | 127 | proc toUnsafeView*[T](ndArray: NumpyArray[T]): ptr UncheckedArray[T] {.noSideEffect, inline.} = 128 | ndArray.data 129 | 130 | proc check_c_contiguous*[T](ar: NumpyArray[T]) : bool = 131 | ar.data.c_contiguous.to(bool) 132 | 133 | proc check_f_contiguous*[T](ar: NumpyArray[T]) : bool = 134 | ar.data.f_contiguous.to(bool) 135 | 136 | proc initNumpyArray*[T](ar: sink PyObject): NumpyArray[T] = 137 | result.pyBuf = newSharedPtr(PyBuffer()) 138 | let f = sizeof(T) div sizeof(byte) 139 | result.strides = ar.data.strides.to(seq[int]).map(x => (x div f)) 140 | ar.getBuffer(result.pyBuf.raw, PyBUF_WRITABLE or PyBUF_ND) 141 | let shapear = cast[ptr UncheckedArray[Py_ssize_t]](result.pyBuf.raw.shape) 142 | for i in 0 ..< result.pyBuf.raw.ndim: 143 | let dimsize = shapear[i].int # py_ssize_t is csize 144 | result.shape.add dimsize 145 | 146 | result.len = result.shape.foldl(a * b, 1) 147 | result.data = cast[ptr UncheckedArray[T]](result.pyBuf.raw.buf) 148 | 149 | proc pyValueToNim*[T: SomeNumber](v: PPyObject, o: var NumpyArray[T]) {.inline.} = 150 | var vv = newPyObject(v) 151 | o = initNumpyArray[T](vv) 152 | 153 | proc isContiguous*[T](ar: NumpyArray[T]) : bool = 154 | result = ar.check_c_contiguous() or ar.check_f_contiguous() 155 | 156 | proc asContiguous*[T](ar: NumpyArray[T]) : NumpyArray[T] = 157 | let np = pyImport("numpy") 158 | result = pyValueToNim[T](np.ascontiguousarray(ar)) 159 | 160 | proc numpyArrayToTensorView*[T](ndArray: NumpyArray[T]): Tensor[T] {.noSideEffect, inline.}= 161 | var buf = cast[ptr T](toUnsafeView(ndArray)) 162 | result = fromBuffer[T](buf, ndArray.shape) 163 | 164 | proc asNumpyArray*[T](ar: sink PyObject): NumpyArray[T] = 165 | ## some PyObject that points to a numpy array 166 | ## User has to make sure that the data type of the array can be 167 | ## cast to `T` without loss of information! 168 | assertNumpyType[T](ar) 169 | if not ar.data.c_contiguous.to(bool): 170 | let np = pyImport("numpy") 171 | var ar = np.ascontiguousarray(ar) 172 | return initNumpyArray[T](ar) 173 | else: 174 | return initNumpyArray[T](ar) 175 | 176 | proc ndArrayFromPtr*[T](t: ptr T, shape: seq[int]): NumpyArray[T] = 177 | let np = pyImport("numpy") 178 | let py_array_type = dtype(T) 179 | # Just a trick to force an initialization of a Numpy Array of the correct size 180 | result = asNumpyArray[T]( 181 | nimpy.callMethod(np, "zeros", shape, py_array_type) 182 | ) 183 | var bsizes = result.len*(sizeof(T) div sizeof(uint8)) 184 | copyMem(addr(result.data[0]), t, bsizes) 185 | 186 | proc ndArrayFromPtr*[T](t: ptr UncheckedArray[T], shape: seq[int]): NumpyArray[T] = 187 | result = ndArrayFromPtr[T](cast[ptr T](t), shape) 188 | 189 | # Arraymancer only 190 | proc numpyArrayToTensor[T](ndArray: NumpyArray[T]): Tensor[T] = 191 | result = newTensor[T](ndArray.shape) 192 | var buf = cast[ptr T](toUnsafeView(ndArray)) 193 | copyFromRaw(result, buf, ndArray.len) 194 | 195 | proc toTensor*[T](ndArray: NumpyArray[T]): Tensor[T] = 196 | result = numpyArrayToTensor[T](ndArray) 197 | 198 | proc toTensor*[T](pyobj: PyObject): Tensor[T] = 199 | var ndArray = asNumpyArray[T](pyobj) 200 | result = numpyArrayToTensor[T](ndArray) 201 | 202 | # Convert Tensor to RawPyBuffer 203 | proc ndArrayFromTensor[T](t: Tensor[T]): NumpyArray[T] = 204 | # Reshape PyObject to Arraymancer Tensor 205 | var shape = t.shape.toSeq() 206 | var t = asContiguous(t, rowMajor) 207 | var buf = cast[ptr T](toUnsafeView(t)) 208 | result = ndArrayFromPtr[T](buf, shape) 209 | 210 | proc toNdArray*[T](t: Tensor[T]): NumpyArray[T] = 211 | ndArrayFromTensor[T](t) 212 | 213 | proc pyValueToNim*[T](ar: NumpyArray[T], o: var Tensor[T]) {.inline.} = 214 | o = toTensor(ar) 215 | 216 | proc initNumpyArray*[T](shape: seq[int]) : NumpyArray[T] = 217 | ## init np array from shape 218 | let np = pyImport("numpy") 219 | let py_array_type = dtype(T) 220 | result = asNumpyArray[T]( 221 | nimpy.callMethod(np, "zeros", shape, py_array_type) 222 | ) 223 | 224 | # Indexing 225 | {.push noSideEffect, inline.} 226 | 227 | func checkIndex[T](ndArray: NumpyArray[T], idx: varargs[int]) = 228 | if unlikely(idx.len != ndArray.ndim): 229 | raise newException( 230 | IndexDefect, "Number of arguments: " & 231 | $(idx.len) & 232 | ", is different from tensor ndim: " & 233 | $(ndArray.ndim) 234 | ) 235 | for i in 0 ..< ndArray.shape.len(): 236 | if unlikely(not(0 <= idx[i] and idx[i] < ndArray.shape[i])): 237 | raise newException( 238 | IndexDefect, "Out-of-bounds access: " & 239 | "Tensor of shape " & $ndArray.shape & 240 | " being indexed by " & $idx 241 | ) 242 | 243 | func checkContiguousIndex[T](ndArray: NumpyArray[T], idx: int) = 244 | if unlikely(idx < 0 or idx >= ndArray.size): 245 | raise newException(IndexDefect, "Invalid contigous index: " & 246 | $idx & 247 | " while tensor size is" & 248 | $(ndArray.size)) 249 | 250 | proc getIndex*[T](ndArray: NumpyArray[T], idx: varargs[int]): int = 251 | when compileOption("boundChecks"): 252 | ndArray.checkIndex(idx) 253 | 254 | result = 0 255 | # result = ndArray.offset # N/A we assume offset is 0.0 256 | for i in 0..