├── .gitignore ├── examples ├── lena.png ├── quickstart.nim └── visualize_filters.nim ├── src ├── arraymancer_vision │ ├── utils.nim │ ├── visdom.nim │ ├── scale.nim │ ├── filters.nim │ ├── imageio.nim │ └── transform.nim └── arraymancer_vision.nim ├── arraymancer_vision.nimble ├── README.md ├── tests └── all_tests.nim ├── LICENSE └── doc └── arraymancer_vision.html /.gitignore: -------------------------------------------------------------------------------- 1 | nimcache/ 2 | .DS_Store 3 | TODO 4 | -------------------------------------------------------------------------------- /examples/lena.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/edubart/arraymancer-vision/HEAD/examples/lena.png -------------------------------------------------------------------------------- /src/arraymancer_vision/utils.nim: -------------------------------------------------------------------------------- 1 | template unsafeAt[T](t: Tensor[T], x: int): Tensor[T] = 2 | t.unsafeSlice(x, _, _).unsafeReshape([t.shape[1], t.shape[2]]) 3 | -------------------------------------------------------------------------------- /arraymancer_vision.nimble: -------------------------------------------------------------------------------- 1 | ### Package 2 | version = "0.0.3" 3 | author = "Eduardo Barthel" 4 | description = "Image transformation and visualization utilities for arraymancer" 5 | license = "Apache License 2.0" 6 | 7 | ### Dependencies 8 | requires "nim >= 0.17.2", "arraymancer >= 0.2.0", "stb_image >= 1.6" 9 | 10 | ## Install files 11 | srcDir = "src" 12 | -------------------------------------------------------------------------------- /src/arraymancer_vision.nim: -------------------------------------------------------------------------------- 1 | import math, strutils, sequtils, random, typetraits, future, macros, os 2 | 3 | import stb_image/read as stbi 4 | import stb_image/write as stbiw 5 | import arraymancer 6 | 7 | include 8 | arraymancer_vision/utils, 9 | arraymancer_vision/imageio, 10 | arraymancer_vision/transform, 11 | arraymancer_vision/filters, 12 | arraymancer_vision/scale, 13 | arraymancer_vision/visdom 14 | 15 | export arraymancer -------------------------------------------------------------------------------- /examples/quickstart.nim: -------------------------------------------------------------------------------- 1 | import arraymancer_vision 2 | 3 | # Load image from file into a CxHxW Tensor[uint8] 4 | var origimage = load("examples/lena.png") 5 | 6 | # Do some preprocessing 7 | var image = origimage.center_crop(128, 128) 8 | image = image.hflip() 9 | image = image.rot90(1) 10 | image = image.filter_sharpen() 11 | image = image.scale(512, 512, ScaleBilinear) 12 | 13 | # Visualize it using visdom 14 | let vis = newVisdomClient() 15 | vis.image(origimage) 16 | vis.image(image) 17 | 18 | # Save it to a file 19 | image.save("examples/preprocessed_lena.png") 20 | -------------------------------------------------------------------------------- /examples/visualize_filters.nim: -------------------------------------------------------------------------------- 1 | import arraymancer_vision 2 | 3 | # Load image from file into a CxHxW Tensor[uint8] 4 | var image = load("examples/lena.png") 5 | 6 | # Visualize filters on it using visdom 7 | let vis = newVisdomClient() 8 | vis.image(image, "Normal") 9 | vis.image(image.filter_blur(), "Blur") 10 | vis.image(image.filter_contour(), "Contour") 11 | vis.image(image.filter_detail(), "Detail") 12 | vis.image(image.filter_edge_enhance(), "Edge Enhance") 13 | vis.image(image.filter_edge_enhance_more(), "Edge Enhance More") 14 | vis.image(image.filter_emboss(), "Emboss") 15 | vis.image(image.filter_smooth(), "Smooth") 16 | vis.image(image.filter_smooth_more(), "Smooth More") 17 | vis.image(image.filter_sharpen(), "Sharpen") 18 | vis.image(image.filter_find_edges(), "Find Edges") 19 | -------------------------------------------------------------------------------- /src/arraymancer_vision/visdom.nim: -------------------------------------------------------------------------------- 1 | import httpclient, json, base64 2 | 3 | proc webEncodeData(data, mimeType: string): string = 4 | return "data:image/" & mimeType & ";base64," & base64.encode(data, newLine="") 5 | 6 | proc postJson(url: string, params: JsonNode) = 7 | let body = $params 8 | let client = newHttpClient() 9 | client.headers = newHttpHeaders({ 10 | "content-type": "application/json", 11 | "content-length": $body.len 12 | }) 13 | let response = client.request(url, httpMethod = HttpPost, body = body) 14 | if response.code != Http200: 15 | raise newException(IOError, "Failed to post json data") 16 | 17 | type 18 | VisdomClient = object 19 | host: string 20 | port: int 21 | 22 | proc newVisdomClient*(host: string = "localhost", port: int = 8097): VisdomClient = 23 | ## Prepare a visdom client for visualization 24 | result.host = host 25 | result.port = port 26 | 27 | proc sendEvent(self: VisdomClient, opts: JsonNode, data: JsonNode, window: string) = 28 | let params = %*{ 29 | "eid": "main", 30 | "opts": opts, 31 | "data": data 32 | } 33 | 34 | if window.len > 0: 35 | params["win"] = % window 36 | 37 | let url = "http://" & self.host & ":" & $self.port & "/events" 38 | postJson(url, params) 39 | 40 | proc image*(vis: VisdomClient, 41 | img: Tensor[uint8], 42 | window: string = "", 43 | caption: string = "", 44 | title: string = "") = 45 | ## Show image into visdom with the given title and specified window 46 | 47 | let opts = %*{ 48 | "title": if title.len > 0: title else: window, 49 | "height": img.height, 50 | "width": img.width 51 | } 52 | 53 | let data = %*[{ 54 | "content": { 55 | "src": img.toJPG().webEncodeData("image/jpg"), 56 | "caption": caption 57 | }, 58 | "type": "image" 59 | }] 60 | 61 | vis.sendEvent(opts, data, window) 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Arraymancer Vision (WIP) 2 | 3 | Simple library for image loading, preprocessing and visualization for working with arraymancer. 4 | 5 | ## Features 6 | 7 | * Loading image into tensors 8 | * Simple image transformations like flipping, rotation, scaling 9 | * Saving images 10 | * Image convolution filters like sharpen, edges 11 | * Visualization of images using [visdom](https://github.com/facebookresearch/visdom) 12 | 13 | # Quick Start 14 | 15 | ## Installation 16 | 17 | Install using nimble package manager: 18 | 19 | ```Bash 20 | nimble install arraymancer_vision 21 | ``` 22 | 23 | For visualizing you have to install visdom and run it: 24 | 25 | ```Bash 26 | pip install visdom 27 | python -m visdom.server 28 | ``` 29 | 30 | Then go to http://localhost:8097 31 | 32 | ## Usage example 33 | 34 | ```Nim 35 | import arraymancer_vision 36 | 37 | # Load image from file into a CxHxW Tensor[uint8] 38 | var origimage = load("examples/lena.png") 39 | 40 | # Do some preprocessing 41 | var image = origimage.center_crop(128, 128) 42 | image = image.hflip() 43 | image = image.rot90(1) 44 | image = image.filter_sharpen() 45 | image = image.scale(512, 512, ScaleBilinear) 46 | 47 | # Visualize it using visdom 48 | let vis = newVisdomClient() 49 | vis.image(origimage) 50 | vis.image(image) 51 | 52 | # Save it to a file 53 | image.save("examples/preprocessed_lena.png") 54 | ``` 55 | 56 | This quickstart example is inside examples directory, you can run it by 57 | cloning the repo and running with `nim c -r examples/quickstart.nim` 58 | 59 | You can visualize all predefined filters having visdom running and then 60 | running the filters example with `nim c -r examples/visualize_filters.nim` 61 | 62 | ## API 63 | 64 | Documentation of the completely available API is [here](https://rawgit.com/edubart/arraymancer-vision/master/doc/arraymancer_vision.html) 65 | 66 | ## Details 67 | 68 | The library operates all images as `Tensor[uint8]` with dimensions CxHxW, where C is in RGBA colorspace, note that other image libraries usually operates with images in HxWxC format, so remember this when using. This design choice is to optimize and facilitate operation on images in deep learning tasks. 69 | 70 | ## TODO 71 | 72 | * Simple drawing routines 73 | * Colorspace conversions 74 | -------------------------------------------------------------------------------- /src/arraymancer_vision/scale.nim: -------------------------------------------------------------------------------- 1 | type 2 | ScaleMode* = enum 3 | ScaleNearest = 0 4 | ScaleBilinear = 1 5 | 6 | proc round_pixel(a: float32, U: typedesc): U {.inline.} = 7 | when U is uint8: 8 | clamp(a + 0.5.float32, low(U).float32, high(U).float32).uint8 9 | elif U is float32: 10 | a.float32 11 | 12 | proc scale_nearest[T](src: Tensor[T], width, height: int): Tensor[T] {.inline.} = 13 | result = newTensor([src.channels, height, width], T) 14 | let 15 | step_x = src.height.float32 / height.float32 16 | step_y = src.width.float32 / width.float32 17 | for c in 0..= height or col >= width: 91 | case mode: 92 | of PadConstant: 93 | result[c, offset_col + w] = pad_constant 94 | of PadNearest: 95 | result[c, offset_col + w] = input[c_offset, clamp(row, 0, height-1), clamp(col, 0, width-1)] 96 | else: 97 | result[c, offset_col + w] = input[c_offset, row, col] 98 | 99 | proc correlate2d*[T,U](input: Tensor[T], weights: Tensor[U], pad: int = 0, mode: PadMode = PadConstant, cval: U = 0): Tensor[int] = 100 | ## Correlate an image with the given kernel weights, this is a convolution 101 | ## without flipping the kernel 102 | let ksize = weights.width 103 | 104 | assert input.rank == 3 105 | assert weights.rank == 3 106 | assert weights.width == weights.height 107 | assert ksize > 0 and ksize mod 2 == 1 108 | 109 | let 110 | channels = input.channels 111 | height = input.height + (2 * pad) - ksize + 1 112 | width = input.width + (2 * pad) - ksize + 1 113 | channel_ksize = ksize*ksize 114 | 115 | var w = weights.reshape([channels, 1, ksize*ksize]) 116 | var x = im2col(input.astype(U), ksize, pad, mode, cval).unsafeReshape([channels, channel_ksize, height*width]) 117 | var res_channels = newSeq[Tensor[U]](channels) 118 | 119 | for c in 0.. 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | Module arraymancer_vision 20 | 1165 | 1166 | 1167 | 1168 | 1193 | 1194 | 1195 | 1196 |
1197 |
1198 |

Module arraymancer_vision

1199 |
1200 |
1201 |
1202 | Search: 1204 |
1205 |
1206 | Group by: 1207 | 1211 |
1212 | 1325 | 1326 |
1327 |
1328 |
1329 |

1330 | 1335 |
1336 |

Types

1337 |
1338 |
PadMode = enum
1339 |   PadConstant = 0, PadNearest = 1
1340 |
1341 | 1342 | 1343 |
1344 |
ScaleMode = enum
1345 |   ScaleNearest = 0, ScaleBilinear = 1
1346 |
1347 | 1348 | 1349 |
1350 | 1351 |
1352 |
1353 |

Procs

1354 |
1355 |
proc channels[T](img: Tensor[T]): int {.
inline
.}
1356 |
1357 | Return number of channels of the image 1358 | 1359 |
1360 |
proc height[T](img: Tensor[T]): int {.
inline
.}
1361 |
1362 | Return height of the image 1363 | 1364 |
1365 |
proc width[T](img: Tensor[T]): int {.
inline
.}
1366 |
1367 | Return width of the image 1368 | 1369 |
1370 |
proc hwc2chw[T](img: Tensor[T]): Tensor[T]
1371 |
1372 | Convert image from HxWxC convetion to the CxHxW convention, where C,W,H stands for channels, width, height, note that this library only works with CxHxW images for optimization and internal usage reasons using CxHxW for images is also a common approach in deep learning 1373 | 1374 |
1375 |
proc chw2hwc[T](img: Tensor[T]): Tensor[T]
1376 |
1377 | Convert image from CxHxW convetion to the HxWxC convention, where C,W,H stands for channels, width, height, note that this library only works with CxHxW images for optimization and internal usage reasons using CxHxW for images is also a common approach in deep learning 1378 | 1379 |
1380 |
proc pixels(img: Tensor[uint8]): seq[uint8] {.
raises: [], tags: []
.}
1381 |
1382 | 1383 | 1384 |
1385 |
proc load(filename: string; desired_channels: int = 0): Tensor[uint8] {.
1386 | raises: [IOError], tags: []
.}
1387 |
1388 |

Load image from file, with the desired number of channels, into a contiguous CxHxW Tensor[uint8]. Desired channels defaults to 0 meaning that it will auto detect the number of channels, the returned image tensor will be in the CxHxW format even for images with a single channel.

1389 |

Supports PNG, JPG, BMP, TGA and HDR formats

1390 |

On error an IOError exception will be thrown

1391 | 1392 | 1393 |
1394 |
proc loadFromMemory(contents: string; desired_channels: int = 0): Tensor[uint8] {.
1395 | raises: [STBIException], tags: []
.}
1396 |
1397 | Like load but loads from memory, the contents must be a buffer for a supported image format 1398 | 1399 |
1400 |
proc loadFromDir(dir: string; desired_channels: int = 0): seq[Tensor[uint8]] {.
1401 | raises: [IOError, IOError, OSError], tags: [ReadDirEffect]
.}
1402 |
1403 | Load batch of images from a directory into a seq of tensors, the load is non recursive, throws an IOError exception on error. 1404 | 1405 |
1406 |
proc save(img: Tensor[uint8]; filename: string; jpeg_quality: int = 100) {.
1407 | raises: [IOError], tags: []
.}
1408 |
1409 | Save an image to a file, supports PNG, BMP, TGA and JPG. Argument jpeg_quality can be passed to inform the saving quality from a range 0 to 100, defaults to 100 1410 | 1411 |
1412 |
proc toPNG(img: Tensor[uint8]): string {.
raises: [Exception, IOError], 1413 | tags: [WriteIOEffect]
.}
1414 |
1415 | Convert an image to PNG into a string of bytes 1416 | 1417 |
1418 |
proc toBMP(img: Tensor[uint8]): string {.
raises: [Exception, IOError], 1419 | tags: [WriteIOEffect]
.}
1420 |
1421 | Convert an image to BMP into a string of bytes 1422 | 1423 |
1424 |
proc toTGA(img: Tensor[uint8]): string {.
raises: [Exception, IOError], 1425 | tags: [WriteIOEffect]
.}
1426 |
1427 | Convert an image to TGA into a string of bytes 1428 | 1429 |
1430 |
proc toJPG(img: Tensor[uint8]; quality: int = 100): string {.
1431 | raises: [Exception, IOError], tags: [WriteIOEffect]
.}
1432 |
1433 | Convert an image to JPG into a string of bytes. Argument jpeg_quality can be passed to inform the saving quality from a range 0 to 100, defaults to 100 1434 | 1435 |
1436 |
proc hflip(img: Tensor[uint8]): Tensor[uint8] {.
inline, raises: [IndexError], tags: []
.}
1437 |
1438 | Horizontal flips an image 1439 | 1440 |
1441 |
proc vflip(img: Tensor[uint8]): Tensor[uint8] {.
inline, raises: [IndexError], tags: []
.}
1442 |
1443 | Vertical flips an image 1444 | 1445 |
1446 |
proc vhflip(img: Tensor[uint8]): Tensor[uint8] {.
inline, raises: [IndexError], tags: []
.}
1447 |
1448 | Flip vertically and horizontally an image 1449 | 1450 |
1451 |
proc crop(img: Tensor[uint8]; x, y, width, height: int): Tensor[uint8] {.
inline, 1452 | raises: [IndexError], tags: []
.}
1453 |
1454 | Crop an image 1455 | 1456 |
1457 |
proc center_crop[T](img: Tensor[T]; width, height: int): Tensor[T] {.
inline
.}
1458 |
1459 | Crop an image to center 1460 | 1461 |
1462 |
proc random_crop[T](img: Tensor[T]; width, height: int): Tensor[T] {.
inline
.}
1463 |
1464 | Random crop an image 1465 | 1466 |
1467 |
proc rot90[T](img: Tensor[T]; k: int): Tensor[T]
1468 |
1469 | Rotate an image 90 degrees clockwise k times 1470 | 1471 |
1472 |
proc quantize_bytes[T: SomeReal](img: Tensor[T]; U: typedesc): Tensor[U]
1473 |
1474 | Quantize image bytes, from type T to U, useful for converting images from floats to ints 1475 | 1476 |
1477 |
proc quantize_bytes[T: SomeInteger](img: Tensor[T]; U: typedesc): Tensor[U]
1478 |
1479 | Quantize image bytes, from type T to U, useful for converting images from floats to ints 1480 | 1481 |
1482 |
proc im2col[T](input: Tensor[T]; ksize: int; pad: int = 0; mode: PadMode; pad_constant: int): Tensor[
1483 |     T]
1484 |
1485 | Convert blocks of an image into columns, useful for preprocessing an image before convolutions, pad mode. 1486 | 1487 |
1488 |
proc correlate2d[T, U](input: Tensor[T]; weights: Tensor[U]; pad: int = 0;
1489 |                      mode: PadMode = PadConstant; cval: U = 0): Tensor[int]
1490 |
1491 | Correlate an image with the given kernel weights, this is a convolution without flipping the kernel 1492 | 1493 |
1494 |
proc convolve2d(input: Tensor[uint8]; weights: Tensor[int]; pad: int;
1495 |                mode: PadMode = PadConstant; cval: int = 0): Tensor[int] {.
1496 | raises: [IndexError, ValueError, Exception], tags: [RootEffect]
.}
1497 |
1498 | Convolve an image with the given kernel weights, like correlate but it flips the kernel before. 1499 | 1500 |
1501 |
proc tile_collection(imgs: Tensor[uint8]; max_width: int = 0): Tensor[uint8] {.
1502 | raises: [IndexError, Exception, ValueError], tags: [RootEffect]
.}
1503 |
1504 | Combine multiple images into one big tiled image and returns it. The new generated image width will be at maximum the given max width if supplied, otherwise will be calculated to create a square image. 1505 | 1506 |
1507 |
proc kernel[T, U](img: Tensor[T]; kernel: Tensor[U]; scale: U = 1; offset: U = 0): Tensor[T]
1508 |
1509 |

Applies a kernel matrix to an image and divides the outputs by scale factor and them sun offset. For more information see https://en.wikipedia.org/wiki/Kernel_(image_processing)

1510 |

Implementation details: This functions does not flip the kernel, so it does image correlation instead of convolution. The padding borders of the image is replaced with the nearest neighbourhood border.

1511 | 1512 | 1513 |
1514 |
proc filter_blur[T](img: Tensor[T]): Tensor[T]
1515 |
1516 | Blur an image using a predefied kernel 1517 | 1518 |
1519 |
proc filter_contour[T](img: Tensor[T]): Tensor[T]
1520 |
1521 | Contour an image using a predefied kernel 1522 | 1523 |
1524 |
proc filter_detail[T](img: Tensor[T]): Tensor[T]
1525 |
1526 | Detail an image using a predefied kernel 1527 | 1528 |
1529 |
proc filter_edge_enhance[T](img: Tensor[T]): Tensor[T]
1530 |
1531 | Enhance edges of an image using a predefied kernel 1532 | 1533 |
1534 |
proc filter_edge_enhance_more[T](img: Tensor[T]): Tensor[T]
1535 |
1536 | Enhance edges of an image using a predefied kernel 1537 | 1538 |
1539 |
proc filter_emboss[T](img: Tensor[T]): Tensor[T]
1540 |
1541 | Enhance an image using a predefied kernel 1542 | 1543 |
1544 |
proc filter_sharpen[T](img: Tensor[T]): Tensor[T]
1545 |
1546 | Sharpen an image using a predefied kernel 1547 | 1548 |
1549 |
proc filter_smooth[T](img: Tensor[T]): Tensor[T]
1550 |
1551 | Smooth an image using a predefied kernel 1552 | 1553 |
1554 |
proc filter_find_edges[T](img: Tensor[T]): Tensor[T]
1555 |
1556 | Find edges of image using a predefied kernel 1557 | 1558 |
1559 |
proc filter_smooth_more[T](img: Tensor[T]): Tensor[T]
1560 |
1561 | Smooth more an image using a predefied kernel 1562 | 1563 |
1564 |
proc scale[T](src: Tensor[T]; width, height: int; mode: ScaleMode = ScaleNearest): Tensor[
1565 |     T]
1566 |
1567 | Scale an image to a new size, suppored modes are nearest, and bilinear, defaults to nearest. 1568 | 1569 |
1570 |
proc newVisdomClient(host: string = "localhost"; port: int = 8097): VisdomClient {.
1571 | raises: [], tags: []
.}
1572 |
1573 | Prepare a visdom client for visualization 1574 | 1575 |
1576 |
proc image(vis: VisdomClient; img: Tensor[uint8]; window: string = "";
1577 |           caption: string = ""; title: string = "") {.
raises: [Exception, IOError, 1578 | ValueError, HttpRequestError, SslError, OSError, TimeoutError, ProtocolError, 1579 | KeyError, OverflowError], tags: [WriteIOEffect, ReadIOEffect, TimeEffect]
.}
1580 |
1581 | Show image into visdom with the given title and specified window 1582 | 1583 |
1584 | 1585 |
1586 | 1587 |
1588 |
1589 | 1590 |
1591 | 1596 |
1597 |
1598 |
1599 | 1600 | 1601 | 1602 | --------------------------------------------------------------------------------