├── ENet-SAD-Simple ├── LICENSE ├── ParallelCriterion2.lua ├── README.md ├── checkpoints.lua ├── dataloader.lua ├── datasets │ ├── init.lua │ ├── lane-gen.lua │ ├── lane.lua │ ├── laneTest-gen.lua │ ├── laneTest.lua │ └── transforms.lua ├── experiments │ ├── test.sh │ └── train.sh ├── list │ ├── test.txt │ ├── train_final.txt │ └── val_final.txt ├── main.lua ├── models │ ├── init.lua │ └── init_test.lua ├── opts.lua ├── testLane.lua └── train.lua ├── LICENSE ├── README.md ├── __init__.py ├── common ├── __init__.py ├── constants.py └── helper_scripts.py ├── culane_metric ├── evaluate.py └── requirements.txt ├── deeplab ├── README.md ├── __init__.py ├── deeplab_common.py ├── deeplab_tfrecords.py ├── deeplab_train.py └── deeplab_vis.py ├── evaluation ├── README.md ├── __init__.py ├── evaluate_segmentation.py ├── fix_inference_output_names.py └── segmentation_metrics.py ├── label_scripts ├── README.md ├── __init__.py ├── check_labels.py ├── dataset_constants.py ├── label_file_scripts.py ├── segmentation_labels.py ├── spline_creator.py ├── visualize_labels.py └── visualize_labels_for_folder.py ├── lane_regression ├── README.md ├── evaluate.py └── simple_mean_baseline.py ├── samples ├── sample_color.jpg ├── sample_gray.jpg └── sample_labeled.jpg └── simple_baseline ├── README.md ├── __init__.py ├── inference_segmentation.py ├── requirements.txt ├── segmentation_batch_reader.py ├── simple_net.py ├── train_binary.py └── utils.py /ENet-SAD-Simple/LICENSE: -------------------------------------------------------------------------------- 1 | BSD License 2 | 3 | For fb.resnet.torch software 4 | 5 | Copyright (c) 2016, Facebook, Inc. All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | * Neither the name Facebook nor the names of its contributors may be used to 18 | endorse or promote products derived from this software without specific 19 | prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/ParallelCriterion2.lua: -------------------------------------------------------------------------------- 1 | local ParallelCriterion2, parent = torch.class('nn.ParallelCriterion2', 'nn.Criterion') 2 | 3 | function ParallelCriterion2:__init(repeatTarget) 4 | parent.__init(self) 5 | self.criterions = {} 6 | self.weights = {} 7 | self.gradInput = {} 8 | self.repeatTarget = repeatTarget 9 | end 10 | 11 | function ParallelCriterion2:add(criterion, weight) 12 | assert(criterion, 'no criterion provided') 13 | weight = weight or 1 14 | table.insert(self.criterions, criterion) 15 | table.insert(self.weights, weight) 16 | return self 17 | end 18 | 19 | function ParallelCriterion2:updateOutput(input, target) 20 | self.output = 0 21 | local output = {} 22 | for i,criterion in ipairs(self.criterions) do 23 | local target = self.repeatTarget and target or target[i] 24 | self.output = self.output + self.weights[i]*criterion:updateOutput(input[i],target) 25 | table.insert(output, self.weights[i]*criterion:updateOutput(input[i],target)) 26 | end 27 | return self.output, output 28 | end 29 | 30 | function ParallelCriterion2:updateGradInput(input, target) 31 | self.gradInput = nn.utils.recursiveResizeAs(self.gradInput, input) 32 | nn.utils.recursiveFill(self.gradInput, 0) 33 | for i,criterion in ipairs(self.criterions) do 34 | local target = self.repeatTarget and target or target[i] 35 | nn.utils.recursiveAdd(self.gradInput[i], self.weights[i], criterion:updateGradInput(input[i], target)) 36 | end 37 | return self.gradInput 38 | end 39 | 40 | function ParallelCriterion2:type(type, tensorCache) 41 | self.gradInput = {} 42 | return parent.type(self, type, tensorCache) 43 | end 44 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Requirements 3 | - [Torch](http://torch.ch/docs/getting-started.html), please follow the installation instructions at [fb.resnet.torch](https://github.com/facebook/fb.resnet.torch). 4 | 5 | ## Before Start 6 | 7 | Please follow [train_final.txt](./list/train_final.txt) and [val_final.txt](./list/val_final.txt) to put LLAMAS dataset in the desired folder. We'll call the directory that you cloned ENet-SAD-Simple as `$ENet_ROOT`. 8 | 9 | ## Testing 10 | 1. Put your trained model to `./experiments/pretrained` 11 | ```Shell 12 | cd $ENet_ROOT/experiments/pretrained 13 | ``` 14 | You can just train the model by yourself and have a test. 15 | 16 | 2. Run test script 17 | ```Shell 18 | cd $ENet_ROOT 19 | sh ./experiments/test.sh 20 | ``` 21 | Testing results (probability map of lane markings) are saved in `experiments/predicts/` by default. 22 | 23 | 3. Submit the results to server 24 | Please follow the instructions of LLAMAS to submit your results. 25 | The performance of our trained model is as follows (you can also find the result in the [official site](https://unsupervised-llamas.com/llamas/benchmark_multi)): 26 | 27 | 0: {'auc': 0.9999917047921401, 28 | 'precision': 0.9973478736332135, 29 | 'recall': 0.9971669285812986, 30 | 'threshold': 0.8156624779014764}, 31 | 32 | 1: {'auc': 0.26641235441069483, 33 | 'precision': 0.16706499250242474, 34 | 'recall': 0.32421710675084453, 35 | 'threshold': 0.467389746285059}, 36 | 37 | 2: {'auc': 0.8964441173279966, 38 | 'precision': 0.6147108519863974, 39 | 'recall': 0.7864514156111525, 40 | 'threshold': 0.9866214343733576}, 41 | 42 | 3: {'auc': 0.8804212880439664, 43 | 'precision': 0.6146173378763694, 44 | 'recall': 0.7364950314056325, 45 | 'threshold': 0.9923073247646805}, 46 | 47 | 4: {'auc': 0.4982252894705332, 48 | 'precision': 0.3300198333296339, 49 | 'recall': 0.4491891521484192, 50 | 'threshold': 0.8424673897462851} 51 | 52 | ## Training 53 | 1. Download the pre-trained model 54 | ```Shell 55 | cd $ENet_ROOT/experiments/models 56 | ``` 57 | Download the pre-trained model [here](https://drive.google.com/open?id=1pIMThIsGn8z8rIs6WgSNzom1H8WVvP5Q) and move it to `$ENet_ROOT/experiments/models`. 58 | 2. Training ENet-SAD-Simple model 59 | ```Shell 60 | cd $ENet_ROOT 61 | sh ./experiments/train.sh 62 | ``` 63 | The training process should start and trained models would be saved in `experiments/models/ENet-SAD-Simple` by default. 64 | Then you can test the trained model following the Testing steps above. If your model position or name is changed, remember to set them to yours accordingly. 65 | 66 | ## Citation 67 | 68 | If you use this code, please cite the following publication: 69 | 70 | ``` 71 | @article{hou2019learning, 72 | title={Learning Lightweight Lane Detection CNNs by Self Attention Distillation}, 73 | author={Hou, Yuenan and Ma, Zheng and Liu, Chunxiao and Loy, Chen Change}, 74 | journal={arXiv preprint arXiv:1908.00821}, 75 | year={2019} 76 | } 77 | ``` 78 | 79 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/checkpoints.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | local checkpoint = {} 10 | 11 | local function deepCopy(tbl) 12 | -- creates a copy of a network with new modules and the same tensors 13 | local copy = {} 14 | for k, v in pairs(tbl) do 15 | if type(v) == 'table' then 16 | copy[k] = deepCopy(v) 17 | else 18 | copy[k] = v 19 | end 20 | end 21 | if torch.typename(tbl) then 22 | torch.setmetatable(copy, torch.typename(tbl)) 23 | end 24 | return copy 25 | end 26 | 27 | function checkpoint.latest(opt) 28 | if opt.resume == 'none' then 29 | return nil 30 | end 31 | 32 | local latestPath = paths.concat(opt.resume, 'latest.t7') 33 | if not paths.filep(latestPath) then 34 | return nil 35 | end 36 | 37 | print('=> Loading checkpoint ' .. latestPath) 38 | local latest = torch.load(latestPath) 39 | local optimState = torch.load(paths.concat(opt.resume, latest.optimFile)) 40 | 41 | return latest, optimState 42 | end 43 | 44 | function checkpoint.save(epoch, model, optimState, isBestModel, opt, iter, bestLoss) 45 | -- don't save the DataParallelTable for easier loading on other machines 46 | if torch.type(model) == 'nn.DataParallelTable' then 47 | model = model:get(1) 48 | end 49 | 50 | -- create a clean copy on the CPU without modifying the original network 51 | model = deepCopy(model):float():clearState() 52 | 53 | local modelFile = 'model_new.t7' 54 | local optimFile = 'optimState_new.t7' 55 | 56 | torch.save(paths.concat(opt.save, modelFile), model) 57 | torch.save(paths.concat(opt.save, optimFile), optimState) 58 | torch.save(paths.concat(opt.save, 'latest.t7'), { 59 | iter = iter, 60 | epoch = epoch, 61 | bestLoss = bestLoss, 62 | modelFile = modelFile, 63 | optimFile = optimFile, 64 | }) 65 | 66 | if isBestModel then 67 | torch.save(paths.concat(opt.save, 'model_best.t7'), model) 68 | end 69 | end 70 | 71 | return checkpoint 72 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/dataloader.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- Multi-threaded data loader 10 | -- 11 | 12 | local datasets = require 'datasets/init' 13 | local Threads = require 'threads' 14 | Threads.serialization('threads.sharedserialize') 15 | 16 | local M = {} 17 | local DataLoader = torch.class('resnet.DataLoader', M) 18 | 19 | function DataLoader.create(opt) 20 | -- The train and val loader 21 | local loaders = {} 22 | local data 23 | if opt.dataset == 'lane' then 24 | data = {'train', 'val'} 25 | elseif opt.dataset == 'laneTest' then 26 | data = {'val'} 27 | else 28 | cmd:error('unknown dataset: ' .. opt.dataset) 29 | end 30 | for i, split in ipairs(data) do 31 | local dataset = datasets.create(opt, split) 32 | print("data created") 33 | loaders[i] = M.DataLoader(dataset, opt, split) 34 | print("data loaded") 35 | end 36 | 37 | return table.unpack(loaders) 38 | end 39 | 40 | function DataLoader:__init(dataset, opt, split) 41 | local manualSeed = opt.manualSeed 42 | local function init() 43 | require('datasets/' .. opt.dataset) 44 | end 45 | local function main(idx) 46 | if manualSeed ~= 0 then 47 | torch.manualSeed(manualSeed + idx) 48 | end 49 | torch.setnumthreads(1) 50 | _G.dataset = dataset 51 | _G.preprocess = dataset:preprocess() 52 | _G.preprocess_aug = dataset:preprocess_aug() 53 | return dataset:size() 54 | end 55 | 56 | local threads, sizes = Threads(opt.nThreads, init, main) 57 | -- self.nCrops = (split == 'val' and opt.tenCrop) and 10 or 1 58 | self.nCrops = 1 59 | self.threads = threads 60 | self.__size = sizes[1][1] 61 | self.batchSize = math.floor(opt.batchSize / self.nCrops) 62 | self.split = split 63 | self.dataset = opt.dataset 64 | end 65 | 66 | function DataLoader:size() 67 | return math.ceil(self.__size / self.batchSize) 68 | end 69 | 70 | function DataLoader:run() 71 | local threads = self.threads 72 | local size, batchSize = self.__size, self.batchSize 73 | local dataset = self.dataset 74 | --if self.split == 'val' then 75 | --batchSize = torch.round(batchSize / 2) 76 | --end 77 | local perm 78 | if self.split == 'val' then 79 | perm = torch.Tensor(size) 80 | for i = 1, size do 81 | perm[i] = i 82 | end 83 | else 84 | perm = torch.randperm(size) 85 | end 86 | 87 | local idx, sample = 1, nil 88 | local function enqueue() 89 | while idx <= size and threads:acceptsjob() do 90 | local indices = perm:narrow(1, idx, math.min(batchSize, size - idx + 1)) 91 | threads:addjob( 92 | function(indices, nCrops) 93 | local sz = indices:size(1) 94 | local batch, segLabels, exists, imgpaths 95 | for i, idx in ipairs(indices:totable()) do 96 | local sample = _G.dataset:get(idx) 97 | local input, segLabel, exist 98 | if dataset=='laneTest' then 99 | input = _G.preprocess(sample.input) 100 | elseif dataset=='lane' then 101 | input, segLabel, exist = _G.preprocess_aug(sample.input, sample.segLabel, sample.exist) 102 | segLabel:resize(segLabel:size(2),segLabel:size(3)) 103 | else 104 | cmd:error('unknown dataset: ' .. dataset) 105 | end 106 | if not batch then 107 | local imageSize = input:size():totable() 108 | local pathSize = sample.imgpath:size():totable() 109 | batch = torch.FloatTensor(sz, table.unpack(imageSize)) 110 | imgpaths = torch.CharTensor(sz, table.unpack(pathSize)) 111 | if dataset=='lane' then 112 | local labelSize = segLabel:size():totable() 113 | local existSize = exist:size():totable() 114 | segLabels = torch.FloatTensor(sz, table.unpack(labelSize)) 115 | exists = torch.FloatTensor(sz, table.unpack(existSize)) 116 | end 117 | end 118 | batch[i]:copy(input) 119 | imgpaths[i]:copy(sample.imgpath) 120 | if dataset=='lane' then 121 | segLabels[i]:copy(segLabel) 122 | exists[i]:copy(exist) 123 | end 124 | end 125 | local targets 126 | if dataset=='laneTest' then 127 | targets = nil 128 | elseif dataset=='lane' then 129 | targets = {segLabels, exists} 130 | else 131 | cmd:error('unknown dataset: ' .. dataset) 132 | end 133 | collectgarbage(); collectgarbage() 134 | 135 | return { 136 | input = batch, 137 | target = targets, 138 | imgpath = imgpaths, -- used in test 139 | } 140 | end, 141 | function(_sample_) 142 | sample = _sample_ 143 | end, 144 | indices, 145 | self.nCrops 146 | ) 147 | idx = idx + batchSize 148 | end 149 | end 150 | 151 | local n = 0 152 | local function loop() 153 | enqueue() 154 | if not threads:hasjob() then 155 | return nil 156 | end 157 | threads:dojob() 158 | if threads:haserror() then 159 | threads:synchronize() 160 | end 161 | enqueue() 162 | n = n + 1 163 | return n, sample 164 | end 165 | return loop 166 | end 167 | 168 | return M.DataLoader 169 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/datasets/init.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- ImageNet and CIFAR-10 datasets 10 | -- 11 | 12 | local M = {} 13 | 14 | local function isvalid(opt, cachePath) 15 | local imageInfo = torch.load(cachePath) 16 | if imageInfo.basedir and imageInfo.basedir ~= opt.data then 17 | return false 18 | end 19 | return true 20 | end 21 | 22 | function M.create(opt, split) 23 | local cachePath = paths.concat(opt.gen, opt.dataset .. '.t7') 24 | if not paths.filep(cachePath) or not isvalid(opt, cachePath) then 25 | paths.mkdir('gen') 26 | 27 | local script = paths.dofile(opt.dataset .. '-gen.lua') 28 | script.exec(opt, cachePath) 29 | end 30 | local imageInfo = torch.load(cachePath) 31 | local Dataset = require('datasets/' .. opt.dataset) 32 | return Dataset(imageInfo, opt, split) 33 | end 34 | 35 | return M 36 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/datasets/lane-gen.lua: -------------------------------------------------------------------------------- 1 | local paths = require 'paths' 2 | local ffi = require 'ffi' 3 | 4 | local M = {} 5 | 6 | local function getPaths(file) 7 | local imagePath = torch.CharTensor() 8 | local labelPath = torch.CharTensor() 9 | 10 | local f = io.open(file, 'r') 11 | print('load file: ' .. file) 12 | local imMaxLength = -1 13 | local lbMaxLength = -1 14 | local imagePaths = {} 15 | local labelPaths = {} 16 | local exists = {} 17 | while true do 18 | local line = f:read() 19 | if line == nil then break end 20 | local lineSplit = line:split(' ') 21 | local impath = lineSplit[1] 22 | local lbpath = lineSplit[2] 23 | table.insert(imagePaths, impath) 24 | table.insert(labelPaths, lbpath) 25 | imMaxLength = math.max(imMaxLength, #impath + 1) 26 | lbMaxLength = math.max(lbMaxLength, #lbpath + 1) 27 | local exist = torch.Tensor(4):zero() 28 | for i = 1, 4 do 29 | exist[i] = lineSplit[i+2] 30 | end 31 | table.insert(exists, exist) 32 | end 33 | f.close() 34 | 35 | local nImages = #imagePaths 36 | local imagePath = torch.CharTensor(nImages, imMaxLength):zero() 37 | local labelPath = torch.CharTensor(nImages, lbMaxLength):zero() 38 | local Exist = torch.Tensor(nImages, 4):zero() 39 | for i, path in ipairs(imagePaths) do 40 | ffi.copy(imagePath[i]:data(), path) 41 | end 42 | for i, path in ipairs(labelPaths) do 43 | ffi.copy(labelPath[i]:data(), path) 44 | end 45 | for i, data in ipairs(exists) do 46 | Exist[i]:copy(data) 47 | end 48 | print("finish getPath") 49 | return imagePath, labelPath, Exist 50 | end 51 | 52 | local function getPerm(paths, seqLen, split) -- Permute data list order 53 | local size = paths:size(1) 54 | local perm 55 | if split == 'train' then 56 | perm = torch.randperm(size) 57 | elseif split == 'val' then 58 | perm = torch.Tensor(size) 59 | for i = 1, size do 60 | perm[i] = i 61 | end 62 | else 63 | print('Wrong split: ' .. split) 64 | end 65 | local seqPerm = {} 66 | for i = 1, size do 67 | local id = perm[i] 68 | if id <= size - seqLen + 1 then 69 | local function videoName(impath) 70 | local impath = string.sub(impath,2,-1) 71 | local j = string.find(impath, '/') 72 | if j then 73 | local video = string.sub(impath, j+1, -11) 74 | return video 75 | else 76 | return nil 77 | end 78 | end 79 | local impath = ffi.string(paths[id]:data()) 80 | local impath2 = ffi.string(paths[id+seqLen-1]:data()) 81 | local video = videoName(impath) 82 | local video2 = videoName(impath2) 83 | if video == video2 then 84 | table.insert(seqPerm, id) 85 | end 86 | end 87 | end 88 | local seqPermTensor = torch.Tensor(#seqPerm) 89 | for i = 1, #seqPerm do 90 | seqPermTensor[i] = seqPerm[i] 91 | end 92 | return seqPermTensor 93 | end 94 | 95 | 96 | function M.exec(opt, cacheFile) 97 | local imagePath = torch.CharTensor() -- path to each image in dataset 98 | local labelPath = torch.CharTensor() -- path to each label 99 | 100 | local listTrain = opt.train 101 | local listVal = opt.val 102 | 103 | local trainImagePath, trainLabelPath, trainExist = getPaths(listTrain) 104 | local valImagePath, valLabelPath, valExist = getPaths(listVal) 105 | local trainPerm = getPerm(trainImagePath, opt.seqLen, 'train') -- set seqLen to 1 if you don't use rnn-based model 106 | local valPerm = getPerm(valImagePath, 1, 'val') 107 | print("create info") 108 | local info = { 109 | basedir = opt.data, 110 | train = { 111 | imagePath = trainImagePath, 112 | labelPath = trainLabelPath, 113 | Exist = trainExist, 114 | perm = trainPerm, 115 | }, 116 | val = { 117 | imagePath = valImagePath, 118 | labelPath = valLabelPath, 119 | Exist = valExist, 120 | perm = valPerm, 121 | }, 122 | } 123 | print(" | saving list of images to " .. cacheFile) 124 | torch.save(cacheFile, info) 125 | return info 126 | end 127 | 128 | return M 129 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/datasets/lane.lua: -------------------------------------------------------------------------------- 1 | local image = require 'image' 2 | local paths = require 'paths' 3 | local t = require 'datasets/transforms' 4 | local ffi = require 'ffi' 5 | 6 | local M = {} 7 | local LaneDataset = torch.class('resnet.LaneDataset', M) 8 | 9 | function LaneDataset:__init(imageInfo, opt, split) 10 | self.imageInfo = imageInfo[split] 11 | self.opt = opt 12 | self.split = split 13 | self.dir = opt.data 14 | assert(paths.dirp(self.dir), 'directory does not exist: ' .. self.dir) 15 | end 16 | 17 | function LaneDataset:get(i) 18 | local imgpath = ffi.string(self.imageInfo.imagePath[i]:data()) 19 | local lbpath = ffi.string(self.imageInfo.labelPath[i]:data()) 20 | --print(self.dir .. imgpath) 21 | local image = self:_loadImage(self.dir .. imgpath, 3, 'float') 22 | local label = self:_loadImage(self.dir .. lbpath, 1, 'byte') 23 | label:add(1) 24 | return { 25 | input = image, 26 | segLabel = label, 27 | exist = self.imageInfo.Exist[i], 28 | imgpath = self.imageInfo.imagePath[i], 29 | } 30 | end 31 | 32 | function LaneDataset:_loadImage(path, channel, ttype) 33 | local ok, input = pcall(function() 34 | return image.load(path, channel, ttype) 35 | end) 36 | 37 | if not ok then 38 | print("load image failed!") 39 | return -1 40 | end 41 | return input 42 | end 43 | 44 | function LaneDataset:size() 45 | return self.imageInfo.imagePath:size(1) 46 | end 47 | 48 | local meanstd = { 49 | mean = { 0.3598, 0.3653, 0.3662 }, 50 | std = { 0.2573, 0.2663, 0.2756 }, 51 | } 52 | 53 | function LaneDataset:preprocess() -- Don't use data augmentation for training RNN 54 | if self.split == 'train' then 55 | return t.Compose{ 56 | t.ScaleWH(640, 368), 57 | t.ColorNormalize(meanstd), 58 | } 59 | elseif self.split == 'val' then 60 | return t.Compose{ 61 | t.ScaleWH(640, 368), 62 | t.ColorNormalize(meanstd), 63 | } 64 | else 65 | error('invalid split: ' .. self.split) 66 | end 67 | end 68 | 69 | function LaneDataset:preprocess_aug() 70 | if self.split == 'train' then 71 | return t.Compose{ 72 | t.RandomScaleRatio(920, 1002, 274, 304), -- 760, 842, 274, 304 73 | t.ColorNormalize(meanstd), 74 | t.Rotation(2), 75 | --t.RandomCrop(800, 288), 76 | t.RandomCropLane(960, 288), -- 800, 288 77 | } 78 | elseif self.split == 'val' then 79 | return t.Compose{ 80 | t.ScaleWH(960, 288), -- 800, 288 81 | t.ColorNormalize(meanstd), 82 | } 83 | else 84 | error('invalid split: ' .. self.split) 85 | end 86 | end 87 | 88 | return M.LaneDataset 89 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/datasets/laneTest-gen.lua: -------------------------------------------------------------------------------- 1 | local paths = require 'paths' 2 | local ffi = require 'ffi' 3 | 4 | local M = {} 5 | 6 | local function getPaths(file) 7 | local imagePath = torch.CharTensor() 8 | 9 | local f = io.open(file, 'r') 10 | print('load file: ' .. file) 11 | local imMaxLength = -1 12 | local imagePaths = {} 13 | while true do 14 | local line = f:read() 15 | if line == nil then break end 16 | 17 | local impath = line 18 | table.insert(imagePaths, impath) 19 | imMaxLength = math.max(imMaxLength, #impath + 1) 20 | end 21 | f.close() 22 | 23 | local nImages = #imagePaths 24 | local imagePath = torch.CharTensor(nImages, imMaxLength):zero() 25 | for i, path in ipairs(imagePaths) do 26 | ffi.copy(imagePath[i]:data(), path) 27 | end 28 | print("finish getPath") 29 | return imagePath 30 | end 31 | 32 | local function getPerm(paths, seqLen, split) -- Permute data list order 33 | local size = paths:size(1) 34 | local perm 35 | if split == 'train' then 36 | perm = torch.randperm(size) 37 | elseif split == 'val' then 38 | perm = torch.Tensor(size) 39 | for i = 1, size do 40 | perm[i] = i 41 | end 42 | else 43 | print('Wrong split: ' .. split) 44 | end 45 | local seqPerm = {} 46 | for i = 1, size do 47 | local id = perm[i] 48 | if id <= size - seqLen + 1 then 49 | local function videoName(impath) 50 | local impath = string.sub(impath,2,-1) 51 | local j = string.find(impath, '/') 52 | if j then 53 | local video = string.sub(impath, j+1, -11) 54 | return video 55 | else 56 | return nil 57 | end 58 | end 59 | local impath = ffi.string(paths[id]:data()) 60 | local impath2 = ffi.string(paths[id+seqLen-1]:data()) 61 | local video = videoName(impath) 62 | local video2 = videoName(impath2) 63 | if video == video2 then 64 | table.insert(seqPerm, id) 65 | end 66 | end 67 | end 68 | local seqPermTensor = torch.Tensor(#seqPerm) 69 | for i = 1, #seqPerm do 70 | seqPermTensor[i] = seqPerm[i] 71 | end 72 | return seqPermTensor 73 | end 74 | 75 | 76 | function M.exec(opt, cacheFile) 77 | local imagePath = torch.CharTensor() -- path to each image in dataset 78 | local labelPath = torch.CharTensor() -- path to each label 79 | 80 | local listVal = opt.val 81 | 82 | local valImagePath = getPaths(listVal) 83 | local valPerm = getPerm(valImagePath, 1, 'val') 84 | print("create info") 85 | local info = { 86 | basedir = opt.data, 87 | train = nil, 88 | val = { 89 | imagePath = valImagePath, 90 | perm = valPerm, 91 | }, 92 | } 93 | print(" | saving list of images to " .. cacheFile) 94 | torch.save(cacheFile, info) 95 | return info 96 | end 97 | 98 | return M 99 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/datasets/laneTest.lua: -------------------------------------------------------------------------------- 1 | local image = require 'image' 2 | local paths = require 'paths' 3 | local t = require 'datasets/transforms' 4 | local ffi = require 'ffi' 5 | 6 | local M = {} 7 | local LaneDataset = torch.class('resnet.LaneDataset', M) 8 | 9 | function LaneDataset:__init(imageInfo, opt, split) 10 | self.imageInfo = imageInfo[split] 11 | self.opt = opt 12 | self.split = split 13 | self.dir = opt.data 14 | assert(paths.dirp(self.dir), 'directory does not exist: ' .. self.dir) 15 | end 16 | 17 | function LaneDataset:get(i) 18 | local imgpath = ffi.string(self.imageInfo.imagePath[i]:data()) 19 | local image = self:_loadImage(self.dir .. imgpath, 3, 'float') 20 | return { 21 | input = image, 22 | imgpath = self.imageInfo.imagePath[i], 23 | } 24 | end 25 | 26 | function LaneDataset:_loadImage(path, channel, ttype) 27 | local ok, input = pcall(function() 28 | return image.load(path, channel, ttype) 29 | end) 30 | 31 | if not ok then 32 | print("load image failed!") 33 | return -1 34 | end 35 | return input 36 | end 37 | 38 | function LaneDataset:size() 39 | return self.imageInfo.imagePath:size(1) 40 | end 41 | 42 | local meanstd = { 43 | mean = { 0.3598, 0.3653, 0.3662 }, 44 | std = { 0.2573, 0.2663, 0.2756 }, 45 | } 46 | 47 | function LaneDataset:preprocess() 48 | if self.split == 'train' then 49 | return t.Compose{ 50 | --t.ScaleWH(640, 368), 51 | t.ScaleWH(960, 288), 52 | t.ColorNormalize(meanstd), 53 | } 54 | elseif self.split == 'val' then 55 | return t.Compose{ 56 | t.ScaleWH(960, 288), -- 800, 288 57 | t.ColorNormalize(meanstd), 58 | } 59 | else 60 | error('invalid split: ' .. self.split) 61 | end 62 | end 63 | 64 | function LaneDataset:preprocess_aug() 65 | if self.split == 'train' then 66 | return t.Compose{ 67 | t.RandomScaleRatio(735, 898, 267, 326), 68 | t.Rotation(2), 69 | t.RandomCrop(728, 264), 70 | t.ColorNormalize(meanstd), 71 | } 72 | elseif self.split == 'val' then 73 | return t.Compose{ 74 | t.ScaleWH(800, 288), 75 | t.ColorNormalize(meanstd), 76 | } 77 | else 78 | error('invalid split: ' .. self.split) 79 | end 80 | end 81 | 82 | return M.LaneDataset 83 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/datasets/transforms.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- Image transforms for data augmentation and input normalization 10 | -- 11 | 12 | require 'image' 13 | 14 | local M = {} 15 | 16 | function M.Compose(transforms) 17 | return function(input, target, exist) 18 | for _, transform in ipairs(transforms) do 19 | input, target, exist = transform(input, target, exist) 20 | end 21 | return input, target, exist 22 | end 23 | end 24 | 25 | function M.ColorNormalize(meanstd) 26 | return function(img, target, exist) 27 | img = img:clone() 28 | for i=1,3 do 29 | img[i]:add(-meanstd.mean[i]) 30 | img[i]:div(meanstd.std[i]) 31 | end 32 | return img, target, exist 33 | end 34 | end 35 | 36 | -- Scales the smaller edge to size 37 | function M.Scale(size, interpolation) 38 | interpolation = interpolation or 'bicubic' 39 | return function(input) 40 | local w, h = input:size(3), input:size(2) 41 | if (w <= h and w == size) or (h <= w and h == size) then 42 | return input 43 | end 44 | if w < h then 45 | return image.scale(input, size, h/w * size, interpolation) 46 | else 47 | return image.scale(input, w/h * size, size, interpolation) 48 | end 49 | end 50 | end 51 | 52 | -- Added by PanXingang. Scales the width and height for input and target 53 | function M.ScaleWH(w, h, interpolation) 54 | interpolation = interpolation or 'bicubic' 55 | return function(input, target, exist) 56 | --- newly added part --- 57 | input = image.crop(input, 0, 333, 1276, 717) 58 | ------------------------ 59 | if target == nil then 60 | return image.scale(input, w, h, interpolation) 61 | else 62 | target = image.crop(target, 0, 333, 1276, 717) 63 | return image.scale(input, w, h, interpolation), image.scale(target, w, h, 'simple'), exist 64 | end 65 | end 66 | end 67 | 68 | -- Crop to centered rectangle 69 | function M.CenterCrop(w, h) 70 | return function(input, target) 71 | local w1 = math.ceil((input:size(3) - w)/2) 72 | local h1 = math.ceil((input:size(2) - h)/2) 73 | return image.crop(input, w1, h1, w1 + w, h1 + h), image.crop(target, w1, h1, w1 + w, h1 + h) -- center patch 74 | end 75 | end 76 | 77 | function M.RandomCrop(w, h, padding) 78 | padding = padding or 0 79 | 80 | return function(input, target) 81 | if padding > 0 then 82 | local temp = input.new(3, input:size(2) + 2*padding, input:size(3) + 2*padding) 83 | temp:zero() 84 | :narrow(2, padding+1, input:size(2)) 85 | :narrow(3, padding+1, input:size(3)) 86 | :copy(input) 87 | input = temp 88 | end 89 | 90 | local inputW, inputH = input:size(3), input:size(2) 91 | if inputW == w and inputH == h then 92 | return input, target 93 | end 94 | 95 | local x1, y1 = torch.random(0, inputW - w), torch.random(0, inputH - h) 96 | local out1 = image.crop(input, x1, y1, x1 + w, y1 + h) 97 | local out2 = image.crop(target, x1, y1, x1 + w, y1 + h) 98 | assert(out1:size(2) == h and out1:size(3) == w, 'wrong crop size') 99 | return out1, out2 100 | end 101 | end 102 | 103 | -- Random crop form larger image with optional zero padding 104 | function M.RandomCropLane(w, h, padding) 105 | padding = padding or 0 106 | 107 | return function(input, target, exist) 108 | if padding > 0 then 109 | local temp = input.new(3, input:size(2) + 2*padding, input:size(3) + 2*padding) 110 | temp:zero() 111 | :narrow(2, padding+1, input:size(2)) 112 | :narrow(3, padding+1, input:size(3)) 113 | :copy(input) 114 | input = temp 115 | end 116 | 117 | local inputW, inputH = input:size(3), input:size(2) 118 | if inputW == w and inputH == h then 119 | return input, target, exist 120 | end 121 | 122 | target:add(-1) 123 | if inputH < h then 124 | local pad = h - inputH 125 | local temp = input.new(3, h, input:size(3)) 126 | temp:zero() 127 | :narrow(2, pad+1, inputH) 128 | :copy(input) 129 | input = temp 130 | local temp2 = input.new(1, h, input:size(3)) 131 | temp2:zero() 132 | :narrow(2, pad+1, inputH) 133 | :copy(target) 134 | target = temp2 135 | end 136 | if inputW < w then 137 | local pad = torch.random(0, w - inputW) 138 | local temp = input.new(3, input:size(2), w) 139 | temp:zero() 140 | :narrow(3, pad+1, inputW) 141 | :copy(input) 142 | input = temp 143 | local temp2 = input.new(1, input:size(2), w) 144 | temp2:zero() 145 | :narrow(3, pad+1, inputW) 146 | :copy(target) 147 | target = temp2 148 | end 149 | local inputW, inputH = input:size(3), input:size(2) 150 | local x1, y1 = torch.random(0, inputW - w), torch.random(0, inputH - h) 151 | local out1 = image.crop(input, x1, y1, x1 + w, y1 + h) 152 | local out2 = image.crop(target, x1, y1, x1 + w, y1 + h) 153 | out2:add(1) 154 | assert(out1:size(2) == h and out1:size(3) == w, 'wrong crop size') 155 | return out1, out2, exist 156 | end 157 | end 158 | 159 | -- Resized with shorter side randomly sampled from [minSize, maxSize] (ResNet-style) 160 | function M.RandomScale(minSize, maxSize) 161 | return function(input, target) 162 | local w, h = input:size(3), input:size(2) 163 | 164 | local targetSz = torch.random(minSize, maxSize) 165 | local targetW, targetH = targetSz, targetSz 166 | if w < h then 167 | targetH = torch.round(h / w * targetW) 168 | else 169 | targetW = torch.round(w / h * targetH) 170 | end 171 | 172 | return image.scale(input, targetW, targetH, 'bicubic'), image.scale(target, targetW, targetH, 'simple') 173 | end 174 | end 175 | 176 | --Added by PanXingang. Resized with random scale and ratio 177 | function M.RandomScaleRatio(minW, maxW, minH, maxH) 178 | return function(input, target, exist) 179 | --- newly added part --- 180 | input = image.crop(input, 0, 333, 1276, 717) 181 | target = image.crop(target, 0, 333, 1276, 717) 182 | ------------------------ 183 | local w, h = input:size(3), input:size(2) 184 | 185 | local targetW = torch.random(minW, maxW) 186 | local targetH = torch.random(minH, maxH) 187 | 188 | return image.scale(input, targetW, targetH, 'bicubic'), image.scale(target, targetW, targetH, 'simple'), exist 189 | end 190 | end 191 | 192 | -- Random crop with size 8%-100% and aspect ratio 3/4 - 4/3 (Inception-style) 193 | function M.RandomSizedCrop(size) 194 | local scale = M.Scale(size) 195 | local crop = M.CenterCrop(size) 196 | 197 | return function(input) 198 | local attempt = 0 199 | repeat 200 | local area = input:size(2) * input:size(3) 201 | local targetArea = torch.uniform(0.08, 1.0) * area 202 | 203 | local aspectRatio = torch.uniform(3/4, 4/3) 204 | local w = torch.round(math.sqrt(targetArea * aspectRatio)) 205 | local h = torch.round(math.sqrt(targetArea / aspectRatio)) 206 | 207 | if torch.uniform() < 0.5 then 208 | w, h = h, w 209 | end 210 | 211 | if h <= input:size(2) and w <= input:size(3) then 212 | local y1 = torch.random(0, input:size(2) - h) 213 | local x1 = torch.random(0, input:size(3) - w) 214 | 215 | local out = image.crop(input, x1, y1, x1 + w, y1 + h) 216 | assert(out:size(2) == h and out:size(3) == w, 'wrong crop size') 217 | 218 | return image.scale(out, size, size, 'bicubic') 219 | end 220 | attempt = attempt + 1 221 | until attempt >= 10 222 | 223 | -- fallback 224 | return crop(scale(input)) 225 | end 226 | end 227 | 228 | function M.HorizontalFlip(prob) 229 | return function(input, target) 230 | if torch.uniform() < prob then 231 | input = image.hflip(input) 232 | target = image.hflip(target) 233 | end 234 | return input, target 235 | end 236 | end 237 | 238 | function M.Rotation(deg) 239 | return function(input, target, exist) 240 | if deg ~= 0 then 241 | local u = torch.uniform() 242 | input = image.rotate(input, (u - 0.5) * deg * math.pi / 180, 'bilinear') 243 | target:add(-1) 244 | target = image.rotate(target, (u - 0.5) * deg * math.pi / 180, 'simple') 245 | target:add(1) 246 | end 247 | return input, target, exist 248 | end 249 | end 250 | 251 | return M 252 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/experiments/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | data=./data 3 | #rm gen/lane.t7 4 | CUDA_VISIBLE_DEVICES="6,7" th testLane.lua \ 5 | -model experiments/pretrained/model_new.t7 \ 6 | -data ${data} \ 7 | -val ./list/test.txt \ 8 | -save experiments/predicts/ \ 9 | -dataset laneTest \ 10 | -shareGradInput true \ 11 | -nThreads 8 \ 12 | -nGPU 2 \ 13 | -batchSize 8 \ 14 | -smooth false 15 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/experiments/train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | data=./data 3 | CUDA_VISIBLE_DEVICES="4,5,6,7" th main.lua \ 4 | -data ${data} \ 5 | -train ./list/train_final.txt \ 6 | -val ./list/val_final.txt \ 7 | -dataset lane \ 8 | -save experiments/models/ENet-SAD-Simple \ 9 | -retrain experiments/models/ENet-label-new.t7 \ 10 | -shareGradInput true \ 11 | -nThreads 8 \ 12 | -nGPU 4 \ 13 | -batchSize 12 \ 14 | -maxIter 100000 \ 15 | -LR 0.01 \ 16 | -backWeight 0.4 \ 17 | -nEpochs 100 \ 18 | 2>&1|tee experiments/models/train.log 19 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/main.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | require 'torch' 10 | require 'paths' 11 | require 'optim' 12 | require 'nn' 13 | local opts = require 'opts' 14 | local opt = opts.parse(arg) 15 | local DataLoader = require 'dataloader' 16 | local Trainer = require 'train' 17 | local models = require 'models/init' 18 | local checkpoints = require 'checkpoints' 19 | 20 | torch.setdefaulttensortype('torch.FloatTensor') 21 | torch.setnumthreads(1) 22 | 23 | torch.manualSeed(opt.manualSeed) 24 | cutorch.manualSeedAll(opt.manualSeed) 25 | 26 | -- Load previous checkpoint, if it exists 27 | local checkpoint, optimState = checkpoints.latest(opt) 28 | 29 | -- Create model 30 | local model, criterion = models.setup(opt, checkpoint) 31 | 32 | -- Data loading 33 | local trainLoader, valLoader = DataLoader.create(opt) 34 | 35 | -- The trainer handles the training loop and evaluation on validation set 36 | local trainer = Trainer(model, criterion, opt, optimState, checkpoint) 37 | 38 | --opt.testOnly = true 39 | 40 | if opt.testOnly then 41 | local Err, Acc, Rec, IOU = trainer:test(0, valLoader) 42 | print(string.format(' * Results: Err: %.3f Acc: %.3f Rec: %.3f IOU: %.3f', Err, Acc, Rec, IOU)) 43 | return 44 | end 45 | 46 | local startEpoch = checkpoint and checkpoint.epoch + 1 or opt.epochNumber 47 | local bestLoss = math.huge 48 | if checkpoint then 49 | bestLoss = bestLoss or checkpoint.bestLoss 50 | end 51 | for epoch = startEpoch, opt.nEpochs do 52 | -- Train for a single epoch 53 | local trainLoss, finish = trainer:train(epoch, trainLoader) 54 | print(string.format(' * TrainLoss: %.3f', trainLoss)) 55 | -- Run model on validation set 56 | local valLoss, Acc, Rec, IOU = 0.0, 0.0, 0.0, 0.0 57 | valLoss, Acc, Rec, IOU = trainer:test(epoch, valLoader) 58 | 59 | local bestModel = false 60 | if valLoss < bestLoss then 61 | bestModel = true 62 | bestLoss = valLoss 63 | end 64 | 65 | checkpoints.save(epoch, model, trainer.optimState, bestModel, opt, trainer.iter, bestLoss) 66 | if finish then 67 | break 68 | end 69 | end 70 | 71 | print(string.format(' * Finished Err: %6.3f', bestLoss)) 72 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/models/init.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- Generic model creating code. For the specific ResNet model see 10 | -- models/resnet.lua 11 | -- 12 | 13 | require 'nn' 14 | require 'cunn' 15 | require 'cudnn' 16 | require 'ParallelCriterion2' 17 | 18 | local M = {} 19 | 20 | function M.setup(opt, checkpoint) 21 | local model 22 | if checkpoint then 23 | local modelPath = paths.concat(opt.resume, checkpoint.modelFile) 24 | assert(paths.filep(modelPath), 'Saved model not found: ' .. modelPath) 25 | print('=> Resuming model from ' .. modelPath) 26 | model = torch.load(modelPath) 27 | if opt.softmax then 28 | model:add(cudnn.SpatialSoftMax()) 29 | print('Softmax added') 30 | end 31 | model:cuda() 32 | elseif opt.retrain ~= 'none' then -- For fine tuning CNN 33 | assert(paths.filep(opt.retrain), 'File not found: ' .. opt.retrain) 34 | print('Loading model from file: ' .. opt.retrain) 35 | model = torch.load(opt.retrain):cuda() 36 | model.__memoryOptimized = nil 37 | elseif opt.model ~= 'none' then -- For testing CNN 38 | assert(paths.filep(opt.model), 'File not found: ' .. opt.model) 39 | print('Loading model from file: ' .. opt.model) 40 | model = torch.load(opt.model):cuda() 41 | else 42 | print('=> Creating model from file: models/' .. opt.netType .. '.lua') 43 | model = require('models/' .. opt.netType)(opt) 44 | end 45 | 46 | -- First remove any DataParallelTable 47 | if torch.type(model) == 'nn.DataParallelTable' then 48 | model = model:get(1) 49 | end 50 | 51 | model:get(19):get(1):get(4):get(1):get(2):get(6):remove(3) 52 | model:get(19):get(1):get(4):get(1):get(2):get(6):insert(nn.View(5400), 3) 53 | model:get(19):get(1):get(4):get(1):get(2):get(6):remove(4) 54 | model:get(19):get(1):get(4):get(1):get(2):get(6):insert(nn.Linear(5400, 128), 4) 55 | 56 | 57 | -- optnet is an general library for reducing memory usage in neural networks 58 | if opt.optnet then 59 | local optnet = require 'optnet' 60 | local imsize = opt.dataset == 'imagenet' and 224 or 32 61 | local sampleInput = torch.zeros(4,3,imsize,imsize):cuda() 62 | optnet.optimizeMemory(model, sampleInput, {inplace = false, mode = 'training'}) 63 | end 64 | 65 | -- This is useful for fitting ResNet-50 on 4 GPUs, but requires that all 66 | -- containers override backwards to call backwards recursively on submodules 67 | if opt.shareGradInput then 68 | M.shareGradInput(model) 69 | end 70 | 71 | -- For resetting the classifier when fine-tuning on a different Dataset 72 | if opt.resetClassifier and not checkpoint then 73 | print(' => Replacing classifier with ' .. opt.nClasses .. '-way classifier') 74 | 75 | local orig = model:get(#model.modules) 76 | assert(torch.type(orig) == 'nn.Linear', 77 | 'expected last layer to be fully connected') 78 | 79 | local linear = nn.Linear(orig.weight:size(2), opt.nClasses) 80 | linear.bias:zero() 81 | 82 | model:remove(#model.modules) 83 | model:add(linear:cuda()) 84 | end 85 | 86 | -- Set the CUDNN flags 87 | if opt.cudnn == 'fastest' then 88 | cudnn.fastest = true 89 | cudnn.benchmark = true 90 | elseif opt.cudnn == 'deterministic' then 91 | -- Use a deterministic convolution implementation 92 | model:apply(function(m) 93 | if m.setMode then m:setMode(1, 1, 1) end 94 | end) 95 | end 96 | 97 | -- Wrap the model with DataParallelTable, if using more than one GPU 98 | if opt.nGPU > 1 then 99 | local gpus = torch.range(1, opt.nGPU):totable() 100 | local fastest, benchmark = cudnn.fastest, cudnn.benchmark 101 | 102 | local dpt = nn.DataParallelTable(1, true, true) 103 | :add(model, gpus) 104 | :threads(function() 105 | local cudnn = require 'cudnn' 106 | cudnn.fastest, cudnn.benchmark = fastest, benchmark 107 | end) 108 | dpt.gradInput = nil 109 | 110 | model = dpt:cuda() 111 | end 112 | 113 | local weights = torch.Tensor(5) 114 | weights[1] = opt.backWeight 115 | weights[2] = 1 116 | weights[3] = 1 117 | weights[4] = 1 118 | weights[5] = 1 119 | local criterion 120 | if opt.dataset == 'lane' then 121 | local SCE = cudnn.SpatialCrossEntropyCriterion(weights):cuda() 122 | local BCE = nn.BCECriterion():cuda() 123 | local MSE_1 = nn.MSECriterion():cuda() 124 | local MSE_2 = nn.MSECriterion():cuda() 125 | criterion = nn.ParallelCriterion2():add(SCE, 1):add(BCE, 0.1):add(MSE_1, 0.0):add(MSE_2, 0.0) -- set the coefficients of MSE_1 and MSE_2 to be 0.1 if you want to use the distillation loss 126 | end 127 | print('Model:\n' .. model:__tostring()) 128 | return model, criterion 129 | end 130 | 131 | function M.shareGradInput(model) 132 | local function sharingKey(m) 133 | local key = torch.type(m) 134 | if m.__shareGradInputKey then 135 | key = key .. ':' .. m.__shareGradInputKey 136 | end 137 | return key 138 | end 139 | 140 | -- Share gradInput for memory efficient backprop 141 | local cache = {} 142 | model:apply(function(m) 143 | local moduleType = torch.type(m) 144 | if torch.isTensor(m.gradInput) and moduleType ~= 'nn.ConcatTable' then 145 | local key = sharingKey(m) 146 | if cache[key] == nil then 147 | cache[key] = torch.CudaStorage(1) 148 | end 149 | m.gradInput = torch.CudaTensor(cache[key], 1, 0) 150 | end 151 | end) 152 | for i, m in ipairs(model:findModules('nn.ConcatTable')) do 153 | if cache[i % 2] == nil then 154 | cache[i % 2] = torch.CudaStorage(1) 155 | end 156 | m.gradInput = torch.CudaTensor(cache[i % 2], 1, 0) 157 | end 158 | end 159 | 160 | return M 161 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/models/init_test.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- Generic model creating code. For the specific ResNet model see 10 | -- models/resnet.lua 11 | -- 12 | 13 | require 'nn' 14 | require 'cunn' 15 | require 'cudnn' 16 | require 'ParallelCriterion2' 17 | 18 | local M = {} 19 | 20 | function M.setup(opt, checkpoint) 21 | local model 22 | if checkpoint then 23 | local modelPath = paths.concat(opt.resume, checkpoint.modelFile) 24 | assert(paths.filep(modelPath), 'Saved model not found: ' .. modelPath) 25 | print('=> Resuming model from ' .. modelPath) 26 | model = torch.load(modelPath) 27 | if opt.softmax then 28 | model:add(cudnn.SpatialSoftMax()) 29 | print('Softmax added') 30 | end 31 | model:cuda() 32 | elseif opt.retrain ~= 'none' then -- For fine tuning CNN 33 | assert(paths.filep(opt.retrain), 'File not found: ' .. opt.retrain) 34 | print('Loading model from file: ' .. opt.retrain) 35 | model = torch.load(opt.retrain):cuda() 36 | model.__memoryOptimized = nil 37 | elseif opt.model ~= 'none' then -- For testing CNN 38 | assert(paths.filep(opt.model), 'File not found: ' .. opt.model) 39 | print('Loading model from file: ' .. opt.model) 40 | model = torch.load(opt.model):cuda() 41 | else 42 | print('=> Creating model from file: models/' .. opt.netType .. '.lua') 43 | model = require('models/' .. opt.netType)(opt) 44 | end 45 | 46 | -- First remove any DataParallelTable 47 | if torch.type(model) == 'nn.DataParallelTable' then 48 | model = model:get(1) 49 | end 50 | 51 | -- optnet is an general library for reducing memory usage in neural networks 52 | if opt.optnet then 53 | local optnet = require 'optnet' 54 | local imsize = opt.dataset == 'imagenet' and 224 or 32 55 | local sampleInput = torch.zeros(4,3,imsize,imsize):cuda() 56 | optnet.optimizeMemory(model, sampleInput, {inplace = false, mode = 'training'}) 57 | end 58 | 59 | -- This is useful for fitting ResNet-50 on 4 GPUs, but requires that all 60 | -- containers override backwards to call backwards recursively on submodules 61 | if opt.shareGradInput then 62 | M.shareGradInput(model) 63 | end 64 | 65 | -- For resetting the classifier when fine-tuning on a different Dataset 66 | if opt.resetClassifier and not checkpoint then 67 | print(' => Replacing classifier with ' .. opt.nClasses .. '-way classifier') 68 | 69 | local orig = model:get(#model.modules) 70 | assert(torch.type(orig) == 'nn.Linear', 71 | 'expected last layer to be fully connected') 72 | 73 | local linear = nn.Linear(orig.weight:size(2), opt.nClasses) 74 | linear.bias:zero() 75 | 76 | model:remove(#model.modules) 77 | model:add(linear:cuda()) 78 | end 79 | 80 | -- Set the CUDNN flags 81 | if opt.cudnn == 'fastest' then 82 | cudnn.fastest = true 83 | cudnn.benchmark = true 84 | elseif opt.cudnn == 'deterministic' then 85 | -- Use a deterministic convolution implementation 86 | model:apply(function(m) 87 | if m.setMode then m:setMode(1, 1, 1) end 88 | end) 89 | end 90 | 91 | -- Wrap the model with DataParallelTable, if using more than one GPU 92 | if opt.nGPU > 1 then 93 | local gpus = torch.range(1, opt.nGPU):totable() 94 | local fastest, benchmark = cudnn.fastest, cudnn.benchmark 95 | 96 | local dpt = nn.DataParallelTable(1, true, true) 97 | :add(model, gpus) 98 | :threads(function() 99 | local cudnn = require 'cudnn' 100 | cudnn.fastest, cudnn.benchmark = fastest, benchmark 101 | end) 102 | dpt.gradInput = nil 103 | 104 | model = dpt:cuda() 105 | end 106 | 107 | local weights = torch.Tensor(5) 108 | weights[1] = opt.backWeight 109 | weights[2] = 1 110 | weights[3] = 1 111 | weights[4] = 1 112 | weights[5] = 1 113 | local criterion 114 | if opt.dataset == 'lane' then 115 | local SCE = cudnn.SpatialCrossEntropyCriterion(weights):cuda() 116 | local BCE = nn.BCECriterion():cuda() 117 | local MSE_1 = nn.MSECriterion():cuda() 118 | local MSE_2 = nn.MSECriterion():cuda() 119 | criterion = nn.ParallelCriterion2():add(SCE, 1):add(BCE, 0.1):add(MSE_1, 0.1):add(MSE_2, 0.1) 120 | end 121 | print('Model:\n' .. model:__tostring()) 122 | return model, criterion 123 | end 124 | 125 | function M.shareGradInput(model) 126 | local function sharingKey(m) 127 | local key = torch.type(m) 128 | if m.__shareGradInputKey then 129 | key = key .. ':' .. m.__shareGradInputKey 130 | end 131 | return key 132 | end 133 | 134 | -- Share gradInput for memory efficient backprop 135 | local cache = {} 136 | model:apply(function(m) 137 | local moduleType = torch.type(m) 138 | if torch.isTensor(m.gradInput) and moduleType ~= 'nn.ConcatTable' then 139 | local key = sharingKey(m) 140 | if cache[key] == nil then 141 | cache[key] = torch.CudaStorage(1) 142 | end 143 | m.gradInput = torch.CudaTensor(cache[key], 1, 0) 144 | end 145 | end) 146 | for i, m in ipairs(model:findModules('nn.ConcatTable')) do 147 | if cache[i % 2] == nil then 148 | cache[i % 2] = torch.CudaStorage(1) 149 | end 150 | m.gradInput = torch.CudaTensor(cache[i % 2], 1, 0) 151 | end 152 | end 153 | 154 | return M 155 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/opts.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | local M = { } 10 | 11 | function M.parse(arg) 12 | local cmd = torch.CmdLine() 13 | cmd:text() 14 | cmd:text('Torch-7 ResNet Training script') 15 | cmd:text('See https://github.com/facebook/fb.resnet.torch/blob/master/TRAINING.md for examples') 16 | cmd:text() 17 | cmd:text('Options:') 18 | ------------ General options -------------------- 19 | cmd:option('-data', '', 'Path to dataset') 20 | cmd:option('-train', '', 'Path to trainList') 21 | cmd:option('-val', '', 'Path to valList') 22 | cmd:option('-dataset', 'lane', 'Options: imagenet | cifar10 | cifar100 | lane | honda') 23 | cmd:option('-manualSeed', 0, 'Manually set RNG seed') 24 | cmd:option('-nGPU', 1, 'Number of GPUs to use by default') 25 | cmd:option('-backend', 'cudnn', 'Options: cudnn | cunn') 26 | cmd:option('-cudnn', 'fastest', 'Options: fastest | default | deterministic') 27 | cmd:option('-gen', 'gen', 'Path to save generated files') 28 | cmd:option('-softmax', 'false', 'Add softmax after the last layer (for test)') 29 | ------------- Data options ------------------------ 30 | cmd:option('-nThreads', 2, 'number of data loading threads') 31 | ------------- Training options -------------------- 32 | cmd:option('-nEpochs', 0, 'Number of total epochs to run') 33 | cmd:option('-epochNumber', 1, 'Manual epoch number (useful on restarts)') 34 | cmd:option('-maxIter', 60000, 'maxIter') 35 | cmd:option('-batchSize', 32, 'mini-batch size (1 = pure stochastic)') 36 | cmd:option('-testOnly', 'false', 'Run on validation set only') 37 | cmd:option('-tenCrop', 'false', 'Ten-crop testing') 38 | cmd:option('-backWeight', 1, 'Loss weight of background') 39 | cmd:option('-shuffle', 'false', 'Shuffle train data EVERY epoch') 40 | ------------- Test options ------------------------ 41 | cmd:option('-save', 'predict','Directory to save prediction') 42 | cmd:option('-model', 'none', 'Path to model for test') 43 | cmd:option('-smooth', 'false', 'Add smooth during test') 44 | ------------- Checkpointing options --------------- 45 | cmd:option('-save', 'checkpoints', 'Directory in which to save checkpoints') 46 | cmd:option('-resume', 'none', 'Resume from the latest checkpoint in this directory') 47 | ---------- Optimization options ---------------------- 48 | cmd:option('-LR', 0.1, 'initial learning rate') 49 | cmd:option('-LRpolicy', 'fix', 'learning rate decrease policy: fix | poly') 50 | cmd:option('-momentum', 0.9, 'momentum') 51 | cmd:option('-weightDecay', 1e-4, 'weight decay') 52 | ---------- Model options ---------------------------------- 53 | cmd:option('-netType', 'vgg', 'Options: segresnet | vgg_small | vgg | convlstm | holeconvlstm | convgru | convlstm_2c') 54 | cmd:option('-depth', 34, 'ResNet depth: 18 | 34 | 50 | 101 | ...', 'number') 55 | cmd:option('-shortcutType', '', 'Options: A | B | C') 56 | cmd:option('-retrain', 'none', 'Path to model to retrain with') 57 | cmd:option('-optimState', 'none', 'Path to an optimState to reload from') 58 | ---------- Model options ---------------------------------- 59 | cmd:option('-shareGradInput', 'false', 'Share gradInput tensors to reduce memory usage') 60 | cmd:option('-optnet', 'false', 'Use optnet to reduce memory usage') 61 | cmd:option('-resetClassifier', 'false', 'Reset the fully connected layer for fine-tuning') 62 | cmd:option('-nClasses', 0, 'Number of classes in the dataset') 63 | cmd:option('-nFeatures', 256, 'Number of features extracted by RNN') 64 | cmd:option('-seqLen', 1, 'Length of sequences fed into RNN') -- Used for training RNN 65 | cmd:text() 66 | 67 | local opt = cmd:parse(arg or {}) 68 | 69 | opt.softmax = opt.softmax ~= 'false' 70 | opt.testOnly = opt.testOnly ~= 'false' 71 | opt.tenCrop = opt.tenCrop ~= 'false' 72 | opt.shareGradInput = opt.shareGradInput ~= 'false' 73 | opt.optnet = opt.optnet ~= 'false' 74 | opt.resetClassifier = opt.resetClassifier ~= 'false' 75 | opt.shuffle = opt.shuffle ~= 'false' 76 | opt.smooth = opt.smooth ~= 'false' 77 | 78 | if not paths.dirp(opt.save) and not paths.mkdir(opt.save) then 79 | cmd:error('error: unable to create checkpoint directory: ' .. opt.save .. '\n') 80 | end 81 | 82 | if opt.dataset == 'imagenet' then 83 | -- Handle the most common case of missing -data flag 84 | local trainDir = paths.concat(opt.data, 'train') 85 | if not paths.dirp(opt.data) then 86 | cmd:error('error: missing ImageNet data directory') 87 | elseif not paths.dirp(trainDir) then 88 | cmd:error('error: ImageNet missing `train` directory: ' .. trainDir) 89 | end 90 | -- Default shortcutType=B and nEpochs=90 91 | opt.shortcutType = opt.shortcutType == '' and 'B' or opt.shortcutType 92 | opt.nEpochs = opt.nEpochs == 0 and 90 or opt.nEpochs 93 | elseif opt.dataset == 'cifar10' then 94 | -- Default shortcutType=A and nEpochs=164 95 | opt.shortcutType = opt.shortcutType == '' and 'A' or opt.shortcutType 96 | opt.nEpochs = opt.nEpochs == 0 and 164 or opt.nEpochs 97 | elseif opt.dataset == 'cifar100' then 98 | -- Default shortcutType=A and nEpochs=164 99 | opt.shortcutType = opt.shortcutType == '' and 'A' or opt.shortcutType 100 | opt.nEpochs = opt.nEpochs == 0 and 164 or opt.nEpochs 101 | elseif opt.dataset == 'lane' or opt.dataset == 'laneTest' then 102 | opt.nEpochs = opt.nEpochs == 0 and 90 or opt.nEpochs 103 | else 104 | cmd:error('unknown dataset: ' .. opt.dataset) 105 | end 106 | 107 | if opt.resetClassifier then 108 | if opt.nClasses == 0 then 109 | cmd:error('-nClasses required when resetClassifier is set') 110 | end 111 | end 112 | 113 | if opt.shareGradInput and opt.optnet then 114 | cmd:error('error: cannot use both -shareGradInput and -optnet') 115 | end 116 | 117 | return opt 118 | end 119 | 120 | return M 121 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/testLane.lua: -------------------------------------------------------------------------------- 1 | require 'torch' 2 | require 'nn' 3 | require 'cunn' 4 | require 'cudnn' 5 | require 'lfs' 6 | require 'paths' 7 | local ffi = require 'ffi' 8 | image = require 'image' 9 | local models = require 'models/init_test' 10 | local opts = require 'opts' 11 | local DataLoader = require 'dataloader' 12 | local checkpoints = require 'checkpoints' 13 | 14 | opt = opts.parse(arg) 15 | show = false -- Set show to true if you want to visualize. In addition, you need to use qlua instead of th. 16 | 17 | checkpoint, optimState = checkpoints.latest(opt) 18 | model = models.setup(opt, checkpoint) 19 | offset = 0 20 | if opt.smooth then 21 | offset = 1 22 | end 23 | 24 | print(model) 25 | local valLoader = DataLoader.create(opt) 26 | print('data loaded') 27 | input = torch.CudaTensor() 28 | function copyInputs(sample) 29 | input = input or (opt.nGPU == 1 30 | and torch.CudaTensor() 31 | or cutorch.createCudaHostTensor()) 32 | input:resize(sample.input:size()):copy(sample.input) 33 | return input 34 | end 35 | 36 | function sleep(n) 37 | os.execute("sleep " .. tonumber(n)) 38 | end 39 | 40 | function process( scoremap ) 41 | local avg = nn.Sequential() 42 | avg:add(nn.SpatialSoftMax()) 43 | avg:add(nn.SplitTable(1, 3)) 44 | avg:add(nn.NarrowTable(2, 4)) 45 | local paral = nn.ParallelTable() 46 | local seq = nn.Sequential() 47 | seq:add(nn.Contiguous()) 48 | seq:add(nn.View(1, 288, 960):setNumInputDims(2)) 49 | local conv = nn.SpatialConvolution(1, 1, 9, 9, 1, 1, 4, 4) 50 | conv.weight:fill(1/81) 51 | conv.bias:fill(0) 52 | seq:add(conv) 53 | paral:add(seq) 54 | for i=1, 3 do 55 | paral:add(seq:clone('weight', 'bias','gradWeight','gradBias')) 56 | end 57 | avg:add(paral) 58 | avg:add(nn.JoinTable(1, 3)) 59 | avg:cuda() 60 | return avg:forward(scoremap) 61 | end 62 | 63 | model:evaluate() 64 | T = 0 65 | N = 0 66 | for n, sample in valLoader:run() do 67 | print(n) 68 | input = copyInputs(sample) 69 | local imgpath = sample.imgpath 70 | local timer = torch.Timer() 71 | output = model:forward(input) 72 | local t = timer:time().real 73 | print('time: ' .. t) 74 | local scoremap = output[1] --:double() 75 | if opt.smooth then 76 | scoremap = process(scoremap):float() 77 | else 78 | local softmax = nn.SpatialSoftMax():cuda() 79 | scoremap = softmax(scoremap):float() 80 | end 81 | if n > 1 then 82 | T = T + t 83 | N = N + 1 84 | print('avgtime: ' .. T/N) 85 | end 86 | timer:reset() 87 | local exist = output[2]:float() 88 | local outputn 89 | for b = 1, input:size(1) do 90 | print('img: ' .. ffi.string(imgpath[b]:data())) 91 | local img = image.load(opt.data .. ffi.string(imgpath[b]:data()), 3, 'float') 92 | outputn = scoremap[{b,{},{},{}}] 93 | --print(outputn:size()) 94 | local _, maxMap = torch.max(outputn, 1) 95 | local save_path = string.sub(ffi.string(imgpath[b]:data()), 15, -16) .. '.json_' 96 | for cnt = 1, 5 do 97 | out_img = maxMap:eq(cnt) -- outputn[{cnt, {}, {}}]--maxMap:eq(cnt) * 1 98 | out_img = image.scale(out_img, 1276, 384, 'simple') 99 | if cnt == 1 then 100 | out_img = torch.cat(torch.ones(1, 333, 1276):byte(), out_img, 2) 101 | else 102 | out_img = torch.cat(torch.zeros(1, 333, 1276):byte(), out_img, 2) 103 | end 104 | image.save(opt.save .. save_path .. (cnt -1) .. '.png', out_img:cuda()) 105 | end 106 | end 107 | end 108 | -------------------------------------------------------------------------------- /ENet-SAD-Simple/train.lua: -------------------------------------------------------------------------------- 1 | -- 2 | -- Copyright (c) 2016, Facebook, Inc. 3 | -- All rights reserved. 4 | -- 5 | -- This source code is licensed under the BSD-style license found in the 6 | -- LICENSE file in the root directory of this source tree. An additional grant 7 | -- of patent rights can be found in the PATENTS file in the same directory. 8 | -- 9 | -- The training loop and learning rate schedule 10 | -- 11 | 12 | local optim = require 'optim' 13 | local models = require 'models/init' 14 | local checkpoints = require 'checkpoints' 15 | 16 | local M = {} 17 | local Trainer = torch.class('resnet.Trainer', M) 18 | 19 | function Trainer:__init(model, criterion, opt, optimState, checkpoint) 20 | print('init trainer') 21 | self.model = model 22 | self.criterion = criterion 23 | self.optimState = optimState or { 24 | learningRate = opt.LR, 25 | learningRateDecay = 0.0, 26 | momentum = opt.momentum, 27 | nesterov = true, 28 | dampening = 0.0, 29 | weightDecay = opt.weightDecay, 30 | } 31 | self.iter = 1 32 | if checkpoint then 33 | self.iter = checkpoint.iter 34 | end 35 | self.opt = opt 36 | self.params, self.gradParams = model:getParameters() 37 | self.finish = false 38 | end 39 | 40 | function Trainer:train(epoch, dataloader) 41 | -- Trains the model for a single epoch 42 | print('training') 43 | 44 | local timer = torch.Timer() 45 | local dataTimer = torch.Timer() 46 | 47 | local function feval() 48 | return self.criterion.output, self.gradParams 49 | end 50 | 51 | local trainSize = dataloader:size() 52 | local top1Sum, top5Sum, lossSum, lossSum2, lossSum3, lossSum4 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 53 | local N = 0 54 | 55 | print('=> Training epoch # ' .. epoch) 56 | -- set the batch norm to training mode 57 | self.model:training() 58 | for n, sample in dataloader:run() do 59 | if self.iter>=self.opt.maxIter then 60 | self.finish = true 61 | break 62 | end 63 | self.optimState.learningRate = self:learningRate(epoch) 64 | local dataTime = dataTimer:time().real 65 | 66 | -- Copy input and target to the GPU 67 | self:copyInputs(sample) 68 | self.input = self.input:cuda() 69 | local output = self.model:forward(self.input) 70 | 71 | local tmp_target = {} 72 | table.insert(tmp_target, self.target[1]) 73 | table.insert(tmp_target, self.target[2]) 74 | table.insert(tmp_target, self.target[1]) 75 | table.insert(tmp_target, self.target[1]) 76 | 77 | self.target = tmp_target 78 | 79 | local batchSize = output[1]:size(1) 80 | local loss, Loss = self.criterion:forward(self.model.output, self.target) 81 | self.model:zeroGradParameters() 82 | self.criterion:backward(self.model.output, self.target) 83 | self.model:backward(self.input, self.criterion.gradInput) 84 | optim.sgd(feval, self.params, self.optimState) 85 | N = N + batchSize 86 | lossSum = lossSum + Loss[1]*batchSize -- loss for segmentation branch 87 | lossSum2 = lossSum2 + Loss[2]*batchSize -- loss for classification branch 88 | lossSum3 = lossSum3 + Loss[3]*batchSize 89 | lossSum4 = lossSum4 + Loss[4]*batchSize 90 | print((' | Epoch: [%d][%d/%d][%d] Time %.2f LR %.5f Err1 %.5f (%.5f) Err2 %.5f (%.5f) Err3 %.5f (%.5f) Err4 %.5f (%.5f)'):format( 91 | epoch, n, trainSize, self.iter, timer:time().real, self.optimState.learningRate, Loss[1], lossSum / N, Loss[2], lossSum2 / N, Loss[3], lossSum3 / N, Loss[4], lossSum4 / N)) 92 | 93 | -- check that the storage didn't get changed do to an unfortunate getParameters call 94 | assert(self.params:storage() == self.model:parameters()[1]:storage()) 95 | if self.iter % 500 == 0 then 96 | checkpoints.save(epoch, self.model, self.optimState, false, self.opt, self.iter) 97 | end 98 | timer:reset() 99 | dataTimer:reset() 100 | self.iter = self.iter + 1 101 | end 102 | 103 | return lossSum / N, self.finish 104 | end 105 | 106 | function Trainer:test(epoch, dataloader) 107 | -- Computes the top-1 and top-5 err on the validation set 108 | 109 | local timer = torch.Timer() 110 | local dataTimer = torch.Timer() 111 | local size = dataloader:size() 112 | 113 | local nCrops = self.opt.tenCrop and 10 or 1 114 | local AccSum, RecSum, IOUSum, lossSum, lossSum2, lossSum3, lossSum4 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 115 | local N = 0 116 | 117 | self.model:evaluate() 118 | for n, sample in dataloader:run() do 119 | local dataTime = dataTimer:time().real 120 | 121 | -- Copy input and target to the GPU 122 | self:copyInputs(sample) 123 | 124 | self.input = self.input:cuda() 125 | local output = self.model:forward(self.input) 126 | local accuracy, avgRecall, avgIOU 127 | local batchSize = 0 128 | 129 | local tmp_target = {} 130 | table.insert(tmp_target, self.target[1]) 131 | table.insert(tmp_target, self.target[2]) 132 | table.insert(tmp_target, self.target[1]) 133 | table.insert(tmp_target, self.target[1]) 134 | 135 | self.target = tmp_target 136 | 137 | batchSize = output[1]:size(1) 138 | accuracy, avgRecall, avgIOU = self:computeAccuracy(output[1]:float(), self.target[1]:float()) 139 | AccSum = AccSum + accuracy*batchSize 140 | RecSum = RecSum + avgRecall*batchSize 141 | IOUSum = IOUSum + avgIOU*batchSize 142 | 143 | local loss, Loss = self.criterion:forward(self.model.output, self.target) 144 | N = N + batchSize 145 | lossSum = lossSum + Loss[1]*batchSize 146 | lossSum2 = lossSum2 + Loss[2]*batchSize 147 | lossSum3 = lossSum3 + Loss[3]*batchSize 148 | lossSum4 = lossSum4 + Loss[4]*batchSize 149 | print((' | Test: [%d][%d/%d] Err1 %.5f (%.5f) Err2 %.5f (%.5f) Err3 %.5f (%.5f) Err4 %.5f (%.5f) Acc %.2f (%.3f) mRec %.2f (%.3f) mIOU %.2f (%.3f)'):format( 150 | epoch, n, size, Loss[1], lossSum / N, Loss[2], lossSum2 / N, Loss[3], lossSum3 / N, Loss[4], lossSum4 / N, accuracy, AccSum / N, avgRecall, RecSum / N, avgIOU, IOUSum / N)) 151 | 152 | timer:reset() 153 | dataTimer:reset() 154 | end 155 | self.model:training() 156 | 157 | return lossSum / N, AccSum / N, RecSum / N, IOUSum / N 158 | end 159 | 160 | function Trainer:copyInputs(sample) 161 | -- Copies the input to a CUDA tensor, if using 1 GPU, or to pinned memory, 162 | -- if using DataParallelTable. The target is always copied to a CUDA tensor 163 | self.input = self.input or (self.opt.nGPU == 1 164 | and torch.CudaTensor() 165 | or cutorch.createCudaHostTensor()) 166 | self.input:resize(sample.input:size()):copy(sample.input) 167 | self.segLabel = self.segLabel or (torch.CudaLongTensor and torch.CudaLongTensor()or torch.CudaTensor()) 168 | self.segLabel:resize(sample.target[1]:size()):copy(sample.target[1]) 169 | self.exist = self.exist or torch.CudaLongTensor() 170 | self.exist:resize(sample.target[2]:size()):copy(sample.target[2]) 171 | self.target = {self.segLabel:cuda(), self.exist:cuda()} 172 | end 173 | 174 | function Trainer:learningRate(epoch) 175 | -- Training schedule 176 | local decay = 0 177 | if self.opt.dataset == 'lane' then 178 | decay = 1 - self.iter/self.opt.maxIter 179 | elseif self.opt.dataset == 'cifar10' then 180 | decay = epoch >= 122 and 2 or epoch >= 81 and 1 or 0 181 | elseif self.opt.dataset == 'cifar100' then 182 | decay = epoch >= 122 and 2 or epoch >= 81 and 1 or 0 183 | end 184 | return self.opt.LR * math.pow(decay, 0.9) 185 | end 186 | 187 | function Trainer:computeAccuracy( output, target ) 188 | -- This is not the final evaluation code. 189 | -- This only gives primal evaluation for segmentation. 190 | local batchSize = output:size(1) 191 | local classNum = output:size(2) 192 | local h = output:size(3) 193 | local w = output:size(4) 194 | local accuracy, avgRecall, avgIOU = 0.0, 0.0, 0.0 195 | for i = 1, batchSize do 196 | local _, maxMap = torch.max(output[{i,{},{},{}}], 1) 197 | local target_i = target[{i,{},{}}]:long() 198 | -- accuracy 199 | accuracy = accuracy + torch.sum(torch.eq(maxMap, target_i)) / (h*w) 200 | -- recall, IOU 201 | local recall = 0.0 202 | local IOU = 0.0 203 | local numClass, numUnion = 0, 0 204 | for c = 1, classNum do 205 | local num_c = torch.sum(torch.eq(target_i, c)) 206 | local num_c_pred = torch.sum(torch.eq(maxMap, c)) 207 | local numTrue = torch.sum(torch.cmul(torch.eq(maxMap, c), torch.eq(target_i, c))) 208 | local unionSize = num_c + num_c_pred - numTrue 209 | if num_c > 0 or num_c_pred > 0 then 210 | IOU = IOU + numTrue / unionSize 211 | numUnion = numUnion + 1 212 | end 213 | if num_c > 0 then 214 | recall = recall + numTrue / num_c 215 | numClass = numClass + 1 216 | end 217 | end 218 | recall = recall / numClass 219 | avgRecall = avgRecall + recall 220 | IOU = IOU / numUnion 221 | avgIOU = avgIOU + IOU 222 | end 223 | accuracy = accuracy / batchSize 224 | avgRecall = avgRecall / batchSize 225 | avgIOU = avgIOU / batchSize 226 | return accuracy * 100, avgRecall * 100, avgIOU * 100 227 | end 228 | 229 | return M.Trainer 230 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Please note that this license only covers the code. The Unsupervised Llamas 2 | dataset comes with its own license for non-commercial use only. 3 | 4 | MIT License 5 | 6 | Copyright (c) 2019 Karsten Behrendt, Robert Bosch LLC 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in all 16 | copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | SOFTWARE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Unsupervised LLAMAS 2 | Code for the Unsupervised Labeled LAne MArkerS (LLAMAS) dataset. The dataset and more information is available at https://unsupervised-llamas.com. 3 | The leaderboard is available [here](https://unsupervised-llamas.com/llamas/benchmarks). Since lane markers and lane detection are evaluated based on multiple metrics, new metrics can be added to the benchmarks as well. 4 | 5 | All contributions are welcome, e.g. sample scripts in different machine learning frameworks. You can even change the [website's code](https://github.com/karstenBehrendt/benchmarks_website/tree/master/benchmarks/llamas) 6 | 7 | ## Errors and Suggestions 8 | In case you encounter any issues with the data or scripts, please create an issue ticket, create a PR, or send me an email. 9 | For questions about training deep learning approaches for lane marker detection or segmentation in the different frameworks, please checkout Stackoverflow. 10 | You can reach me at "llamas" + the at sign since this is an email + kbehrendt.com. 11 | 12 | ## Starter Code 13 | Make sure to check the label_scripts/label_file_scripts.py for loading and using the annotations. There exist a few sample use-cases and implementations. 14 | 15 | The simple_baseline folder contains a simplistic baseline approach in Tensorflow which is supposed to be easy to understand. 16 | 17 | **ENet-SAD-Simple** folder contains the **ENet-SAD** model which achieves state-of-the-art performance in TuSimple, CULane and BDD100K datasets. It also achieves appealing performance in LLAMAS dataset. Details can be found in README in [ENet-SAD-Simple](/ENet-SAD-Simple) and [this repo](https://github.com/cardwing/Codes-for-Lane-Detection). 18 | 19 | The deeplab folder offers some scripts to train deeplab models for the unsupervised LLAMAS dataset. 20 | 21 | All results for the leaderboards are calculated based on scripts in the evaluation folder. 22 | 23 | ## Video 24 | Make sure to checkout the [Youtube video](https://youtu.be/kp0qz8PuXxA) with samples from the dataset and baseline approaches. 25 | 26 | ## Sample 27 | 28 | ![Sample Image Gray](https://github.com/karstenbehrendt/unsupervised_llamas/blob/master/samples/sample_gray.jpg) ![Sample Image Color](https://github.com/karstenbehrendt/unsupervised_llamas/blob/master/samples/sample_color.jpg) 29 | ![Sample Image Labeled](https://github.com/karstenbehrendt/unsupervised_llamas/blob/master/samples/sample_labeled.jpg) 30 | 3D points are available and spline interpolation on labels is possible. 31 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karstenBehrendt/unsupervised_llamas/9b99f464e1983195b922e2df8bb57760182206e7/__init__.py -------------------------------------------------------------------------------- /common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karstenBehrendt/unsupervised_llamas/9b99f464e1983195b922e2df8bb57760182206e7/common/__init__.py -------------------------------------------------------------------------------- /common/constants.py: -------------------------------------------------------------------------------- 1 | """ General dataset constants """ 2 | 3 | IMAGE_WIDTH = 1276 4 | IMAGE_HEIGHT = 717 5 | 6 | NUM_TRAIN_IMAGES = 58269 7 | NUM_VALID_IMAGES = 20844 8 | NUM_TEST_IMAGES = 20929 9 | -------------------------------------------------------------------------------- /common/helper_scripts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Random scripts that just don't fit anywhere 3 | """ 4 | 5 | import os 6 | 7 | from unsupervised_llamas.label_scripts import dataset_constants 8 | 9 | 10 | def get_files_from_folder(directory, extension=None): 11 | """Get all files within a folder that fit the extension """ 12 | # NOTE Can be replaced by glob for newer python versions 13 | label_files = [] 14 | for root, _, files in os.walk(directory): 15 | for some_file in files: 16 | label_files.append(os.path.abspath(os.path.join(root, some_file))) 17 | if extension is not None: 18 | label_files = list(filter(lambda x: x.endswith(extension), label_files)) 19 | return label_files 20 | 21 | 22 | def get_label_base(label_path): 23 | """ Gets directory independent label path """ 24 | return '/'.join(label_path.split('/')[-2:]) 25 | 26 | 27 | def get_labels(split='test'): 28 | """ Gets label files of specified dataset split """ 29 | label_paths = get_files_from_folder( 30 | os.path.join(dataset_constants.LABELS, split), '.json') 31 | return label_paths 32 | 33 | 34 | def ir(some_value): 35 | """ Rounds and casts to int 36 | Useful for pixel values that cannot be floats 37 | 38 | Parameters 39 | ---------- 40 | some_value : float 41 | numeric value 42 | 43 | Returns 44 | -------- 45 | Rounded integer 46 | 47 | Raises 48 | ------ 49 | ValueError for non scalar types 50 | """ 51 | return int(round(some_value)) 52 | -------------------------------------------------------------------------------- /culane_metric/evaluate.py: -------------------------------------------------------------------------------- 1 | """ Evaluation script for the CULane metric on the LLAMAS dataset. 2 | 3 | This script will compute the F1, precision and recall metrics as described in the CULane benchmark. 4 | 5 | The predictions format is the same one used in the CULane benchmark. 6 | In summary, for every annotation file: 7 | labels/a/b/c.json 8 | There should be a prediction file: 9 | predictions/a/b/c.lines.txt 10 | Inside each .lines.txt file each line will contain a sequence of points (x, y) separated by spaces. 11 | For more information, please see https://xingangpan.github.io/projects/CULane.html 12 | 13 | This script uses two methods to compute the IoU: one using an image to draw the lanes (named `discrete` here) and 14 | another one that uses shapes with the shapely library (named `continuous` here). The results achieved with the first 15 | method are very close to the official CULane implementation. Although the second should be a more exact method and is 16 | faster to compute, it deviates more from the official implementation. By default, the method closer to the official 17 | metric is used. 18 | """ 19 | 20 | import os 21 | import argparse 22 | import warnings 23 | from functools import partial 24 | 25 | import cv2 26 | import numpy as np 27 | from p_tqdm import t_map, p_map 28 | from scipy.interpolate import splprep, splev 29 | from scipy.optimize import linear_sum_assignment 30 | from shapely.geometry import LineString, Polygon 31 | import unsupervised_llamas.common.helper_scripts as helper_scripts 32 | import unsupervised_llamas.label_scripts.spline_creator as spline_creator 33 | from unsupervised_llamas.common.constants import IMAGE_HEIGHT, IMAGE_WIDTH 34 | 35 | LLAMAS_IMG_RES = (IMAGE_HEIGHT, IMAGE_WIDTH) 36 | 37 | 38 | def add_ys(xs): 39 | """For each x in xs, make a tuple with x and its corresponding y.""" 40 | xs = np.array(xs[300:]) 41 | valid = xs >= 0 42 | xs = xs[valid] 43 | assert len(xs) > 1 44 | ys = np.arange(300, 717)[valid] 45 | return list(zip(xs, ys)) 46 | 47 | 48 | def draw_lane(lane, img=None, img_shape=None, width=30): 49 | """Draw a lane (a list of points) on an image by drawing a line with width `width` through each 50 | pair of points i and i+i""" 51 | if img is None: 52 | img = np.zeros(img_shape, dtype=np.uint8) 53 | lane = lane.astype(np.int32) 54 | for p1, p2 in zip(lane[:-1], lane[1:]): 55 | cv2.line(img, tuple(p1), tuple(p2), color=(1,), thickness=width) 56 | return img 57 | 58 | 59 | def discrete_cross_iou(xs, ys, width=30, img_shape=LLAMAS_IMG_RES): 60 | """For each lane in xs, compute its Intersection Over Union (IoU) with each lane in ys by drawing the lanes on 61 | an image""" 62 | xs = [draw_lane(lane, img_shape=img_shape, width=width) > 0 for lane in xs] 63 | ys = [draw_lane(lane, img_shape=img_shape, width=width) > 0 for lane in ys] 64 | 65 | ious = np.zeros((len(xs), len(ys))) 66 | for i, x in enumerate(xs): 67 | for j, y in enumerate(ys): 68 | # IoU by the definition: sum all intersections (binary and) and divide by the sum of the union (binary or) 69 | ious[i, j] = (x & y).sum() / (x | y).sum() 70 | return ious 71 | 72 | 73 | def continuous_cross_iou(xs, ys, width=30): 74 | """For each lane in xs, compute its Intersection Over Union (IoU) with each lane in ys using the area between each 75 | pair of points""" 76 | h, w = IMAGE_HEIGHT, IMAGE_WIDTH 77 | image = Polygon([(0, 0), (0, h - 1), (w - 1, h - 1), (w - 1, 0)]) 78 | xs = [LineString(lane).buffer(distance=width / 2., cap_style=1, join_style=2).intersection(image) for lane in xs] 79 | ys = [LineString(lane).buffer(distance=width / 2., cap_style=1, join_style=2).intersection(image) for lane in ys] 80 | 81 | ious = np.zeros((len(xs), len(ys))) 82 | for i, x in enumerate(xs): 83 | for j, y in enumerate(ys): 84 | ious[i, j] = x.intersection(y).area / x.union(y).area 85 | 86 | return ious 87 | 88 | 89 | def remove_consecutive_duplicates(x): 90 | """Remove consecutive duplicates""" 91 | y = [] 92 | for t in x: 93 | if len(y) > 0 and y[-1] == t: 94 | warnings.warn('Removed consecutive duplicate point ({}, {})!'.format(t[0], t[1])) 95 | continue 96 | y.append(t) 97 | return y 98 | 99 | 100 | def interpolate_lane(points, n=50): 101 | """Spline interpolation of a lane. Used on the predictions""" 102 | # Consecutive duplicates (can happen with parametric curves) 103 | # cause internal error for scipy's splprep: 104 | # https://stackoverflow.com/a/47949170/15449902 105 | points = remove_consecutive_duplicates(points) 106 | 107 | # B-Spline interpolate 108 | x = [x for x, _ in points] 109 | y = [y for _, y in points] 110 | tck, _ = splprep([x, y], s=0, t=n, k=min(3, len(points) - 1)) 111 | 112 | u = np.linspace(0., 1., n) 113 | return np.array(splev(u, tck)).T 114 | 115 | 116 | def culane_metric(pred, anno, width=30, iou_threshold=0.5, unofficial=False, img_shape=LLAMAS_IMG_RES): 117 | """Computes CULane's metric for a single image""" 118 | if len(pred) == 0: 119 | return 0, 0, len(anno) 120 | if len(anno) == 0: 121 | return 0, len(pred), 0 122 | interp_pred = np.array([interpolate_lane(pred_lane, n=50) for pred_lane in pred]) # (4, 50, 2) 123 | anno = np.array([np.array(anno_lane) for anno_lane in anno], dtype=object) 124 | 125 | if unofficial: 126 | ious = continuous_cross_iou(interp_pred, anno, width=width) 127 | else: 128 | ious = discrete_cross_iou(interp_pred, anno, width=width, img_shape=img_shape) 129 | 130 | row_ind, col_ind = linear_sum_assignment(1 - ious) 131 | tp = int((ious[row_ind, col_ind] > iou_threshold).sum()) 132 | fp = len(pred) - tp 133 | fn = len(anno) - tp 134 | return tp, fp, fn 135 | 136 | 137 | def load_prediction(path): 138 | """Loads an image's predictions 139 | Returns a list of lanes, where each lane is a list of points (x,y) 140 | """ 141 | with open(path, 'r') as data_file: 142 | img_data = data_file.readlines() 143 | img_data = [line.split() for line in img_data] 144 | img_data = [list(map(float, lane)) for lane in img_data] 145 | img_data = [[(lane[i], lane[i + 1]) for i in range(0, len(lane), 2)] for lane in img_data] 146 | img_data = [lane for lane in img_data if len(lane) >= 2] 147 | 148 | return img_data 149 | 150 | 151 | def load_prediction_list(label_paths, pred_dir): 152 | return [load_prediction(os.path.join(pred_dir, path.replace('.json', '.lines.txt'))) for path in label_paths] 153 | 154 | 155 | def load_labels(label_dir): 156 | """Loads the annotations and its paths 157 | Each annotation is converted to a list of points (x, y) 158 | """ 159 | label_paths = helper_scripts.get_files_from_folder(label_dir, '.json') 160 | annos = [[add_ys(xs) for xs in spline_creator.get_horizontal_values_for_four_lanes(label_path) if 161 | (np.array(xs) >= 0).sum() > 1] # lanes annotated with a single point are ignored 162 | for label_path in label_paths] 163 | label_paths = [ 164 | helper_scripts.get_label_base(p) for p in label_paths 165 | ] 166 | return np.array(annos, dtype=object), np.array(label_paths, dtype=object) 167 | 168 | 169 | def eval_predictions(pred_dir, anno_dir, width=30, unofficial=True, sequential=False): 170 | """Evaluates the predictions in pred_dir and returns CULane's metrics (precision, recall, F1 and its components)""" 171 | print(f'Loading annotation data ({anno_dir})...') 172 | annotations, label_paths = load_labels(anno_dir) 173 | print(f'Loading prediction data ({pred_dir})...') 174 | predictions = load_prediction_list(label_paths, pred_dir) 175 | print('Calculating metric {}...'.format('sequentially' if sequential else 'in parallel')) 176 | if sequential: 177 | results = t_map(partial(culane_metric, width=width, unofficial=unofficial, img_shape=LLAMAS_IMG_RES), 178 | predictions, 179 | annotations) 180 | else: 181 | results = p_map(partial(culane_metric, width=width, unofficial=unofficial, img_shape=LLAMAS_IMG_RES), 182 | predictions, 183 | annotations) 184 | total_tp = sum(tp for tp, _, _ in results) 185 | total_fp = sum(fp for _, fp, _ in results) 186 | total_fn = sum(fn for _, _, fn in results) 187 | if total_tp == 0: 188 | precision = 0 189 | recall = 0 190 | f1 = 0 191 | else: 192 | precision = float(total_tp) / (total_tp + total_fp) 193 | recall = float(total_tp) / (total_tp + total_fn) 194 | f1 = 2 * precision * recall / (precision + recall) 195 | 196 | return {'TP': total_tp, 'FP': total_fp, 'FN': total_fn, 'Precision': precision, 'Recall': recall, 'F1': f1} 197 | 198 | 199 | def parse_args(): 200 | parser = argparse.ArgumentParser(description="Measure CULane's metric on the LLAMAS dataset") 201 | parser.add_argument("--pred_dir", help="Path to directory containing the predicted lanes", required=True) 202 | parser.add_argument("--anno_dir", help="Path to directory containing the annotated lanes", required=True) 203 | parser.add_argument("--width", type=int, default=30, help="Width of the lane") 204 | parser.add_argument("--sequential", action='store_true', help="Run sequentially instead of in parallel") 205 | parser.add_argument("--unofficial", action='store_true', help="Use a faster but unofficial algorithm") 206 | 207 | return parser.parse_args() 208 | 209 | 210 | def main(): 211 | args = parse_args() 212 | results = eval_predictions(args.pred_dir, 213 | args.anno_dir, 214 | width=args.width, 215 | unofficial=args.unofficial, 216 | sequential=args.sequential) 217 | 218 | header = '=' * 20 + ' Results' + '=' * 20 219 | print(header) 220 | for metric, value in results.items(): 221 | if isinstance(value, float): 222 | print('{}: {:.4f}'.format(metric, value)) 223 | else: 224 | print('{}: {}'.format(metric, value)) 225 | print('=' * len(header)) 226 | 227 | 228 | if __name__ == '__main__': 229 | main() 230 | -------------------------------------------------------------------------------- /culane_metric/requirements.txt: -------------------------------------------------------------------------------- 1 | opencv_python>=4.2.0 2 | Shapely>=1.7.0 3 | scipy>=1.4.1 4 | p_tqdm>=1.3.3 5 | tqdm>=4.43.0 6 | scikit_learn>=0.23.2 -------------------------------------------------------------------------------- /deeplab/README.md: -------------------------------------------------------------------------------- 1 | # tf models deeplab example 2 | 3 | There are a few things that need to be added before the unsupervised llamas dataset can smoothly be used with the deeplab framework. 4 | 5 | See deeplab_train.py for some more information. 6 | Make sure to set the dataset and workspace paths in deeplab_common.py and label_scripts/dataset_constants.py 7 | 8 | This is an example script and config. It is only supposed to be used as a reference. 9 | 10 | First create tfrecords files using the deeplab_tfrecords.py script. Read the docstring. 11 | 12 | ```python 13 | Before using, add: 14 | # ----------------------------------------------------------------------- 15 | _UNSUPERVISED_LLAMAS_INFORMATION = DatasetDescriptor( 16 | splits_to_sizes={ 17 | 'train': 58269, 18 | 'valid': 20844, 19 | 'test': 20929, 20 | }, 21 | num_classes=5, 22 | ignore_label=255, 23 | ) 24 | 25 | _BINARY_UNSUPERVISED_LLAMAS_INFORMATION = DatasetDescriptor( 26 | splits_to_sizes={ 27 | 'train': 58269, 28 | 'valid': 20844, 29 | 'test': 20929, 30 | }, 31 | num_classes=2, 32 | ignore_labe=l255, 33 | ) 34 | # ----------------------------------------------------------------------- 35 | into tensorflow/models/research/deeplab/datasets/data_generator.py 36 | 37 | 38 | Replace: 39 | ------------------------------------------------------------------------- 40 | # Set to False if one does not want to re-use the trained classifier weights. 41 | flags.DEFINE_boolean('initialize_last_layer', False, 42 | 'Initialize the last layer.') 43 | 44 | flags.DEFINE_boolean('last_layers_contain_logits_only', True, 45 | 'Only consider logits as last layers or not.') 46 | ------------------------------------------------------------------------- 47 | 48 | and add the datasets to the dataset information 49 | _DATASETS_INFORMATION = { 50 | 'cityscapes': _CITYSCAPES_INFORMATION, 51 | 'pascal_voc_seg': _PASCAL_VOC_SEG_INFORMATION, 52 | 'ade20k': _ADE20K_INFORMATION, 53 | 'unsupervised_llamas': _UNSUPERVISED_LLAMAS_INFORMATION, 54 | 'binary_unsupervised_llamas': _BINARY_UNSUPERVISED_LLAMAS_INFORMATION, 55 | } 56 | 57 | ``` 58 | 59 | This sample may not be up to date with the current deeplab implementation. 60 | -------------------------------------------------------------------------------- /deeplab/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karstenBehrendt/unsupervised_llamas/9b99f464e1983195b922e2df8bb57760182206e7/deeplab/__init__.py -------------------------------------------------------------------------------- /deeplab/deeplab_common.py: -------------------------------------------------------------------------------- 1 | """ 2 | Collection of small helper functions 3 | """ 4 | 5 | 6 | import os 7 | from unsupervised_llamas.label_scripts import dataset_constants as dc 8 | 9 | # Only needed for deeplab training: 10 | # TODO set paths 11 | DEEPLAB_DIR = '/TODO/TODO/tensorflow/models/research/deeplab/' 12 | # TODO NOTE This one is very specific to the base model you are training 13 | PRETRAINED_PATH = ('TODO/TODO/' 14 | 'deeplabv3_pascal_trainval/model.ckpt.index') 15 | 16 | 17 | def tfrecords_dir(settings): 18 | """ Creates names for tfrecord folders according to settings """ 19 | prefix1 = settings['input_type'] 20 | prefix1 = prefix1.replace('location', 'gray_grads') 21 | prefix2 = settings['problem'] 22 | tfrecords_folder = os.path.join(dc.TFRECORDS_FOLDER, '{}_{}'.format(prefix1, prefix2)) 23 | return tfrecords_folder 24 | 25 | 26 | def segmentation_set_name(settings): 27 | """ Deeplab sdataset name """ 28 | return 'unsupervised_llamas' if settings['problem'] == 'multi'\ 29 | else 'binary_unsupervised_llamas' 30 | 31 | 32 | def checkpoint_dir(settings): 33 | checkpoint = settings['checkpoint_dir'] 34 | if checkpoint.endswith('/'): 35 | checkpoint = checkpoint[:-1] 36 | return checkpoint 37 | -------------------------------------------------------------------------------- /deeplab/deeplab_tfrecords.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Segmentation dataset specific to the tf models deeplab implementation 4 | https://github.com/tensorflow/models/tree/master/research/deeplab 5 | 6 | Needs paths configured in label_scripts/dataset_constants.py 7 | 8 | Usage: 9 | python deeplab_tfrecords.py 10 | 11 | Paramaters / Flags: 12 | --color_input switches input images to color images intead of grayscale 13 | The driver assistance camera focuses on specific colors with an RCCB pattern. 14 | Color images may not look like your typical video. The grayscale 15 | images may look nicer. The dataset contains both. 16 | --multi_class switches pixel-level annotations from binary to lane specific annotations 17 | The binary problem marks every pixel as either 0 for not being part of a marker 18 | or 1 for marker pixels. The multi-class problem adds the information which lane, 19 | relative to the vehicle, this marker pixel belongs to. 20 | The annotation files allow you to add even more information! 21 | --location_gradients Adds location information as extra channels to the input channels. 22 | Lane markers are location dependent. Location information can be 23 | useful because of limited receptive fields and crops. 24 | Implemented by scaling pixel location as relativ coordinate in the 25 | respective coordinate. --> x: 0, 0.03, 0.06, ...., 0.97, 1.0 26 | (There are not gradients stored in the image, they are just called 27 | gradient images because their values increase linearly. There 28 | could be better names.) 29 | """ 30 | # NOTE Could use a few sanity checks 31 | # NOTE Could use some output on the current flags 32 | 33 | import os 34 | from random import shuffle 35 | import sys 36 | 37 | import cv2 38 | from deeplab.datasets import build_data # requires models/research in PYTHONPATH 39 | import numpy 40 | import tensorflow as tf 41 | import tqdm 42 | 43 | from unsupervised_llamas.common import constants 44 | from unsupervised_llamas.common import helper_scripts 45 | from unsupervised_llamas.label_scripts import dataset_constants as dc 46 | from unsupervised_llamas.label_scripts import label_file_scripts 47 | from unsupervised_llamas.label_scripts import segmentation_labels 48 | from unsupervised_llamas.label_scripts import visualize_labels 49 | 50 | 51 | # NOTE Also using tf.app.flags because build_data complains about other args 52 | FLAGS = tf.app.flags.FLAGS 53 | tf.app.flags.DEFINE_boolean('multi_class', False, 'Multi class segmentation, otherwise binary') 54 | tf.app.flags.DEFINE_boolean('color_input', False, 'Color input, otherwise gray images') 55 | tf.app.flags.DEFINE_boolean('location_gradients', False, 'Add gradient images to grayscale image') 56 | 57 | 58 | def create_deeplab_tfrecords(input_folder, tfrecord_file): 59 | """Creates a tfrecord file for a given folder 60 | 61 | Parameters: 62 | input_folder: str, path to samples for a given dataset 63 | tfrecord_file: str, path to tfrecord that will be created 64 | 65 | Flags: 66 | See docstring for more information 67 | color_input: whether to use gray or color images 68 | multi_class: binary or multi-class segmentation 69 | location_gradients: location information as extra channels 70 | """ 71 | label_paths = helper_scripts.get_files_from_folder(input_folder, '.json') 72 | shuffle(label_paths) 73 | print('{} label files in {}'.format(len(label_paths), input_folder)) 74 | 75 | loc_grad_x = list(map(lambda z: z / constants.IMAGE_WIDTH * 255, range(constants.IMAGE_WIDTH))) 76 | loc_grad_y = list(map(lambda z: z / constants.IMAGE_HEIGHT * 255, range(constants.IMAGE_HEIGHT))) 77 | loc_grad_x = numpy.asarray([loc_grad_x] * constants.IMAGE_HEIGHT) 78 | loc_grad_y = numpy.asarray([loc_grad_y] * constants.IMAGE_WIDTH).transpose() 79 | loc_grad_x = numpy.round(loc_grad_x).astype(numpy.uint8) 80 | loc_grad_y = numpy.round(loc_grad_y).astype(numpy.uint8) 81 | 82 | os.makedirs(os.path.dirname(tfrecord_file), exist_ok=True) 83 | with tf.python_io.TFRecordWriter(tfrecord_file) as writer: 84 | for label_path in tqdm.tqdm(label_paths, total=len(label_paths), 85 | desc='Creating ' + tfrecord_file): 86 | 87 | image_name = os.path.basename(label_path).replace('.json', '') 88 | if FLAGS.color_input: 89 | image_data = label_file_scripts.read_image(label_path, image_type='color') 90 | else: 91 | image_data = label_file_scripts.read_image(label_path, image_type='gray') 92 | if FLAGS.location_gradients: 93 | image_data = numpy.stack([image_data, loc_grad_x, loc_grad_y], -1) 94 | image_data = cv2.imencode('.png', image_data)[1].tostring() 95 | 96 | if FLAGS.multi_class: 97 | segmentation_label = segmentation_labels.create_multi_class_segmentation_label( 98 | label_path) 99 | segmentation = numpy.zeros(segmentation_label.shape[0:2], numpy.uint8) 100 | for class_index in range(1, 5): 101 | segmentation[segmentation_label[:, :, class_index] > 0] = class_index 102 | else: 103 | segmentation = visualize_labels.create_segmentation_image( 104 | label_path, image='blank') 105 | segmentation = cv2.cvtColor(segmentation, cv2.COLOR_BGR2GRAY) 106 | segmentation = segmentation > 0 107 | segmentation = segmentation.astype(numpy.uint8) 108 | 109 | segmentation = cv2.imencode('.png', segmentation)[1].tostring() 110 | 111 | example = build_data.image_seg_to_tfexample( 112 | image_data, image_name, constants.IMAGE_HEIGHT, 113 | constants.IMAGE_WIDTH, segmentation) 114 | 115 | writer.write(example.SerializeToString()) 116 | 117 | 118 | def create_deeplab_sets(): 119 | """ Create the individual sets based on command line arguments 120 | Sets output names based on flags and calls tfrecord creation for the individual datasets 121 | """ 122 | if FLAGS.color_input and FLAGS.location_gradients: 123 | print('Cannot add location gradients to color image') 124 | sys.exit() 125 | 126 | prefix1 = 'color' if FLAGS.color_input else 'gray' 127 | prefix1 = prefix1 + '_grads' if FLAGS.location_gradients else prefix1 128 | prefix2 = 'multi' if FLAGS.multi_class else 'binary' 129 | tfrecords_folder = os.path.join(dc.TFRECORDS_FOLDER, '{}_{}'.format(prefix1, prefix2)) 130 | os.makedirs(tfrecords_folder, exist_ok=True) 131 | 132 | # TODO test folder not available 133 | # TODO Adapt test to handle missing annotations or remove test 134 | osj = os.path.join 135 | # NOTE the dash after 'train', 'valid', and 'test' terminates set names in deeplab 136 | for dataset in [(osj(dc.LABELS, 'train'), osj(tfrecords_folder, 'train-set.tfrecords')), 137 | (osj(dc.LABELS, 'valid'), osj(tfrecords_folder, 'valid-set.tfrecords')), 138 | (osj(dc.LABELS, 'test'), osj(tfrecords_folder, 'test-set.tfrecords'))]: 139 | # not multithreaded anymore. Call without loop possible 140 | create_deeplab_tfrecords(*dataset) 141 | 142 | 143 | if __name__ == '__main__': 144 | # READ THE DOCSTRING BEFORE RUNNING THIS 145 | create_deeplab_sets() 146 | -------------------------------------------------------------------------------- /deeplab/deeplab_train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Train deeplab models on the unsupervised llamas dataset 4 | 5 | Before using, add: 6 | # ----------------------------------------------------------------------- 7 | _UNSUPERVISED_LLAMAS_INFORMATION = DatasetDescriptor( 8 | splits_to_sizes={ 9 | 'train': 58269, 10 | 'valid': 20844, 11 | 'test': 20929, 12 | }, 13 | num_classes=5, 14 | ignore_label=255, 15 | ) 16 | 17 | _BINARY_UNSUPERVISED_LLAMAS_INFORMATION = DatasetDescriptor( 18 | splits_to_sizes={ 19 | 'train': 58269, 20 | 'valid': 20844, 21 | 'test': 20929, 22 | }, 23 | num_classes=2, 24 | ignore_labe=l255, 25 | ) 26 | # ----------------------------------------------------------------------- 27 | into tensorflow/models/research/deeplab/datasets/data_generator.py 28 | 29 | 30 | Replace: 31 | ------------------------------------------------------------------------- 32 | # Set to False if one does not want to re-use the trained classifier weights. 33 | flags.DEFINE_boolean('initialize_last_layer', False, 34 | 'Initialize the last layer.') 35 | 36 | flags.DEFINE_boolean('last_layers_contain_logits_only', True, 37 | 'Only consider logits as last layers or not.') 38 | ------------------------------------------------------------------------- 39 | 40 | and add the datasets to the dataset information 41 | _DATASETS_INFORMATION = { 42 | 'cityscapes': _CITYSCAPES_INFORMATION, 43 | 'pascal_voc_seg': _PASCAL_VOC_SEG_INFORMATION, 44 | 'ade20k': _ADE20K_INFORMATION, 45 | 'unsupervised_llamas': _UNSUPERVISED_LLAMAS_INFORMATION, 46 | 'binary_unsupervised_llamas': _BINARY_UNSUPERVISED_LLAMAS_INFORMATION, 47 | } 48 | 49 | 50 | in deeplab/train.py 51 | 52 | Usage: 53 | python3 deeplab_train.py 54 | --help for arguments 55 | Needs to have existing tfrecords from unsupervised Llamas dataset 56 | 57 | This is only an example. I don't recommend training based on this or even using this. 58 | """ 59 | 60 | import argparse 61 | import os 62 | import subprocess 63 | 64 | from unsupervised_llamas.label_scripts import dataset_constants as dc 65 | from unsupervised_llamas.deeplab import deeplab_common 66 | 67 | 68 | def train_deeplab(settings): 69 | """ Prepares variables to call tensorflow/models/research/deeplab's training function """ 70 | train_dir = '{}_deeplab_{}_{}'.format(dc.TRAIN_DIRECTORY, 71 | settings['input_type'], 72 | settings['problem']) 73 | os.makedirs(train_dir, exist_ok=True) 74 | 75 | env = os.environ.copy() 76 | env['CUDA_VISIBLE_DEVICES'] = str(settings['gpu']) 77 | train_call = [ 78 | 'python', 79 | os.path.join(deeplab_common.DEEPLAB_DIR, 'train.py'), 80 | 81 | # TODO FIXME There is an issue here and those are set manually in deeplab/train.py 82 | # '--noinitialize_last_layer' 83 | # '--last_layers_contain_logits_only', 84 | 85 | '--logtostderr', 86 | '--train_split=train', 87 | # NOTE Add dataset into deeplab/datasets/segmentation_dataset 88 | '--dataset={}'.format(deeplab_common.segmentation_set_name(settings)), 89 | '--model_variant=xception_65', 90 | '--atrous_rates=6', 91 | '--atrous_rates=12', 92 | '--atrous_rates=18', 93 | '--output_stride=16', 94 | '--decoder_output_stride=4', 95 | '--train_crop_size=513', 96 | '--train_crop_size=513', 97 | '--save_interval_secs=3600', 98 | '--train_batch_size=4', 99 | '--training_number_of_steps={}'.format(settings['num_iterations']), 100 | '--fine_tune_batch_norm=true', 101 | '--tf_initial_checkpoint={}'.format(deeplab_common.PRETRAINED_PATH), 102 | '--train_logdir={}'.format(train_dir), 103 | '--dataset_dir={}'.format(deeplab_common.tfrecords_dir(settings))] 104 | 105 | subprocess.call(train_call, env=env) 106 | 107 | 108 | def parse_args(): 109 | """ Defines defaults and command line parser """ 110 | parser = argparse.ArgumentParser(description=__doc__) 111 | parser.add_argument('--num_iterations', type=int, default=10**6, help='Number of iterations') 112 | parser.add_argument('--input_type', type=str, default='gray', help='gray, location or color') 113 | parser.add_argument('--problem', type=str, default='multi', help='binary or multi') 114 | parser.add_argument('--gpu', type=int, default=0, help='0 to n, n being your number of GPUs') 115 | 116 | return vars(parser.parse_args()) 117 | 118 | 119 | if __name__ == '__main__': 120 | train_deeplab(parse_args()) 121 | -------------------------------------------------------------------------------- /deeplab/deeplab_vis.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Visualize deeplab models on the unsupervised lamas dataset. 4 | See training file with changes needed in models/.../deeplab 5 | 6 | Usage: 7 | python3 deeplab_vis.py 8 | --help for arguments 9 | Needs to have existing tfrecords from unsupervised Lamas dataset 10 | 11 | This is only an example. Not a good one either. It is better to implement your own scripts. 12 | """ 13 | 14 | import argparse 15 | import os 16 | import subprocess 17 | 18 | from unsupervised_llamas.deeplab import deeplab_common 19 | 20 | 21 | def vis_deeplab(settings): 22 | """ Draws segmentations based on trained deeplab models """ 23 | checkpoint_dir = deeplab_common.checkpoint_dir(settings) 24 | 25 | env = os.environ.copy() 26 | env['CUDA_VISIBLE_DEVICES'] = str(settings['gpu']) 27 | dataset = deeplab_common.segmentation_set_name(settings) 28 | dataset_dir = deeplab_common.tfrecords_dir(settings) 29 | vis_call = [ 30 | 'python3', 31 | os.path.join(deeplab_common.DEEPLAB_DIR, 'vis.py'), 32 | 33 | '--logtostderr', 34 | '--vis_split=valid', 35 | '--dataset={}'.format(dataset), 36 | # '--dataset_dir={}'.format(settings['dataset_dir']), 37 | '--model_variant=xception_65', 38 | '--atrous_rates=6', 39 | '--atrous_rates=12', 40 | '--atrous_rates=18', 41 | '--output_stride=16', 42 | '--decoder_output_stride=4', 43 | 44 | '--vis_crop_size=1276,717', # May need to be changed # 513,513 45 | '--checkpoint_dir={}'.format(checkpoint_dir), 46 | '--vis_logdir={}_vis'.format(checkpoint_dir), 47 | '--dataset_dir={}'.format(dataset_dir), 48 | '--also_save_raw_predictions'] 49 | 50 | subprocess.call(vis_call, env=env) 51 | 52 | 53 | def parse_args(): 54 | """ Defines defaults and command line parser """ 55 | parser = argparse.ArgumentParser(description=__doc__) 56 | parser.add_argument('--num_iterations', type=int, default=10**6, help='Number of iterations') 57 | parser.add_argument('--input_type', type=str, default='gray', help='gray, location, or color') 58 | parser.add_argument('--problem', type=str, default='multi', help='binary or multi') 59 | parser.add_argument('--gpu', type=int, default=0, help='0 to n, n being your number of GPUs') 60 | parser.add_argument('--checkpoint_dir', type=str, required=True, 61 | help='Training directory with checkpoint') # Define single checkpoint? 62 | 63 | return vars(parser.parse_args()) 64 | 65 | 66 | if __name__ == '__main__': 67 | vis_deeplab(parse_args()) 68 | -------------------------------------------------------------------------------- /evaluation/README.md: -------------------------------------------------------------------------------- 1 | # Evaluation 2 | This folder contains the evaluation scripts for the leaderboard. 3 | Additional scripts and metrics may be added. 4 | 5 | ## Submission Format 6 | For segmentatation approaches, the results have to be submitted as png images for each image in the test set. 7 | ``` 8 | The script expects all images to be named according to the label files, i.e., 9 | recording_folder/label_file.json + '_' + {class integer} + '.png' 10 | 11 | The class integers / enums are: 12 | 0: background 13 | 1: l1 14 | 2: l0 15 | 3: r0 16 | 4: r1 17 | In the binary case 1 is enough for the evaluation. 18 | 19 | An example image path for r0 (first marker to the right) is: 20 | /PATH_TO_FOLDER/llamas/trained_nets/2019_03_03__17_53_39_multi_marker_net_gradients/ 21 | markers-1456725_test/images-2014-12-22-13-22-35_mapping_280S_2nd_lane/ 22 | 1419283521_0744236000.json_3.png 23 | ``` 24 | 25 | Make sure to see evaluate_segmentation.py and test your submission format for the validation set before submitting data. 26 | 27 | ## Leaderboards 28 | Benchmark results are displayed on the unsupervised LLAMAS website [here](https://unsupervised-llamas.com/llamas/benchmarks). 29 | -------------------------------------------------------------------------------- /evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karstenBehrendt/unsupervised_llamas/9b99f464e1983195b922e2df8bb57760182206e7/evaluation/__init__.py -------------------------------------------------------------------------------- /evaluation/evaluate_segmentation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ The evaluation script for the segmentation part of the unsupervised 3 | llamas dataset. 4 | 5 | It calculates AUC, and best precision-recall combinations for each class. 6 | 7 | The script expects all images to be named according to the label files, i.e., 8 | recording_folder/label_file.json + '_' + {class integer} + '.png' 9 | 10 | The class integers / enums are: 11 | 0: background 12 | 1: l1 13 | 2: l0 14 | 3: r0 15 | 4: r1 16 | In the binary case 1 is enough for the evaluation. 17 | 18 | An example image path for r0 (first marker to the right) is: 19 | /PATH_TO_FOLDER/llamas/trained_nets/2019_03_03__17_53_39_multi_marker_net_gradients/ 20 | markers-1456725_test/images-2014-12-22-13-22-35_mapping_280S_2nd_lane/ 21 | 1419283521_0744236000.json_3.png 22 | 23 | Use png files for lossless compression. 24 | Files are stored for individual channels because it's easy. Four channel images 25 | would not be an issue but after that it may not be too straightforward. 26 | 27 | Make sure to scale predictions from 0 to 255 when storing as image. 28 | cv2.imwrite may write zeros and ones only for a given float as dtype with values 29 | between 0 and one, even though cv2.imshow visualizes it correctly. 30 | 31 | Usage: 32 | python3 evaluate_segmentation.py \ 33 | --inference_folder folder_with_stored_inference_images 34 | --multi_class (optional if it is not binary) 35 | """ 36 | # TODO Needs to be tested 37 | # TODO The binary and multi_class evaluation can probably be combined 38 | # by just checking which files exist 39 | # TODO The multithreading call can be implemented in a cleaner way 40 | 41 | import argparse 42 | import concurrent.futures 43 | import os 44 | import pprint 45 | 46 | import cv2 47 | import tqdm 48 | 49 | from unsupervised_llamas.common import helper_scripts 50 | from unsupervised_llamas.evaluation import segmentation_metrics 51 | from unsupervised_llamas.label_scripts import dataset_constants 52 | from unsupervised_llamas.label_scripts import segmentation_labels 53 | 54 | 55 | def binary_eval_single_image(inputs): 56 | # Single argument call for the threaded function. 57 | # This can probably be implemented in a cleaner way. 58 | return single_threaded_binary_eval_single_image(inputs[0], inputs[1]) 59 | 60 | 61 | def multi_eval_single_image(inputs): 62 | # Single argument call for the threaded function. 63 | # This can probably be implemented in a cleaner way. 64 | return single_threaded_multi_eval_single_image(inputs[0], inputs[1]) 65 | 66 | 67 | def single_threaded_multi_eval_single_image(label_path, segmentation_folder): 68 | target = segmentation_labels.create_multi_class_segmentation_label(label_path) 69 | 70 | results = {} 71 | for i in range(5): 72 | # TODO Needs to be adapted for more cases farther lanes 73 | # Currently (in order) background, l1, l0, r0, r1 74 | segmentation_path = os.path.join( 75 | segmentation_folder, 76 | helper_scripts.get_label_base(label_path)) + '_{}.png'.format(i) 77 | 78 | segmentation = cv2.imread(segmentation_path, cv2.IMREAD_GRAYSCALE).astype(float) / 255 79 | if segmentation is None: 80 | raise IOError('Could not read image. Is this label path correct?', label_path) 81 | results[i] = segmentation_metrics.binary_approx_auc(segmentation, target[:, :, i]) 82 | 83 | return results 84 | 85 | 86 | def single_threaded_binary_eval_single_image(label_path, segmentation_folder): 87 | target = segmentation_labels.create_binary_segmentation_label(label_path) 88 | 89 | segmentation_path = os.path.join( 90 | segmentation_folder, 91 | helper_scripts.get_label_base(label_path)) + '_1.png' 92 | segmentation = cv2.imread(segmentation_path, cv2.IMREAD_GRAYSCALE).astype(float) / 255 93 | 94 | results = {} 95 | results[1] = segmentation_metrics.binary_approx_auc(segmentation, target) 96 | return results 97 | 98 | 99 | def evaluate_set(segmentation_folder, eval_function, dataset_split='test', max_workers=8): 100 | """ Runs evaluation for a given image folder 101 | 102 | Parameters 103 | ---------- 104 | segmentation_folder : str 105 | folder with predictions / inference images according to docstring 106 | eval_function : function 107 | Currently the binary or multi-class evaluation function 108 | dataset_split : str 109 | 'train', 'valid', or 'test'. Calculates metrics for that split. 110 | max_workers : int 111 | Number of threads to use 112 | 113 | Returns 114 | ------- 115 | Dictionary with AP for each class and best precision-recall combination 116 | 117 | Raises 118 | ------ 119 | IOError if inference image does not exist for a sample in the defined split 120 | 121 | Notes 122 | ----- 123 | Use max_workers=1 for single threaded call. This makes debugging a lot easier. 124 | """ 125 | label_folder = os.path.join(dataset_constants.LABELS, dataset_split) 126 | if not os.path.isdir(label_folder): 127 | raise IOError('Could not find labels for split {} at {}'.format( 128 | dataset_split, label_folder)) 129 | label_paths = helper_scripts.get_labels(dataset_split) 130 | 131 | if not os.path.isdir(segmentation_folder): 132 | raise IOError('Could not find segmentation folder at', segmentation_folder) 133 | 134 | # This still takes a couple of hours. 135 | eval_dicts = {} 136 | if max_workers > 1: 137 | with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor: 138 | for label_path, single_eval in tqdm.tqdm( 139 | zip(label_paths, executor.map( 140 | eval_function, zip(label_paths, [segmentation_folder] * len(label_paths)))), 141 | desc='Scoring test samples', total=len(label_paths)): 142 | eval_dicts[label_path] = single_eval 143 | else: # mainly for debugging 144 | for label_path in tqdm.tqdm( 145 | label_paths, desc='Scoring test samples', total=len(label_paths)): 146 | eval_dicts[label_path] = eval_function((label_path, segmentation_folder)) 147 | 148 | # The reduce step. Calculates averages 149 | label_paths = list(eval_dicts.keys()) 150 | lanes = list(eval_dicts[label_paths[0]].keys()) 151 | metrics = list(eval_dicts[label_paths[0]][lanes[0]].keys()) 152 | 153 | mean_results = {} 154 | for lane in lanes: 155 | mean_results[lane] = {} 156 | for metric in metrics: 157 | mean = 0 158 | for label_path in label_paths: 159 | mean += eval_dicts[label_path][lane][metric] 160 | mean /= len(label_paths) 161 | mean_results[lane][metric] = mean 162 | 163 | pprint.pprint(segmentation_folder) 164 | pprint.pprint(mean_results) 165 | return mean_results 166 | 167 | 168 | def parse_args(): 169 | parser = argparse.ArgumentParser(description=__doc__) 170 | parser.add_argument('--inference_folder', type=str, required=True, 171 | help='Folder of inference images, see docstring') 172 | parser.add_argument('--multi_class', action='store_true') 173 | parser.add_argument('--max_workers', type=int, default=8) 174 | parser.add_argument( 175 | '--split', type=str, required=False, default='test', 176 | help="('train' | 'valid' | 'test') to select the split to evaluate") 177 | return parser.parse_args() 178 | 179 | 180 | if __name__ == '__main__': 181 | args = parse_args() 182 | eval_function = multi_eval_single_image if args.multi_class else binary_eval_single_image 183 | evaluate_set(args.inference_folder, eval_function, dataset_split=args.split, 184 | max_workers=args.max_workers) 185 | -------------------------------------------------------------------------------- /evaluation/fix_inference_output_names.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | A quick script to adapt to the naming schema of the evaluation scripts. 4 | Not needed if files are named according to the evaluation scripts. 5 | """ 6 | import argparse 7 | import os 8 | 9 | import tqdm 10 | 11 | from unsupervised_llamas.common import helper_scripts 12 | 13 | 14 | def fix_names(input_folder, input_string, output_string): 15 | """ Changes all names within folder according to parameters 16 | 17 | Parameters 18 | ---------- 19 | input_folder : str 20 | folder containing inference images 21 | input_string : str 22 | substring to be replace within each image 23 | output_string : str 24 | what the input_string should be 25 | 26 | Notes 27 | ----- 28 | This function is only needed if the scripts don't follow the 29 | expected naming conventions in the first place. 30 | """ 31 | segmentation_images = helper_scripts.get_files_from_folder(input_folder, '.png') 32 | for segmentation_image in tqdm.tqdm(segmentation_images, desc='renaming images'): 33 | output_path = segmentation_image.replace(input_string, output_string) 34 | os.rename(segmentation_image, output_path) 35 | 36 | 37 | def parse_args(): 38 | parser = argparse.ArgumentParser(description=__doc__) 39 | parser.add_argument('--input_folder', type=str, required=True) 40 | parser.add_argument('--input_string', type=str, required=True) 41 | parser.add_argument('--output_string', type=str, required=True) 42 | return parser.parse_args() 43 | 44 | 45 | if __name__ == '__main__': 46 | args = parse_args() 47 | fix_names(args.input_folder, args.input_string, args.output_string) 48 | -------------------------------------------------------------------------------- /evaluation/segmentation_metrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Calculates 4 | true positives (tp) 5 | false positives (fp) 6 | true negatives (tn) 7 | false negatives (fn) 8 | precision 9 | recall 10 | average precision / AUC / PR curves 11 | 12 | Additional metrics are welcome 13 | One problem with lane marker segmentation is that the absolute number of correctly 14 | classified pixels often is not helpful because background pixels far outweigh 15 | the lane marker pixels. In absolute terms, marking all pixels as background likely 16 | is the best solution but not helpful for the problem at hand. 17 | 18 | Notes 19 | ----- 20 | Don't use Python2. There may be integer divisions that I missed. 21 | 22 | Options for calculating AUC / Precision Recall curve 23 | 1) 24 | It may be faster to sort (prediction, label) pixels by probability and 25 | go through those. O(n log n) in the amount of pixels per image. 26 | Sorting takes about .36 seconds on my current system. 27 | Expected speedup should be about 50% 28 | 29 | 2) 30 | Bucket sort is possible as well. O(n) to put probabilities into k buckets. 31 | o(n) to calculate the poc / auc. May be faster than using sort(). 32 | Sort however may be implemented in C. Still an approximation, as 3). 33 | 34 | 3) * current implementation. It was easy and can be replaced any time. 35 | O(k * n), k being the amount of threshold steps, 36 | which is not as accurate but may leverage the c/c++ numpy backend. 37 | tp/tn/fp/fn take about one second to calculate 38 | """ 39 | # NOTE There should be tests 40 | 41 | import numpy 42 | 43 | 44 | def _debug_view(prediction, label): 45 | """ Shows prediction and label for visual debugging """ 46 | prediction = (prediction * 255).astype(numpy.uint8) 47 | label = (label * 255).astype(numpy.uint8) 48 | c = numpy.zeros((717, 1276), dtype=numpy.uint8) 49 | 50 | debug_image = numpy.stack((prediction, label, c), axis=-1) 51 | import cv2 # Not forcing cv2 dependency for metrics 52 | cv2.imshow('debug_image', debug_image) 53 | cv2.waitKey(1000) 54 | 55 | 56 | def thresholded_binary(prediction, threshold): 57 | """ Thresholds prediction to 0 and 1 according to threshold """ 58 | return (prediction >= threshold).astype(int) 59 | 60 | 61 | def true_positive(prediction, label): 62 | """ Calculates number of correctly classified foreground pixels """ 63 | num_tp = numpy.sum(numpy.logical_and(label != 0, prediction == label)) 64 | return num_tp 65 | 66 | 67 | def false_positive(prediction, label): 68 | """ Calculates number of incorrectly predicted foreground pixels """ 69 | num_fp = numpy.sum(numpy.logical_and(label == 0, prediction != 0)) 70 | return num_fp 71 | 72 | 73 | def true_negative(prediction, label): 74 | """ Calculates number of correctly identified background pixels """ 75 | num_tn = numpy.sum(numpy.logical_and(label == 0, prediction == label)) 76 | return num_tn 77 | 78 | 79 | def false_negative(prediction, label): 80 | """ Calculates number of missed foreground pixels """ 81 | num_fn = numpy.sum(numpy.logical_and(label != 0, prediction == 0)) 82 | return num_fn 83 | 84 | 85 | def binary_approx_auc(prediction, label): 86 | """ Calculates approximated auc and best precision-recall combination 87 | 88 | Parameters 89 | ---------- 90 | prediction : numpy.ndarray 91 | raw prediction output in [0, 1] 92 | label : numpy.ndarray 93 | target / label, values are either 0 or 1 94 | 95 | Returns 96 | ------- 97 | Dict of approximate AUC, "corner" precision, "corner" recall 98 | {'precision', 'recall', 'auc'} 99 | 100 | Notes 101 | ----- 102 | See docstring for alternative implementation options 103 | Approximated by 100 uniform thresholds between 0 and 1 104 | """ 105 | # NOTE May achieve speedup by checking if label is all zeros 106 | num_steps = 100 107 | auc_value = 0 108 | 109 | # Most upper right precision, recall point 110 | corner_precision = 0 111 | corner_recall = 0 112 | corner_auc = 0 113 | corner_threshold = 0 114 | 115 | precisions = [1] 116 | recalls = [0] 117 | 118 | # Individual precision recall evaluation for those steps 119 | for i in range(num_steps + 1): 120 | threshold = (num_steps - i) / num_steps 121 | thresholded_prediction = thresholded_binary(prediction, threshold) 122 | 123 | # tn = true_negative(thresholded_prediction, label) 124 | tp = true_positive(thresholded_prediction, label) 125 | fn = false_negative(thresholded_prediction, label) 126 | fp = false_positive(thresholded_prediction, label) 127 | 128 | precision = 0 if (tp + fp) == 0 else tp / (tp + fp) 129 | recall = 0 if (tp + fn) == 0 else tp / (tp + fn) 130 | 131 | if (precision * recall) > corner_auc: 132 | corner_auc = precision * recall 133 | corner_precision = precision 134 | corner_recall = recall 135 | corner_threshold = threshold 136 | 137 | precisions.append(precision) 138 | recalls.append(recall) 139 | 140 | auc_value += (recalls[-1] - recalls[-2]) * precisions[-2] 141 | 142 | return {'recall': corner_recall, 'precision': corner_precision, 143 | 'threshold': corner_threshold, 'auc': auc_value} 144 | -------------------------------------------------------------------------------- /label_scripts/README.md: -------------------------------------------------------------------------------- 1 | # Label scripts 2 | 3 | Everything that is about processing the label files and is framework independent. 4 | 5 | - label_file_scripts.py is about "low level" label file handling, e.g. projecting markers into image space and opening label files 6 | - visualize_labels.py draws labels into images 7 | - spline_creator.py and segmentation_labels.py create outputs that may directly be used for training 8 | 9 | See the deeplab sample script for an example to quickly use these scripts. There is lots that can be improved. 10 | -------------------------------------------------------------------------------- /label_scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karstenBehrendt/unsupervised_llamas/9b99f464e1983195b922e2df8bb57760182206e7/label_scripts/__init__.py -------------------------------------------------------------------------------- /label_scripts/check_labels.py: -------------------------------------------------------------------------------- 1 | """ Scripts to check labels 2 | 3 | Will be extended with each bug report 4 | 5 | Usage: 6 | python check_labels.py some_label_folder 7 | """ 8 | import json 9 | import sys 10 | 11 | import tqdm 12 | 13 | from unsupervised_llamas.common import helper_scripts 14 | 15 | 16 | def check_labels(input_folder): 17 | """ Checks if labels within folder are readable """ 18 | label_files = helper_scripts.get_files_from_folder(input_folder, 'json') 19 | for label_file in tqdm.tqdm(label_files, desc='checking labels'): 20 | with open(label_file, 'r') as lf: 21 | json.load(lf) # Just to check if json syntax is correct 22 | 23 | 24 | if __name__ == '__main__': 25 | if len(sys.argv) != 2: 26 | print(__doc__) 27 | sys.exit() 28 | check_labels(sys.argv[1]) 29 | -------------------------------------------------------------------------------- /label_scripts/dataset_constants.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains paths to dataset, working directory, and tfrecords 3 | """ 4 | import os 5 | import time 6 | 7 | 8 | # TODO Set path 9 | COLOR_IMAGES = "..../unsupervised_llamas/color_images" 10 | GRAYSCALE_IMAGES = "..../unsupervised_llamas/grayscale_images" 11 | LABELS = "..../unsupervised_llamas/labels" 12 | 13 | # TODO set path 14 | WORKING_DIRECTORY = ".../some_path/markers/" 15 | TFRECORDS_FOLDER = os.path.join(WORKING_DIRECTORY, 'processed_data/tfrecords') 16 | TRAINED_NETS = os.path.join(WORKING_DIRECTORY, 'trained_nets') 17 | 18 | # A specific training directory based on time 19 | TRAIN_DIRECTORY = os.path.join(TRAINED_NETS, time.strftime('%Y_%m_%d__%H_%M_%S', time.localtime())) 20 | 21 | NUM_TRAIN_IMAGES = 58269 22 | NUM_VALID_IMAGES = 20844 23 | NUM_TEST_IMAGES = 20929 24 | 25 | # Multi-class segmentation colors for the individual lanes 26 | # The names are based on the camera location, e.g. the markers 27 | # from r2 divide the first lane to the right from the second to the right 28 | DCOLORS = [(110, 30, 30), (75, 25, 230), (75, 180, 60), (200, 130, 0), (48, 130, 245), (180, 30, 145), 29 | (0, 0, 255), (24, 140, 34), (255, 0, 0), (0, 255, 255), # the main ones 30 | (40, 110, 170), (200, 250, 255), (255, 190, 230), (0, 0, 128), (195, 255, 170), 31 | (0, 128, 128), (195, 255, 170), (75, 25, 230)] 32 | LANE_NAMES = ['l7', 'l6', 'l5', 'l4', 'l3', 'l2', 33 | 'l1', 'l0', 'r0', 'r1', 34 | 'r2', 'r3', 'r4', 'r5', 35 | 'r6', 'r7', 'r8'] 36 | DICT_COLORS = dict(zip(LANE_NAMES, DCOLORS)) 37 | -------------------------------------------------------------------------------- /label_scripts/label_file_scripts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Reads and preprocesses label files, i.e. returns clean dicts 4 | """ 5 | 6 | import json 7 | import os 8 | 9 | import cv2 10 | import numpy 11 | 12 | from unsupervised_llamas.label_scripts import dataset_constants as dc 13 | 14 | 15 | def project_point(point, projection_matrix): 16 | """Projects 3D point into image coordinates 17 | 18 | Parameters 19 | ---------- 20 | p1: iterable 21 | (x, y, z), line start in 3D 22 | p2: iterable 23 | (x, y, z), line end in 3D 24 | width: float 25 | width of marker in cm 26 | projection matrix: numpy.array, shape=(3, 3) 27 | projection 3D location into image space 28 | 29 | Returns (x, y) 30 | """ 31 | point = numpy.asarray(point) 32 | projection_matrix = numpy.asarray(projection_matrix) 33 | 34 | point_projected = projection_matrix.dot(point) 35 | point_projected /= point_projected[2] 36 | 37 | return point_projected 38 | 39 | 40 | def project_lane_marker(p1, p2, width, projection_matrix, color, img): 41 | """ Draws a marker by two 3D points (p1, p2) in 2D image space 42 | 43 | p1 and p2 are projected into the image space using a given projection_matrix. 44 | The line is given a fixed width (in cm) to be drawn. Since the marker width 45 | is given for the 3D space, the closer edge will be thicker in the image. 46 | The color can be freely set, e.g. according to lane association. 47 | 48 | Parameters 49 | ---------- 50 | p1: iterable 51 | (x, y, z), line start in 3D 52 | p2: iterable 53 | (x, y, z), line end in 3D 54 | width: float 55 | width of marker in m, default=0.1 m 56 | projection matrix: numpy.array, shape=(3, 3) 57 | projection 3D location into image space 58 | color: int or tuple 59 | color of marker, e.g. 255 or (0, 255, 255) in (b, g, r) 60 | img: numpy.array (dtype=numpy.uint8) 61 | Image array to draw the marker 62 | 63 | Notes 64 | ------ 65 | You can't draw colored lines into a grayscale image. 66 | """ 67 | p1 = numpy.asarray(p1) 68 | p2 = numpy.asarray(p2) 69 | 70 | p1_projected = project_point(p1, projection_matrix) 71 | p2_projected = project_point(p2, projection_matrix) 72 | 73 | points = numpy.zeros((4, 2), dtype=numpy.float32) 74 | shift = 0 75 | # shift_multiplier = static_cast(1 << shift) 76 | shift_multiplier = 1 # simplified 77 | 78 | projection_matrix = numpy.asarray(projection_matrix) 79 | projected_half_width1 = projection_matrix[0, 0] * width / p1[2] / 2.0 80 | points[0, 0] = (p1_projected[0] - projected_half_width1) * shift_multiplier 81 | points[0, 1] = p1_projected[1] * shift_multiplier 82 | points[1, 0] = (p1_projected[0] + projected_half_width1) * shift_multiplier 83 | points[1, 1] = p1_projected[1] * shift_multiplier 84 | 85 | projected_half_width2 = projection_matrix[0, 0] * width / p2[2] / 2.0 86 | points[2, 0] = (p2_projected[0] + projected_half_width2) * shift_multiplier 87 | points[2, 1] = p2_projected[1] * shift_multiplier 88 | points[3, 0] = (p2_projected[0] - projected_half_width2) * shift_multiplier 89 | points[3, 1] = p2_projected[1] * shift_multiplier 90 | 91 | points = numpy.round(points).astype(numpy.int32) 92 | 93 | if not points[0, 1] == points[3, 1]: 94 | try: # difference in cv2 versions 95 | aliasing = cv2.LINE_AA 96 | except AttributeError: 97 | aliasing = cv2.CV_AA 98 | cv2.fillConvexPoly(img, points, color, aliasing, shift) 99 | cv2.fillConvexPoly(img, points, color, aliasing, shift) 100 | 101 | 102 | def __get_base_name(input_path): 103 | """ /foo/bar/test/folder/image_label.ext --> test/folder/image_label.ext """ 104 | return '/'.join(input_path.split('/')[-3:]) 105 | 106 | 107 | def read_image(json_path, image_type='gray'): 108 | """ Reads image corresponding to json file 109 | 110 | Parameters 111 | ---------- 112 | json_path: str 113 | path to json file / label 114 | image_type: str 115 | type of image to read, either 'gray' or 'color' 116 | 117 | Returns 118 | ------- 119 | numpy.array 120 | Image corresponding to image file 121 | 122 | Raises 123 | ------ 124 | ValueError 125 | If image_type is neither 'gray' nor 'color' 126 | IOError 127 | If image_path does not exist. The image folder may not exist 128 | or may not be set in dataset_constants.py 129 | """ 130 | # NOTE The function is built like this because extensions offer access to other types 131 | base_name = __get_base_name(json_path) 132 | if image_type == 'gray': 133 | image_path = os.path.join(dc.GRAYSCALE_IMAGES, base_name.replace('.json', '_gray_rect.png')) 134 | imread_code = cv2.IMREAD_GRAYSCALE 135 | elif image_type == 'color': 136 | image_path = os.path.join(dc.COLOR_IMAGES, base_name.replace('.json', '_color_rect.png')) 137 | imread_code = cv2.IMREAD_COLOR 138 | else: 139 | ValueError('Unknown image_type: {}'.format(image_type)) 140 | 141 | if not os.path.exists(image_path): 142 | raise IOError( 143 | 'Image does not exist: {}\n. Did you set dataset_constants.py?'.format(image_path)) 144 | return cv2.imread(image_path, imread_code) 145 | 146 | 147 | def _fix_json(json_string): 148 | """ The 'json' output of the label creation tool does not natively 149 | work with Python's json. This one is a quick fix to correct these issues. 150 | """ 151 | # NOTE should be applied to label files directly 152 | json_string.replace('",\n\t\t\t\t"lane_marker": {', '",\n\t\t\t\t"markers": [') 153 | json_lines = json_string.split('\n') 154 | json_lines.pop(1) 155 | json_lines.pop(-1) 156 | json_lines.pop(-1) 157 | for i in range(len(json_lines)): 158 | if json_lines[i] == '\t\t"lanes": {': 159 | json_lines[i] = '\t\t"lanes": [' 160 | elif json_lines[i] == '\t\t\t"lane": {': 161 | json_lines[i] = '\t\t\t{' 162 | elif json_lines[i] == '\t\t}': 163 | json_lines[i] = '\t\t]' 164 | 165 | # now inner lane markers 166 | if json_lines[i] == '\t\t\t\t"lane_marker": {': 167 | json_lines[i] = '\t\t\t\t{' 168 | json_string = '\n'.join(json_lines) 169 | 170 | # global stop of lists 171 | json_string = json_string.replace('",\n\t\t\t\t{', '",\n\t\t\t\t"markers": [\n\t\t\t\t{') 172 | json_string = json_string.replace('\t\t\t\t}\n\t\t\t}', '\t\t\t\t}]\n\t\t\t}') 173 | return json_string 174 | 175 | 176 | def _filter_lanes_by_size(label, min_height=40): 177 | """ May need some tuning """ 178 | filtered_lanes = [] 179 | for lane in label['lanes']: 180 | lane_start = min([int(marker['pixel_start']['y']) for marker in lane['markers']]) 181 | lane_end = max([int(marker['pixel_start']['y']) for marker in lane['markers']]) 182 | if (lane_end - lane_start) < min_height: 183 | continue 184 | filtered_lanes.append(lane) 185 | label['lanes'] = filtered_lanes 186 | 187 | 188 | def _filter_few_markers(label, min_markers=2): 189 | """Filter lines that consist of only few markers""" 190 | filtered_lanes = [] 191 | for lane in label['lanes']: 192 | if len(lane['markers']) >= min_markers: 193 | filtered_lanes.append(lane) 194 | label['lanes'] = filtered_lanes 195 | 196 | 197 | def _fix_lane_names(label): 198 | """ Given keys ['l3', 'l2', 'l0', 'r0', 'r2'] returns ['l2', 'l1', 'l0', 'r0', 'r1']""" 199 | 200 | # Create mapping 201 | l_counter = 0 202 | r_counter = 0 203 | mapping = {} 204 | lane_ids = [lane['lane_id'] for lane in label['lanes']] 205 | for key in sorted(lane_ids): 206 | if key[0] == 'l': 207 | mapping[key] = 'l' + str(l_counter) 208 | l_counter += 1 209 | if key[0] == 'r': 210 | mapping[key] = 'r' + str(r_counter) 211 | r_counter += 1 212 | for lane in label['lanes']: 213 | lane['lane_id'] = mapping[lane['lane_id']] 214 | 215 | 216 | def read_json(json_path, min_lane_height=20): 217 | """ Reads and cleans label file information by path""" 218 | with open(json_path, 'r') as jf: 219 | label_content = json.load(jf) 220 | 221 | _filter_lanes_by_size(label_content, min_height=min_lane_height) 222 | _filter_few_markers(label_content, min_markers=2) 223 | _fix_lane_names(label_content) 224 | 225 | content = { 226 | 'projection_matrix': label_content['projection_matrix'], 227 | 'lanes': label_content['lanes'] 228 | } 229 | 230 | for lane in content['lanes']: 231 | for marker in lane['markers']: 232 | for pixel_key in marker['pixel_start'].keys(): 233 | marker['pixel_start'][pixel_key] = int(marker['pixel_start'][pixel_key]) 234 | for pixel_key in marker['pixel_end'].keys(): 235 | marker['pixel_end'][pixel_key] = int(marker['pixel_end'][pixel_key]) 236 | for pixel_key in marker['world_start'].keys(): 237 | marker['world_start'][pixel_key] = float(marker['world_start'][pixel_key]) 238 | for pixel_key in marker['world_end'].keys(): 239 | marker['world_end'][pixel_key] = float(marker['world_end'][pixel_key]) 240 | return content 241 | -------------------------------------------------------------------------------- /label_scripts/segmentation_labels.py: -------------------------------------------------------------------------------- 1 | """ 2 | Collection of functions to create segmentation labels 3 | """ 4 | 5 | import numpy 6 | 7 | from unsupervised_llamas.label_scripts import visualize_labels 8 | from unsupervised_llamas.label_scripts import dataset_constants as dc 9 | 10 | 11 | def create_multi_class_segmentation_label(json_path): 12 | """ Creates pixel-level label of markings color coded by lane association 13 | Only for the for closest lane dividers, i.e. l1, l0, r0, r1 14 | 15 | Parameters 16 | ---------- 17 | json_path: str 18 | path to label file 19 | 20 | Returns 21 | ------- 22 | numpy.array 23 | pixel level segmentation with lane association (717, 1276, 5) 24 | 25 | Notes 26 | ----- 27 | Only draws 4 classes, can easily be extended for to a given number of lanes 28 | """ 29 | debug_image = visualize_labels.create_segmentation_image(json_path, image='blank') 30 | 31 | l1 = (debug_image == dc.DICT_COLORS['l1']).all(axis=2).astype(numpy.uint8) 32 | l0 = (debug_image == dc.DICT_COLORS['l0']).all(axis=2).astype(numpy.uint8) 33 | r0 = (debug_image == dc.DICT_COLORS['r0']).all(axis=2).astype(numpy.uint8) 34 | r1 = (debug_image == dc.DICT_COLORS['r1']).all(axis=2).astype(numpy.uint8) 35 | 36 | no_marker = (l1 + l0 + r0 + r1) == 0 37 | 38 | return numpy.stack((no_marker, l1, l0, r0, r1), axis=2) 39 | 40 | 41 | def create_binary_segmentation_label(json_path): 42 | """ Creates binary segmentation image from label 43 | 44 | Parameters 45 | ---------- 46 | json_path: str 47 | path to label file 48 | 49 | Returns 50 | ------- 51 | numpy.array 52 | binary image, 0 for background or 1 for marker, (716, 1276), numpy.uint8 53 | """ 54 | blank_image = numpy.zeros((717, 1276), dtype=numpy.uint8) 55 | blank_image = visualize_labels.create_segmentation_image( 56 | json_path, color=1, image=blank_image) 57 | 58 | return blank_image 59 | -------------------------------------------------------------------------------- /label_scripts/spline_creator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Scripts to create the curves between lanes. (Splines here) 3 | """ 4 | import pdb 5 | 6 | import cv2 7 | import numpy 8 | 9 | from unsupervised_llamas.common import helper_scripts 10 | from unsupervised_llamas.label_scripts import label_file_scripts 11 | from unsupervised_llamas.label_scripts import dataset_constants as dc 12 | 13 | 14 | def _draw_points(image, points, color=(255, 0, 0)): 15 | for point in map(tuple, points): 16 | cv2.circle(image, point, 2, color, 1) 17 | 18 | 19 | def _extend_lane(lane, projection_matrix): 20 | """Extends marker closest to the camera 21 | 22 | Adds an extra marker that reaches the end of the image 23 | 24 | Parameters 25 | ---------- 26 | lane : iterable of markers 27 | projection_matrix : 3x3 projection matrix 28 | """ 29 | # Unfortunately, we did not store markers beyond the image plane. That hurts us now 30 | # z is the orthongal distance to the car. It's good enough 31 | 32 | # The markers are automatically detected, mapped, and labeled. There exist faulty ones, 33 | # e.g., horizontal markers which need to be filtered 34 | filtered_markers = filter(lambda x: (x['pixel_start']['y'] != x['pixel_end']['y'] and 35 | x['pixel_start']['x'] != x['pixel_end']['x']), 36 | lane['markers']) 37 | # might be the first marker in the list but not guaranteed 38 | closest_marker = min(filtered_markers, key=lambda x: x['world_start']['z']) 39 | 40 | if closest_marker['world_start']['z'] < 0: # This one likely equals "if False" 41 | return lane 42 | 43 | # World marker extension approximation 44 | x_gradient = (closest_marker['world_end']['x'] - closest_marker['world_start']['x']) /\ 45 | (closest_marker['world_end']['z'] - closest_marker['world_start']['z']) 46 | y_gradient = (closest_marker['world_end']['y'] - closest_marker['world_start']['y']) /\ 47 | (closest_marker['world_end']['z'] - closest_marker['world_start']['z']) 48 | 49 | zero_x = closest_marker['world_start']['x'] - (closest_marker['world_start']['z'] - 1) * x_gradient 50 | zero_y = closest_marker['world_start']['y'] - (closest_marker['world_start']['z'] - 1) * y_gradient 51 | 52 | # Pixel marker extension approximation 53 | pixel_x_gradient = (closest_marker['pixel_end']['x'] - closest_marker['pixel_start']['x']) /\ 54 | (closest_marker['pixel_end']['y'] - closest_marker['pixel_start']['y']) 55 | pixel_y_gradient = (closest_marker['pixel_end']['y'] - closest_marker['pixel_start']['y']) /\ 56 | (closest_marker['pixel_end']['x'] - closest_marker['pixel_start']['x']) 57 | 58 | pixel_zero_x = closest_marker['pixel_start']['x'] + (716 - closest_marker['pixel_start']['y']) * pixel_x_gradient 59 | if pixel_zero_x < 0: 60 | left_y = closest_marker['pixel_start']['y'] - closest_marker['pixel_start']['x'] * pixel_y_gradient 61 | new_pixel_point = (0, left_y) 62 | elif pixel_zero_x > 1276: 63 | right_y = closest_marker['pixel_start']['y'] + (1276 - closest_marker['pixel_start']['x']) * pixel_y_gradient 64 | new_pixel_point = (1276, right_y) 65 | else: 66 | new_pixel_point = (pixel_zero_x, 716) 67 | 68 | new_marker = { 69 | 'lane_marker_id': 'FAKE', 70 | 'world_end': {'x': closest_marker['world_start']['x'], 71 | 'y': closest_marker['world_start']['y'], 72 | 'z': closest_marker['world_start']['z']}, 73 | 'world_start': {'x': zero_x, 'y': zero_y, 'z': 1}, 74 | 'pixel_end': {'x': closest_marker['pixel_start']['x'], 75 | 'y': closest_marker['pixel_start']['y']}, 76 | 'pixel_start': {'x': helper_scripts.ir(new_pixel_point[0]), 77 | 'y': helper_scripts.ir(new_pixel_point[1])} 78 | } 79 | lane['markers'].insert(0, new_marker) 80 | 81 | return lane 82 | 83 | 84 | class SplineCreator(): 85 | """ 86 | For each lane divder 87 | - all lines are projected 88 | - linearly interpolated to limit oscillations 89 | - interpolated by a spline 90 | - subsampled to receive individual pixel values 91 | 92 | The spline creation can be optimized! 93 | - Better spline parameters 94 | - Extend lowest marker to reach bottom of image would also help 95 | - Extending last marker may in some cases be interesting too 96 | Any help is welcome. 97 | 98 | Call create_all_points and get the points in self.sampled_points 99 | It has an x coordinate for each value for each lane 100 | 101 | """ 102 | def __init__(self, json_path): 103 | self.json_path = json_path 104 | self.json_content = label_file_scripts.read_json(json_path) 105 | self.lanes = self.json_content['lanes'] 106 | self.lane_marker_points = {} 107 | self.sampled_points = {} # <--- the interesting part 108 | self.debug_image = numpy.zeros((717, 1276, 3), dtype=numpy.uint8) 109 | 110 | def _sample_points(self, lane, ypp=5, between_markers=True): 111 | """ Markers are given by start and endpoint. This one adds extra points 112 | which need to be considered for the interpolation. Otherwise the spline 113 | could arbitrarily oscillate between start and end of the individual markers 114 | 115 | Parameters 116 | ---------- 117 | lane: polyline, in theory but there are artifacts which lead to inconsistencies 118 | in ordering. There may be parallel lines. The lines may be dashed. It's messy. 119 | ypp: y-pixels per point, e.g. 10 leads to a point every ten pixels 120 | between_markers : bool, interpolates inbetween dashes 121 | 122 | Notes 123 | ----- 124 | Especially, adding points in the lower parts of the image (high y-values) because 125 | the start and end points are too sparse. 126 | Removing upper lane markers that have starting and end points mapped into the same pixel. 127 | """ 128 | 129 | # Collect all x values from all markers along a given line. There may be multiple 130 | # intersecting markers, i.e., multiple entries for some y values 131 | x_values = [[] for i in range(717)] 132 | for marker in lane['markers']: 133 | try: 134 | x_values[marker['pixel_start']['y']].append(marker['pixel_start']['x']) 135 | except IndexError: 136 | pdb.set_trace() 137 | 138 | height = marker['pixel_start']['y'] - marker['pixel_end']['y'] 139 | if height > 2: 140 | slope = (marker['pixel_end']['x'] - marker['pixel_start']['x']) / height 141 | step_size = (marker['pixel_start']['y'] - marker['pixel_end']['y']) / float(height) 142 | for i in range(height + 1): 143 | x = marker['pixel_start']['x'] + slope * step_size * i 144 | y = marker['pixel_start']['y'] - step_size * i 145 | x_values[helper_scripts.ir(y)].append(helper_scripts.ir(x)) 146 | 147 | # Calculate average x values for each y value 148 | for y, xs in enumerate(x_values): 149 | if not xs: 150 | x_values[y] = -1 151 | else: 152 | x_values[y] = sum(xs) / float(len(xs)) 153 | 154 | # In the following, we will only interpolate between markers if needed 155 | if not between_markers: 156 | return x_values # TODO ypp 157 | 158 | # # interpolate between markers 159 | current_y = 0 160 | while x_values[current_y] == -1: # skip missing first entries 161 | current_y += 1 162 | 163 | # Also possible using numpy.interp when accounting for beginning and end 164 | next_set_y = 0 165 | try: 166 | while current_y < 717: 167 | if x_values[current_y] != -1: # set. Nothing to be done 168 | current_y += 1 169 | continue 170 | 171 | # Finds target x value for interpolation 172 | while next_set_y <= current_y or x_values[next_set_y] == -1: 173 | next_set_y += 1 174 | if next_set_y >= 717: 175 | raise StopIteration 176 | 177 | x_values[current_y] = x_values[current_y - 1] + (x_values[next_set_y] - x_values[current_y - 1]) /\ 178 | (next_set_y - current_y + 1) 179 | current_y += 1 180 | 181 | except StopIteration: 182 | pass # Done with lane 183 | 184 | return x_values 185 | 186 | def _lane_points_fit(self, lane): 187 | # TODO name and docstring 188 | """ Fits spline in image space for the markers of a single lane (side) 189 | 190 | Parameters 191 | ---------- 192 | lane: dict as specified in label 193 | 194 | Returns 195 | ------- 196 | Pixel level values for curve along the y-axis 197 | 198 | Notes 199 | ----- 200 | This one can be drastically improved. Probably fairly easy as well. 201 | """ 202 | # NOTE all variable names represent image coordinates, interpolation coordinates are swapped! 203 | lane = _extend_lane(lane, self.json_content['projection_matrix']) 204 | sampled_points = self._sample_points(lane, ypp=1) 205 | self.sampled_points[lane['lane_id']] = sampled_points 206 | 207 | return sampled_points 208 | 209 | def create_all_points(self,): 210 | """ Creates splines for given label """ 211 | for lane in self.lanes: 212 | self._lane_points_fit(lane) 213 | 214 | def _show_lanes(self, return_only=False): 215 | """ For debugging spline creation only """ 216 | 217 | gray_image = label_file_scripts.read_image(self.json_path, 'gray') 218 | self.debug_image = cv2.cvtColor(gray_image, cv2.COLOR_GRAY2BGR) 219 | self.create_all_points() 220 | 221 | for _, sampled_points in self.sampled_points.items(): 222 | _draw_points(self.debug_image, sampled_points, dc.DCOLORS[1]) 223 | 224 | for lane_name, marker_points in self.lane_marker_points.items(): 225 | _draw_points(self.debug_image, marker_points, dc.DICT_COLORS[lane_name]) 226 | 227 | if not return_only: 228 | cv2.imshow('debug image', cv2.resize(self.debug_image, (2200, 1400))) 229 | cv2.waitKey(10000) 230 | 231 | return self.debug_image 232 | 233 | 234 | def get_horizontal_values_for_four_lanes(json_path): 235 | """ Gets an x value for every y coordinate for l1, l0, r0, r1 236 | 237 | This allows to easily train a direct curve approximation. For each value along 238 | the y-axis, the respective x-values can be compared, e.g. squared distance. 239 | Missing values are filled with -1. Missing values are values missing from the spline. 240 | There is no extrapolation to the image start/end (yet). 241 | But values are interpolated between markers. Space between dashed markers is not missing. 242 | 243 | Parameters 244 | ---------- 245 | json_path: str 246 | path to label-file 247 | 248 | Returns 249 | ------- 250 | List of [l1, l0, r0, r1], each of which represents a list of ints the length of 251 | the number of vertical pixels of the image 252 | 253 | Notes 254 | ----- 255 | The points are currently based on the splines. The splines are interpolated based on the 256 | segmentation values. The spline interpolation has lots of room for improvement, e.g. 257 | the lines could be interpolated in 3D, a better approach to spline interpolation could 258 | be used, there is barely any error checking, sometimes the splines oscillate too much. 259 | This was used for a quick poly-line regression training only. 260 | """ 261 | 262 | sc = SplineCreator(json_path) 263 | sc.create_all_points() 264 | 265 | l1 = sc.sampled_points.get('l1', [-1] * 717) 266 | l0 = sc.sampled_points.get('l0', [-1] * 717) 267 | r0 = sc.sampled_points.get('r0', [-1] * 717) 268 | r1 = sc.sampled_points.get('r1', [-1] * 717) 269 | 270 | lanes = [l1, l0, r0, r1] 271 | return lanes 272 | -------------------------------------------------------------------------------- /label_scripts/visualize_labels.py: -------------------------------------------------------------------------------- 1 | """ 2 | A collection of functions to draw the labels 3 | """ 4 | 5 | import cv2 6 | import numpy 7 | 8 | from unsupervised_llamas.label_scripts.spline_creator import SplineCreator 9 | from unsupervised_llamas.label_scripts import label_file_scripts 10 | from unsupervised_llamas.label_scripts import dataset_constants as dc 11 | 12 | 13 | def _draw_points(debug_image, x_coordinates, color): 14 | """ Draws a list of x values into an image 15 | 16 | Parameters 17 | ---------- 18 | debug_image : numpy.array 19 | Image to draw the x values into 20 | x_coordinates : list 21 | list of x values along the y-axis 22 | color : tuple 23 | BGR color value or gray if the input is grayscale 24 | """ 25 | for y, x in enumerate(x_coordinates): 26 | if x != -1: 27 | cv2.circle(debug_image, (int(round(x)), y), 2, color) 28 | 29 | 30 | def create_spline_image(json_path, image='blank'): 31 | """ Draws splines into given image 32 | 33 | Parameters 34 | ---------- 35 | json_path: str 36 | path to label file 37 | image: str, 'blank' for all zeros or 'gray' for gray image 38 | numpy.array, direct image input 39 | 40 | Returns 41 | ------- 42 | numpy.array 43 | image with drawn splines 44 | """ 45 | sc = SplineCreator(json_path) 46 | sc.create_all_points() 47 | 48 | # TODO replace section by label_file_scripts read_image 49 | if isinstance(image, str): 50 | if image == 'blank': 51 | image = numpy.zeros((717, 1276, 3), dtype=numpy.uint8) 52 | elif image == 'gray': 53 | image = label_file_scripts.read_image(json_path, 'gray') 54 | else: 55 | raise ValueError('Unexpected input image: {}'.format(image)) 56 | 57 | # TODO Request that as part of read_image as well or util function 58 | if (len(image.shape) == 2 or image.shape[2] == 1): 59 | image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) 60 | 61 | for lane_name, spline in sc.sampled_points.items(): 62 | _draw_points(image, spline, dc.DICT_COLORS[lane_name]) 63 | 64 | return image 65 | 66 | 67 | def create_segmentation_image(json_path, color=None, image=None): 68 | """ Draws pixel-level markers onto image 69 | 70 | Parameters 71 | ---------- 72 | json_path: str 73 | path to label-file 74 | color: int/uint8 for grayscale color to draw markers 75 | tuple (uint8, uint8, uint8), BGR values 76 | None for default marker colors, multi-class 77 | image: str, 'blank' for all zeros or 'gray' for gray image 78 | numpy.array, direct image input 79 | 80 | Returns: 81 | -------- 82 | numpy.array 83 | image with drawn markers 84 | 85 | Notes 86 | ----- 87 | This one is for visualizing the label, may not be optimal for training label creation 88 | """ 89 | 90 | label = label_file_scripts.read_json(json_path) 91 | 92 | # TODO replace section by label_file_scripts read_image 93 | # NOTE Same in function above 94 | if isinstance(image, str): 95 | if image == 'blank': 96 | image = numpy.zeros((717, 1276), dtype=numpy.uint8) 97 | elif image == 'gray': 98 | image = label_file_scripts.read_image(json_path, 'gray') 99 | # TODO Add color 100 | else: 101 | raise ValueError('Unknown image type {}'.format(image)) 102 | 103 | if (len(image.shape) == 2 or image.shape[2] == 1)\ 104 | and (color is None or not isinstance(color, int)): 105 | image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) 106 | 107 | for lane in label['lanes']: 108 | lane_id = lane['lane_id'] 109 | for marker in lane['markers']: 110 | p1 = marker['world_start'] 111 | p1 = [p1['x'], p1['y'], p1['z']] 112 | p2 = marker['world_end'] 113 | p2 = [p2['x'], p2['y'], p2['z']] 114 | dcolor = dc.DICT_COLORS[lane_id] if color is None else color 115 | label_file_scripts.project_lane_marker( 116 | p1, p2, width=.1, projection_matrix=label['projection_matrix'], 117 | color=dcolor, img=image) 118 | return image 119 | -------------------------------------------------------------------------------- /label_scripts/visualize_labels_for_folder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions to draw all labels for a given folder 3 | """ 4 | # NOTE There are a few TODOs that would make it a lot nicer 5 | 6 | import os 7 | 8 | import cv2 9 | import tqdm 10 | 11 | from unsupervised_llamas.common import helper_scripts 12 | from unsupervised_llamas.label_scripts import visualize_labels 13 | 14 | 15 | def segmentation_for_folder(input_folder, output_folder, color=None): 16 | """ Draws segmentation images for a given folder of labels 17 | 18 | Parameters 19 | ---------- 20 | input_folder: str 21 | path with json files / labels 22 | output_folder: str 23 | folder to store segmentation images, cannot exist 24 | color: int, gray color value 25 | (int, int, int), BGR values 26 | None for default colors 27 | 28 | Returns nothing 29 | """ 30 | # TODO Add color image option 31 | # TODO keep input name and folders 32 | if os.path.exists(output_folder): 33 | raise IOError('Output folder already exists, stopping to not mess things up') 34 | os.makedirs(output_folder) 35 | 36 | input_labels = helper_scripts.get_files_from_folder(input_folder, '.json') 37 | 38 | for i, label_path in tqdm.tqdm(enumerate(input_labels)): 39 | segmentation_image = visualize_labels.create_segmentation_image(label_path, image='gray', color=color) 40 | cv2.imwrite(os.path.join(output_folder, str(i) + '.png'), segmentation_image) 41 | 42 | 43 | def splines_for_folder(input_folder, output_folder): 44 | """ Draws segmentation images for a given folder of labels 45 | 46 | Parameters 47 | ---------- 48 | input_folder: str 49 | path with json files / labels 50 | output_folder: str 51 | folder to store segmentation images, cannot exist 52 | 53 | Returns nothing 54 | """ 55 | # TODO Add color image option 56 | # TODO keep input name and folders 57 | if os.path.exists(output_folder): 58 | raise IOError('Output folder already exists, stopping to not mess things up') 59 | os.makedirs(output_folder) 60 | 61 | input_labels = helper_scripts.get_files_from_folder(input_folder, '.json') 62 | 63 | for i, label_path in tqdm.tqdm(enumerate(input_labels)): 64 | spline_image = visualize_labels.create_spline_image(label_path, 'gray') 65 | cv2.imwrite(os.path.join(output_folder, str(i) + '.png'), spline_image) 66 | -------------------------------------------------------------------------------- /lane_regression/README.md: -------------------------------------------------------------------------------- 1 | # Lane Regression 2 | 3 | ## Submission format 4 | We focus on the cars left, current, and right lane by regressing the lane borders l1, l0, r0, r1 from left to right. 5 | l0 is our left lane border closest to us, l1 is the left lane border one further to the left and so on. 6 | The dataset contains more than those lane borders, but those should be the cleanest. 7 | They are auto generated on a frame by frame basis, so they won't be perfect. 8 | 9 | Format: 10 | * The results have to be stored jointly for all images within a single json file. 11 | * Each image is stored as a dict based on its base path (video_name/image_name.png) 12 | * Each image needs to contain for lanes as keys, l1, l0, r0, r1 13 | * The result for each lane is stored as x-value for each y-value. 14 | * * OpenCV's coordinate system has y-values start from the top of the image and increase as you go down 15 | * * The first (upper) 300 pixels of the image are not evaluated. There barely are any labels for those. 16 | * * You may submit x values for all y values across the image (717) or ignore the first 300 (417) 17 | 18 | Metric: 19 | Mean absolute distance horizontally for each vertical pixel for each lane 20 | 21 | ## Simple mean baseline evaluations: 22 | On the training set 23 | Overall mean absolute error 36.52 pixels 24 | Invidiaul lanes {'l1': 34.62, 'l0': 35.30, 'r0': 37.89, 'r1': 38.42} 25 | 26 | On the validation set 27 | Overall mean absolute error 33.34 28 | Invidiaul lanes {'l1': 33.47, 'l0': 33.48, 'r0': 32.88, 'r1': 33.68} 29 | 30 | On the test set 31 | Overall mean absolute error: 31.00 pixels 32 | Individual lanes {'l1': 33.78, 'l0': 26.34, 'r0': 30.24, 'r1': 34.75} 33 | 34 | I am currently not listing the paper baselines, because the dataset splits, and evaluation changed since publication. 35 | I may or may not re-train a model close to the paper baseline. Feel free to submit your results. 36 | 37 | All results will be listed as part of the [lane approximation benchmark](https://unsupervised-llamas.com/llamas/benchmark_splines). 38 | -------------------------------------------------------------------------------- /lane_regression/evaluate.py: -------------------------------------------------------------------------------- 1 | """Evaluates lane regression results 2 | 3 | Submission format (as json file): 4 | { 5 | "label_base": { 6 | 'l1': [x0, x1, x2, x3, x4, ..., x717], 7 | 'l0': [x0, x1, x2, x3, x4, ..., x717], 8 | 'r0': [x0, x1, x2, x3, x4, ..., x717], 9 | 'r1': [x0, x1, x2, x3, x4, ..., x717], 10 | 11 | }, # or since the upper part isn't evaluated 12 | "label_base": { 13 | 'l1': [x300, ..., x717], 14 | 'l0': [x300, ..., x717], 15 | 'r0': [x300, ..., x717], 16 | 'r1': [x300, ..., x717], 17 | 18 | }, 19 | ... (one entry for each label / image within a set 20 | } 21 | 22 | Markers from left to right: 23 | l1, l0, car / camera, r0, r1 24 | 25 | The main metric for evaluation is mean abs distance in pixels 26 | between regressed markers and reference markers. 27 | """ 28 | 29 | import argparse 30 | import json 31 | import math 32 | 33 | import numpy 34 | 35 | from unsupervised_llamas.label_scripts import dataset_constants 36 | from unsupervised_llamas.common import helper_scripts 37 | from unsupervised_llamas.label_scripts import spline_creator 38 | 39 | 40 | def compare_lane(reference_lane, detected_lane, vertical_cutoff=300): 41 | """Mean deviation in pixels""" 42 | assert len(reference_lane) == 717, "Reference lane is too short" 43 | assert len(detected_lane) >= 717 - vertical_cutoff, "Need at least 417 pixels per lane" 44 | 45 | # Reference lanes go from 0 to 717. If a horizontal entry is not 46 | # defined, it is stored as -1. We have to filter for that. 47 | 48 | reference_lane = reference_lane[vertical_cutoff:] 49 | if len(detected_lane) == 717: # lane regressed across complete image 50 | detected_lane = detected_lane[vertical_cutoff:] 51 | elif len(detected_lane) == 417: # lane regress across part of image that is relevant 52 | pass 53 | else: 54 | raise NotImplementedError(f"Evaluations not implemented for length of detected lane: {len(detected_lane)}") 55 | 56 | reference_lane = [x if x != -1 else float('nan') for x in reference_lane] 57 | # Results are only allowed to be nan where the labels also are invalid. 58 | # Just don't add nans to your submissions within the relevant sections of the image. 59 | assert all([not math.isnan(x) or math.isnan(x_ref) for x, x_ref in zip(detected_lane, reference_lane)]), "NaNs not allowe within lower part of image" 60 | 61 | lane_diff = numpy.subtract(reference_lane, detected_lane) 62 | abs_lane_diff = numpy.abs(lane_diff) 63 | mean_abs_diff = numpy.nanmean(abs_lane_diff) 64 | return mean_abs_diff 65 | 66 | 67 | def evaluate(eval_file: str, split: str): 68 | 69 | assert eval_file.endswith(".json"), "Detections need to be in json file" 70 | with open(eval_file) as efh: 71 | regressions = json.load(efh) 72 | 73 | labels = helper_scripts.get_labels(split=split) 74 | results = {"l1": [], "l0": [], "r0": [], "r1": []} 75 | for label in labels: 76 | spline_labels = spline_creator.get_horizontal_values_for_four_lanes(label) 77 | assert len(spline_labels) == 4, "Incorrect number of lanes" 78 | key = helper_scripts.get_label_base(label) 79 | regression_lanes = regressions[key] 80 | for lane, lane_key in zip(spline_labels, ["l1", "l0", "r0", "r1"]): 81 | result = compare_lane(lane, regression_lanes[lane_key]) 82 | results[lane_key].append(result) 83 | 84 | # Overall mean 85 | all_distances = [] 86 | for value in results.values(): 87 | all_distances.extend(value) 88 | mean_distance = numpy.nanmean(all_distances) 89 | print("Overall mean absolute error", mean_distance ) 90 | 91 | # Invididual lanes 92 | for key, value in results.items(): 93 | results[key] = numpy.nanmean(value) 94 | print("Invidiaul lanes", results) 95 | 96 | 97 | 98 | def parse_args(): 99 | parser = argparse.ArgumentParser() 100 | parser.add_argument("--eval_file", help="file to be evaluated", required=True) 101 | parser.add_argument("--split", help="train, valid, or test", default="valid") 102 | return parser.parse_args() 103 | 104 | 105 | if __name__ == "__main__": 106 | args = parse_args() 107 | assert args.split in ["train", "valid", "test"] 108 | evaluate(eval_file=args.eval_file, split=args.split) 109 | -------------------------------------------------------------------------------- /lane_regression/simple_mean_baseline.py: -------------------------------------------------------------------------------- 1 | """Very simple baseline by just taking the mean lane locations 2 | 3 | This script is not meant to be pretty. It's just supposed to give 4 | a very quick baseline. It also helps us test our evaluation pipeline. 5 | """ 6 | 7 | import argparse 8 | from collections import defaultdict 9 | import json 10 | 11 | import numpy 12 | import os 13 | import tqdm 14 | 15 | from unsupervised_llamas.label_scripts import dataset_constants 16 | from unsupervised_llamas.common import helper_scripts 17 | from unsupervised_llamas.label_scripts import spline_creator 18 | 19 | 20 | def calculate_means(): 21 | # We store everything in memory, make sure to have >4GB to spare 22 | # NOTE The individual blocks should be separate functions 23 | 24 | if not os.path.exists("mean_label_pre.json"): # Load all data from labels 25 | train_labels = helper_scripts.get_labels(split="train") 26 | valid_labels = helper_scripts.get_labels(split="valid") 27 | labels = train_labels + valid_labels 28 | lanes = { 29 | "l1": defaultdict(list), 30 | "l0": defaultdict(list), 31 | "r0": defaultdict(list), 32 | "r1": defaultdict(list) 33 | } 34 | 35 | # Iterate over all lane labels and store them 36 | for label in tqdm.tqdm(labels, desc="Going through labels"): 37 | spline_labels = spline_creator.get_horizontal_values_for_four_lanes(label) 38 | for lane, lane_key in zip(spline_labels, ["l1", "l0", "r0", "r1"]): 39 | for y_value, x_value in enumerate(lane): 40 | lanes[lane_key][y_value].append(x_value) 41 | # Writes about 2 GB. Technically, this doesn't need to be stored. It's just for debugging 42 | json.dump(lanes, open("mean_label_pre.json", "w")) # use with, was lazy 43 | else: 44 | lanes = json.load(open("mean_label_pre.json")) # use with, was lazy 45 | 46 | if not os.path.exists("mean_label.json"): # Calculate averages 47 | for key, lane in lanes.items(): 48 | for y_value, x_values in lane.items(): 49 | clean_x_values = [x if x!=-1 else float('nan') for x in x_values] 50 | lanes[key][y_value] = numpy.nanmean(clean_x_values) 51 | json.dump(lanes, open("mean_label.json", "w")) # use with, was lazy 52 | else: 53 | lanes = json.load(open("mean_label.json")) # use with, was lazy 54 | 55 | # Clean: Move dict to list, remove first 300 entries per image 56 | for key, lane in lanes.items(): 57 | lanes[key] = [x_value for x_value in lane.values()] 58 | # Remove upper part of image with really sparse information 59 | lanes[key] = lanes[key][300:] # 40% less storage use 60 | 61 | # Write results for the three splits for evaluations 62 | for split in ["train", "valid", "test"]: 63 | print(f"Writing {split} set. This may take a couple of minutes") 64 | split_results = {} 65 | labels = helper_scripts.get_labels(split) 66 | for label in labels: 67 | split_results[helper_scripts.get_label_base(label)] = lanes 68 | with open(f"{split}_mean_results.json", "w") as results_handle: 69 | json.dump(split_results, results_handle) 70 | 71 | 72 | if __name__ == "__main__": 73 | calculate_means() 74 | -------------------------------------------------------------------------------- /samples/sample_color.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karstenBehrendt/unsupervised_llamas/9b99f464e1983195b922e2df8bb57760182206e7/samples/sample_color.jpg -------------------------------------------------------------------------------- /samples/sample_gray.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karstenBehrendt/unsupervised_llamas/9b99f464e1983195b922e2df8bb57760182206e7/samples/sample_gray.jpg -------------------------------------------------------------------------------- /samples/sample_labeled.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karstenBehrendt/unsupervised_llamas/9b99f464e1983195b922e2df8bb57760182206e7/samples/sample_labeled.jpg -------------------------------------------------------------------------------- /simple_baseline/README.md: -------------------------------------------------------------------------------- 1 | # Simple Baseline Segmenting Lane Markers 2 | Code is provided for a simple training of a fully convolutional network in tensorflow. 3 | The tfrecords should be created using the functionality of the deeplab folder (or your own implementation). 4 | 5 | ## Training 6 | Once the tfrecords are created, train_binary allows to start training a binary classifier using only the paths to the tfrecords for training and validation. It is only meant to be a starting point but does train an already useful classifier. 7 | 8 | ## Results 9 | The model is trained on crops of the original images without any data augmentation or explicitly training on the validation set. Results are available on the official leadboard for [binary segmentation](https://unsupervised-llamas.com/llamas/benchmark_binary) and [lane dependent segmentation](https://unsupervised-llamas.com/llamas/benchmark_multi). 10 | 11 | The binary segmentation was trained on grayscale inputs while the multi-class segmentation additionally included gradient images for location information since the simplistic network only has a small region of view. 12 | 13 | ## Video 14 | The output of the baseline approaches are visualized after the dataset samples as part of this [Youtube video](https://youtu.be/kp0qz8PuXxA). 15 | -------------------------------------------------------------------------------- /simple_baseline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karstenBehrendt/unsupervised_llamas/9b99f464e1983195b922e2df8bb57760182206e7/simple_baseline/__init__.py -------------------------------------------------------------------------------- /simple_baseline/inference_segmentation.py: -------------------------------------------------------------------------------- 1 | """ 2 | A very simple inference script which stores segmentation images 3 | to file for a given image folder and a checkpoint of a trained model 4 | """ 5 | 6 | import argparse 7 | import os 8 | import pdb # noqa 9 | import time 10 | 11 | import cv2 12 | import numpy 13 | from scipy.special import softmax, expit 14 | import tensorflow as tf 15 | import tqdm 16 | 17 | from unsupervised_llamas.label_scripts import dataset_constants 18 | from unsupervised_llamas.common import helper_scripts 19 | from unsupervised_llamas.simple_baseline import utils 20 | 21 | 22 | def gradient_images(): 23 | x = list(map(lambda z: z / 1276.0 * 255, range(1276))) 24 | y = list(map(lambda z: z / 717.0 * 255, range(717))) 25 | grad_x = numpy.asarray([x] * 717) 26 | grad_y = numpy.asarray([y] * 1276).transpose() 27 | return grad_x, grad_y 28 | 29 | 30 | class NetModel(): 31 | def __init__(self, checkpoint): 32 | checkpoint = utils.get_checkpoint(checkpoint) 33 | 34 | tf.Graph().as_default() 35 | tf.reset_default_graph() # if multiple evaluations are used within one script 36 | 37 | config = tf.ConfigProto() 38 | config.gpu_options.allow_growth = True 39 | self._sess = tf.Session(config=config) 40 | 41 | new_saver = tf.train.import_meta_graph(checkpoint + '.meta') 42 | init_op = tf.global_variables_initializer() 43 | self._sess.run(init_op) 44 | new_saver.restore(self._sess, checkpoint) 45 | 46 | print('All setup for inference') 47 | 48 | def __del__(self): 49 | self._sess.close() 50 | 51 | def single_batch_inference(self, input_dict): 52 | input_dict['is_training:0'] = False 53 | pf = tf.get_default_graph().get_tensor_by_name('inference_values/prediction:0') 54 | return self._sess.run(pf, feed_dict=input_dict) 55 | 56 | 57 | def model_speed(checkpoint_file, num_samples, num_channels): 58 | """Crude method to measure network speeed without optimization""" 59 | nm = NetModel(checkpoint=checkpoint_file) 60 | images = numpy.random.random_integers(0, 255, (num_samples, 1, 1216, 717, num_channels)) 61 | with tf.Session(): 62 | start = time.time() 63 | for image in tqdm.tqdm(images): 64 | nm.single_batch_inference({'image_input:0': image}) 65 | 66 | end = time.time() 67 | duration = end - start 68 | print('Inference duration per sample', duration / num_samples, 'based on', num_samples) 69 | 70 | 71 | def folder_inference(checkpoint_file, image_folder, gray=True, binary=True, location=False, suffix='_test'): 72 | """ 73 | checkpoint_file: str, path to checkpoint, can also be folder 74 | tfrecord_file: str, path to file 75 | """ 76 | out_folder = checkpoint_file + suffix 77 | 78 | input_images = helper_scripts.get_files_from_folder(image_folder, '.png') 79 | if suffix == '_test': 80 | assert len(input_images) == dataset_constants.NUM_TEST_IMAGES 81 | 82 | nm = NetModel(checkpoint=checkpoint_file) 83 | 84 | if location: 85 | # grad_x, grad_y = gradient_images() 86 | # grad_x_batch = numpy.expand_dims(numpy.asarray([grad_x]), -1) 87 | # grad_y_batch = numpy.expand_dims(numpy.asarray([grad_y]), -1) 88 | pass 89 | 90 | config = tf.ConfigProto() 91 | config.gpu_options.allow_growth = True 92 | with tf.Session(config=config): 93 | for image_path in tqdm.tqdm(input_images): 94 | 95 | base_path = image_path.replace(image_folder, '') 96 | base_path = base_path[1:] if base_path.startswith('/') else base_path 97 | 98 | camera_image = cv2.imread(image_path, 0 if gray else 1) 99 | camera_image = numpy.expand_dims(camera_image, axis=0) 100 | 101 | if binary: 102 | camera_image = numpy.expand_dims(camera_image, axis=-1) 103 | if location: 104 | # Currently, there are images stored to file like this 105 | # So, this one needs to be implemented, should be < 5 lines 106 | raise NotImplementedError('Add gradient images to inference input') 107 | feed_dict = {'image_input:0': camera_image} 108 | prediction = nm.single_batch_inference(feed_dict) # A bit bigger. Upsampling, padding 109 | 110 | # Multiply by 255 to get an actual image and stuff 111 | os.makedirs(os.path.dirname(os.path.join(out_folder, base_path)), exist_ok=True) 112 | if binary: 113 | prediction = (expit(prediction)[0, :717, :, 0] - 1.0) * -1 # == softmax 114 | output_file = os.path.splitext(os.path.join(out_folder, base_path))[0] + '.json_1.png' 115 | cv2.imwrite(output_file, (prediction * 255).astype(numpy.uint8)) 116 | else: 117 | prediction = prediction[0, :717, :, :] 118 | prediction = softmax(prediction, axis=2) 119 | for i in range(prediction.shape[-1]): 120 | output_file = os.path.splitext(os.path.join(out_folder, base_path))[0] + '.json_' + str(i) + '.png' 121 | prediction_image = prediction[:, :, i] 122 | cv2.imwrite(output_file, (prediction_image * 255).astype(numpy.uint8)) 123 | 124 | 125 | def parse_args(): 126 | parser = argparse.ArgumentParser( 127 | description='Stores segmentaiton images to file for a given folder and checkpoint') 128 | parser.add_argument( 129 | '--checkpoint', type=str, required=True, 130 | help='Directory or checkpoint file to use for inference / segmenting markers') 131 | parser.add_argument( 132 | '--image_folder', type=str, required=True, 133 | help='Folder with input images to be used for inference') 134 | parser.add_argument( 135 | '--gray', action='store_true', 136 | help='If the input images are used in grayscale instead of color images') 137 | parser.add_argument( 138 | '--location', action='store_true', 139 | help='Add gradient images as two additional channels onto input images') 140 | parser.add_argument( 141 | '--binary', action='store_true', 142 | help='Binary segmentation only, i.e., 0 or 1 instead of segmenting lanes also') 143 | parser.add_argument( 144 | '--suffix', type=str, default='_test', 145 | help='Name for inference run. Will be added to output folder. _test will verify split size.') 146 | return parser.parse_args() 147 | 148 | 149 | if __name__ == '__main__': 150 | args = vars(parse_args()) 151 | folder_inference(checkpoint_file=args['checkpoint'], 152 | image_folder=args['image_folder'], 153 | gray=args['gray'], 154 | binary=args['binary'], 155 | location=args['location'], 156 | suffix=args['suffix']) 157 | -------------------------------------------------------------------------------- /simple_baseline/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | opencv-python 3 | scipy 4 | tensorflow-gpu 5 | tqdm 6 | -------------------------------------------------------------------------------- /simple_baseline/segmentation_batch_reader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | A couple of iterators for older tfrecords files. 4 | This one should be adapted to work with deeplab tfrecords 5 | so that there aren't too many different formats 6 | 7 | Includes different versions of batch_readers. Most of them 8 | can be removed. 9 | """ 10 | 11 | # TODO Not up to date and should not be used 12 | 13 | import cv2 14 | import numpy 15 | import tensorflow as tf 16 | import tqdm 17 | 18 | 19 | def dataset_iterator_python(tfrecords_file, debug_print=False): 20 | """ Quick and dirty dataset iterator """ 21 | # TODO Needs to be adapted to work with deeplab tfrecord format 22 | # NOTE incomplete! 23 | example = tf.train.Example() 24 | with tf.Session(): 25 | for record in tqdm.tqdm(tf.python_io.tf_record_iterator(tfrecords_file)): 26 | example.ParseFromString(record) 27 | 28 | camera_image = example.features.feature['camera_image'].bytes_list.value[0] 29 | segmentation = example.features.feature['segmentation'].bytes_list.value[0] 30 | camera_image = tf.image.decode_png(camera_image, channels=1).eval() 31 | segmentation = tf.image.decode_png(segmentation, channels=1).eval() 32 | multi_class_segmentation = example.features.feature['multi_class_segmentation'].\ 33 | bytes_list.value[0] 34 | multi_class_segmentation = numpy.fromstring(multi_class_segmentation, 35 | dtype=numpy.uint8) 36 | multi_class_segmentation = numpy.reshape(multi_class_segmentation, [717, 1276, 5]) 37 | 38 | if debug_print: 39 | print('#################') 40 | cv2.imshow('camera_image', camera_image) 41 | cv2.imshow('segmentation', segmentation) 42 | cv2.waitKey(0) 43 | 44 | yield {'camera_image': numpy.expand_dims(camera_image, 0), 45 | 'segmentation_image': numpy.expand_dims(segmentation, 0), 46 | 'multi_class_segmentation': numpy.expand_dims(multi_class_segmentation, 0)} 47 | 48 | 49 | def dataset_iterator(tfrecords_file): 50 | """ Yields single samples from tfrecord file for debugging """ 51 | # TODO Needs to be adapted to work with deeplab tfrecords 52 | num_samples = sum(1 for _ in tqdm.tqdm(tf.python_io.tf_record_iterator(tfrecords_file), 53 | desc='Getting number of samples. May take a bit.')) 54 | print('Number of samples', num_samples) 55 | with tf.Session() as data_sess: 56 | batch = batch_reader(tfrecords_file, batch_size=1) 57 | 58 | coord = tf.train.Coordinator() 59 | tf.train.start_queue_runners(coord=coord) 60 | 61 | for _ in tqdm.tqdm(range(num_samples)): 62 | numpy_batch = data_sess.run(batch) 63 | yield numpy_batch 64 | 65 | 66 | def _parse_function(example): 67 | # TODO Needs to be adapted to work with deeplab tfrecords 68 | features = { 69 | 'camera_image': tf.FixedLenFeature([], tf.string), 70 | 'segmentation': tf.FixedLenFeature([], tf.string), 71 | 'multi_class_segmentation': tf.FixedLenFeature([], tf.string), 72 | } 73 | example = tf.parse_single_example(example, features) 74 | camera_image = tf.image.decode_png(example['camera_image'], channels=1, dtype=tf.uint8) 75 | camera_image.set_shape([717, 1276, 1]) 76 | segmentation = tf.image.decode_png(example['segmentation'], channels=1, dtype=tf.uint8) 77 | segmentation.set_shape([717, 1276, 1]) 78 | multi_class_segmentation = tf.decode_raw(example['multi_class_segmentation'], tf.uint8) 79 | multi_class_segmentation = tf.reshape(multi_class_segmentation, [717, 1276, 5]) 80 | 81 | batch_entries = {'camera_image': camera_image, 'segmentation_image': segmentation, 82 | 'multi_class_segmentation': multi_class_segmentation} 83 | return batch_entries 84 | 85 | 86 | def batch_reader(dataset_file, batch_size=10, name=None): 87 | """ tf.train.batch for dataset""" 88 | # TODO Needs to be adapted to work with deeplab tfrecords 89 | paths = [dataset_file] 90 | tfrecord_file_queue = tf.train.string_input_producer(paths, name='itfrecord_queue') 91 | 92 | reader = tf.TFRecordReader() 93 | _, batch = reader.read(tfrecord_file_queue) 94 | batch_entries = _parse_function(batch) 95 | batch = tf.train.batch(batch_entries, batch_size=batch_size, 96 | num_threads=4, capacity=50, name=name) 97 | 98 | return batch 99 | 100 | 101 | def dataset_reader(batch_size=1): 102 | # TODO Needs to be adapted to work with deeplab tfrecords 103 | """ Another batch reader, this time tf.data.TFRecordDataset """ 104 | filenames = tf.placeholder(tf.string, shape=[None]) 105 | dataset = tf.data.TFRecordDataset(filenames) 106 | dataset = dataset.map(_parse_function, num_parallel_calls=4) 107 | dataset = dataset.batch(batch_size) 108 | dataset = dataset.prefetch(batch_size * 10) 109 | iterator = dataset.make_initializable_iterator() 110 | return iterator, filenames 111 | -------------------------------------------------------------------------------- /simple_baseline/simple_net.py: -------------------------------------------------------------------------------- 1 | """ 2 | This should only be used as a baseline comparison. 3 | It could be added to some results table 4 | """ 5 | 6 | import tensorflow as tf 7 | 8 | DEBUG = True 9 | 10 | 11 | def lane_marker_net_2rt(inputs, is_training, out_classes=1): 12 | """ Super simple, very old network """ 13 | with tf.name_scope('marker_net'): 14 | conv1 = tf.contrib.slim.conv2d(inputs, num_outputs=32, kernel_size=(3, 3), stride=1, padding='SAME', scope='conv1') 15 | conv2 = tf.contrib.slim.conv2d(conv1, num_outputs=32, kernel_size=(3, 3), stride=1, padding='SAME', scope='conv2') 16 | pool1 = tf.contrib.slim.max_pool2d(conv2, kernel_size=(2, 2), stride=2, padding='SAME', scope='pool1') 17 | conv3 = tf.contrib.slim.conv2d(pool1, num_outputs=64, kernel_size=(5, 5), stride=1, padding='SAME', scope='conv3') 18 | pool2 = tf.contrib.slim.max_pool2d(conv3, kernel_size=(2, 2), stride=2, padding='SAME', scope='pool2') 19 | conv4 = tf.contrib.slim.conv2d(pool2, num_outputs=96, kernel_size=(5, 5), stride=1, padding='SAME', activation_fn=None, scope='conv4') 20 | batch_norm1 = tf.contrib.slim.batch_norm(conv4, activation_fn=tf.nn.relu, is_training=is_training, scope='batch_norm1') 21 | deconv1 = tf.contrib.slim.conv2d_transpose(batch_norm1, num_outputs=96, kernel_size=(2, 2), stride=(2, 2), padding='SAME', activation_fn=tf.nn.relu, scope='deconv1') 22 | conv5 = tf.contrib.slim.conv2d(deconv1, num_outputs=64, kernel_size=(3, 3), stride=1, padding='SAME', scope='conv5') 23 | deconv2 = tf.contrib.slim.conv2d_transpose(conv5, num_outputs=64, kernel_size=(2, 2), stride=(2, 2), padding='SAME', activation_fn=tf.nn.relu, scope='deconv2') 24 | conv6 = tf.contrib.slim.conv2d(deconv2, num_outputs=128, kernel_size=(3, 3), stride=1, padding='SAME', scope='conv6') 25 | out = tf.contrib.slim.conv2d(conv6, num_outputs=out_classes, kernel_size=(1, 1), stride=1, padding='SAME', scope='logits', activation_fn=None) 26 | 27 | return out 28 | 29 | 30 | if __name__ == '__main__': 31 | some_inputs = tf.placeholder(tf.float32, (10, 460, 640, 1), name='some_inputs') 32 | prediction = lane_marker_net_2rt(some_inputs, True) 33 | print(prediction.shape) 34 | print('Made it through the net') 35 | -------------------------------------------------------------------------------- /simple_baseline/train_binary.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple training script for segmenting lane markers. 3 | It should be straightforward to replace network architectures. 4 | 5 | This is a simple implementation, not a particularly clean one. 6 | """ 7 | 8 | import argparse 9 | import os 10 | 11 | import cv2 12 | import numpy 13 | import numpy.random 14 | import tensorflow as tf 15 | import tqdm 16 | 17 | from unsupervised_llamas.common import constants 18 | from unsupervised_llamas.label_scripts import dataset_constants 19 | from unsupervised_llamas.simple_baseline import segmentation_batch_reader 20 | from unsupervised_llamas.simple_baseline import simple_net 21 | from unsupervised_llamas.simple_baseline import utils 22 | 23 | DEBUG = True 24 | IMAGE_WIDTH = 600 25 | IMAGE_HEIGHT = 600 26 | BATCH_SIZE = 5 27 | 28 | SUFFIX = 'marker_net' 29 | 30 | 31 | def normalize_image(tf_tensor, name=None): 32 | return tf.subtract(tf_tensor / (255.0 / 2.0), 1, name=name) # using function to name it 33 | 34 | 35 | def scale_image_values(tf_tensor, name=None): 36 | return tf.divide(tf_tensor, 255.0, name=name) 37 | 38 | 39 | def random_crop_params(crop_height=400, crop_width=400, image_height=717, image_width=1276): 40 | x = numpy.random.random_integers(0, image_width - crop_width) 41 | y = numpy.random.random_integers(0, image_height - crop_height) 42 | return {'x1': x, 'y1': y, 'x2': x + crop_width, 'y2': y + crop_height} 43 | 44 | 45 | def segmentation_functions(input_batch, is_training, segmentation): 46 | """ 47 | """ 48 | segmentation = tf.cast(tf.equal(segmentation, 0), tf.float32) 49 | with tf.name_scope('inference_values'): 50 | logits = simple_net.lane_marker_net_2rt(input_batch, is_training) 51 | prediction = tf.identity(logits, name='prediction') # named for restoring 52 | 53 | with tf.name_scope('losses'): 54 | abs_loss = tf.reduce_sum(tf.abs(prediction - segmentation), name='abs_loss') 55 | misclassifications = tf.reduce_sum(tf.round(tf.abs(prediction - segmentation)), name='misclassifications') 56 | train_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=segmentation, logits=prediction, name='train_loss') 57 | 58 | return {'prediction': prediction, 'abs_loss': abs_loss, 'train_loss': train_loss, 59 | 'misclassifications': misclassifications} 60 | 61 | 62 | def train(train_tfrecords, valid_tfrecords, checkpoint=None): 63 | 64 | train_directory = dataset_constants.TRAIN_DIRECTORY + SUFFIX 65 | os.makedirs(train_directory) 66 | 67 | print('Working with ', constants.NUM_TRAIN_IMAGES, 'for training and ', 68 | constants.NUM_VALID_IMAGES, 'for validation') 69 | 70 | def current_sample(current_epoch, current_minibatch): 71 | return current_epoch * constants.NUM_TRAIN_IMAGES + current_minibatch * BATCH_SIZE 72 | 73 | with tf.Graph().as_default() as default_graph: 74 | 75 | # All placeholders 76 | is_training = tf.placeholder(tf.bool, name='is_training') 77 | segmentation_input = tf.placeholder(tf.float32, shape=(None, None, None, 1), name='segmentation_input') 78 | image_input = tf.placeholder(tf.float32, shape=(None, None, None, 1), name='image_input') 79 | segmentation_batch = tf.identity(segmentation_input, name='segmentation_batch') 80 | 81 | image_batch = normalize_image(image_input, name='image_batch') 82 | 83 | funcs = segmentation_functions(image_batch, is_training, segmentation_batch) 84 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # for batch_norm, assigns averages 85 | 86 | train_step = tf.train.AdamOptimizer().minimize(funcs['train_loss']) 87 | 88 | train_batch = segmentation_batch_reader.batch_reader( 89 | train_tfrecords, batch_size=BATCH_SIZE, name='train_batch') 90 | valid_batch = segmentation_batch_reader.batch_reader( 91 | valid_tfrecords, batch_size=BATCH_SIZE, name='valid_batch') 92 | 93 | merged_tf_train_summaries = tf.summary.merge_all() 94 | 95 | writer = tf.summary.FileWriter(train_directory) # NOTE not another with 96 | writer.add_graph(default_graph) # not given per default 97 | 98 | saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5, keep_checkpoint_every_n_hours=1) 99 | init_op = tf.global_variables_initializer() 100 | 101 | config = tf.ConfigProto() 102 | config.gpu_options.allow_growth = True 103 | with tf.Session(config=config) as sess: 104 | coord = tf.train.Coordinator() 105 | threads = tf.train.start_queue_runners(coord=coord) 106 | sess.run(init_op) 107 | 108 | if checkpoint is not None: 109 | print('Restoring last checkpoint from', checkpoint) 110 | actual_checkpoint = utils.get_checkpoint(checkpoint) 111 | print('Restoring', actual_checkpoint) 112 | saver.restore(sess, actual_checkpoint) 113 | print('Checkpoint restored') 114 | 115 | for epoch in range(500): # number of training epochs 116 | 117 | ####################################################################################################### 118 | # Training 119 | 120 | mean_abs_loss = 0 121 | mean_miss_loss = 0 122 | mean_train_loss = 0 123 | for minibatch in tqdm.tqdm(range(constants.NUM_TRAIN_IMAGES // BATCH_SIZE), desc='Training epoch ' + str(epoch)): 124 | train_batch_numpy = sess.run(train_batch) 125 | rcp = random_crop_params(crop_height=IMAGE_HEIGHT, crop_width=IMAGE_WIDTH) 126 | gray_crop = train_batch_numpy['camera_image'][:, rcp['y1']:rcp['y2'], rcp['x1']:rcp['x2'], :] 127 | seg_crop = train_batch_numpy['segmentation_image'][:, rcp['y1']:rcp['y2'], rcp['x1']:rcp['x2'], :] 128 | feed_dict = { 129 | 'is_training:0': True, 130 | image_input: gray_crop, 131 | segmentation_input: seg_crop} 132 | 133 | iteration = sess.run( 134 | { 135 | 'update_ops': update_ops, 136 | 'train_step': train_step, 137 | 'prediction': funcs['prediction'], 138 | 'abs_loss': funcs['abs_loss'], 139 | 'miss_loss': funcs['misclassifications'], 140 | 'train_loss': funcs['train_loss'], 141 | 'input_batch': image_batch, 142 | 'segmentation_batch': segmentation_batch, 143 | 'train_summaries': merged_tf_train_summaries, 144 | }, 145 | feed_dict=feed_dict) 146 | mean_abs_loss += numpy.mean(iteration['abs_loss']) 147 | mean_miss_loss += numpy.mean(iteration['miss_loss']) 148 | mean_train_loss += numpy.mean(iteration['train_loss']) 149 | 150 | if minibatch % 25 == 0 and DEBUG: 151 | debug_image = (iteration['input_batch'][0, :, :, 0] + 1.0) / 2.0 152 | debug_image = cv2.cvtColor(debug_image, cv2.COLOR_GRAY2BGR) 153 | debug_label = iteration['segmentation_batch'][0, :, :, 0] 154 | debug_prediction = iteration['prediction'][0, :, :, 0] 155 | debug_prediction = 1 / (1 + numpy.exp(debug_prediction)) 156 | 157 | debug_image[:, :, 1] = debug_label 158 | debug_image[:, :, 2] = debug_prediction 159 | cv2.imshow('debug_image', debug_image) 160 | cv2.imshow('debug_prediction', debug_prediction) 161 | cv2.waitKey(5) 162 | 163 | mean_train_train_loss = mean_train_loss / float(minibatch + 1) 164 | mean_train_miss_loss = mean_miss_loss / float(minibatch + 1) 165 | mean_train_abs_loss = mean_abs_loss / float(minibatch + 1) 166 | print('mean train loss', mean_train_loss, epoch) 167 | 168 | ####################################################################################################### 169 | # Validation 170 | 171 | for minibatch in tqdm.tqdm(range(constants.NUM_VALID_IMAGES // BATCH_SIZE)): 172 | 173 | valid_batch_numpy = sess.run(valid_batch) 174 | rcp = random_crop_params(crop_height=IMAGE_HEIGHT, crop_width=IMAGE_WIDTH) 175 | gray_crop = valid_batch_numpy['camera_image'][:, rcp['y1']:rcp['y2'], rcp['x1']:rcp['x2'], :] 176 | seg_crop = valid_batch_numpy['segmentation_image'][:, rcp['y1']:rcp['y2'], rcp['x1']:rcp['x2'], :] 177 | 178 | feed_dict = {'is_training:0': False, 179 | image_input: gray_crop, 180 | segmentation_input: seg_crop} 181 | 182 | iteration = sess.run( 183 | { 184 | 'abs_loss': funcs['abs_loss'], 185 | 'train_loss': funcs['train_loss'], 186 | 'miss_loss': funcs['misclassifications'], 187 | }, 188 | feed_dict=feed_dict) 189 | mean_abs_loss += numpy.mean(iteration['abs_loss']) 190 | mean_train_loss += numpy.mean(iteration['train_loss']) 191 | mean_miss_loss += numpy.mean(iteration['miss_loss']) 192 | mean_valid_train_loss = mean_train_loss / float(minibatch + 1) 193 | mean_valid_miss_loss = mean_miss_loss / float(minibatch + 1) 194 | mean_valid_abs_loss = mean_abs_loss / float(minibatch + 1) 195 | print('mean_valid_loss', mean_valid_train_loss, epoch) 196 | 197 | ####################################################################################################### 198 | 199 | print('Writing checkpoint') 200 | saver.save(sess, os.path.join(train_directory, 'markers'), global_step=current_sample(epoch + 2, 0)) 201 | print('Done writing checkpoint') 202 | 203 | mean_abs = tf.Summary() 204 | mean_abs.value.add(tag='mean_train_abs_loss', simple_value=mean_train_abs_loss) 205 | mean_abs.value.add(tag='mean_valid_abs_loss', simple_value=mean_valid_abs_loss) 206 | mean_abs.value.add(tag='mean_train_miss_loss', simple_value=mean_train_miss_loss) 207 | mean_abs.value.add(tag='mean_valid_miss_loss', simple_value=mean_valid_miss_loss) 208 | mean_abs.value.add(tag='mean_train_train_loss', simple_value=mean_train_train_loss) 209 | mean_abs.value.add(tag='mean_valid_train_loss', simple_value=mean_valid_train_loss) 210 | writer.add_summary(mean_abs, current_sample(epoch + 1, 0)) 211 | 212 | coord.request_stop() 213 | coord.join(threads) 214 | writer.close() 215 | 216 | 217 | def parse_args(): 218 | parser = argparse.ArgumentParser(description='Train some regression') 219 | parser.add_argument('--train_tfrecords', type=str, required=True, 220 | help='Tfrecords file for training') 221 | parser.add_argument('--valid_tfrecords', type=str, required=True, 222 | help='Tfrecords file for validation') 223 | parser.add_argument('--checkpoint', type=str, default=None, 224 | help='If provided, continues training for a given checkpoint') 225 | return parser.parse_args() 226 | 227 | 228 | if __name__ == '__main__': 229 | args = parse_args() 230 | train(args.train_tfrecords, args.valid_tfrecords, args.checkpoint) 231 | -------------------------------------------------------------------------------- /simple_baseline/utils.py: -------------------------------------------------------------------------------- 1 | """ Small collection of util functions """ 2 | import os 3 | import tensorflow as tf 4 | 5 | 6 | def get_checkpoint(checkpoint): 7 | """ Returns last checkpoint from directory, checkpoint file, or 'checkpoint' file """ 8 | if checkpoint is None: 9 | return None 10 | if os.path.basename(checkpoint) == 'checkpoint': 11 | checkpoint = os.path.dirname(checkpoint) 12 | if os.path.isdir(checkpoint): 13 | last_checkpoint = tf.train.latest_checkpoint(checkpoint) 14 | checkpoint = last_checkpoint 15 | print('Loading checkpoint:', checkpoint) 16 | return checkpoint 17 | --------------------------------------------------------------------------------