├── LICENSE ├── README.md ├── check_hosts.sh ├── pop_trainforests.sh ├── pop_features.sh ├── pop_samplesets.sh ├── metadata.py ├── pop_biasfieldcorrected.sh ├── scripts ├── make_body_mask.py ├── train_rdf.py ├── extract_features.py ├── apply_rdf.py ├── evaluate_multilable.py └── sample_trainingset.py ├── pop_lesionsegmentation.sh ├── pop_original.sh ├── featureconfig.py ├── pop_backgroundstripped.sh ├── pop_intensitrangestandardization.sh ├── pop_segmentations.sh ├── config.sh ├── pop_sequencespace.sh ├── run.sh └── include.sh /LICENSE: -------------------------------------------------------------------------------- 1 | non-public repository 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | multilabel-multimachine-pipeline 2 | ================================ 3 | a saveguard 4 | -------------------------------------------------------------------------------- /check_hosts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #### 4 | # Small, convenient script to check which hosts are available and with what software 5 | #### 6 | 7 | # host list 8 | HOSTS=("tiffy" "bibo" "oskar" "lulatsch" "piggy" "wolle" "mumpitz" "rumpel" "finchen" "kruemel" "yipyip" "hastig" "schorsch" "elmo" "bert") # all 9 | #HOSTS=("tiffy" "bibo" "oskar" "lulatsch" "kermit" "piggy" "wolle") # bvlab 10 | #HOSTS=("mumpitz" "rumpel" "finchen" "kruemel" "yipyip" "hastig" "schorsch") # happy users 11 | #HOSTS=("elmo" "bert") # bad mood users 12 | 13 | ## check for programs at host machine 14 | for host in ${HOSTS[@]}; do 15 | echo "##### ${host} #####" 16 | ssh maier@${host} 'elastix;transformix;fsl5.0-bet | tail -n2;cmtk mrbias;python -c "import medpy; print \"medpy:\", medpy.__file__";python -c "import sklearn; print \"sklearn:\", sklearn.__version__"' 17 | done 18 | -------------------------------------------------------------------------------- /pop_trainforests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##### 4 | # Train the decision forest with a training sample set. 5 | ##### 6 | 7 | ## Changelog 8 | # 2014-08-13 adapted to process multiple ground truth at ones 9 | # 2013-05-08 created 10 | 11 | # include shared information 12 | source $(dirname $0)/include.sh 13 | 14 | # main code 15 | log 2 "Training random decision forests" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 16 | for gtset in "${gtsets[@]}"; do 17 | mkdircond ${sequenceforests}/${gtset} 18 | for i in "${allimages[@]}"; do 19 | if [ -e "${sequenceforests}/${gtset}/${i}.pkl" ]; then 20 | continue 21 | fi 22 | log 2 "Training forest no ${i} from ground truth set ${gtset}..." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 23 | runcond "scripts/train_rdf.py ${sequencesamplesets}/${gtset}/${i}/trainingset.features.npy ${sequenceforests}/${gtset}/${i}.pkl ${maxdepth}" 24 | done 25 | done 26 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 27 | -------------------------------------------------------------------------------- /pop_features.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##### 4 | # Extracts a number of features as defined in a python-style config file. 5 | ##### 6 | 7 | ## Changelog 8 | # 2014-08-12 adapted to work with different feature configs per cases 9 | # 2013-05-08 created 10 | 11 | # include shared information 12 | source $(dirname $0)/include.sh 13 | 14 | # functions 15 | function extract_features () 16 | { 17 | local i=$1 18 | local sc_featurecnf=$(getcustomfeatureconfig "${scid}") 19 | mkdircond ${sequencefeatures}/${basesequence}/${i} 20 | runcond "${scripts}/extract_features.py ${sequenceintensitrangestandardization}/${basesequence}/${i}/ ${sequencebrainmasks}/${basesequence}/${i}.${imgfiletype} ${sequencefeatures}/${basesequence}/${i}/ ${sc_featurecnf}" 21 | } 22 | 23 | # main code 24 | log 2 "Extracting the features" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 25 | makecustomfeatureconfigs 26 | for scid in "${!sc_train_brainmasks[@]}"; do 27 | basesequence=${sc_train_brainmasks[$scid]} 28 | images=( ${sc_train_images[$scid]} ) 29 | 30 | mkdircond ${sequencefeatures}/${basesequence} 31 | 32 | parallelize extract_features ${threadcount} images[@] 33 | done 34 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 35 | -------------------------------------------------------------------------------- /pop_samplesets.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##### 4 | # Samples a number of training samples randomly using a set of selected features. 5 | ##### 6 | 7 | ## Changelog 8 | # 2014-08-12 Adapted to work with different seuqence combinations, a separated train and application set and ground-truths 9 | # 2013-05-08 created 10 | 11 | # include shared information 12 | source $(dirname $0)/include.sh 13 | 14 | # functions 15 | function sample_trainingset () { 16 | local i=$1 17 | local sc_featurecnf=$(getcustomfeatureconfig "${scid}") 18 | local _trainimages=( ${sc_train_images[$scid]} ) 19 | local _trainimages=( $(delEl "${i}" _trainimages[@]) ) 20 | mkdircond ${sequencesamplesets}/${gtset}/${i} 21 | runcond "${scripts}/sample_trainingset.py ${sequencefeatures}/${basesequence} ${sequencesegmentations}/${gtset} ${sequencebrainmasks}/${basesequence} ${sequencesamplesets}/${gtset}/${i}/ ${sc_featurecnf} ${samplesize} $(joinarr " " ${_trainimages[@]})" 22 | } 23 | 24 | # main code 25 | log 2 "Drawing a training set for each leave-one-out case using stratified random sampling" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 26 | makecustomfeatureconfigs 27 | for gtset in "${gtsets[@]}"; do 28 | mkdircond ${sequencesamplesets}/${gtset} 29 | for scid in "${!sc_train_brainmasks[@]}"; do 30 | basesequence=${sc_train_brainmasks[$scid]} 31 | images=( ${sc_apply_images[$scid]} ) 32 | 33 | parallelize sample_trainingset ${threadcount} images[@] 34 | done 35 | done 36 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 37 | -------------------------------------------------------------------------------- /metadata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | Prints out handy information about the metadata of an NifTi image, especially regarding the transformation to world coordinates. 5 | arg1: the image to check 6 | """ 7 | 8 | import sys 9 | 10 | import numpy 11 | 12 | from medpy.io import load, header 13 | 14 | def main(): 15 | i, h = load(sys.argv[1]) 16 | 17 | print 'Image:\t{}'.format(sys.argv[1]) 18 | print 'Shape:\t{}'.format(i.shape) 19 | print 'Spacing:{}'.format(header.get_pixel_spacing(h)) 20 | print 'Offset:\t{}'.format(header.get_offset(h)) 21 | 22 | if 0 == h.get_header()['qform_code']: 23 | method = 'ANALYZE 7.5 (old)' 24 | if h.get_header()['qform_code'] > 0: 25 | method = 'Normal (qform)' 26 | if h.get_header()['sform_code'] > 0: 27 | method = 'Special space (sform)' 28 | 29 | print 30 | print 'Orientation and location in space:' 31 | print 'Type:\t\t{}'.format(method) 32 | print 'qform_code:\t{}'.format(h.get_header()['qform_code']) 33 | print 'sform_code:\t{}'.format(h.get_header()['sform_code']) 34 | 35 | print 36 | print 'qform == sform?\t{} (max diff={})'.format(numpy.all(h.get_qform() == h.get_sform()), numpy.max(numpy.abs(h.get_qform() - h.get_sform()))) 37 | print 'affine = qform?\t{} (max diff={})'.format(numpy.all(h.get_affine() == h.get_qform()), numpy.max(numpy.abs(h.get_affine() - h.get_qform()))) 38 | print 'affine = sform?\t{} (max diff={})'.format(numpy.all(h.get_affine() == h.get_sform()), numpy.max(numpy.abs(h.get_affine() - h.get_sform()))) 39 | 40 | print 41 | print 'qform:' 42 | print h.get_qform() 43 | print 'sform:' 44 | print h.get_sform() 45 | print 'affine:' 46 | print h.get_affine() 47 | 48 | if __name__ == "__main__": 49 | main() 50 | -------------------------------------------------------------------------------- /pop_biasfieldcorrected.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##### 4 | # Removes intensity in-homogenities in the images. 5 | ##### 6 | 7 | ## Changelog 8 | # 2014-08-12 changed to adapt to different sequence combinations 9 | # 2014-04-09 adapted to new style 10 | # 2013-11-14 added a step to correct the nifti metadata 11 | # 2013-11-04 imporved code 12 | # 2013-10-17 created 13 | 14 | # include shared information 15 | source $(dirname $0)/include.sh 16 | 17 | # main code 18 | log 2 "Correcting the bias fields" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 19 | for scid in "${!sc_train_brainmasks[@]}"; do 20 | basesequence=${sc_train_brainmasks[$scid]} 21 | images=( ${sc_train_images[$scid]} ) 22 | sequences=( ${sc_sequences[$scid]} ) 23 | 24 | mkdircond ${sequencebiasfieldcorrected}/${basesequence} 25 | 26 | for i in "${images[@]}"; do 27 | mkdircond ${sequencebiasfieldcorrected}/${basesequence}/${i} 28 | for s in "${sequences[@]}"; do 29 | 30 | # continue if target file already exists 31 | if [ -f "${sequencebiasfieldcorrected}/${basesequence}/${i}/${s}.${imgfiletype}" ]; then 32 | continue 33 | fi 34 | 35 | # esitmate and correct bias field 36 | runcond "cmtk mrbias --mask ${sequencebrainmasks}/${basesequence}/${i}.${imgfiletype} ${sequenceskullstripped}/${basesequence}/${i}/${s}.${imgfiletype} ${sequencebiasfieldcorrected}/${basesequence}/${i}/${s}.${imgfiletype}" # note: already multitasking 37 | 38 | # correct nifit orientation metadata in-place 39 | runcond "${scripts}/niftimodifymetadata.py ${sequencebiasfieldcorrected}/${basesequence}/${i}/${s}.${imgfiletype} qf=aff sf=aff qfc=1 sfc=1" 40 | done 41 | done 42 | done 43 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 44 | 45 | -------------------------------------------------------------------------------- /scripts/make_body_mask.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | Automatically create a full-body mask for an abdominal MRI image. 5 | .py [] 6 | """ 7 | 8 | import sys 9 | import numpy 10 | from medpy.io import load, save, header 11 | from medpy.filter import largest_connected_component, xminus1d 12 | from scipy.ndimage.morphology import binary_opening, binary_closing,\ 13 | binary_erosion, binary_dilation, binary_fill_holes 14 | 15 | DEFAULT_THRESHOLD = 50.0 16 | 17 | def main(): 18 | i, h = load(sys.argv[1]) 19 | if len(sys.argv) > 3: 20 | t = float(sys.argv[3]) 21 | else: 22 | t = DEFAULT_THRESHOLD 23 | 24 | # threshold image 25 | i = i > t 26 | 27 | # select only largest connected component 28 | i = largest_connected_component(i) 29 | 30 | # fill holes along each dimension in 2D 31 | i = xminus1d(i, binary_fill_holes, 0) 32 | i = xminus1d(i, binary_fill_holes, 1) 33 | i = xminus1d(i, binary_fill_holes, 2) 34 | 35 | # select only largest connected component 36 | i = largest_connected_component(i) 37 | 38 | # apply morphological operations 39 | i = binary_closing(i, structure=None, iterations=3) # 3D 40 | #i = morphology2d(binary_closing, i, structure=1, iterations=1) 41 | 42 | if 0 == numpy.count_nonzero(i): 43 | raise Warning("{}: empty mask resulted".format(sys.argv[1])) 44 | 45 | save(i, sys.argv[2], h, True) 46 | 47 | def morphology2d(operation, arr, structure = None, iterations=1, dimension = 2): 48 | res = numpy.zeros(arr.shape, numpy.bool) 49 | for sl in range(processed.shape[dimension]): 50 | res[:,:,sl] = operation(arr[:,:,sl], structure, iterations) 51 | return res 52 | 53 | if __name__ == "__main__": 54 | main() 55 | 56 | 57 | -------------------------------------------------------------------------------- /pop_lesionsegmentation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##### 4 | # Applyies the forests to a (preliminary) segmentation of the brain lesion in sequence space. 5 | ##### 6 | 7 | ## Changelog 8 | # 2014-08-13 Adapted to cope with multiple sequence configurations and ground truth sets 9 | # 2014-05-08 Adapted to the new, distributed calculation scheme. 10 | # 2013-04-03 Added a morphological post-processing step (and removed again). 11 | # 2013-03-25 Updated to new, variable version. 12 | # 2013-11-25 Updated to use new script to distinguish between sequence space and std space features 13 | # 2013-11-05 adapted to new brain mask location 14 | # 2013-10-29 created 15 | 16 | # include shared information 17 | source $(dirname $0)/include.sh 18 | 19 | # functions 20 | 21 | # main code 22 | log 2 "Applying random decision forests to segment lesion" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 23 | 24 | makecustomfeatureconfigs 25 | for gtset in "${gtsets[@]}"; do 26 | mkdircond ${sequencelesionsegmentation}/${gtset} 27 | 28 | for scid in "${!sc_train_brainmasks[@]}"; do 29 | basesequence=${sc_train_brainmasks[$scid]} 30 | images=( ${sc_apply_images[$scid]} ) 31 | 32 | log 2 "Applying for ground truth set ${gtset} and seq. configuration ${scid}..." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 33 | 34 | for i in "${images[@]}"; do 35 | sc_featurecnf=$(getcustomfeatureconfig "${scid}") 36 | mkdircond ${sequencelesionsegmentation}/${gtset}/${i} 37 | runcond "${scripts}/apply_rdf.py ${sequenceforests}/${gtset}/${i}.pkl ${sequencefeatures}/${basesequence}/${i}/ ${sequencebrainmasks}/${basesequence}/${i}.nii.gz ${sc_featurecnf} ${sequencelesionsegmentation}/${gtset}/${i}/probabilities.nii.gz ${sequencelesionsegmentation}/${gtset}/${i}/segmentation.nii.gz" 38 | done 39 | done 40 | 41 | done 42 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 43 | 44 | 45 | -------------------------------------------------------------------------------- /pop_original.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##### 4 | # Link images from the image database in a consitent manner to 00originals. 5 | # Links all images whose case ids are mentiones in "includes.sh". 6 | ##### 7 | 8 | ## Changelog 9 | # 2015-02-27 adapted to visceral pipeline 10 | # 2014-05-05 every second case now gets flipped 11 | # 2014-03-24 changed to link sequence by availability (i.e. skip non-existing ones with only info message displayed) 12 | # 2013-11-13 changed to actually copy even existing files and to correct the qform and sform codes 13 | # 2013-10-15 changed the ADC creation script and added a conversion of non-float to float images 14 | # 2013-10-02 created 15 | 16 | # Visceral ground-truth labels 17 | # 1: liver 18 | # 2: spleen 19 | # 3: bladder 20 | # 4: left (liver) kidney 21 | # 5: right kidney 22 | # 6: left ? muscle 23 | # 7: right ? muscle 24 | 25 | # include shared information 26 | source $(dirname $0)/include.sh 27 | 28 | # Constants 29 | sequencestolink=('MRI') 30 | 31 | # Image collection 32 | srcdir="/share/data_mumpitz2/heinrich/OskarMRI/" 33 | declare -A indicesmapping=( ["01"]="1" ["02"]="2" ["03"]="3" ["04"]="4" ["05"]="5" ["06"]="6" ["07"]="7" ["08"]="8" ["09"]="9" ["10"]="10") 34 | 35 | ### 36 | # Prepare all the sequences of a case 37 | ### 38 | log 1 "Linking images and ground truth images" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 39 | for i in "${images[@]}"; do 40 | mkdircond "${originals}/${i}" 41 | for s in "${sequencestolink[@]}"; do 42 | srcfile="${srcdir}/${s}${indicesmapping[${i}]}.${imgfiletype}" 43 | trgfile="${originals}/${i}/${s}.${imgfiletype}" 44 | lncond "${srcfile}" "${trgfile}" 45 | done 46 | srcfile="${srcdir}/${s}${indicesmapping[${i}]}_seg.${imgfiletype}" 47 | trgfile="${segmentations}/${i}.${imgfiletype}" 48 | #runcond "scripts/extract_label.py ${srcfile} ${trgfile} ${label}" # only required, if a single label should be extracted 49 | lncond "${srcfile}" "${trgfile}" 50 | done 51 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 52 | 53 | -------------------------------------------------------------------------------- /scripts/train_rdf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | Train a decision forest on a training set. 5 | arg1: the training set file (.features.npy) 6 | arg2: the decision forest target file 7 | arg4: the maximum tree depth (optional) 8 | """ 9 | 10 | import sys 11 | import pickle 12 | import numpy 13 | 14 | from sklearn.ensemble.forest import ExtraTreesClassifier 15 | from sklearn.ensemble.forest import RandomForestClassifier 16 | #from sklearn.ensemble.forest import MixedForestClassifier 17 | 18 | # constants 19 | n_jobs = 6 20 | 21 | def main(): 22 | # catch parameters 23 | training_set_features = sys.argv[1] 24 | training_set_classes = training_set_features.replace('features', 'classes') 25 | forest_file = sys.argv[2] 26 | max_depth = int(sys.argv[3]) if 3 <= len(sys.argv) else 500 27 | 28 | # loading training features 29 | with open(training_set_features, 'r') as f: 30 | training_feature_vector = numpy.load(f) 31 | if 1 == training_feature_vector.ndim: 32 | training_feature_vector = numpy.expand_dims(training_feature_vector, -1) 33 | with open(training_set_classes , 'r') as f: 34 | training_class_vector = numpy.load(f) 35 | 36 | # prepare and train the decision forest 37 | forest = ExtraTreesClassifier(n_estimators=200, 38 | criterion = 'gini', 39 | max_features = None, # rdf: auto / et: None 40 | #splitter="alternatingnode", 41 | min_samples_split = 2, 42 | min_samples_leaf = 1, 43 | max_depth = max_depth, 44 | bootstrap = True, 45 | oob_score = False, 46 | random_state=None, 47 | n_jobs=n_jobs) 48 | forest.fit(training_feature_vector, training_class_vector) 49 | 50 | # saving the decision forest 51 | with open(forest_file, 'wb') as f: 52 | pickle.dump(forest, f) 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /scripts/extract_features.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | Extract features from an supplied multi-spectral image according to a config file and saves them under the supplied target directory. 5 | arg1: folder with image channels 6 | arg2: mask image, features are only extracted for voxels where 1 7 | arg3: the target folder to store the extracted features 8 | arg4: the config file, containing a struct called features_to_extract that follows a special syntax 9 | 10 | Note: Does not overwrite existing feature files. 11 | """ 12 | 13 | import os 14 | import sys 15 | import imp 16 | import numpy 17 | import itertools 18 | 19 | from medpy.io import load, header 20 | 21 | # configuration 22 | trg_dtype = numpy.float32 23 | 24 | def main(): 25 | # loading the features to extract 26 | d, m = os.path.split(os.path.splitext(sys.argv[4])[0]) 27 | f, filename, desc = imp.find_module(m, [d]) 28 | features_to_extract = imp.load_module(m, f, filename, desc).features_to_extract 29 | 30 | # loading the image mask 31 | m = load(sys.argv[2])[0].astype(numpy.bool) 32 | 33 | # extracting the required features and saving them 34 | for sequence, function_call, function_arguments, voxelspacing in features_to_extract: 35 | if not isfv(sys.argv[3], sequence, function_call, function_arguments): 36 | #print sequence, function_call.__name__, function_arguments 37 | i, h = load('{}/{}.nii.gz'.format(sys.argv[1], sequence)) 38 | call_arguments = list(function_arguments) 39 | if voxelspacing: call_arguments.append(header.get_pixel_spacing(h)) 40 | call_arguments.append(m) 41 | fv = function_call(i, *call_arguments) 42 | savefv(fv, sys.argv[3], sequence, function_call, function_arguments) 43 | 44 | def savefv(fv, trgdir, seq, fcall, fargs): 45 | """Saves the supplied feature vector under a fixed naming rule.""" 46 | name = 'feature.{}.{}.{}'.format(seq, fcall.func_name, '_'.join(['arg{}'.format(i) for i in fargs])) 47 | with open('{}/{}.npy'.format(trgdir, name), 'wb') as f: 48 | numpy.save(f, fv.astype(trg_dtype)) 49 | 50 | def isfv(trgdir, seq, fcall, fargs): 51 | name = 'feature.{}.{}.{}'.format(seq, fcall.func_name, '_'.join(['arg{}'.format(i) for i in fargs])) 52 | return os.path.exists('{}/{}.npy'.format(trgdir, name)) 53 | 54 | if __name__ == "__main__": 55 | main() 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /featureconfig.py: -------------------------------------------------------------------------------- 1 | #### 2 | # Configuration file: Denotes the features to extract 3 | #### 4 | 5 | from medpy.features.intensity import intensities, centerdistance, centerdistance_xdminus1, local_mean_gauss, local_histogram 6 | 7 | MRI = [ 8 | ('MRI', intensities, [], False), 9 | ('MRI', local_mean_gauss, [3], True), 10 | ('MRI', local_mean_gauss, [5], True), 11 | ('MRI', local_mean_gauss, [7], True), 12 | ('MRI', local_histogram, [11, 'image', (0, 100), 5, None, None, 'ignore', 0], False), #11 bins, 5*2=10mm region 13 | ('MRI', local_histogram, [11, 'image', (0, 100), 10, None, None, 'ignore', 0], False), #11 bins, 10*2=20mm region 14 | ('MRI', local_histogram, [11, 'image', (0, 100), 15, None, None, 'ignore', 0], False), #11 bins, 15*2=30mm region 15 | ('MRI', centerdistance_xdminus1, [0], True), 16 | ('MRI', centerdistance_xdminus1, [1], True), 17 | ('MRI', centerdistance_xdminus1, [2], True) 18 | ] 19 | 20 | APROBL0 = [ 21 | ('aprobl0', intensities, [], False), 22 | ('aprobl0', local_mean_gauss, [5], True), 23 | ('aprobl0', local_mean_gauss, [10], True), 24 | ('aprobl0', local_mean_gauss, [20], True) 25 | ] 26 | 27 | APROBL1 = [ 28 | ('aprobl1', intensities, [], False), 29 | ('aprobl1', local_mean_gauss, [5], True), 30 | ('aprobl1', local_mean_gauss, [10], True), 31 | ('aprobl1', local_mean_gauss, [20], True) 32 | ] 33 | 34 | APROBL2 = [ 35 | ('aprobl2', intensities, [], False), 36 | ('aprobl2', local_mean_gauss, [5], True), 37 | ('aprobl2', local_mean_gauss, [10], True), 38 | ('aprobl2', local_mean_gauss, [20], True) 39 | ] 40 | 41 | APROBL3 = [ 42 | ('aprobl3', intensities, [], False), 43 | ('aprobl3', local_mean_gauss, [5], True), 44 | ('aprobl3', local_mean_gauss, [10], True), 45 | ('aprobl3', local_mean_gauss, [20], True) 46 | ] 47 | 48 | APROBL4 = [ 49 | ('aprobl4', intensities, [], False), 50 | ('aprobl4', local_mean_gauss, [3], True), 51 | ('aprobl4', local_mean_gauss, [10], True), 52 | ('aprobl4', local_mean_gauss, [20], True) 53 | ] 54 | 55 | APROBL5 = [ 56 | ('aprobl5', intensities, [], False), 57 | ('aprobl5', local_mean_gauss, [5], True), 58 | ('aprobl5', local_mean_gauss, [10], True), 59 | ('aprobl5', local_mean_gauss, [20], True) 60 | ] 61 | 62 | APROBL6 = [ 63 | ('aprobl6', intensities, [], False), 64 | ('aprobl6', local_mean_gauss, [5], True), 65 | ('aprobl6', local_mean_gauss, [10], True), 66 | ('aprobl6', local_mean_gauss, [20], True) 67 | ] 68 | 69 | APROBL7 = [ 70 | ('aprobl7', intensities, [], False), 71 | ('aprobl7', local_mean_gauss, [5], True), 72 | ('aprobl7', local_mean_gauss, [10], True), 73 | ('aprobl7', local_mean_gauss, [20], True) 74 | ] 75 | 76 | features_to_extract = MRI + APROBL1 + APROBL2 + APROBL3 + APROBL4 + APROBL5 + APROBL6 + APROBL7 77 | 78 | 79 | -------------------------------------------------------------------------------- /scripts/apply_rdf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | Apply an RDF to a case. 5 | arg1: the decision forest file 6 | arg2: the case folder holding the feature files 7 | arg3: the cases mask file 8 | arg4: file containing a struct identifying the features to use 9 | arg5: the target probability file 10 | arg6: the target segmentation file 11 | """ 12 | 13 | import os 14 | import sys 15 | import imp 16 | import pickle 17 | import numpy 18 | 19 | from scipy.ndimage.morphology import binary_fill_holes, binary_dilation 20 | from scipy.ndimage.measurements import label 21 | 22 | from medpy.io import load, save 23 | from medpy.features.utilities import join 24 | 25 | # constants 26 | n_jobs = 6 27 | 28 | def main(): 29 | # catch parameters 30 | forest_file = sys.argv[1] 31 | case_folder = sys.argv[2] 32 | mask_file = sys.argv[3] 33 | feature_cnf_file = sys.argv[4] 34 | probability_file = sys.argv[5] 35 | segmentation_file = sys.argv[6] 36 | 37 | # load features to use and create proper names from them 38 | features_to_use = load_feature_names(feature_cnf_file) 39 | 40 | # loading case features 41 | feature_vector = [] 42 | 43 | for feature_name in features_to_use: 44 | _file = os.path.join(case_folder, '{}.npy'.format(feature_name)) 45 | if not os.path.isfile(_file): 46 | raise Exception('The feature "{}" could not be found in folder "{}". Breaking.'.format(feature_name, case_folder)) 47 | with open(_file, 'r') as f: 48 | feature_vector.append(numpy.load(f)) 49 | feature_vector = join(*feature_vector) 50 | if 1 == feature_vector.ndim: 51 | feature_vector = numpy.expand_dims(feature_vector, -1) 52 | 53 | # load and apply the decision forest 54 | with open(forest_file, 'r') as f: 55 | forest = pickle.load(f) 56 | probability_results = [] 57 | for _fv in numpy.array_split(feature_vector, 20): 58 | probability_results.append(forest.predict_proba(_fv)) 59 | probability_results = numpy.vstack(probability_results) 60 | classification_results = numpy.argmax(probability_results, -1) 61 | 62 | # preparing image 63 | m, h = load(mask_file) 64 | m = m.astype(numpy.bool) 65 | oc = numpy.zeros(m.shape, numpy.uint8) 66 | op = numpy.zeros(m.shape + (probability_results.shape[-1], ), numpy.float32) 67 | oc[m] = numpy.squeeze(classification_results).ravel() 68 | op[m] = numpy.squeeze(probability_results).reshape(numpy.prod(probability_results.shape[:-1]), probability_results.shape[-1]) 69 | 70 | # saving the results 71 | save(oc, segmentation_file, h, True) 72 | save(op, probability_file, h, True) 73 | 74 | def feature_struct_entry_to_name(fstruct): 75 | seq, fcall, fargs, _ = fstruct 76 | return 'feature.{}.{}.{}'.format(seq, fcall.func_name, '_'.join(['arg{}'.format(i) for i in fargs])) 77 | 78 | def load_feature_struct(f): 79 | "Load the feature struct from a feature config file." 80 | d, m = os.path.split(os.path.splitext(f)[0]) 81 | f, filename, desc = imp.find_module(m, [d]) 82 | return imp.load_module(m, f, filename, desc).features_to_extract 83 | 84 | def load_feature_names(f): 85 | "Load the feature names from a feature config file." 86 | fs = load_feature_struct(f) 87 | return [feature_struct_entry_to_name(e) for e in fs] 88 | 89 | if __name__ == "__main__": 90 | main() 91 | -------------------------------------------------------------------------------- /pop_backgroundstripped.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##### 4 | # skull from all sequences volumes. 5 | ##### 6 | 7 | ## Changelog 8 | # 2015-02-27 Changed to foreground rather than brainmask 9 | # 2014-08-14 Changes such, that all brain mask option folders contain brain masks for all cases using copy from other brain mask settings 10 | # 2014-08-12 Adapted to work with different skull-stripping base sequences for different target sequences 11 | # 2013-03-25 Adapted to take any sequence as base sequence. 12 | # 2013-11-04 Improved the mechanism and separated the brain mask location from the skull-stripped images. 13 | # 2013-10-16 created 14 | 15 | # include shared information 16 | source $(dirname $0)/include.sh 17 | 18 | # functions 19 | ### 20 | # Compute a foreground mask using the base sequence 21 | ### 22 | function compute_foregroundmask () 23 | { 24 | # grab parameters 25 | i=$1 26 | 27 | # created required directories 28 | mkdircond ${sequenceskullstripped}/${basesequence}/${i} 29 | # continue if target file already exists 30 | if [ -f "${sequencebrainmasks}/${basesequence}/${i}.${imgfiletype}" ]; then 31 | return 32 | fi 33 | # compute foreground mask 34 | log 1 "Computing foreground mask for ${sequencespace}/${i}/${basesequence}.${imgfiletype}" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 35 | runcond "${scripts}/make_foreground_mask.py ${sequencespace}/${i}/${basesequence}.${imgfiletype} ${sequencebrainmasks}/${basesequence}/${i}.${imgfiletype}" 36 | } 37 | 38 | # main code 39 | for scid in "${!sc_train_brainmasks[@]}"; do 40 | basesequence=${sc_train_brainmasks[$scid]} 41 | images=( ${sc_train_images[$scid]} ) 42 | sequences=( ${sc_sequences[$scid]} ) 43 | 44 | mkdircond ${sequenceskullstripped}/${basesequence} 45 | mkdircond ${sequencebrainmasks}/${basesequence} 46 | 47 | log 2 "Computing foreground masks on base sequence ${basesequence}" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 48 | parallelize compute_foregroundmask ${threadcount} images[@] 49 | 50 | log 2 "Applying foregroundmask to remaining spectra" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 51 | for i in "${images[@]}"; do 52 | for s in "${sequences[@]}"; do 53 | # skip if base sequence 54 | if [ "${s}" == "${basesequence}" ]; then 55 | continue 56 | fi 57 | 58 | srcfile="${sequencespace}/${i}/${s}.${imgfiletype}" 59 | trgfile="${sequenceskullstripped}/${basesequence}/${i}/${s}.${imgfiletype}" 60 | 61 | # continue if target file already exists 62 | if [ -f "${trgfile}" ]; then 63 | log 1 "Target file ${trgfile} already exists. Skipping." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 64 | continue 65 | fi 66 | # continue and warn if source file doesn't exists 67 | if [ ! -f "${srcfile}" ]; then 68 | log 3 "Source file ${srcfile} does not exist. Skipping." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 69 | continue 70 | fi 71 | 72 | runcond "${scripts}/apply_binary_mask.py ${srcfile} ${sequencebrainmasks}/${basesequence}/${i}.${imgfiletype} ${trgfile}" /dev/null 73 | done 74 | done 75 | done 76 | 77 | # fill possible gaps in the foreground masks (carefully: which foreground masks configuration is chosen for the filling is random!) 78 | for brain_basessequence_from in "${sc_train_brainmasks[@]}"; do 79 | for brain_basessequence_to in "${sc_train_brainmasks[@]}"; do 80 | linkmissing "${sequencebrainmasks}/${brain_basessequence_from}/" "${sequencebrainmasks}/${brain_basessequence_to}/" 81 | done 82 | done 83 | 84 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /pop_intensitrangestandardization.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##### 4 | # Standarizes the intensity profiles of all images belonging to the same MRI sequence. 5 | ##### 6 | 7 | ## Changelog 8 | # 2014-08-12 Updated to work with adaptive intensity model creation depending on the brainmasks used. 9 | # 2013-03-25 Updated to new structure. 10 | # 2013-11-14 changed script to allow for intensity correction of an image, even if the model already exists 11 | # 2013-11-05 adapted to new brain mask location 12 | # 2013-10-22 created 13 | 14 | # include shared information 15 | source $(dirname $0)/include.sh 16 | 17 | # main code 18 | log 2 "Learning and adapting the intensity profiles" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 19 | tmpdir=`mktemp -d` 20 | for scid in "${!sc_train_brainmasks[@]}"; do 21 | basesequence=${sc_train_brainmasks[$scid]} 22 | images=( ${sc_train_images[$scid]} ) 23 | sequences=( ${sc_sequences[$scid]} ) 24 | 25 | mkdircond ${sequenceintensitrangestandardization}/${basesequence} 26 | 27 | for s in "${sequences[@]}"; do 28 | log 2 "Processing MRI sequence ${s}" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 29 | 30 | # if target model already exists, skip model creation for the whole sequence and remark upon it 31 | if [ -f "${sequenceintensitrangestandardization}/${basesequence}/intensity_model_${s}.pkl" ]; then 32 | log 3 "The intensity model for the MRI sequence ${s} already exists. Skipping the model creation for the whole sequence." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 33 | else 34 | # collect all the images for training 35 | images_string="" 36 | masks_string="" 37 | for i in "${images[@]}"; do 38 | # if target file already exists, skip model creation for the whole sequence and remark upon it 39 | if [ -f "${sequenceintensitrangestandardization}/${basesequence}/${i}/${s}.${imgfiletype}" ]; then 40 | log 3 "One of the target files for the MRI sequence ${s} already exists. Skipping the model creation and image transformation for the whole sequence." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 41 | continue 2 42 | fi 43 | # add image to list of images to use for training (always use all images) 44 | images_string="${images_string} ${sequencebiasfieldcorrected}/${basesequence}/${i}/${s}.${imgfiletype}" 45 | masks_string="${masks_string} ${sequencebrainmasks}/${basesequence}/${i}.${imgfiletype}" 46 | done 47 | 48 | # train the model without transforming the images 49 | runcond "medpy_intensity_range_standardization.py --masks ${masks_string} --save-model ${sequenceintensitrangestandardization}/${basesequence}/intensity_model_${s}.pkl ${images_string}" 50 | fi 51 | 52 | # transform and post-process the images, them move them to their target location if not already existant 53 | for i in "${images[@]}"; do 54 | mkdircond ${sequenceintensitrangestandardization}/${basesequence}/${i} 55 | if [ ! -f "${sequenceintensitrangestandardization}/${basesequence}/${i}/${s}.${imgfiletype}" ]; then 56 | runcond "medpy_intensity_range_standardization.py --load-model ${sequenceintensitrangestandardization}/${basesequence}/intensity_model_${s}.pkl --masks ${sequencebrainmasks}/${basesequence}/${i}.${imgfiletype} --save-images ${tmpdir} ${sequencebiasfieldcorrected}/${basesequence}/${i}/${s}.${imgfiletype} -f" 57 | runcond "${scripts}/condenseoutliers.py ${tmpdir}/${s}.${imgfiletype} ${sequenceintensitrangestandardization}/${basesequence}/${i}/${s}.${imgfiletype}" 58 | fi 59 | done 60 | 61 | emptydircond ${tmpdir} 62 | done 63 | done 64 | rmdircond ${tmpdir} 65 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 66 | 67 | 68 | -------------------------------------------------------------------------------- /pop_segmentations.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##### 4 | # Link all segmentation images from the image database in a consitent manner. 5 | ##### 6 | 7 | ## Changelog 8 | 9 | # 2014-08-12 Changed to work with multiple segmentation sets 10 | # 2014-05-05 Changed to also include the flipping along the mid-saggital plane for every second case. 11 | # 2014-03-25 Changed to copy images and correcting (possibly faulty) voxel spacing 12 | # 2014-03-25 Adapted to work with new case to database case mapping. 13 | # 2013-10-21 created 14 | 15 | # include shared information 16 | source $(dirname $0)/include.sh 17 | 18 | # Constants 19 | basesequenceflipdim="0" 20 | 21 | # Image collection HEOPKS details 22 | c01dir="/imagedata/HEOPKS/" 23 | declare -A c01indicesmapping=( ["01"]="01" ["02"]="02" ["03"]="03" ["04"]="04" ["05"]="05" ["06"]="06" ["07"]="07" ["08"]="08" ["09"]="09" ["10"]="10" \ 24 | ["11"]="11" ["12"]="12" ["13"]="13" ["14"]="14" ["15"]="15" ["16"]="16" ["17"]="17" ["18"]="18" ["19"]="19" ["20"]="20" \ 25 | ["21"]="21" ["22"]="22" ["23"]="23" ["24"]="24" ["25"]="25" ["26"]="26" ["27"]="27" ["28"]="28" ["29"]="29" ) 26 | 27 | # Image collection JGABLENTZ details 28 | c02dir="/imagedata/JGABLENTZ/" 29 | declare -A c02indicesmapping=( ["30"]="02" ["31"]="08" ["32"]="11" ["33"]="13" ["34"]="14" ["35"]="17" ["36"]="19" ["37"]="20" ["38"]="25" ["39"]="29" \ 30 | ["40"]="30" ["41"]="31" ["42"]="34" ["43"]="47" ["44"]="55" ["45"]="57" ) 31 | 32 | 33 | ## 34 | # Checks whether at least one target image already exists 35 | ## 36 | function check_existance () { 37 | for i in "${allimages[@]}"; do 38 | [ -f "${segmentations}/${gtset}/${i}.${imgfiletype}" ] && echo "1" && return 39 | done 40 | echo "0" 41 | } 42 | 43 | # main code 44 | for gtset in "${gtsets[@]}"; do 45 | log 2 "Processing ground truth set ${gtset}" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 46 | 47 | if [[ "$(check_existance)" -eq "1" ]]; then 48 | log 3 "Folder ${segmentations}/${gtset} already contains files. Assuming done and skipping complete ground truth set as otherwise a double-flip might occur." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 49 | continue 50 | fi 51 | 52 | srcdir=${gtsources[$gtset]} 53 | mkdircond ${segmentations}/${gtset} 54 | 55 | log 2 "Copying ground truth images" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 56 | for i in "${allimages[@]}"; do 57 | # catch original voxel sapcing of associated flair sequence 58 | vs=( $(voxelspacing "${originals}/${i}/flair_tra.${imgfiletype}") ) 59 | vs=$(joinarr " " ${vs[@]}) 60 | # copy and correct voxel spacing 61 | if test "${c01indicesmapping[${i}]+isset}"; then 62 | runcond "medpy_set_pixel_spacing.py ${c01dir}/${srcdir}/${c01indicesmapping[${i}]}.${imgfiletype} ${segmentations}/${gtset}/${i}.${imgfiletype} ${vs[@]}" 63 | elif test "${c02indicesmapping[${i}]+isset}"; then 64 | runcond "medpy_set_pixel_spacing.py ${c02dir}/${srcdir}/${c02indicesmapping[${i}]}.${imgfiletype} ${segmentations}/${gtset}/${i}.${imgfiletype} ${vs[@]}" 65 | else 66 | log 3 "No candidate for case id ${i} found in any of the collections. Please check your 'images' array. Skipping." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 67 | fi 68 | done 69 | 70 | log 2 "Flipping ground truth of every second case in-place along the mid-saggital plane" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 71 | for (( i = 1 ; i < ${#allimages[@]} ; i+=2 )) do 72 | f="${segmentations}/${gtset}/${allimages[$i]}.${imgfiletype}" 73 | if [ -e ${f} ]; then 74 | lnrealize "${f}" 75 | runcond "${scripts}/flip.py ${f} ${basesequenceflipdim}" 76 | fi 77 | done 78 | done 79 | 80 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 81 | -------------------------------------------------------------------------------- /config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ###################### 4 | # Configuration file # 5 | ###################### 6 | 7 | ## changelog 8 | # 2014-08-12 adapted to run script 9 | # 2014-05-08 created 10 | 11 | # image array 12 | # INCLUSIVE (training) 13 | images=('01' '02' '03' '04' '05' '06' '07' '08' '09' '10') 14 | 15 | # EXCLUSIVE (preparation & application) 16 | 17 | # ground truth sets and settings 18 | gtsets=("seven") # "seven" 19 | declare -A gtsources=( [""]="" ) 20 | 21 | # sequence combinations settings 22 | declare -A sc_sequences=( ["1"]="MRI aprob0 aprob1 aprob2 aprob3 aprob4 aprob5 aprob6 aprob7" ) 23 | declare -A sc_apply_images=( ["1"]="01 02 03 04 05 06 07 08 09 10" ) 24 | declare -A sc_train_images=( ["1"]="01 02 03 04 05 06 07 08 09 10" ) 25 | declare -A sc_train_brainmasks=( ["1"]="MRI" ) 26 | sequencespacebasesequence="MRI" 27 | evaluationbasesequence="MRI" 28 | 29 | # sequence space settings 30 | isotropic=0 # 0/1 to disable/enable pre-registration resampling of base sequence to isotropic spacing 31 | isotropicspacing=3 # the target isotropic spacing in mm 32 | 33 | # config file with feature (1) to extract and (2) to create the training sample from 34 | featurecnf="featureconfig.py" 35 | 36 | # training sample size 37 | samplesize=500000 38 | 39 | # rdf parameters 40 | maxdepth=100 41 | 42 | # post-processing parameters 43 | minimallesionsize=1500 44 | 45 | ## 46 | # functions 47 | ## 48 | # build a global flat sorted allimages variable 49 | function makeallimages () { 50 | local sorted 51 | readarray -t sorted < <(for a in ${sc_apply_images[@]}; do echo "$a"; done | sort) 52 | allimages=( ${sorted[@]} ) 53 | } 54 | # returns a custom feature config file 55 | # call like: featurecnf_file=$(getcustomfeatureconfig "${scid}") 56 | function getcustomfeatureconfig () { 57 | local scid=$1 58 | local sc_featurecnf="/tmp/.${featurecnf:0: -3}_${scid}.py" 59 | echo "${sc_featurecnf}" 60 | } 61 | # build a custom, hidden feature config file for each sequence combinations 62 | function makecustomfeatureconfigs () { 63 | local scid 64 | for scid in "${sc_ids[@]}"; do 65 | local sequences=( ${sc_sequences[$scid]} ) 66 | local sequences_sum=$(joinarr "+" ${sequences[@]}) 67 | local string="features_to_extract = ${sequences_sum}" 68 | #local sc_featurecnf=".${featurecnf:0: -3}_${scid}.py" 69 | local sc_featurecnf=$(getcustomfeatureconfig "${scid}") 70 | runcond "cp ${featurecnf} ${sc_featurecnf}" 71 | #!NOTE: Not very nice, as runcond is omitted. But I didn't find a solution to get the piping working otherwise. 72 | echo "${string}" >> "${sc_featurecnf}" 73 | done 74 | } 75 | # loads a personal config file if the appropriate command line arguments are encountered 76 | # call like: source "$(parsecustomconfig $@)" 77 | # pass custom config to a script with "CUSTOMCONFIG=" argument; only first such argument is considered 78 | function parsecustomconfig () { 79 | local -a args=("${!1}") 80 | local arg 81 | for arg in "$@"; do 82 | [ "${arg:0:13}" == "CUSTOMCONFIG=" ] && echo "${arg:13}" && return 83 | done 84 | echo "/dev/null" 85 | } 86 | 87 | ## 88 | # Config processing 89 | ## 90 | # creating an allimages variable for easy processing 91 | makeallimages 92 | # collect the sequence combination set ids in one variable 93 | sc_ids=( "${!sc_apply_images[@]}" ) 94 | 95 | ## 96 | # Loads a personal config file to overwrite this config file if supplied to the including script via the commandline 97 | # @see parsecustomconfig function above 98 | ## 99 | source "$(parsecustomconfig $@)" 100 | 101 | ## 102 | # Space for scripted config changes (created automatically) 103 | ## 104 | 105 | -------------------------------------------------------------------------------- /scripts/evaluate_multilable.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | Evaluate the segmentation created. 5 | arg1: the segmentation result for each case, with a {} in place of the case number 6 | arg2: the ground truth segmentation, with a {} in place of the case number 7 | arg3: the cases mask file, with a {} in place of the case number 8 | arg4+: the cases to evaluate 9 | """ 10 | 11 | import sys 12 | import math 13 | import time 14 | from multiprocessing.pool import Pool 15 | 16 | import numpy 17 | from scipy.ndimage.measurements import label 18 | 19 | from medpy.io import load, header, save 20 | from medpy.metric import dc, hd, assd, precision, recall 21 | 22 | # constants 23 | n_jobs = 6 24 | silent = True 25 | labels = [1, 2, 3, 4] 26 | 27 | def main(): 28 | 29 | # catch parameters 30 | segmentation_base_string = sys.argv[1] 31 | ground_truth_base_string = sys.argv[2] 32 | mask_file_base_string = sys.argv[3] 33 | cases = sys.argv[4:] 34 | 35 | # evaluate each label of each case and collect the scores 36 | precisions = [] 37 | recalls = [] 38 | dcs = [] 39 | 40 | # for each case 41 | for case in cases: 42 | 43 | # load images of the current case 44 | i_segmentation, _ = load(segmentation_base_string.format(case)) 45 | i_truth, _ = load(ground_truth_base_string.format(case)) 46 | i_mask = load(mask_file_base_string.format(case))[0].astype(numpy.bool) 47 | 48 | # collect images for each label in list and apply mask to segmentation and ground truth (to remove ground truth fg outside of brain mask) 49 | s = [(i_segmentation == d) & i_mask for d in labels] 50 | t = [(i_truth == d) & i_mask for d in labels] 51 | 52 | # post-processing 53 | from scipy.ndimage.morphology import binary_fill_holes 54 | s = [binary_fill_holes(_s) for _s in s] 55 | #from medpy.filter import largest_connected_component 56 | s[0] = largest_connected_components(s[0], n = 2) 57 | s[1] = largest_connected_components(s[1], n = 1) 58 | s[2] = largest_connected_components(s[2], n = 2) 59 | s[3] = largest_connected_components(s[3], n = 2) 60 | #from scipy.ndimage.morphology import binary_dilation 61 | #s = [binary_dilation(_s, structure=None, iterations=4) for _s in s] 62 | save(numpy.asarray(s), segmentation_base_string.format(case) + '_tmp.nii.gz') 63 | 64 | # compute and append metrics (Pool-processing) 65 | pool = Pool(n_jobs) 66 | dcs.append(pool.map(wdc, zip(t, s))) 67 | precisions.append(pool.map(wprecision, zip(s, t))) 68 | recalls.append(pool.map(wrecall, zip(s, t))) 69 | 70 | # print case-wise and label-wise results results 71 | print 'Case\t', 72 | for label in labels: 73 | print 'Label {}\t\t\t'.format(label), 74 | print '\n\t', 75 | for label in labels: 76 | print 'DC[0,1]\tprec.\trec.\t', 77 | print 78 | for case, _dcs, _prs, _rcs in zip(cases, dcs, precisions, recalls): 79 | print '{}'.format(case), 80 | for _dc, _pr, _rc in zip(_dcs, _prs, _rcs): 81 | print '\t{:>3,.3f}\t{:>3,.3f}\t{:>3,.3f}'.format(_dc, _pr, _rc), 82 | print 83 | print 84 | 85 | # print label-wise averages 86 | for lid, label in enumerate(labels): 87 | print 'Label {} averages:'.format(label) 88 | 89 | _mdcs = [_dc[lid] for _dc in dcs] 90 | _mpres = [_prs[lid] for _prs in precisions] 91 | _mrcs = [_rcs[lid] for _rcs in recalls] 92 | print '\tDM average\t{} +/- {} (Median: {})'.format(numpy.mean(_mdcs), numpy.std(_mdcs), numpy.median(_mdcs)) 93 | print '\tPrec. average\t{} +/- {} (Median: {})'.format(numpy.mean(_mpres), numpy.std(_mpres), numpy.median(_mpres)) 94 | print '\tRec. average\t{} +/- {} (Median: {})'.format(numpy.mean(_mrcs), numpy.std(_mrcs), numpy.median(_mrcs)) 95 | print 96 | 97 | # print overall averages (label independent) 98 | print 'Overall averages:' 99 | print 'DM average\t{} +/- {} (Median: {})'.format(numpy.asarray(dcs).mean(), numpy.asarray(dcs).std(), numpy.median(numpy.asarray(dcs))) 100 | print 'Prec. average\t{} +/- {} (Median: {})'.format(numpy.asarray(precisions).mean(), numpy.asarray(precisions).std(), numpy.median(numpy.asarray(precisions))) 101 | print 'Rec. average\t{} +/- {} (Median: {})'.format(numpy.asarray(recalls).mean(), numpy.asarray(recalls).std(), numpy.median(numpy.asarray(recalls))) 102 | 103 | def wdc(x): 104 | return dc(*x) 105 | def whd(x): 106 | try: 107 | val = hd(*x) 108 | except RuntimeError: 109 | val = numpy.inf 110 | return val 111 | def wprecision(x): 112 | return precision(*x) 113 | def wrecall(x): 114 | return recall(*x) 115 | def wassd(x): 116 | try: 117 | val = assd(*x) 118 | except RuntimeError: 119 | val = numpy.inf 120 | return val 121 | 122 | def largest_connected_components(img, n = 1, structure = None): 123 | r""" 124 | Select the largest connected binary component in an image. 125 | 126 | Treats all zero values in the input image as background and all others as foreground. 127 | The return value is an binary array of equal dimensions as the input array with TRUE 128 | values where the largest connected component is situated. 129 | 130 | Parameters 131 | ---------- 132 | img : array_like 133 | An array containing connected objects. Will be cast to type numpy.bool. 134 | structure : array_like 135 | A structuring element that defines the connectivity. Structure must be symmetric. 136 | If no structuring element is provided, one is automatically generated with a 137 | squared connectivity equal to one. 138 | 139 | Returns 140 | ------- 141 | binary_image : ndarray 142 | The supplied binary image with only the largest connected component remaining. 143 | """ 144 | labeled_array, num_features = label(img, structure) 145 | component_sizes = [numpy.count_nonzero(labeled_array == label_idx) for label_idx in range(1, num_features + 1)] 146 | component_indices_list_by_sizes = numpy.argsort(component_sizes)[::-1] + 1 147 | 148 | out = numpy.zeros(img.shape, numpy.bool) 149 | 150 | for i in range(n): 151 | out[labeled_array == component_indices_list_by_sizes[i]] = True 152 | return out 153 | 154 | if __name__ == "__main__": 155 | main() 156 | -------------------------------------------------------------------------------- /pop_sequencespace.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##### 4 | # Rigidly registers all sequences to a base sequence, which can optionally be resampled to isotropic spacing. 5 | ##### 6 | 7 | ## Changelog 8 | # 2014-08-12 Changed to use sequencespacebasesequence rather than basesequence 9 | # 2014-03-24 Changed to a more flexible version 10 | # 2013-11-13 Added step to correct the qform and sform codes 11 | # 2013-11-04 Added re-sampling of T2 image to isotropic spacing before registration and updated loop design. 12 | # 2013-10-16 ADC images are now not registered directly, but rather transformed with the DW transformation matrix 13 | # 2013-10-15 created 14 | 15 | # include shared information 16 | source $(dirname $0)/include.sh 17 | 18 | # functions 19 | ### 20 | # Resample the base sequence of the supplied id 21 | ### 22 | function resample () 23 | { 24 | idx=$1 25 | 26 | srcfile="${originals}/${idx}/${sequencespacebasesequence}.${imgfiletype}" 27 | trgfile="${sequencespace}/${idx}/${sequencespacebasesequence}.${imgfiletype}" 28 | 29 | mkdircond ${sequencespace}/${idx} 30 | 31 | # warn and skip if source file not present 32 | if [ ! -f "${srcfile}" ]; then 33 | log 3 "Base sequence for case ${idx} not found under ${srcfile}. Skipping." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 34 | return 35 | fi 36 | 37 | # process if target file not yet existing 38 | if [ ! -f "${trgfile}" ]; then 39 | log 1 "Isotropic resampling to ${trgfile}" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 40 | runcond "medpy_resample.py ${srcfile} ${trgfile} ${isotropicspacing},${isotropicspacing},${isotropicspacing}" 41 | fi 42 | } 43 | 44 | ### 45 | # Register an image to another, also saving the transformation matrix. 46 | ### 47 | function register () 48 | { 49 | idx=$1 50 | sequence=$2 51 | 52 | trgdir="${sequencespace}/${idx}/" 53 | fixed="${trgdir}/${sequencespacebasesequence}.${imgfiletype}" 54 | moving="${originals}/${idx}/${sequence}.${imgfiletype}" 55 | 56 | tmpdir=`mktemp -d` 57 | 58 | # perform rigid registration 59 | log 1 "Registering ${moving} to ${fixed} using tmp dir ${tmpdir}" 60 | runcond "elastix -f ${fixed} -m ${moving} -out ${tmpdir} -p ${configs}/elastix_sequencespace_rigid_cfg.txt -threads=${threadcount}" /dev/null 61 | # copy resulting files 62 | cpcond "${tmpdir}/result.0.nii.gz" "${trgdir}/${sequence}.${imgfiletype}" 63 | cpcond "${tmpdir}/TransformParameters.0.txt" "${trgdir}/${sequence}.txt" 64 | 65 | # clean up 66 | emptydircond "${tmpdir}" 67 | rmdircond "${tmpdir}" 68 | } 69 | 70 | ### 71 | # Tranform a sequence using an already existing transformation matrix 72 | ### 73 | function transform () 74 | { 75 | idx=$1 76 | sequence=$2 77 | matrix=$3 78 | 79 | trgdir="${sequencespace}/${idx}/" 80 | moving="${originals}/${sequence}.${imgfiletype}" 81 | 82 | tmpdir=`mktemp -d` 83 | 84 | # perform transformation 85 | log 1 "Transforming ${sequence} image ${moving} with ${matrix} transformation matrix using tmp dir ${tmpdir}" 86 | runcond "transformix -in ${originals}/${i}/adc_tra.${imgfiletype} -out ${tmpdir} -tp ${matrix}" /dev/null 87 | # copy resulting file 88 | cpcond "${tmpdir}/result.nii.gz" "${trgdir}/${sequence}.${imgfiletype}" 89 | cpcond "${matrix}" "${trgdir}/${sequence}.txt" 90 | 91 | # clean up 92 | emptydircond ${tmpdir} 93 | rmdircond "${tmpdir}" 94 | } 95 | 96 | 97 | # main code 98 | if (( $isotropic == 1 )) ; then 99 | log 2 "Resampling all ${sequencespacebasesequence} sequences to isotropic spacing of ${isotropicspacing}mm" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 100 | parallelize resample ${threadcount} allimages[@] 101 | else 102 | log 2 "Resampling disabled. Linking base sequences ${sequencespacebasesequence} to target folder." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 103 | for i in "${allimages[@]}"; do 104 | mkdircond ${sequencespace}/${i} 105 | lncond "${PWD}/${originals}/${i}/${sequencespacebasesequence}.${imgfiletype}" "${sequencespace}/${i}/${sequencespacebasesequence}.${imgfiletype}" 106 | done 107 | fi 108 | 109 | log 2 "Registering all remaining sequences to the base sequence ${sequencespacebasesequence}" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 110 | for i in "${allimages[@]}"; do 111 | for sequences in "${sc_sequences[@]}"; do 112 | sequences=( ${sequences} ) 113 | for s in "${sequences[@]}"; do 114 | srcfile="${originals}/${i}/${s}.${imgfiletype}" 115 | trgfile="${sequencespace}/${i}/${s}.${imgfiletype}" 116 | 117 | # catch base sequence and continue, since it is the fixed image and does not need registration 118 | if [ "${s}" == "${sequencespacebasesequence}" ]; then 119 | continue 120 | fi 121 | # catch ADC and continue, since these are transformed with the DW transformation matrices 122 | if [ "${s}" == "adc_tra" ]; then 123 | continue 124 | fi 125 | # continue if target file already exists 126 | if [ -f "${trgfile}" ]; then 127 | continue 128 | fi 129 | # warn if source file does not exist 130 | if [ ! -f "${srcfile}" ]; then 131 | log 3 "The source file ${srcfile} does not exist. Skipping." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 132 | continue 133 | fi 134 | 135 | # perform rigid registration 136 | register "${i}" "${s}" 137 | done 138 | done 139 | done 140 | 141 | if isIn "adc_tra" "${sequences[@]}"; then 142 | log 2 "Registering resp. transforming ADC images" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 143 | for i in "${allimages[@]}"; do 144 | srcfile="${originals}/${i}/adc_tra.${imgfiletype}" 145 | trgfile="${sequencespace}/${i}/adc_tra.${imgfiletype}" 146 | matrix="${sequencespace}/${i}/dw_tra_b1000_dmean.txt" 147 | 148 | # warn if source file does not exist 149 | if [ ! -f "${srcfile}" ]; then 150 | log 3 "The source file ${srcfile} does not exist. Skipping." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 151 | continue 152 | fi 153 | # continue if target file already exists 154 | if [ -f "${trgfile}" ]; then 155 | continue 156 | fi 157 | 158 | # transform if an DW image has already been registered 159 | if [ -f "${matrix}" ]; then 160 | transform "${i}" "adc_tra" "${matrix}" 161 | else 162 | register "${i}" "adc_tra" 163 | fi 164 | done 165 | fi 166 | 167 | log 2 "Correcting metadata" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 168 | for i in "${allimages[@]}"; do 169 | for s in "${sequences[@]}"; do 170 | if [ -f "${sequencespace}/${i}/${s}.${imgfiletype}" ]; then 171 | runcond "${scripts}/niftimodifymetadata.py ${sequencespace}/${i}/${s}.${imgfiletype} qf=qf sf=qf qfc=1 sfc=1" 172 | fi 173 | done 174 | done 175 | 176 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 177 | 178 | -------------------------------------------------------------------------------- /scripts/sample_trainingset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | """ 4 | Sample a traning set for a case by drawing from all other images using stratified random sampling. 5 | 6 | arg1: directory with case-folders containing feature files 7 | arg2: directory containing segmentations 8 | arg3: directory containing brain masks 9 | arg4: target directory 10 | arg5: file containing a struct identifying the features to sample 11 | arg6: number of samples to draw 12 | arg7+: indices of all cases from which to draw the training sample 13 | """ 14 | 15 | import os 16 | import sys 17 | import imp 18 | import numpy 19 | import pickle 20 | import itertools 21 | 22 | from medpy.io import load 23 | from medpy.features.utilities import append, join 24 | 25 | # main settings 26 | min_no_of_samples_per_class_and_case = 4 27 | 28 | # debug settings 29 | verboose = True 30 | debug = True 31 | override = False # activate override (will signal a warning) 32 | 33 | def main(): 34 | # catch arguments 35 | src_dir = sys.argv[1] 36 | seg_dir = sys.argv[2] 37 | msk_dir = sys.argv[3] 38 | trg_dir = sys.argv[4] 39 | feature_cnf_file = sys.argv[5] 40 | total_no_of_samples = int(sys.argv[6]) 41 | training_set_cases = sys.argv[7:] 42 | 43 | # load features to use and create proper names from them 44 | features_to_use = load_feature_names(feature_cnf_file) 45 | 46 | # warn if target sample set already exists 47 | if os.path.isfile('{}/trainingset.features.npy'.format(trg_dir)): 48 | if override: 49 | print 'WARNING: The target file {}/trainingset.features.npy already exists and will be replaced by a new sample.'.format(trg_dir) 50 | else: 51 | print 'WARNING: The target file {}/trainingset.features.npy already exists. Skipping.'.format(trg_dir) 52 | sys.exit(0) 53 | 54 | if verboose: print 'Preparing leave-out training set' 55 | # initialize collection variables 56 | training_set_classes_selections = dict.fromkeys(training_set_cases) 57 | 58 | # use stratified random sampling to select a number of sample for each case 59 | for case in training_set_cases: 60 | if verboose: print 'Stratified random sampling of case {}'.format(case) 61 | # determine number of samples to draw from this case 62 | samples_to_draw = int(total_no_of_samples / len(training_set_cases)) 63 | if debug: print 'samples_to_draw', samples_to_draw 64 | # load class memberships of case as binary and integer array 65 | mask = load(os.path.join(msk_dir, '{}.nii.gz'.format(case)))[0].astype(numpy.bool) 66 | truth = load(os.path.join(seg_dir, '{}.nii.gz'.format(case)))[0].astype(numpy.int8) 67 | class_vector = truth[mask] 68 | n_class_vector = len(class_vector) 69 | classes = numpy.unique(class_vector) 70 | n_classes = len(classes) 71 | if debug: print 'n_classes', n_classes 72 | # determine class ratios i.e. how many samples of each class to draw from this case 73 | n_samples_class = [numpy.count_nonzero(c == class_vector) for c in classes] 74 | classes_ratios = [nsc / float(n_class_vector) for nsc in n_samples_class] 75 | classes_samples_to_draw = [int(samples_to_draw * cr) for cr in classes_ratios] 76 | if debug: 77 | for c, cr, cstd in zip(classes, classes_ratios, classes_samples_to_draw): 78 | print 'drawing {} samples (equals {} of 1) for class {}'.format(cstd, cr, c) 79 | # check for exceptions 80 | for c, cstd, nsc in zip(classes, classes_samples_to_draw, n_samples_class): 81 | if cstd < min_no_of_samples_per_class_and_case: 82 | raise Exception('Current setting would lead to a drawing of only {} samples of class {} for case {}!'.format(cstd, c, case)) 83 | if cstd > nsc: 84 | raise Exception('Current settings would require to draw {} samples of class {}, but only {} present for case {}!'.format(cstd, c, nsc, case)) 85 | # get sample indices and split into class-wise indices 86 | samples_indices = numpy.arange(n_class_vector) 87 | classes_samples_indices = [samples_indices[class_vector == c] for c in classes] 88 | if debug: 89 | for c, csi in zip(classes, classes_samples_indices): 90 | print 'class{}_samples_indices.shape: {}'.format(c, csi.shape) 91 | # randomly draw the required number of sample indices 92 | for csi in classes_samples_indices: 93 | numpy.random.shuffle(csi) # in place 94 | classes_sample_selection = [csi[:cstd] for csi, cstd in zip(classes_samples_indices, classes_samples_to_draw)] 95 | if debug: 96 | for c, css in zip(classes, classes_sample_selection): 97 | print 'class{}_samples_selection.shape: {}'.format(c, css.shape) 98 | # add to collection 99 | training_set_classes_selections[case] = dict(zip(classes, classes_sample_selection)) 100 | 101 | # load the features of each case, draw the samples from them and append them to a training set 102 | drawn_samples = dict() 103 | 104 | for case in training_set_cases: 105 | if verboose: print 'Sampling features of case {}'.format(case) 106 | 107 | # loading and sampling features piece-wise to avoid excessive memory requirements 108 | drawn_samples_case = dict() 109 | for feature_name in features_to_use: 110 | _file = os.path.join(src_dir, case, '{}.npy'.format(feature_name)) 111 | if not os.path.isfile(_file): 112 | raise Exception('The feature "{}" for case {} could not be found in folder "{}". Breaking.'.format(feature_name, case, os.path.join(src_dir, case))) 113 | with open(_file, 'r') as f: 114 | feature_vector = numpy.load(f) 115 | tscs = training_set_classes_selections[case] 116 | for cla, sel in tscs.iteritems(): 117 | if not cla in drawn_samples_case: 118 | drawn_samples_case[cla] = [] 119 | drawn_samples_case[cla].append(feature_vector[sel]) 120 | 121 | # join and append feature vector from this case 122 | for cla, samples in drawn_samples_case.iteritems(): 123 | if not cla in drawn_samples: 124 | drawn_samples[cla] = [] 125 | drawn_samples[cla].append(join(*samples)) # vertical join of different features 126 | 127 | # prepare training set as numpy array and the class memberships 128 | samples = [append(*csamples) for csamples in drawn_samples.itervalues()] # append samples belonging to the same class 129 | samples_length = [len(x) for x in samples] 130 | samples_class_memberships = numpy.zeros(sum(samples_length), dtype=numpy.int8) 131 | i = 0 132 | for c, sl in zip(drawn_samples.keys(), samples_length): 133 | samples_class_memberships[i:i+sl] = c 134 | i += sl 135 | samples_feature_vector = append(*samples) 136 | 137 | if debug: print 'n_classes', len(drawn_samples) 138 | if debug: print 'samples_feature_vector shape', samples_feature_vector.shape 139 | if debug: print 'class_memberships shape', samples_class_memberships.shape 140 | if debug: print 'class_memberships dytpe', samples_class_memberships.dtype 141 | if debug: print 'class_memberships unique', numpy.unique(samples_class_memberships) 142 | 143 | # save feature vector, feature names and class membership vector as leave-one-out training set 144 | if verboose: print 'Saving training data set' 145 | with open('{}/trainingset.features.npy'.format(trg_dir), 'wb') as f: 146 | numpy.save(f, samples_feature_vector) 147 | with open('{}/trainingset.classes.npy'.format(trg_dir), 'wb') as f: 148 | numpy.save(f, samples_class_memberships) 149 | with open('{}/trainingset.fnames.npy'.format(trg_dir), 'wb') as f: 150 | numpy.save(f, features_to_use) 151 | with open('{}/trainingset.classesselections.pkl'.format(trg_dir), 'wb') as f: 152 | pickle.dump(training_set_classes_selections, f) 153 | 154 | if verboose: print 155 | 156 | if verboose: print 'Done.' 157 | 158 | def feature_struct_entry_to_name(fstruct): 159 | seq, fcall, fargs, _ = fstruct 160 | return 'feature.{}.{}.{}'.format(seq, fcall.func_name, '_'.join(['arg{}'.format(i) for i in fargs])) 161 | 162 | def load_feature_struct(f): 163 | "Load the feature struct from a feature config file." 164 | d, m = os.path.split(os.path.splitext(f)[0]) 165 | f, filename, desc = imp.find_module(m, [d]) 166 | return imp.load_module(m, f, filename, desc).features_to_extract 167 | 168 | def load_feature_names(f): 169 | "Load the feature names from a feature config file." 170 | fs = load_feature_struct(f) 171 | return [feature_struct_entry_to_name(e) for e in fs] 172 | 173 | if __name__ == "__main__": 174 | main() 175 | 176 | 177 | 178 | 179 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##### 4 | # Adaptable pipeline script for running the whole or parts of the pipeline (distributed where deemed sensible) 5 | ##### 6 | 7 | ### CHANGELOG ### 8 | # 2014-08-12 created 9 | 10 | ### SETTINGS ### 11 | #START="forests" 12 | EVALLOG="results/standalonerun.log" 13 | LOGGING=true 14 | USER="maier" 15 | HOSTS=("bibo" "yipyip" "hastig" "bert" "elmo") 16 | # Host notes 17 | # kermit: elastix and transformix give segmentation fault when reading image; error persists after re-installing elastix from the repostiory; forums suggest that it is a known (and unsolved) problem 18 | # kruemel: uses mastmeyers installation of medpy, rather than the one from the PYTHONATH; might cause problems in some configurations; furthermore, my sklearn is shaded by local installation 19 | # the slowest, in order from slow to slower: tiffy < piggy < rumpel < mumpitz 20 | #HOSTS=("tiffy" "bibo" "oskar" "lulatsch" "piggy" "wolle" "mumpitz" "rumpel" "finchen" "kruemel" "yipyip" "hastig" "schorsch" "elmo" "bert") # all 21 | #HOSTS=("tiffy" "bibo" "oskar" "lulatsch" "kermit" "piggy" "wolle") # bvlab 22 | #HOSTS=("mumpitz" "rumpel" "finchen" "kruemel" "yipyip" "hastig" "schorsch") # happy users 23 | #HOSTS=("elmo" "bert") # bad mood users 24 | 25 | ### INCLUDES ### 26 | source $(dirname $0)/include.sh 27 | 28 | ### CONSTANTS ### 29 | LOGDIR='logs/' 30 | CWD="/share$(pwd)" 31 | 32 | ### FUNCTIONS ### 33 | # Executes runcond, but also with a logging if enabled 34 | function runcondlog () { 35 | local cmd=$1 36 | local logfile=$2 37 | if [[ -z "$LOGGING" ]] ; then 38 | runcond "${cmd}" 39 | else 40 | runcond "${cmd}" "${LOGDIR}/${logfile}.log" 41 | fi 42 | } 43 | 44 | # executes rundistributed, but also with a loggind if enabled 45 | function runcondlogdistributed () { 46 | local cmd=$1 47 | local logfile=$2 48 | local errfile=$3 49 | if [[ -z "$LOGGING" ]] ; then 50 | rundistributed "${cmd}" 51 | else 52 | rundistributed "${cmd}" "${LOGDIR}/${logfile}" "${LOGDIR}/${errfile}" 53 | fi 54 | } 55 | 56 | function original () { 57 | log 2 "### ORIGINALS: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 58 | # note sure if possible to execute distributedly 59 | runcondlog "./pop_original.sh" "originals" 60 | log 2 "### ORIGINALS: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 61 | } 62 | 63 | function sequencespace () { 64 | log 2 "### SEQUENCESPACE: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 65 | # requires elastix & transformix! 66 | runcondlogdistributed "./pop_sequencespace.sh" "sequencespace" "sequencespace_err" 67 | #runcondlog "./pop_sequencespace.sh" "sequencespace" 68 | log 2 "### SEQUENCESPACE: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 69 | } 70 | 71 | function skullstripped () { 72 | log 2 "### SEQUENCESKULLSTRIP: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 73 | # requires fsl5.0-bet 74 | #runcondlogdistributed "./pop_skullstripped.sh" "skullstrip" "skullstrip_err" 75 | runcondlog "./pop_skullstripped.sh" "skullstrip" 76 | log 2 "### SEQUENCESKULLSTRIP: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 77 | } 78 | 79 | function biasfieldcorrected () { 80 | log 2 "### SEQUENCEBIASFIELD: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 81 | # requires cmtk 82 | #runcondlogdistributed "./pop_biasfieldcorrected.sh" "biasfieldcorrected" "biasfieldcorrected_err" 83 | runcondlog "./pop_biasfieldcorrected.sh" "biasfield" 84 | log 2 "### SEQUENCEBIASFIELD: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 85 | } 86 | 87 | function intensitrangestandardization () { 88 | log 2 "### SEQUENCEINTENSITYRANGESTD: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 89 | # can not be readily be executed distributedly, as the sc_train_images array is required to the unfragmented 90 | runcondlog "./pop_intensitrangestandardization.sh" "intensityrangestd" 91 | log 2 "### SEQUENCEINTENSITYRANGESTD: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 92 | } 93 | 94 | function features () { 95 | log 2 "### SEQUENCEFEATURES: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 96 | runcondlogdistributed "./pop_features.sh" "features" "features_err" 97 | log 2 "### SEQUENCEFEATURES: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 98 | } 99 | 100 | function samplesets () { 101 | log 2 "### SEQUENCESAMPLESETS: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 102 | # can not be readily be executed distributedly, as the sc_train_images array is required to the unfragmented 103 | runcondlog "./pop_samplesets.sh" "samplesets" 104 | log 2 "### SEQUENCESAMPLESETS: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 105 | } 106 | 107 | function forests () { 108 | log 2 "### SEQUENCEFORESTS: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 109 | runcondlogdistributed "./pop_trainforests.sh" "trainforests" "trainforests_err" 110 | log 2 "### SEQUENCEFORESTS: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 111 | } 112 | 113 | function lesionsegmentation () { 114 | log 2 "### SEQUENCELESIONSEGMENTATION: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 115 | runcondlogdistributed "./pop_lesionsegmentation.sh" "lesionsegmentation" "lesionsegmentation_err" 116 | log 2 "### SEQUENCELESIONSEGMENTATION: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 117 | } 118 | 119 | function evaluation () { 120 | log 2 "### EVALUATION: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 121 | # can only be executed all at once and locally 122 | runcond "./evaluate.sh" ${EVALLOG} 123 | log 2 "### EVALUATION: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 124 | } 125 | 126 | function segmentations () { 127 | log 2 "### SEGMENTATIONS: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 128 | # note sure if possible to execute distributedly 129 | runcondlog "./pop_segmentations.sh" "segmentations" 130 | log 2 "### SEGMENTATIONS: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 131 | } 132 | 133 | function sequencesegmentations () { 134 | log 2 "### SEQUENCESEGMENTATIONS: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 135 | # requires elastix & transformix! 136 | runcondlogdistributed "./pop_sequencesegmentations.sh" "sequencesegmentations" "sequencesegmentations_err" 137 | #runcondlog "./pop_sequencesegmentations.sh" "sequencesegmentations" 138 | log 2 "### SEQUENCESEGMENTATIONS: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 139 | } 140 | 141 | function createfolderstructure () { 142 | for folder in "${folders[@]}"; do 143 | mkdircond ${folder} 144 | done 145 | } 146 | 147 | # MODULE: Distributed processing 148 | # create a dedicated config file for each host 149 | function makeconfigs () { 150 | local -i nhosts=${#HOSTS[@]} 151 | 152 | # for each host 153 | for((i=0;i<${nhosts};i++)); do 154 | local chunks 155 | 156 | # generate config file 157 | local cnf=".config_${HOSTS[$i]}.sh" 158 | echo "# Dedicated configuration file for ${HOSTS[$i]}" > ${cnf} 159 | echo "# $(date +"%D %T")" >> ${cnf} 160 | echo "# auto generated / can be safely removed after +/- 10s" >> ${cnf} 161 | echo "" >> ${cnf} 162 | 163 | # split and redistribute sc_apply_images array 164 | echo "declare -A sc_apply_images=( \\" >> "${cnf}" 165 | for sc_id in "${!sc_apply_images[@]}"; do 166 | local images=( ${sc_apply_images[$sc_id]} ) 167 | splitarray chunks ${nhosts} images[@] 168 | echo "[\"${sc_id}\"]=\"${chunks[$i]}\" \\" >> "${cnf}" 169 | done 170 | echo ")" >> "${cnf}" 171 | 172 | # split and redistribute sc_train_images array 173 | echo "declare -A sc_train_images=( \\" >> "${cnf}" 174 | for sc_id in "${!sc_train_images[@]}"; do 175 | local images=( ${sc_train_images[$sc_id]} ) 176 | splitarray chunks ${nhosts} images[@] 177 | echo "[\"${sc_id}\"]=\"${chunks[$i]}\" \\" >> "${cnf}" 178 | done 179 | echo ")" >> "${cnf}" 180 | 181 | # add function call to re-make allimages variable 182 | echo "makeallimages" >> "${cnf}" 183 | done 184 | } 185 | 186 | # removes the dedicated config file for each host 187 | function removeconfigs () { 188 | local host 189 | for host in ${HOSTS[@]};do 190 | runcond "rm .config_${host}.sh" 191 | done 192 | } 193 | 194 | ### 195 | # Function to run a command distributed over a number of machines, taking care of equal image load among them. 196 | # arg1: a command to execute 197 | # arg2: log file for the commands stdout (on the remote machine); will be appendixed with a "_"; optional, otherwise goes to /dev/null 198 | # arg3: err file for the commands stderr (on the remote machine); will be appendixed with a "_"; optional, otherwise goes to /dev/null 199 | # example: rundistributed "./myscript.sh" "/tmp/log" "/tmp/err" 200 | # Notes: 201 | # - will always try to switch the remote working directory to ${CWD} before executing any command; if this fails, will start in users home 202 | # - take care to supply the command like you would start it in the local bash, i.e. with ./ appendix where required 203 | # - if the function does not return in due time, check if the command is running on the target machine(s) 204 | # - if something goes wrong, enable debugging (loglevel=1) and supply a log as well as an error file 205 | function rundistributed () { 206 | # catch arguments 207 | local cmd=$1 208 | local log=$2 209 | local err=$3 210 | 211 | # prepare 212 | makeconfigs 213 | 214 | # start processes and collect their (remote) pids 215 | log 2 "Starting distributed processes..." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 216 | local -a pids 217 | 218 | for((i=0;i<${#HOSTS[@]};i++)); do 219 | # build command 220 | if [[ -z "$log" ]]; then 221 | local _log="/dev/null" 222 | else 223 | local _log=${log}_${HOSTS[$i]} 224 | fi 225 | if [[ -z "$err" ]]; then 226 | local _err="/dev/null" 227 | else 228 | local _err=${err}_${HOSTS[$i]} 229 | fi 230 | local rcmd="cd ${CWD}; nohup ${cmd} CUSTOMCONFIG=.config_${HOSTS[$i]}.sh > ${_log} 2> ${_err} < /dev/null & echo \$!" 231 | 232 | # execute command remotely and catch return value as array 233 | log 1 "Command: \"${rcmd}\" / Host: \"ssh ${USER}@${HOSTS[$i]}\"" "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 234 | local ret=( $(ssh ${USER}@${HOSTS[$i]} "${rcmd}") ) # real command 235 | local retstring="${ret[@]}" 236 | log 1 "Shh returned: ${retstring}" 237 | 238 | # the last element in the return array is the desired pid 239 | pids[$i]=${ret[-1]} 240 | log 2 "Started a process on ${HOSTS[$i]} with pid ${pids[$i]}..." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 241 | done 242 | 243 | # wait for all remote processed to terminate 244 | log 2 "Waiting for all distributed processes to terminate..." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 245 | while [ "${#pids[@]}" -ne "0" ]; do 246 | echo -n '.' 247 | for i in "${!pids[@]}"; do 248 | local ret=( $(ssh ${USER}@${HOSTS[$i]} "ps -p${pids[$i]} -opid=") ) 249 | if ! [[ "${ret[-1]}" =~ ^-?[0-9]+$ ]]; then 250 | echo -n "(${HOSTS[$i]})" 251 | unset pids[$i] 252 | fi 253 | done 254 | sleep 5 255 | done 256 | echo "" 257 | 258 | # clean up 259 | removeconfigs 260 | } 261 | 262 | ### MAIN ### 263 | logelevel=2 264 | #createfolderstructure 265 | #original 266 | #segmentations 267 | #sequencespace 268 | #sequencesegmentations 269 | #skullstripped 270 | #biasfieldcorrected 271 | #intensitrangestandardization 272 | features 273 | #samplesets 274 | #forests 275 | #lesionsegmentation 276 | #evaluation 277 | 278 | -------------------------------------------------------------------------------- /include.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ######################################## 4 | # Include file with shared information # 5 | ######################################## 6 | 7 | ## changelog 8 | # 2014-05-08 Adapted to the new, distributed calculation scheme. 9 | # 2014-05-08 Transfered some settings to a config file and included it here. 10 | # 2014-05-05 Removed normalized space directories. 11 | # 2014-05-05 Added the lnrealize() function. 12 | # 2014-03-25 Adapted directory structure. 13 | # 2014-03-24 Added the runcond function. 14 | # 2013-11-14 Added new directories. 15 | # 2013-11-11 Added new directories. 16 | # 2013-10-31 Added new directories. 17 | # 2013-10-22 Added new directories. 18 | # 2013-10-21 Added new directories. 19 | # 2013-10-15 Added new directories and made emptydircond a tick more save 20 | # 2013-10-02 created 21 | 22 | # include the shared config file 23 | source $(dirname $0)/config.sh 24 | 25 | # folders 26 | originals="00original/" 27 | sequencespace="00original/" 28 | sequenceskullstripped="00original/" 29 | sequencebiasfieldcorrected="03biasfieldcorrected/" 30 | sequenceintensitrangestandardization="04intensitystandarized/" 31 | sequencefeatures="05features/" 32 | sequencesamplesets="06samplesets/" 33 | sequenceforests="07forests/" 34 | sequencelesionsegmentation="08lesionsegmentation/" 35 | 36 | segmentations="100gtsegmentations/" 37 | sequencesegmentations="100gtsegmentations/" 38 | sequencebrainmasks="102sequenceforegroundmasks/" 39 | 40 | folders=("${originals}" "${sequencespace}" "${sequenceskullstripped}" "${sequencebiasfieldcorrected}" "${sequenceintensitrangestandardization}" \ 41 | "${sequencefeatures}" "${sequencesamplesets}" "${sequenceforests}" "${sequencelesionsegmentation}" "${segmentations}" \ 42 | "${sequencesegmentations}" "${sequencebrainmasks}" ) 43 | 44 | scripts="scripts/" 45 | configs="configs/" 46 | 47 | # other constants 48 | imgfiletype="nii.gz" 49 | threadcount=2 50 | 51 | # logging 52 | loglevel=1 # 1=debug, 2=info, 3=warning, 4=err, 5+=silent 53 | logprefixes=('DEBUG' 'INFO' 'WARNING' 'ERROR') 54 | logprintlocation=false # true | false to print the location from where the log was triggered 55 | 56 | 57 | # shared functions 58 | 59 | ###### 60 | ## Signal a log message of a determined level 61 | ###### 62 | function log { 63 | local level=${1} 64 | local msg=${2} 65 | local location=${3} # optional, should be [$SOURCE:$FUNCNAME:$LINENO], [$SOURCE::$LINENO] or similar 66 | 67 | local loglevels=${#logprefixes[@]} 68 | 69 | local prefix 70 | 71 | # check if current logging level is lower than the messages logging level 72 | if [ "$loglevel" -le "$level" ] 73 | then 74 | # determine the log type 75 | if [ "$level" -le "0" ] 76 | then 77 | prefix="UNKNOWN" 78 | elif [ "$level" -gt "$loglevels" ] 79 | then 80 | prefix="UNKNOWN" 81 | else 82 | prefix=${logprefixes[$level-1]} 83 | fi 84 | 85 | # print, according to logprintlocation, with or without location information 86 | if $logprintlocation 87 | then 88 | echo -e "${prefix}: ${msg} ${location}" 89 | else 90 | echo -e "${prefix}: ${msg}" 91 | fi 92 | fi 93 | } 94 | 95 | ###### 96 | # Parallelizes a function-call by calling different subprocesses 97 | ###### 98 | # Note that the different calles are processed in chunks, each of which this functions waits for to terminate before executing the next one. 99 | # Takes as first parameter the function, as second the number of process to spawn and as third the array of parameters to pass to the function. 100 | # !The third argument is supposed to be an array and therefore has to be passes in the form "parameter[@]" 101 | # Call like "parallelize fun 4 indices[@]" 102 | function parallelize () 103 | { 104 | # Grab parameters 105 | local fun=$1 106 | local processes=$2 107 | local -a parameters=("${!3}") 108 | 109 | # split $parameters into $processes sized chunks 110 | local i 111 | local paramter 112 | for i in $(seq 0 ${processes} ${#parameters[@]}); do # seq: from stepsize to 113 | declare -a parameterchunk="(${parameters[@]:$i:$processes})" 114 | # execute function in background for each parameter in the current chunk and then wait for their termination 115 | for parameter in "${parameterchunk[@]}"; do 116 | ${fun} $parameter & 117 | done 118 | wait 119 | done 120 | } 121 | 122 | ###### 123 | ## Create the supplied directory if it does not yet exists 124 | ###### 125 | function mkdircond { 126 | local directory=${1} 127 | 128 | if [ ! -d "$directory" ] 129 | then 130 | log 1 "Creating directory ${directory}." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 131 | mkdir ${directory} 132 | fi 133 | } 134 | 135 | ###### 136 | ## Remove all files (but not directories or write-protected files) from the supplied directory if it is not empty 137 | ###### 138 | function emptydircond { 139 | local directory=${1} 140 | 141 | if [ -z "$directory" ]; then 142 | log 3 "Supplied an empty string to emptydircond function. This might be dangerous and is therefore ignored." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 143 | else 144 | local filecount=`ls -al ${directory} | wc -l` 145 | if [ "$filecount" -gt "3" ] 146 | then 147 | rm ${directory}/* 148 | fi 149 | fi 150 | } 151 | 152 | ##### 153 | ## Remove a dirctory if it exists 154 | ##### 155 | function rmdircond { 156 | local directory=${1} 157 | 158 | if [ -d "$directory" ] 159 | then 160 | rmdir ${directory} 161 | fi 162 | } 163 | 164 | ##### 165 | ## Empties and removes a directory if it exists 166 | ##### 167 | function removedircond { 168 | local directory=${1} 169 | emptydircond ${directory} 170 | rmdircond ${directory} 171 | } 172 | 173 | ##### 174 | ## Runs the passed command if no variable "dryrun" has been initialized with a non-empty value. 175 | ## As a second parameter a redirect target of the command std output can optionaly be passed. 176 | ##### 177 | function runcond { 178 | local cmd=$1 179 | if [[ -z "$dryrun" ]]; then 180 | if [ $# -gt 1 ]; then 181 | $cmd > $2 182 | else 183 | $cmd 184 | fi 185 | else 186 | echo "DRYRUN: ${cmd}" 187 | fi 188 | } 189 | 190 | ###### 191 | ## Copy a file if target file does not exist already 192 | ###### 193 | function cpcond { 194 | local source=$1 195 | local target=$2 196 | 197 | if [ ! -f ${source} ]; then 198 | log 3 "Source file ${source} does not exists. Skipping." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 199 | elif [ -f ${target} ]; then 200 | log 1 "Target file ${target} already exists, skipping." [$BASH_SOURCE:$FUNCNAME:$LINENO] 201 | else 202 | log 1 "Copying ${source} to ${target}." [$BASH_SOURCE:$FUNCNAME:$LINENO] 203 | runcond "cp ${source} ${target}" 204 | fi 205 | } 206 | 207 | ###### 208 | ## Create a symlink if non existant or dead 209 | ###### 210 | function lncond { 211 | local source=$1 212 | local target=$2 213 | 214 | # Check if link does not exists or is a dead symlink 215 | if [ ! -e ${target} ] 216 | then 217 | # remove if a dead symlink 218 | if [ -L ${target} ] 219 | then 220 | log 1 "Removing dead symlink ${target}." [$BASH_SOURCE:$FUNCNAME:$LINENO] 221 | `rm ${target}` 222 | fi 223 | 224 | # create sym link if source file exists 225 | if [ -e ${source} ] 226 | then 227 | log 1 "Linking ${source} to ${target}." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 228 | ln -s ${source} ${target} 229 | else 230 | log 3 "${source} does not exists." "[$BASH_SOURCE:$FUNCNAME:$LINENO]" 231 | fi 232 | else 233 | log 1 "Target file ${target} already exists, skipping." [$BASH_SOURCE:$FUNCNAME:$LINENO] 234 | fi 235 | } 236 | 237 | ### 238 | # Takes a symbolic link and makes it "real" i.e. replaces the link with a copy of the 239 | # actual target file. 240 | ### 241 | lnrealize() { 242 | if [ -L ${1} ] 243 | then 244 | runcond "cp --remove-destination `readlink ${1}` ${1}" 245 | fi 246 | } 247 | 248 | ### 249 | # Links all files in folder from that are missing in folder to into folder to 250 | ### 251 | function linkmissing () { 252 | local from=$1 253 | local to=$2 254 | 255 | # make pathes absolute if not yet 256 | [ "${from:0:1}" = "/" ] || from="$(pwd)/${from}" 257 | [ "${to:0:1}" = "/" ] || to="$(pwd)/${to}" 258 | 259 | # link missing files 260 | local f 261 | for f in "${from}"/*; do 262 | [[ -f "${f}" ]] || continue 263 | lncond "${from}/$(basename "$f")" "${to}/$(basename "$f")" 264 | done 265 | } 266 | 267 | ##### 268 | ## Checks whether an element exists in an array 269 | ## Call like: isIn "element" "${array[@]}" 270 | ##### 271 | isIn () { 272 | local e 273 | for e in "${@:2}"; do [[ "$e" == "$1" ]] && return 0; done 274 | return 1 275 | } 276 | 277 | ### 278 | # Join the elements of an array using a one-character delimiter 279 | # Call like: joinarr $delimiter ${arr[@]} 280 | ### 281 | function joinarr () { 282 | local IFS="${1}" 283 | shift 284 | echo "$*" 285 | } 286 | 287 | ##### 288 | # Returns a new version of an array with the passed element removed from it. 289 | # Call like: newarray=( $(delEl element array[@]) ) 290 | # If the element could not be found, the original array is returned 291 | # Exit codes (available from $?): 0 on success, 1 if the element could not be found 292 | ##### 293 | function delEl { 294 | local -a arr=("${!2}") # Note: decalre has scope limited to function 295 | local pos=$(isAt $1 arr[@]) 296 | if [[ $pos -lt 0 ]]; then echo "${arr[@]}" && return 1; fi 297 | local newarr=(${arr[@]:0:$pos} ${arr[@]:$(($pos + 1))}) 298 | echo "${newarr[@]}" 299 | return 0 300 | } 301 | 302 | ##### 303 | # Returns the position of the first occurence of an element in an array. 304 | # Call like: pos=$(isAt element array[@]) 305 | # If the element could not be found, the return value (not! exit code) will be a negative integer 306 | # Exit codes (available from $?): 0 on success, 1 if the element could not be found 307 | ##### 308 | isAt () { 309 | local -a arr=("${!2}") 310 | local e 311 | for e in "${!arr[@]}"; do [[ "${arr[$e]}" == "$1" ]] && echo ${e} && return 0; done 312 | echo -1 313 | return 1 314 | } 315 | 316 | ### 317 | # Returns the voxel spacing of supplied image as space separated string 318 | # To catch as array, use var=( $(voxelspacing "imagelocation") ) 319 | ### 320 | function voxelspacing () { 321 | local image=$1 322 | local vss=`medpy_info.py "${image}" | grep "spacing"` 323 | local vse=${vss:15:-1} 324 | local vs=(${vse//, / }) 325 | echo "${vs[@]}" 326 | } 327 | ### 328 | # Return a sorted version of an array 329 | # Call like sarr=( $(sorted array[@]) ) 330 | ### 331 | function sorted () { 332 | local -a arr=("${!1}") 333 | local -a sorted 334 | readarray -t sorted < <(for a in "${arr[@]}"; do echo "$a"; done | sort) 335 | echo "${sorted[@]}" 336 | } 337 | ##### 338 | # Splits an array into as equal chunks as possible and returns these. 339 | # The last chunks will always contain the least elements. 340 | # Call like: 341 | # splitarray returnvarname nchunks array[@] 342 | # And then iterate over chunks and unpack them like: 343 | # for packedchunk in "${returnvarname[@]}"; do 344 | # unpackedchunk=( ${packedchunk[@]} ) 345 | # done 346 | # Note: Note that you'll have to pass the name of the desired return variable. 347 | # Take care not to unpack into the same variable (e.g. packedchunk=( ${packedchunk[@]} )), as this will cause unexpected behaviour. 348 | ##### 349 | function splitarray () { 350 | # catch parameters 351 | local retvar=$1 352 | local -i nchunks=$2 353 | local -a arr=("${!3}") 354 | 355 | # compute step size and round up (always round up) 356 | local -i len=${#arr[@]} 357 | local -i step=$(($len / $nchunks)) 358 | if [ "$(($len % $nchunks))" -ne "0" ]; then 359 | step=$(($step+1)) 360 | fi 361 | 362 | # split array 363 | local collection 364 | local chunk 365 | local -i i 366 | for (( i=0; i<${len}; i+=${step} )); do 367 | #chunk=( "${arr[@]:${i}:${step}}" ) # as array 368 | chunk="${arr[@]:${i}:${step}}" # as string 369 | collection=( "${collection[@]}" "${chunk}" ) # array of fake arrays (strings containing space-separated elements) 370 | done 371 | 372 | # return chunks by referenced variable assignment 373 | eval "$retvar=(\"\${collection[@]}\")" 374 | } 375 | --------------------------------------------------------------------------------