├── LICENSE
├── README.md
├── check_hosts.sh
├── pop_trainforests.sh
├── pop_features.sh
├── pop_samplesets.sh
├── metadata.py
├── pop_biasfieldcorrected.sh
├── scripts
    ├── make_body_mask.py
    ├── train_rdf.py
    ├── extract_features.py
    ├── apply_rdf.py
    ├── evaluate_multilable.py
    └── sample_trainingset.py
├── pop_lesionsegmentation.sh
├── pop_original.sh
├── featureconfig.py
├── pop_backgroundstripped.sh
├── pop_intensitrangestandardization.sh
├── pop_segmentations.sh
├── config.sh
├── pop_sequencespace.sh
├── run.sh
└── include.sh


/LICENSE:
--------------------------------------------------------------------------------
1 | non-public repository
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | multilabel-multimachine-pipeline
2 | ================================
3 | a saveguard
4 | 


--------------------------------------------------------------------------------
/check_hosts.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ####
 4 | # Small, convenient script to check which hosts are available and with what software
 5 | ####
 6 | 
 7 | # host list
 8 | HOSTS=("tiffy" "bibo" "oskar" "lulatsch" "piggy" "wolle" "mumpitz" "rumpel" "finchen" "kruemel" "yipyip" "hastig" "schorsch" "elmo" "bert") # all
 9 | #HOSTS=("tiffy" "bibo" "oskar" "lulatsch" "kermit" "piggy" "wolle") # bvlab
10 | #HOSTS=("mumpitz" "rumpel" "finchen" "kruemel" "yipyip" "hastig" "schorsch") # happy users
11 | #HOSTS=("elmo" "bert") # bad mood users
12 | 
13 | ## check for programs at host machine
14 | for host in ${HOSTS[@]}; do
15 |     echo "##### ${host} #####"
16 |     ssh maier@${host} 'elastix;transformix;fsl5.0-bet | tail -n2;cmtk mrbias;python -c "import medpy; print \"medpy:\", medpy.__file__";python -c "import sklearn; print \"sklearn:\", sklearn.__version__"'
17 | done
18 | 


--------------------------------------------------------------------------------
/pop_trainforests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #####
 4 | # Train the decision forest with a training sample set.
 5 | #####
 6 | 
 7 | ## Changelog
 8 | # 2014-08-13 adapted to process multiple ground truth at ones
 9 | # 2013-05-08 created
10 | 
11 | # include shared information
12 | source $(dirname $0)/include.sh
13 | 
14 | # main code
15 | log 2 "Training random decision forests" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
16 | for gtset in "${gtsets[@]}"; do
17 |     mkdircond ${sequenceforests}/${gtset}
18 |     for i in "${allimages[@]}"; do
19 |         if [ -e "${sequenceforests}/${gtset}/${i}.pkl" ]; then
20 |             continue
21 |         fi
22 |         log 2 "Training forest no ${i} from ground truth set ${gtset}..." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
23 | 	    runcond "scripts/train_rdf.py ${sequencesamplesets}/${gtset}/${i}/trainingset.features.npy ${sequenceforests}/${gtset}/${i}.pkl ${maxdepth}"
24 |     done
25 | done
26 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
27 | 


--------------------------------------------------------------------------------
/pop_features.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #####
 4 | # Extracts a number of features as defined in a python-style config file.
 5 | #####
 6 | 
 7 | ## Changelog
 8 | # 2014-08-12 adapted to work with different feature configs per cases
 9 | # 2013-05-08 created
10 | 
11 | # include shared information
12 | source $(dirname $0)/include.sh
13 | 
14 | # functions
15 | function extract_features ()
16 | {
17 | 	local i=$1
18 | 	local sc_featurecnf=$(getcustomfeatureconfig "${scid}")
19 | 	mkdircond ${sequencefeatures}/${basesequence}/${i}
20 | 	runcond "${scripts}/extract_features.py ${sequenceintensitrangestandardization}/${basesequence}/${i}/ ${sequencebrainmasks}/${basesequence}/${i}.${imgfiletype} ${sequencefeatures}/${basesequence}/${i}/ ${sc_featurecnf}"
21 | }
22 | 
23 | # main code
24 | log 2 "Extracting the features" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
25 | makecustomfeatureconfigs
26 | for scid in "${!sc_train_brainmasks[@]}"; do
27 |     basesequence=${sc_train_brainmasks[$scid]}
28 |     images=( ${sc_train_images[$scid]} )
29 |     
30 |     mkdircond ${sequencefeatures}/${basesequence}
31 |         
32 |     parallelize extract_features ${threadcount} images[@]
33 | done
34 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
35 | 


--------------------------------------------------------------------------------
/pop_samplesets.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #####
 4 | # Samples a number of training samples randomly using a set of selected features.
 5 | #####
 6 | 
 7 | ## Changelog
 8 | # 2014-08-12 Adapted to work with different seuqence combinations, a separated train and application set and ground-truths
 9 | # 2013-05-08 created
10 | 
11 | # include shared information
12 | source $(dirname $0)/include.sh
13 | 
14 | # functions
15 | function sample_trainingset () {
16 | 	local i=$1
17 | 	local sc_featurecnf=$(getcustomfeatureconfig "${scid}")
18 | 	local _trainimages=( ${sc_train_images[$scid]} )
19 |     local _trainimages=( $(delEl "${i}" _trainimages[@]) )
20 | 	mkdircond ${sequencesamplesets}/${gtset}/${i}
21 | 	runcond "${scripts}/sample_trainingset.py ${sequencefeatures}/${basesequence} ${sequencesegmentations}/${gtset} ${sequencebrainmasks}/${basesequence} ${sequencesamplesets}/${gtset}/${i}/ ${sc_featurecnf} ${samplesize} $(joinarr " " ${_trainimages[@]})"
22 | }
23 | 
24 | # main code
25 | log 2 "Drawing a training set for each leave-one-out case using stratified random sampling" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
26 | makecustomfeatureconfigs
27 | for gtset in "${gtsets[@]}"; do
28 |     mkdircond ${sequencesamplesets}/${gtset}
29 |     for scid in "${!sc_train_brainmasks[@]}"; do
30 |         basesequence=${sc_train_brainmasks[$scid]}
31 |         images=( ${sc_apply_images[$scid]} )
32 | 
33 |         parallelize sample_trainingset ${threadcount} images[@]
34 |     done
35 | done
36 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
37 | 


--------------------------------------------------------------------------------
/metadata.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | """
 4 | Prints out handy information about the metadata of an NifTi image, especially regarding the transformation to world coordinates.
 5 | arg1: the image to check
 6 | """
 7 | 
 8 | import sys
 9 | 
10 | import numpy
11 | 
12 | from medpy.io import load, header
13 | 
14 | def main():
15 | 	i, h = load(sys.argv[1])
16 | 
17 | 	print 'Image:\t{}'.format(sys.argv[1])
18 | 	print 'Shape:\t{}'.format(i.shape)
19 | 	print 'Spacing:{}'.format(header.get_pixel_spacing(h))
20 | 	print 'Offset:\t{}'.format(header.get_offset(h))
21 | 
22 | 	if 0 == h.get_header()['qform_code']:
23 | 		method = 'ANALYZE 7.5 (old)'
24 | 	if h.get_header()['qform_code'] > 0:
25 | 		method = 'Normal (qform)'
26 | 	if h.get_header()['sform_code'] > 0:
27 | 		method = 'Special space (sform)'
28 | 
29 | 	print
30 | 	print 'Orientation and location in space:'
31 | 	print 'Type:\t\t{}'.format(method)
32 | 	print 'qform_code:\t{}'.format(h.get_header()['qform_code'])
33 | 	print 'sform_code:\t{}'.format(h.get_header()['sform_code'])
34 | 
35 | 	print
36 | 	print 'qform == sform?\t{} (max diff={})'.format(numpy.all(h.get_qform() == h.get_sform()), numpy.max(numpy.abs(h.get_qform() - h.get_sform())))
37 | 	print 'affine = qform?\t{} (max diff={})'.format(numpy.all(h.get_affine() == h.get_qform()), numpy.max(numpy.abs(h.get_affine() - h.get_qform())))
38 | 	print 'affine = sform?\t{} (max diff={})'.format(numpy.all(h.get_affine() == h.get_sform()), numpy.max(numpy.abs(h.get_affine() - h.get_sform())))
39 | 
40 | 	print
41 | 	print 'qform:'
42 | 	print h.get_qform()
43 | 	print 'sform:'
44 | 	print h.get_sform()
45 | 	print 'affine:'
46 | 	print h.get_affine()
47 | 
48 | if __name__ == "__main__":
49 | 	main()
50 | 


--------------------------------------------------------------------------------
/pop_biasfieldcorrected.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #####
 4 | # Removes intensity in-homogenities in the images.
 5 | #####
 6 | 
 7 | ## Changelog
 8 | # 2014-08-12 changed to adapt to different sequence combinations
 9 | # 2014-04-09 adapted to new style
10 | # 2013-11-14 added a step to correct the nifti metadata
11 | # 2013-11-04 imporved code
12 | # 2013-10-17 created
13 | 
14 | # include shared information
15 | source $(dirname $0)/include.sh
16 | 
17 | # main code
18 | log 2 "Correcting the bias fields" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
19 | for scid in "${!sc_train_brainmasks[@]}"; do
20 |     basesequence=${sc_train_brainmasks[$scid]}
21 |     images=( ${sc_train_images[$scid]} )
22 |     sequences=( ${sc_sequences[$scid]} )
23 |     
24 |     mkdircond ${sequencebiasfieldcorrected}/${basesequence}
25 |     
26 |     for i in "${images[@]}"; do
27 | 	    mkdircond ${sequencebiasfieldcorrected}/${basesequence}/${i}
28 | 	    for s in "${sequences[@]}"; do
29 | 
30 | 		    # continue if target file already exists
31 | 		    if [ -f "${sequencebiasfieldcorrected}/${basesequence}/${i}/${s}.${imgfiletype}" ]; then
32 | 			    continue
33 | 		    fi
34 | 		
35 | 		    # esitmate and correct bias field
36 | 		    runcond "cmtk mrbias --mask ${sequencebrainmasks}/${basesequence}/${i}.${imgfiletype} ${sequenceskullstripped}/${basesequence}/${i}/${s}.${imgfiletype} ${sequencebiasfieldcorrected}/${basesequence}/${i}/${s}.${imgfiletype}" # note: already multitasking
37 | 
38 | 		    # correct nifit orientation metadata in-place
39 | 		    runcond "${scripts}/niftimodifymetadata.py ${sequencebiasfieldcorrected}/${basesequence}/${i}/${s}.${imgfiletype} qf=aff sf=aff qfc=1 sfc=1"
40 | 	    done
41 |     done
42 | done
43 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
44 | 
45 | 


--------------------------------------------------------------------------------
/scripts/make_body_mask.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | """
 4 | Automatically create a full-body mask for an abdominal MRI image.
 5 | <program>.py <in-image> <out-binary-image> [<threshold>]
 6 | """
 7 | 
 8 | import sys
 9 | import numpy
10 | from medpy.io import load, save, header
11 | from medpy.filter import largest_connected_component, xminus1d
12 | from scipy.ndimage.morphology import binary_opening, binary_closing,\
13 |     binary_erosion, binary_dilation, binary_fill_holes
14 | 
15 | DEFAULT_THRESHOLD = 50.0
16 | 
17 | def main():
18 |     i, h = load(sys.argv[1])
19 |     if len(sys.argv) > 3:
20 |         t = float(sys.argv[3])
21 |     else:
22 |         t = DEFAULT_THRESHOLD
23 |         
24 |     # threshold image
25 |     i = i > t
26 |     
27 |     # select only largest connected component
28 |     i = largest_connected_component(i)
29 | 
30 |     # fill holes along each dimension in 2D
31 |     i = xminus1d(i, binary_fill_holes, 0)
32 |     i = xminus1d(i, binary_fill_holes, 1)
33 |     i = xminus1d(i, binary_fill_holes, 2)
34 | 
35 |     # select only largest connected component
36 |     i = largest_connected_component(i)
37 |     
38 |     # apply morphological operations
39 |     i = binary_closing(i, structure=None, iterations=3) # 3D
40 |     #i = morphology2d(binary_closing, i, structure=1, iterations=1)
41 | 
42 |     if 0 == numpy.count_nonzero(i):
43 | 	    raise Warning("{}: empty mask resulted".format(sys.argv[1]))
44 | 
45 |     save(i, sys.argv[2], h, True)
46 | 
47 | def morphology2d(operation, arr, structure = None, iterations=1, dimension = 2):
48 | 	res = numpy.zeros(arr.shape, numpy.bool)
49 | 	for sl in range(processed.shape[dimension]):	
50 | 		res[:,:,sl] = operation(arr[:,:,sl], structure, iterations)
51 | 	return res
52 | 
53 | if __name__ == "__main__":
54 | 	main()
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/pop_lesionsegmentation.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #####
 4 | # Applyies the forests to a (preliminary) segmentation of the brain lesion in sequence space.
 5 | #####
 6 | 
 7 | ## Changelog
 8 | # 2014-08-13 Adapted to cope with multiple sequence configurations and ground truth sets
 9 | # 2014-05-08 Adapted to the new, distributed calculation scheme.
10 | # 2013-04-03 Added a morphological post-processing step (and removed again).
11 | # 2013-03-25 Updated to new, variable version.
12 | # 2013-11-25 Updated to use new script to distinguish between sequence space and std space features
13 | # 2013-11-05 adapted to new brain mask location
14 | # 2013-10-29 created
15 | 
16 | # include shared information
17 | source $(dirname $0)/include.sh
18 | 
19 | # functions
20 | 
21 | # main code
22 | log 2 "Applying random decision forests to segment lesion" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
23 | 
24 | makecustomfeatureconfigs
25 | for gtset in "${gtsets[@]}"; do
26 |     mkdircond ${sequencelesionsegmentation}/${gtset}
27 |     
28 |     for scid in "${!sc_train_brainmasks[@]}"; do
29 |         basesequence=${sc_train_brainmasks[$scid]}
30 |         images=( ${sc_apply_images[$scid]} )
31 |         
32 |         log 2 "Applying for ground truth set ${gtset} and seq. configuration ${scid}..." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
33 |     
34 |         for i in "${images[@]}"; do
35 |             sc_featurecnf=$(getcustomfeatureconfig "${scid}")
36 | 	        mkdircond ${sequencelesionsegmentation}/${gtset}/${i}   
37 | 	        runcond "${scripts}/apply_rdf.py ${sequenceforests}/${gtset}/${i}.pkl ${sequencefeatures}/${basesequence}/${i}/ ${sequencebrainmasks}/${basesequence}/${i}.nii.gz ${sc_featurecnf}  ${sequencelesionsegmentation}/${gtset}/${i}/probabilities.nii.gz ${sequencelesionsegmentation}/${gtset}/${i}/segmentation.nii.gz"
38 |         done
39 |     done
40 | 
41 | done
42 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/pop_original.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #####
 4 | # Link images from the image database in a consitent manner to 00originals.
 5 | # Links all images whose case ids are mentiones in "includes.sh".
 6 | #####
 7 | 
 8 | ## Changelog
 9 | # 2015-02-27 adapted to visceral pipeline
10 | # 2014-05-05 every second case now gets flipped
11 | # 2014-03-24 changed to link sequence by availability (i.e. skip non-existing ones with only info message displayed)
12 | # 2013-11-13 changed to actually copy even existing files and to correct the qform and sform codes
13 | # 2013-10-15 changed the ADC creation script and added a conversion of non-float to float images
14 | # 2013-10-02 created
15 | 
16 | # Visceral ground-truth labels
17 | # 1: liver
18 | # 2: spleen
19 | # 3: bladder
20 | # 4: left (liver) kidney
21 | # 5: right kidney
22 | # 6: left ? muscle
23 | # 7: right ? muscle
24 | 
25 | # include shared information
26 | source $(dirname $0)/include.sh
27 | 
28 | # Constants
29 | sequencestolink=('MRI')
30 | 
31 | # Image collection
32 | srcdir="/share/data_mumpitz2/heinrich/OskarMRI/"
33 | declare -A indicesmapping=(  ["01"]="1" ["02"]="2" ["03"]="3" ["04"]="4" ["05"]="5" ["06"]="6" ["07"]="7" ["08"]="8" ["09"]="9" ["10"]="10")
34 | 
35 | ###
36 | # Prepare all the sequences of a case
37 | ###
38 | log 1 "Linking images and ground truth images" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
39 | for i in "${images[@]}"; do
40 |     mkdircond "${originals}/${i}"
41 |     for s in "${sequencestolink[@]}"; do
42 |         srcfile="${srcdir}/${s}${indicesmapping[${i}]}.${imgfiletype}"
43 | 	    trgfile="${originals}/${i}/${s}.${imgfiletype}"
44 | 	    lncond "${srcfile}" "${trgfile}"
45 | 	done
46 | 	srcfile="${srcdir}/${s}${indicesmapping[${i}]}_seg.${imgfiletype}"
47 | 	trgfile="${segmentations}/${i}.${imgfiletype}"
48 | 	#runcond "scripts/extract_label.py ${srcfile} ${trgfile} ${label}" # only required, if a single label should be extracted
49 | 	lncond "${srcfile}" "${trgfile}"
50 | done
51 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
52 | 
53 | 


--------------------------------------------------------------------------------
/scripts/train_rdf.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | """
 4 | Train a decision forest on a training set.
 5 | arg1: the training set file (.features.npy)
 6 | arg2: the decision forest target file
 7 | arg4: the maximum tree depth (optional)
 8 | """
 9 | 
10 | import sys
11 | import pickle
12 | import numpy
13 | 
14 | from sklearn.ensemble.forest import ExtraTreesClassifier
15 | from sklearn.ensemble.forest import RandomForestClassifier
16 | #from sklearn.ensemble.forest import MixedForestClassifier
17 | 
18 | # constants
19 | n_jobs = 6
20 | 
21 | def main():
22 | 	# catch parameters
23 | 	training_set_features = sys.argv[1]
24 | 	training_set_classes = training_set_features.replace('features', 'classes')
25 | 	forest_file = sys.argv[2]
26 | 	max_depth = int(sys.argv[3]) if 3 <= len(sys.argv) else 500
27 | 
28 |         # loading training features
29 |         with open(training_set_features, 'r') as f:
30 |             training_feature_vector = numpy.load(f)
31 | 	    if 1 == training_feature_vector.ndim:
32 | 		training_feature_vector = numpy.expand_dims(training_feature_vector, -1)
33 |         with open(training_set_classes , 'r') as f:
34 |             training_class_vector = numpy.load(f)
35 | 	
36 |         # prepare and train the decision forest
37 |         forest = ExtraTreesClassifier(n_estimators=200,
38 |                             criterion = 'gini',
39 |                             max_features = None, # rdf: auto / et: None
40 | 			                #splitter="alternatingnode",
41 |                             min_samples_split = 2,
42 |                             min_samples_leaf = 1,
43 | 			                max_depth = max_depth,
44 |                             bootstrap = True,
45 |                             oob_score = False,
46 |                             random_state=None,
47 |                             n_jobs=n_jobs)
48 |         forest.fit(training_feature_vector, training_class_vector)
49 | 
50 | 	# saving the decision forest
51 | 	with open(forest_file, 'wb') as f:
52 | 		pickle.dump(forest, f)
53 | 
54 | if __name__ == "__main__":
55 | 	main()
56 | 


--------------------------------------------------------------------------------
/scripts/extract_features.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | """
 4 | Extract features from an supplied multi-spectral image according to a config file and saves them under the supplied target directory.
 5 | arg1: folder with image channels
 6 | arg2: mask image, features are only extracted for voxels where 1
 7 | arg3: the target folder to store the extracted features
 8 | arg4: the config file, containing a struct called features_to_extract that follows a special syntax
 9 | 
10 | Note: Does not overwrite existing feature files.
11 | """
12 | 
13 | import os
14 | import sys
15 | import imp
16 | import numpy
17 | import itertools
18 | 
19 | from medpy.io import load, header
20 | 
21 | # configuration
22 | trg_dtype = numpy.float32
23 | 
24 | def main():
25 | 	# loading the features to extract
26 | 	d, m = os.path.split(os.path.splitext(sys.argv[4])[0])
27 | 	f, filename, desc = imp.find_module(m, [d])
28 | 	features_to_extract = imp.load_module(m, f, filename, desc).features_to_extract
29 | 
30 | 	# loading the image mask
31 | 	m = load(sys.argv[2])[0].astype(numpy.bool)
32 | 
33 | 	# extracting the required features and saving them
34 | 	for sequence, function_call, function_arguments, voxelspacing in features_to_extract:
35 | 		if not isfv(sys.argv[3], sequence, function_call, function_arguments):
36 | 			#print sequence, function_call.__name__, function_arguments
37 | 			i, h = load('{}/{}.nii.gz'.format(sys.argv[1], sequence))
38 | 			call_arguments = list(function_arguments)
39 | 			if voxelspacing: call_arguments.append(header.get_pixel_spacing(h))
40 | 			call_arguments.append(m)
41 | 			fv = function_call(i, *call_arguments)
42 | 			savefv(fv, sys.argv[3], sequence, function_call, function_arguments)
43 | 
44 | def savefv(fv, trgdir, seq, fcall, fargs):
45 | 	"""Saves the supplied feature vector under a fixed naming rule."""
46 | 	name = 'feature.{}.{}.{}'.format(seq, fcall.func_name, '_'.join(['arg{}'.format(i) for i in fargs]))
47 | 	with open('{}/{}.npy'.format(trgdir, name), 'wb') as f:
48 | 		numpy.save(f, fv.astype(trg_dtype))
49 | 
50 | def isfv(trgdir, seq, fcall, fargs):
51 | 	name = 'feature.{}.{}.{}'.format(seq, fcall.func_name, '_'.join(['arg{}'.format(i) for i in fargs]))
52 | 	return os.path.exists('{}/{}.npy'.format(trgdir, name))
53 | 
54 | if __name__ == "__main__":
55 | 	main()
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/featureconfig.py:
--------------------------------------------------------------------------------
 1 | ####
 2 | # Configuration file: Denotes the features to extract
 3 | ####
 4 | 
 5 | from medpy.features.intensity import intensities, centerdistance, centerdistance_xdminus1, local_mean_gauss, local_histogram
 6 | 
 7 | MRI = [
 8 | 	('MRI', intensities, [], False),
 9 | 	('MRI', local_mean_gauss, [3], True),
10 | 	('MRI', local_mean_gauss, [5], True),
11 | 	('MRI', local_mean_gauss, [7], True),
12 | 	('MRI', local_histogram, [11, 'image', (0, 100), 5, None, None, 'ignore', 0], False), #11 bins, 5*2=10mm region
13 | 	('MRI', local_histogram, [11, 'image', (0, 100), 10, None, None, 'ignore', 0], False), #11 bins, 10*2=20mm region
14 | 	('MRI', local_histogram, [11, 'image', (0, 100), 15, None, None, 'ignore', 0], False), #11 bins, 15*2=30mm region
15 | 	('MRI', centerdistance_xdminus1, [0], True),
16 | 	('MRI', centerdistance_xdminus1, [1], True),
17 | 	('MRI', centerdistance_xdminus1, [2], True)
18 | ]
19 | 
20 | APROBL0 = [
21 | 	('aprobl0', intensities, [], False),
22 | 	('aprobl0', local_mean_gauss, [5], True),
23 | 	('aprobl0', local_mean_gauss, [10], True),
24 | 	('aprobl0', local_mean_gauss, [20], True)
25 | ]
26 | 
27 | APROBL1 = [
28 | 	('aprobl1', intensities, [], False),
29 | 	('aprobl1', local_mean_gauss, [5], True),
30 | 	('aprobl1', local_mean_gauss, [10], True),
31 | 	('aprobl1', local_mean_gauss, [20], True)
32 | ]
33 | 
34 | APROBL2 = [
35 | 	('aprobl2', intensities, [], False),
36 | 	('aprobl2', local_mean_gauss, [5], True),
37 | 	('aprobl2', local_mean_gauss, [10], True),
38 | 	('aprobl2', local_mean_gauss, [20], True)
39 | ]
40 | 
41 | APROBL3 = [
42 | 	('aprobl3', intensities, [], False),
43 | 	('aprobl3', local_mean_gauss, [5], True),
44 | 	('aprobl3', local_mean_gauss, [10], True),
45 | 	('aprobl3', local_mean_gauss, [20], True)
46 | ]
47 | 
48 | APROBL4 = [
49 | 	('aprobl4', intensities, [], False),
50 | 	('aprobl4', local_mean_gauss, [3], True),
51 | 	('aprobl4', local_mean_gauss, [10], True),
52 | 	('aprobl4', local_mean_gauss, [20], True)
53 | ]
54 | 
55 | APROBL5 = [
56 | 	('aprobl5', intensities, [], False),
57 | 	('aprobl5', local_mean_gauss, [5], True),
58 | 	('aprobl5', local_mean_gauss, [10], True),
59 | 	('aprobl5', local_mean_gauss, [20], True)
60 | ]
61 | 
62 | APROBL6 = [
63 | 	('aprobl6', intensities, [], False),
64 | 	('aprobl6', local_mean_gauss, [5], True),
65 | 	('aprobl6', local_mean_gauss, [10], True),
66 | 	('aprobl6', local_mean_gauss, [20], True)
67 | ]
68 | 
69 | APROBL7 = [
70 | 	('aprobl7', intensities, [], False),
71 | 	('aprobl7', local_mean_gauss, [5], True),
72 | 	('aprobl7', local_mean_gauss, [10], True),
73 | 	('aprobl7', local_mean_gauss, [20], True)
74 | ]
75 | 
76 | features_to_extract = MRI + APROBL1 + APROBL2 + APROBL3 + APROBL4 + APROBL5 + APROBL6 + APROBL7
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/scripts/apply_rdf.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | """
 4 | Apply an RDF to a case.
 5 | arg1: the decision forest file
 6 | arg2: the case folder holding the feature files
 7 | arg3: the cases mask file
 8 | arg4: file containing a struct identifying the features to use
 9 | arg5: the target probability file
10 | arg6: the target segmentation file
11 | """
12 | 
13 | import os
14 | import sys
15 | import imp
16 | import pickle
17 | import numpy
18 | 
19 | from scipy.ndimage.morphology import binary_fill_holes, binary_dilation
20 | from scipy.ndimage.measurements import label
21 | 
22 | from medpy.io import load, save
23 | from medpy.features.utilities import join
24 | 
25 | # constants
26 | n_jobs = 6
27 | 
28 | def main():
29 |     # catch parameters
30 |     forest_file = sys.argv[1]
31 |     case_folder = sys.argv[2]
32 |     mask_file = sys.argv[3]
33 |     feature_cnf_file = sys.argv[4]
34 |     probability_file = sys.argv[5]    
35 |     segmentation_file = sys.argv[6]
36 | 
37 |     # load features to use and create proper names from them
38 |     features_to_use = load_feature_names(feature_cnf_file)
39 | 
40 |         # loading case features
41 |     feature_vector = []
42 | 
43 |     for feature_name in features_to_use:
44 |         _file = os.path.join(case_folder, '{}.npy'.format(feature_name))
45 |         if not os.path.isfile(_file):
46 |             raise Exception('The feature "{}" could not be found in folder "{}". Breaking.'.format(feature_name, case_folder))
47 |         with open(_file, 'r') as f:
48 |             feature_vector.append(numpy.load(f))
49 |     feature_vector = join(*feature_vector)
50 |     if 1 == feature_vector.ndim:
51 |         feature_vector = numpy.expand_dims(feature_vector, -1)
52 | 
53 |     # load and apply the decision forest    
54 |     with open(forest_file, 'r') as f:
55 |         forest = pickle.load(f)
56 |     probability_results = []
57 |     for _fv in numpy.array_split(feature_vector, 20):
58 |         probability_results.append(forest.predict_proba(_fv))
59 |     probability_results = numpy.vstack(probability_results)
60 |     classification_results = numpy.argmax(probability_results, -1)
61 | 
62 |     # preparing  image
63 |     m, h = load(mask_file)
64 |     m = m.astype(numpy.bool)
65 |     oc = numpy.zeros(m.shape, numpy.uint8)
66 |     op = numpy.zeros(m.shape + (probability_results.shape[-1], ), numpy.float32)
67 |     oc[m] = numpy.squeeze(classification_results).ravel()
68 |     op[m] = numpy.squeeze(probability_results).reshape(numpy.prod(probability_results.shape[:-1]), probability_results.shape[-1])
69 | 
70 |     # saving the results
71 |     save(oc, segmentation_file, h, True)
72 |     save(op, probability_file, h, True)
73 | 
74 | def feature_struct_entry_to_name(fstruct):
75 |     seq, fcall, fargs, _ = fstruct
76 |     return 'feature.{}.{}.{}'.format(seq, fcall.func_name, '_'.join(['arg{}'.format(i) for i in fargs]))
77 |     
78 | def load_feature_struct(f):
79 |     "Load the feature struct from a feature config file."
80 |     d, m = os.path.split(os.path.splitext(f)[0])
81 |     f, filename, desc = imp.find_module(m, [d])
82 |     return imp.load_module(m, f, filename, desc).features_to_extract
83 | 
84 | def load_feature_names(f):
85 |     "Load the feature names from a feature config file."
86 |     fs = load_feature_struct(f)
87 |     return [feature_struct_entry_to_name(e) for e in fs]
88 | 
89 | if __name__ == "__main__":
90 |     main()
91 | 


--------------------------------------------------------------------------------
/pop_backgroundstripped.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #####
 4 | # skull from all sequences volumes.
 5 | #####
 6 | 
 7 | ## Changelog
 8 | # 2015-02-27 Changed to foreground rather than brainmask
 9 | # 2014-08-14 Changes such, that all brain mask option folders contain brain masks for all cases using copy from other brain mask settings
10 | # 2014-08-12 Adapted to work with different skull-stripping base sequences for different target sequences
11 | # 2013-03-25 Adapted to take any sequence as base sequence.
12 | # 2013-11-04 Improved the mechanism and separated the brain mask location from the skull-stripped images.
13 | # 2013-10-16 created
14 | 
15 | # include shared information
16 | source $(dirname $0)/include.sh
17 | 
18 | # functions
19 | ###
20 | # Compute a foreground mask using the base sequence
21 | ###
22 | function compute_foregroundmask ()
23 | {
24 | 	# grab parameters
25 | 	i=$1
26 | 
27 | 	# created required directories
28 | 	mkdircond ${sequenceskullstripped}/${basesequence}/${i}
29 | 	# continue if target file already exists
30 | 	if [ -f "${sequencebrainmasks}/${basesequence}/${i}.${imgfiletype}" ]; then
31 | 		return
32 | 	fi
33 | 	# compute foreground mask
34 | 	log 1 "Computing foreground mask for ${sequencespace}/${i}/${basesequence}.${imgfiletype}" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
35 | 	runcond "${scripts}/make_foreground_mask.py ${sequencespace}/${i}/${basesequence}.${imgfiletype} ${sequencebrainmasks}/${basesequence}/${i}.${imgfiletype}"
36 | }
37 | 
38 | # main code
39 | for scid in "${!sc_train_brainmasks[@]}"; do
40 |     basesequence=${sc_train_brainmasks[$scid]}
41 |     images=( ${sc_train_images[$scid]} )
42 |     sequences=( ${sc_sequences[$scid]} )
43 | 
44 |     mkdircond ${sequenceskullstripped}/${basesequence}
45 |     mkdircond ${sequencebrainmasks}/${basesequence}
46 |     
47 |     log 2 "Computing foreground masks on base sequence ${basesequence}" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
48 |     parallelize compute_foregroundmask ${threadcount} images[@]
49 |     
50 |     log 2 "Applying foregroundmask to remaining spectra" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
51 |     for i in "${images[@]}"; do
52 | 	    for s in "${sequences[@]}"; do
53 | 		    # skip if base sequence
54 | 		    if [ "${s}" == "${basesequence}" ]; then
55 | 			    continue
56 | 		    fi
57 | 
58 | 		    srcfile="${sequencespace}/${i}/${s}.${imgfiletype}"
59 | 		    trgfile="${sequenceskullstripped}/${basesequence}/${i}/${s}.${imgfiletype}"
60 | 
61 | 		    # continue if target file already exists
62 | 		    if [ -f "${trgfile}" ]; then
63 | 			    log 1 "Target file ${trgfile} already exists. Skipping." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
64 | 			    continue
65 | 		    fi
66 | 		    # continue and warn if source file doesn't exists
67 | 		    if [ ! -f "${srcfile}" ]; then
68 | 			    log 3 "Source file ${srcfile} does not exist. Skipping." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
69 | 			    continue
70 | 		    fi
71 | 
72 | 		    runcond "${scripts}/apply_binary_mask.py ${srcfile} ${sequencebrainmasks}/${basesequence}/${i}.${imgfiletype} ${trgfile}" /dev/null
73 | 	    done
74 |     done
75 | done
76 | 
77 | # fill possible gaps in the foreground masks (carefully: which foreground masks configuration is chosen for the filling is random!)
78 | for brain_basessequence_from in "${sc_train_brainmasks[@]}"; do
79 |     for brain_basessequence_to in "${sc_train_brainmasks[@]}"; do
80 |         linkmissing "${sequencebrainmasks}/${brain_basessequence_from}/" "${sequencebrainmasks}/${brain_basessequence_to}/"
81 |     done
82 | done
83 | 
84 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
85 | 
86 | 
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/pop_intensitrangestandardization.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #####
 4 | # Standarizes the intensity profiles of all images belonging to the same MRI sequence.
 5 | #####
 6 | 
 7 | ## Changelog
 8 | # 2014-08-12 Updated to work with adaptive intensity model creation depending on the brainmasks used.
 9 | # 2013-03-25 Updated to new structure.
10 | # 2013-11-14 changed script to allow for intensity correction of an image, even if the model already exists
11 | # 2013-11-05 adapted to new brain mask location
12 | # 2013-10-22 created
13 | 
14 | # include shared information
15 | source $(dirname $0)/include.sh
16 | 
17 | # main code
18 | log 2 "Learning and adapting the intensity profiles" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
19 | tmpdir=`mktemp -d`
20 | for scid in "${!sc_train_brainmasks[@]}"; do
21 |     basesequence=${sc_train_brainmasks[$scid]}
22 |     images=( ${sc_train_images[$scid]} )
23 |     sequences=( ${sc_sequences[$scid]} )
24 | 
25 |     mkdircond ${sequenceintensitrangestandardization}/${basesequence}
26 | 
27 |     for s in "${sequences[@]}"; do
28 | 	    log 2 "Processing MRI sequence ${s}" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
29 | 
30 | 	    # if target model already exists, skip model creation for the whole sequence and remark upon it
31 | 	    if [ -f "${sequenceintensitrangestandardization}/${basesequence}/intensity_model_${s}.pkl" ]; then
32 | 		    log 3 "The intensity model for the MRI sequence ${s} already exists. Skipping the model creation for the whole sequence." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
33 | 	    else
34 | 		    # collect all the images for training
35 | 		    images_string=""
36 | 		    masks_string=""
37 | 		    for i in "${images[@]}"; do
38 | 			    # if target file already exists, skip model creation for the whole sequence and remark upon it
39 | 			    if [ -f "${sequenceintensitrangestandardization}/${basesequence}/${i}/${s}.${imgfiletype}" ]; then
40 | 				    log 3 "One of the target files for the MRI sequence ${s} already exists. Skipping the model creation and image transformation for the whole sequence." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
41 | 				    continue 2
42 | 			    fi
43 | 			    # add image to list of images to use for training (always use all images)
44 | 			    images_string="${images_string} ${sequencebiasfieldcorrected}/${basesequence}/${i}/${s}.${imgfiletype}"
45 | 			    masks_string="${masks_string} ${sequencebrainmasks}/${basesequence}/${i}.${imgfiletype}"
46 | 		    done
47 | 
48 | 		    # train the model without transforming the images
49 | 		    runcond "medpy_intensity_range_standardization.py --masks ${masks_string} --save-model ${sequenceintensitrangestandardization}/${basesequence}/intensity_model_${s}.pkl ${images_string}"
50 | 	    fi
51 | 
52 | 	    # transform and post-process the images, them move them to their target location if not already existant
53 | 	    for i in "${images[@]}"; do
54 | 		    mkdircond ${sequenceintensitrangestandardization}/${basesequence}/${i}
55 | 		    if [ ! -f "${sequenceintensitrangestandardization}/${basesequence}/${i}/${s}.${imgfiletype}" ]; then
56 | 			    runcond "medpy_intensity_range_standardization.py --load-model ${sequenceintensitrangestandardization}/${basesequence}/intensity_model_${s}.pkl --masks ${sequencebrainmasks}/${basesequence}/${i}.${imgfiletype} --save-images ${tmpdir} ${sequencebiasfieldcorrected}/${basesequence}/${i}/${s}.${imgfiletype} -f"
57 | 			    runcond "${scripts}/condenseoutliers.py ${tmpdir}/${s}.${imgfiletype} ${sequenceintensitrangestandardization}/${basesequence}/${i}/${s}.${imgfiletype}"
58 | 		    fi
59 | 	    done
60 | 
61 | 	    emptydircond ${tmpdir}
62 |     done
63 | done
64 | rmdircond ${tmpdir}
65 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/pop_segmentations.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #####
 4 | # Link all segmentation images from the image database in a consitent manner.
 5 | #####
 6 | 
 7 | ## Changelog
 8 | 
 9 | # 2014-08-12 Changed to work with multiple segmentation sets
10 | # 2014-05-05 Changed to also include the flipping along the mid-saggital plane for every second case.
11 | # 2014-03-25 Changed to copy images and correcting (possibly faulty) voxel spacing
12 | # 2014-03-25 Adapted to work with new case to database case mapping.
13 | # 2013-10-21 created
14 | 
15 | # include shared information
16 | source $(dirname $0)/include.sh
17 | 
18 | # Constants
19 | basesequenceflipdim="0"
20 | 
21 | # Image collection HEOPKS details
22 | c01dir="/imagedata/HEOPKS/"
23 | declare -A c01indicesmapping=(  ["01"]="01" ["02"]="02" ["03"]="03" ["04"]="04" ["05"]="05" ["06"]="06" ["07"]="07" ["08"]="08" ["09"]="09" ["10"]="10" \
24 | 				["11"]="11" ["12"]="12" ["13"]="13" ["14"]="14" ["15"]="15" ["16"]="16" ["17"]="17" ["18"]="18" ["19"]="19" ["20"]="20" \
25 | 				["21"]="21" ["22"]="22" ["23"]="23" ["24"]="24" ["25"]="25" ["26"]="26" ["27"]="27" ["28"]="28" ["29"]="29" )
26 | 
27 | # Image collection JGABLENTZ details
28 | c02dir="/imagedata/JGABLENTZ/"
29 | declare -A c02indicesmapping=(	["30"]="02" ["31"]="08" ["32"]="11" ["33"]="13" ["34"]="14" ["35"]="17" ["36"]="19" ["37"]="20" ["38"]="25" ["39"]="29" \
30 | 				["40"]="30" ["41"]="31" ["42"]="34" ["43"]="47" ["44"]="55" ["45"]="57" )
31 | 
32 | 
33 | ##
34 | # Checks whether at least one target image already exists
35 | ##
36 | function check_existance () {
37 |     for i in "${allimages[@]}"; do
38 |         [ -f "${segmentations}/${gtset}/${i}.${imgfiletype}" ] && echo "1" && return
39 |     done
40 |     echo "0"
41 | }
42 | 
43 | # main code
44 | for gtset in "${gtsets[@]}"; do
45 |     log 2 "Processing ground truth set ${gtset}" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
46 |     
47 |     if [[ "$(check_existance)" -eq "1" ]]; then
48 |         log 3 "Folder ${segmentations}/${gtset} already contains files. Assuming done and skipping complete ground truth set as otherwise a double-flip might occur." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
49 |         continue
50 |     fi
51 |     
52 |     srcdir=${gtsources[$gtset]}
53 |     mkdircond ${segmentations}/${gtset}
54 |     
55 |     log 2 "Copying ground truth images" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
56 |     for i in "${allimages[@]}"; do
57 | 	    # catch original voxel sapcing of associated flair sequence
58 | 	    vs=( $(voxelspacing "${originals}/${i}/flair_tra.${imgfiletype}") )
59 | 	    vs=$(joinarr " " ${vs[@]})
60 | 	    # copy and correct voxel spacing
61 | 	    if test "${c01indicesmapping[${i}]+isset}"; then
62 | 		    runcond "medpy_set_pixel_spacing.py ${c01dir}/${srcdir}/${c01indicesmapping[${i}]}.${imgfiletype} ${segmentations}/${gtset}/${i}.${imgfiletype} ${vs[@]}"
63 | 	    elif test "${c02indicesmapping[${i}]+isset}"; then
64 | 		    runcond "medpy_set_pixel_spacing.py ${c02dir}/${srcdir}/${c02indicesmapping[${i}]}.${imgfiletype} ${segmentations}/${gtset}/${i}.${imgfiletype} ${vs[@]}"
65 | 	    else
66 | 		    log 3 "No candidate for case id ${i} found in any of the collections. Please check your 'images' array. Skipping." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
67 | 	    fi
68 |     done
69 | 
70 |     log 2 "Flipping ground truth of every second case in-place along the mid-saggital plane" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
71 |     for (( i = 1 ; i < ${#allimages[@]} ; i+=2 )) do
72 | 	    f="${segmentations}/${gtset}/${allimages[$i]}.${imgfiletype}"
73 | 	    if [ -e ${f} ]; then
74 | 		    lnrealize "${f}"
75 | 		    runcond "${scripts}/flip.py ${f} ${basesequenceflipdim}"
76 | 	    fi
77 |     done
78 | done
79 | 
80 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
81 | 


--------------------------------------------------------------------------------
/config.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | ######################
  4 | # Configuration file #
  5 | ######################
  6 | 
  7 | ## changelog
  8 | # 2014-08-12 adapted to run script
  9 | # 2014-05-08 created
 10 | 
 11 | # image array
 12 | # INCLUSIVE (training)
 13 | images=('01' '02' '03' '04' '05' '06' '07' '08' '09' '10')
 14 | 
 15 | # EXCLUSIVE (preparation & application)
 16 | 
 17 | # ground truth sets and settings
 18 | gtsets=("seven") # "seven" 
 19 | declare -A gtsources=( [""]="" )
 20 | 
 21 | # sequence combinations settings
 22 | declare -A sc_sequences=( ["1"]="MRI aprob0 aprob1 aprob2 aprob3 aprob4 aprob5 aprob6 aprob7" )
 23 | declare -A sc_apply_images=( ["1"]="01 02 03 04 05 06 07 08 09 10" )
 24 | declare -A sc_train_images=( ["1"]="01 02 03 04 05 06 07 08 09 10" )
 25 | declare -A sc_train_brainmasks=( ["1"]="MRI" )
 26 | sequencespacebasesequence="MRI"
 27 | evaluationbasesequence="MRI"
 28 | 
 29 | # sequence space settings
 30 | isotropic=0 # 0/1 to disable/enable pre-registration resampling of base sequence to isotropic spacing
 31 | isotropicspacing=3 # the target isotropic spacing in mm
 32 | 
 33 | # config file with feature (1) to extract and (2) to create the training sample from
 34 | featurecnf="featureconfig.py"
 35 | 
 36 | # training sample size
 37 | samplesize=500000
 38 | 
 39 | # rdf parameters
 40 | maxdepth=100
 41 | 
 42 | # post-processing parameters
 43 | minimallesionsize=1500
 44 | 
 45 | ##
 46 | # functions
 47 | ##
 48 | # build a global flat sorted allimages variable
 49 | function makeallimages () {
 50 |     local sorted
 51 |     readarray -t sorted < <(for a in ${sc_apply_images[@]}; do echo "$a"; done | sort)
 52 |     allimages=( ${sorted[@]} )
 53 | }
 54 | # returns a custom feature config file
 55 | # call like: featurecnf_file=$(getcustomfeatureconfig "${scid}")
 56 | function getcustomfeatureconfig () {
 57 |     local scid=$1
 58 |     local sc_featurecnf="/tmp/.${featurecnf:0: -3}_${scid}.py"
 59 |     echo "${sc_featurecnf}"
 60 | }
 61 | # build a custom, hidden feature config file for each sequence combinations
 62 | function makecustomfeatureconfigs () {
 63 |     local scid
 64 |     for scid in "${sc_ids[@]}"; do
 65 |         local sequences=( ${sc_sequences[$scid]} )
 66 |         local sequences_sum=$(joinarr "+" ${sequences[@]})
 67 |         local string="features_to_extract = ${sequences_sum}"
 68 |         #local sc_featurecnf=".${featurecnf:0: -3}_${scid}.py"
 69 |         local sc_featurecnf=$(getcustomfeatureconfig "${scid}")
 70 |         runcond "cp ${featurecnf} ${sc_featurecnf}"
 71 |         #!NOTE: Not very nice, as runcond is omitted. But I didn't find a solution to get the piping working otherwise.
 72 |         echo "${string}" >> "${sc_featurecnf}"
 73 |     done
 74 | }
 75 | # loads a personal config file if the appropriate command line arguments are encountered
 76 | # call like: source "$(parsecustomconfig $@)"
 77 | # pass custom config to a script with "CUSTOMCONFIG=<file>" argument; only first such argument is considered
 78 | function parsecustomconfig () {
 79 |     local -a args=("${!1}")
 80 |     local arg
 81 |     for arg in "$@"; do
 82 |         [ "${arg:0:13}" == "CUSTOMCONFIG=" ] && echo "${arg:13}" && return
 83 |     done
 84 |     echo "/dev/null"
 85 | }
 86 | 
 87 | ##
 88 | # Config processing
 89 | ##
 90 | # creating an allimages variable for easy processing
 91 | makeallimages
 92 | # collect the sequence combination set ids in one variable
 93 | sc_ids=( "${!sc_apply_images[@]}" )
 94 | 
 95 | ##
 96 | # Loads a personal config file to overwrite this config file if supplied to the including script via the commandline
 97 | # @see parsecustomconfig function above
 98 | ##
 99 | source "$(parsecustomconfig $@)"
100 | 
101 | ##
102 | # Space for scripted config changes (created automatically)
103 | ##
104 | 
105 | 


--------------------------------------------------------------------------------
/scripts/evaluate_multilable.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | """
  4 | Evaluate the segmentation created.
  5 | arg1: the segmentation result for each case, with a {} in place of the case number
  6 | arg2: the ground truth segmentation, with a {} in place of the case number
  7 | arg3: the cases mask file, with a {} in place of the case number
  8 | arg4+: the cases to evaluate
  9 | """
 10 | 
 11 | import sys
 12 | import math
 13 | import time
 14 | from multiprocessing.pool import Pool
 15 | 
 16 | import numpy
 17 | from scipy.ndimage.measurements import label
 18 | 
 19 | from medpy.io import load, header, save
 20 | from medpy.metric import dc, hd, assd, precision, recall
 21 | 
 22 | # constants
 23 | n_jobs = 6
 24 | silent = True
 25 | labels = [1, 2, 3, 4]
 26 | 
 27 | def main():
 28 | 
 29 |     # catch parameters
 30 |     segmentation_base_string = sys.argv[1]
 31 |     ground_truth_base_string = sys.argv[2]
 32 |     mask_file_base_string = sys.argv[3]
 33 |     cases = sys.argv[4:]
 34 | 
 35 |     # evaluate each label of each case and collect the scores
 36 |     precisions = []
 37 |     recalls = []
 38 |     dcs = []
 39 |     
 40 |     # for each case
 41 |     for case in cases:
 42 |     
 43 |         # load images of the current case
 44 |         i_segmentation, _ = load(segmentation_base_string.format(case))
 45 |         i_truth, _ = load(ground_truth_base_string.format(case))
 46 |         i_mask = load(mask_file_base_string.format(case))[0].astype(numpy.bool)
 47 | 
 48 |         # collect images for each label in list and apply mask to segmentation and ground truth (to remove ground truth fg outside of brain mask)
 49 |         s = [(i_segmentation == d) & i_mask for d in labels]
 50 |         t = [(i_truth == d) & i_mask for d in labels]
 51 |         
 52 |         # post-processing
 53 |         from scipy.ndimage.morphology import binary_fill_holes
 54 |         s = [binary_fill_holes(_s) for _s in s]
 55 |         #from medpy.filter import largest_connected_component
 56 |         s[0] = largest_connected_components(s[0], n = 2)
 57 |         s[1] = largest_connected_components(s[1], n = 1)
 58 |         s[2] = largest_connected_components(s[2], n = 2)
 59 |         s[3] = largest_connected_components(s[3], n = 2)
 60 |         #from scipy.ndimage.morphology import binary_dilation
 61 |         #s = [binary_dilation(_s, structure=None, iterations=4) for _s in s]
 62 |         save(numpy.asarray(s), segmentation_base_string.format(case) + '_tmp.nii.gz')
 63 |         
 64 |         # compute and append metrics (Pool-processing)
 65 |         pool = Pool(n_jobs)
 66 |         dcs.append(pool.map(wdc, zip(t, s)))
 67 |         precisions.append(pool.map(wprecision, zip(s, t)))
 68 |         recalls.append(pool.map(wrecall, zip(s, t)))
 69 | 
 70 |     # print case-wise and label-wise results results
 71 |     print 'Case\t',
 72 |     for label in labels:
 73 |         print 'Label {}\t\t\t'.format(label),
 74 |     print '\n\t',
 75 |     for label in labels:
 76 |         print 'DC[0,1]\tprec.\trec.\t',
 77 |     print
 78 |     for case, _dcs, _prs, _rcs in zip(cases, dcs, precisions, recalls):
 79 |         print '{}'.format(case),
 80 |         for _dc, _pr, _rc in zip(_dcs, _prs, _rcs):
 81 |             print '\t{:>3,.3f}\t{:>3,.3f}\t{:>3,.3f}'.format(_dc, _pr, _rc),
 82 |         print
 83 |     print
 84 | 
 85 |     # print label-wise averages
 86 |     for lid, label in enumerate(labels):
 87 |         print 'Label {} averages:'.format(label)
 88 | 
 89 |         _mdcs = [_dc[lid] for _dc in dcs]
 90 |         _mpres = [_prs[lid] for _prs in precisions]
 91 |         _mrcs = [_rcs[lid] for _rcs in recalls]
 92 |         print '\tDM average\t{} +/- {} (Median: {})'.format(numpy.mean(_mdcs), numpy.std(_mdcs), numpy.median(_mdcs))
 93 |         print '\tPrec. average\t{} +/- {} (Median: {})'.format(numpy.mean(_mpres), numpy.std(_mpres), numpy.median(_mpres))
 94 |         print '\tRec. average\t{} +/- {} (Median: {})'.format(numpy.mean(_mrcs), numpy.std(_mrcs), numpy.median(_mrcs))
 95 |     print
 96 |         
 97 |     # print overall averages (label independent)
 98 |     print 'Overall averages:'
 99 |     print 'DM  average\t{} +/- {} (Median: {})'.format(numpy.asarray(dcs).mean(), numpy.asarray(dcs).std(), numpy.median(numpy.asarray(dcs)))
100 |     print 'Prec.  average\t{} +/- {} (Median: {})'.format(numpy.asarray(precisions).mean(), numpy.asarray(precisions).std(), numpy.median(numpy.asarray(precisions)))
101 |     print 'Rec.  average\t{} +/- {} (Median: {})'.format(numpy.asarray(recalls).mean(), numpy.asarray(recalls).std(), numpy.median(numpy.asarray(recalls)))
102 | 
103 | def wdc(x):
104 |     return dc(*x)
105 | def whd(x):
106 |     try:
107 |         val = hd(*x)
108 |     except RuntimeError:
109 |         val = numpy.inf
110 |     return val
111 | def wprecision(x):
112 |     return precision(*x)
113 | def wrecall(x):
114 |     return recall(*x)
115 | def wassd(x):
116 |     try:
117 |         val = assd(*x)
118 |     except RuntimeError:
119 |         val = numpy.inf
120 |     return val
121 |     
122 | def largest_connected_components(img, n = 1, structure = None):
123 |     r"""
124 |     Select the largest connected binary component in an image.
125 |     
126 |     Treats all zero values in the input image as background and all others as foreground.
127 |     The return value is an binary array of equal dimensions as the input array with TRUE
128 |     values where the largest connected component is situated.
129 |     
130 |     Parameters
131 |     ----------
132 |     img : array_like
133 |         An array containing connected objects. Will be cast to type numpy.bool.
134 |     structure : array_like
135 |         A structuring element that defines the connectivity. Structure must be symmetric.
136 |         If no structuring element is provided, one is automatically generated with a
137 |         squared connectivity equal to one.
138 |     
139 |     Returns
140 |     -------
141 |     binary_image : ndarray
142 |         The supplied binary image with only the largest connected component remaining.
143 |     """   
144 |     labeled_array, num_features = label(img, structure)
145 |     component_sizes = [numpy.count_nonzero(labeled_array == label_idx) for label_idx in range(1, num_features + 1)]
146 |     component_indices_list_by_sizes = numpy.argsort(component_sizes)[::-1] + 1
147 | 
148 |     out = numpy.zeros(img.shape, numpy.bool)
149 |     
150 |     for i in range(n):
151 |         out[labeled_array == component_indices_list_by_sizes[i]] = True
152 |     return out
153 | 
154 | if __name__ == "__main__":
155 |     main()
156 | 


--------------------------------------------------------------------------------
/pop_sequencespace.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | #####
  4 | # Rigidly registers all sequences to a base sequence, which can optionally be resampled to isotropic spacing.
  5 | #####
  6 | 
  7 | ## Changelog
  8 | # 2014-08-12 Changed to use sequencespacebasesequence rather than basesequence
  9 | # 2014-03-24 Changed to a more flexible version
 10 | # 2013-11-13 Added step to correct the qform and sform codes
 11 | # 2013-11-04 Added re-sampling of T2 image to isotropic spacing before registration and updated loop design.
 12 | # 2013-10-16 ADC images are now not registered directly, but rather transformed with the DW transformation matrix
 13 | # 2013-10-15 created
 14 | 
 15 | # include shared information
 16 | source $(dirname $0)/include.sh
 17 | 
 18 | # functions
 19 | ###
 20 | # Resample the base sequence of the supplied id
 21 | ###
 22 | function resample ()
 23 | {
 24 | 	idx=$1
 25 | 
 26 | 	srcfile="${originals}/${idx}/${sequencespacebasesequence}.${imgfiletype}"
 27 | 	trgfile="${sequencespace}/${idx}/${sequencespacebasesequence}.${imgfiletype}"
 28 | 
 29 | 	mkdircond ${sequencespace}/${idx}
 30 | 
 31 | 	# warn and skip if source file not present
 32 | 	if [ ! -f "${srcfile}" ]; then
 33 | 		log 3 "Base sequence for case ${idx} not found under ${srcfile}. Skipping." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
 34 | 		return
 35 | 	fi
 36 | 
 37 | 	# process if target file not yet existing
 38 | 	if [ ! -f "${trgfile}" ]; then
 39 | 		log 1 "Isotropic resampling to ${trgfile}" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
 40 | 		runcond "medpy_resample.py ${srcfile} ${trgfile} ${isotropicspacing},${isotropicspacing},${isotropicspacing}"
 41 | 	fi
 42 | }
 43 | 
 44 | ###
 45 | # Register an image to another, also saving the transformation matrix.
 46 | ###
 47 | function register ()
 48 | {
 49 | 	idx=$1
 50 | 	sequence=$2
 51 | 
 52 | 	trgdir="${sequencespace}/${idx}/"
 53 | 	fixed="${trgdir}/${sequencespacebasesequence}.${imgfiletype}"
 54 | 	moving="${originals}/${idx}/${sequence}.${imgfiletype}"
 55 | 
 56 | 	tmpdir=`mktemp -d`
 57 | 
 58 | 	# perform rigid registration
 59 | 	log 1 "Registering ${moving} to ${fixed} using tmp dir ${tmpdir}"
 60 | 	runcond "elastix -f ${fixed} -m ${moving} -out ${tmpdir} -p ${configs}/elastix_sequencespace_rigid_cfg.txt -threads=${threadcount}" /dev/null
 61 | 	# copy resulting files
 62 | 	cpcond "${tmpdir}/result.0.nii.gz" "${trgdir}/${sequence}.${imgfiletype}"
 63 | 	cpcond "${tmpdir}/TransformParameters.0.txt" "${trgdir}/${sequence}.txt"
 64 | 
 65 | 	# clean up
 66 | 	emptydircond "${tmpdir}"
 67 | 	rmdircond "${tmpdir}"
 68 | }
 69 | 
 70 | ###
 71 | # Tranform a sequence using an already existing transformation matrix
 72 | ###
 73 | function transform ()
 74 | {
 75 | 	idx=$1
 76 | 	sequence=$2
 77 | 	matrix=$3
 78 | 
 79 | 	trgdir="${sequencespace}/${idx}/"
 80 | 	moving="${originals}/${sequence}.${imgfiletype}"
 81 | 
 82 | 	tmpdir=`mktemp -d`
 83 | 
 84 | 	# perform transformation
 85 | 	log 1 "Transforming ${sequence} image ${moving} with ${matrix} transformation matrix using tmp dir ${tmpdir}"
 86 | 	runcond "transformix -in ${originals}/${i}/adc_tra.${imgfiletype} -out ${tmpdir} -tp ${matrix}" /dev/null
 87 | 	# copy resulting file
 88 | 	cpcond "${tmpdir}/result.nii.gz" "${trgdir}/${sequence}.${imgfiletype}"
 89 | 	cpcond "${matrix}" "${trgdir}/${sequence}.txt"
 90 | 
 91 | 	# clean up
 92 | 	emptydircond ${tmpdir}
 93 | 	rmdircond "${tmpdir}"
 94 | }
 95 | 
 96 | 
 97 | # main code
 98 | if (( $isotropic == 1 )) ; then
 99 | 	log 2 "Resampling all ${sequencespacebasesequence} sequences to isotropic spacing of ${isotropicspacing}mm" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
100 | 	parallelize resample ${threadcount} allimages[@]
101 | else
102 | 	log 2 "Resampling disabled. Linking base sequences ${sequencespacebasesequence} to target folder." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
103 | 	for i in "${allimages[@]}"; do
104 | 		mkdircond ${sequencespace}/${i}
105 | 		lncond "${PWD}/${originals}/${i}/${sequencespacebasesequence}.${imgfiletype}" "${sequencespace}/${i}/${sequencespacebasesequence}.${imgfiletype}"
106 | 	done
107 | fi
108 | 
109 | log 2 "Registering all remaining sequences to the base sequence ${sequencespacebasesequence}" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
110 | for i in "${allimages[@]}"; do
111 |     for sequences in "${sc_sequences[@]}"; do
112 |         sequences=( ${sequences} )
113 | 	    for s in "${sequences[@]}"; do
114 | 		    srcfile="${originals}/${i}/${s}.${imgfiletype}"
115 | 		    trgfile="${sequencespace}/${i}/${s}.${imgfiletype}"
116 | 
117 | 		    # catch base sequence and continue, since it is the fixed image and does not need registration
118 | 		    if [ "${s}" == "${sequencespacebasesequence}" ]; then
119 | 			    continue
120 | 		    fi
121 | 		    # catch ADC and continue, since these are transformed with the DW transformation matrices
122 | 		    if [ "${s}" == "adc_tra" ]; then
123 | 			    continue
124 | 		    fi
125 | 		    # continue if target file already exists
126 | 		    if [ -f "${trgfile}" ]; then
127 | 			    continue
128 | 		    fi
129 | 		    # warn if source file does not exist
130 | 		    if [ ! -f "${srcfile}" ]; then
131 | 			    log 3 "The source file ${srcfile} does not exist. Skipping." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
132 | 			    continue
133 | 		    fi
134 | 
135 | 		    # perform rigid registration
136 | 		    register "${i}" "${s}"
137 | 	    done
138 |     done
139 | done
140 | 
141 | if isIn "adc_tra" "${sequences[@]}"; then
142 | 	log 2 "Registering resp. transforming ADC images" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
143 | 	for i in "${allimages[@]}"; do
144 | 		srcfile="${originals}/${i}/adc_tra.${imgfiletype}"
145 | 		trgfile="${sequencespace}/${i}/adc_tra.${imgfiletype}"
146 | 		matrix="${sequencespace}/${i}/dw_tra_b1000_dmean.txt"
147 | 
148 | 		# warn if source file does not exist
149 | 		if [ ! -f "${srcfile}" ]; then
150 | 			log 3 "The source file ${srcfile} does not exist. Skipping." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
151 | 			continue
152 | 		fi
153 | 		# continue if target file already exists
154 | 		if [ -f "${trgfile}" ]; then
155 | 			continue
156 | 		fi
157 | 
158 | 		# transform if an DW image has already been registered
159 | 		if [ -f "${matrix}" ]; then
160 | 			transform "${i}" "adc_tra" "${matrix}"
161 | 		else
162 | 			register "${i}" "adc_tra"
163 | 		fi 
164 | 	done
165 | fi
166 | 
167 | log 2 "Correcting metadata" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
168 | for i in "${allimages[@]}"; do
169 | 	for s in "${sequences[@]}"; do
170 | 		if [ -f "${sequencespace}/${i}/${s}.${imgfiletype}" ]; then
171 | 			runcond "${scripts}/niftimodifymetadata.py ${sequencespace}/${i}/${s}.${imgfiletype} qf=qf sf=qf qfc=1 sfc=1"
172 | 		fi
173 | 	done
174 | done	
175 | 
176 | log 2 "Done." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
177 | 
178 | 


--------------------------------------------------------------------------------
/scripts/sample_trainingset.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | """
  4 | Sample a traning set for a case by drawing from all other images using stratified random sampling.
  5 | 
  6 | arg1: directory with case-folders containing feature files
  7 | arg2: directory containing segmentations
  8 | arg3: directory containing brain masks
  9 | arg4: target directory
 10 | arg5: file containing a struct identifying the features to sample
 11 | arg6: number of samples to draw
 12 | arg7+: indices of all cases from which to draw the training sample
 13 | """
 14 | 
 15 | import os
 16 | import sys
 17 | import imp
 18 | import numpy
 19 | import pickle
 20 | import itertools
 21 | 
 22 | from medpy.io import load
 23 | from medpy.features.utilities import append, join
 24 | 
 25 | # main settings
 26 | min_no_of_samples_per_class_and_case = 4
 27 | 
 28 | # debug settings
 29 | verboose = True
 30 | debug = True
 31 | override = False # activate override (will signal a warning)
 32 | 
 33 | def main():
 34 | 	# catch arguments
 35 | 	src_dir = sys.argv[1]
 36 | 	seg_dir = sys.argv[2]
 37 | 	msk_dir = sys.argv[3]
 38 | 	trg_dir = sys.argv[4]
 39 | 	feature_cnf_file = sys.argv[5]
 40 | 	total_no_of_samples = int(sys.argv[6])
 41 | 	training_set_cases = sys.argv[7:]
 42 | 
 43 | 	# load features to use and create proper names from them
 44 | 	features_to_use = load_feature_names(feature_cnf_file)
 45 | 
 46 | 	# warn if target sample set already exists
 47 | 	if os.path.isfile('{}/trainingset.features.npy'.format(trg_dir)):
 48 | 		if override:
 49 | 			print 'WARNING: The target file {}/trainingset.features.npy already exists and will be replaced by a new sample.'.format(trg_dir)
 50 | 		else:
 51 | 			print 'WARNING: The target file {}/trainingset.features.npy already exists. Skipping.'.format(trg_dir)
 52 | 			sys.exit(0)
 53 | 
 54 | 	if verboose: print 'Preparing leave-out training set'
 55 | 	# initialize collection variables
 56 | 	training_set_classes_selections = dict.fromkeys(training_set_cases)
 57 | 	
 58 | 	# use stratified random sampling to select a number of sample for each case
 59 | 	for case in training_set_cases:
 60 | 		if verboose: print 'Stratified random sampling of case {}'.format(case)
 61 | 		# determine number of samples to draw from this case
 62 | 		samples_to_draw = int(total_no_of_samples / len(training_set_cases))
 63 | 		if debug: print 'samples_to_draw', samples_to_draw
 64 | 		# load class memberships of case as binary and integer array
 65 | 		mask = load(os.path.join(msk_dir, '{}.nii.gz'.format(case)))[0].astype(numpy.bool)
 66 | 		truth = load(os.path.join(seg_dir, '{}.nii.gz'.format(case)))[0].astype(numpy.int8)
 67 | 		class_vector = truth[mask]
 68 | 		n_class_vector = len(class_vector)
 69 | 		classes = numpy.unique(class_vector)
 70 | 		n_classes = len(classes)
 71 | 		if debug: print 'n_classes', n_classes
 72 | 		# determine class ratios i.e. how many samples of each class to draw from this case
 73 | 		n_samples_class = [numpy.count_nonzero(c == class_vector) for c in classes]
 74 | 		classes_ratios = [nsc / float(n_class_vector) for nsc in n_samples_class]
 75 | 		classes_samples_to_draw = [int(samples_to_draw * cr) for cr in classes_ratios]
 76 | 		if debug:
 77 | 		    for c, cr, cstd in zip(classes, classes_ratios, classes_samples_to_draw):
 78 | 		        print 'drawing {} samples (equals {} of 1) for class {}'.format(cstd, cr, c)
 79 | 		# check for exceptions
 80 | 		for c, cstd, nsc in zip(classes, classes_samples_to_draw, n_samples_class):
 81 | 		    if cstd < min_no_of_samples_per_class_and_case:
 82 | 		        raise Exception('Current setting would lead to a drawing of only {} samples of class {} for case {}!'.format(cstd, c, case))
 83 | 		    if cstd > nsc:
 84 | 		        raise Exception('Current settings would require to draw {} samples of class {}, but only {} present for case {}!'.format(cstd, c, nsc, case))
 85 | 		# get sample indices and split into class-wise indices
 86 | 		samples_indices = numpy.arange(n_class_vector)
 87 | 		classes_samples_indices = [samples_indices[class_vector == c] for c in classes]
 88 | 		if debug:
 89 | 		    for c, csi in zip(classes, classes_samples_indices):
 90 | 		        print 'class{}_samples_indices.shape: {}'.format(c, csi.shape)
 91 | 		# randomly draw the required number of sample indices
 92 | 		for csi in classes_samples_indices:
 93 | 		    numpy.random.shuffle(csi) # in place
 94 | 		classes_sample_selection = [csi[:cstd] for csi, cstd in zip(classes_samples_indices, classes_samples_to_draw)]
 95 | 		if debug:
 96 | 		    for c, css in zip(classes, classes_sample_selection):
 97 | 		        print 'class{}_samples_selection.shape: {}'.format(c, css.shape)
 98 | 		# add to collection
 99 | 		training_set_classes_selections[case] = dict(zip(classes, classes_sample_selection))
100 | 		
101 | 	# load the features of each case, draw the samples from them and append them to a training set
102 | 	drawn_samples = dict()
103 | 	
104 | 	for case in training_set_cases:
105 | 		if verboose: print 'Sampling features of case {}'.format(case)
106 | 		
107 | 		# loading and sampling features piece-wise to avoid excessive memory requirements
108 | 		drawn_samples_case = dict()
109 | 		for feature_name in features_to_use:
110 | 			_file = os.path.join(src_dir, case, '{}.npy'.format(feature_name))
111 | 			if not os.path.isfile(_file):
112 | 				raise Exception('The feature "{}" for case {} could not be found in folder "{}". Breaking.'.format(feature_name, case, os.path.join(src_dir, case)))
113 | 			with open(_file, 'r') as f:
114 | 				feature_vector = numpy.load(f)
115 | 				tscs = training_set_classes_selections[case]
116 | 				for cla, sel in tscs.iteritems():
117 | 				    if not cla in drawn_samples_case:
118 | 				        drawn_samples_case[cla] = []
119 | 				    drawn_samples_case[cla].append(feature_vector[sel])
120 | 				
121 | 		# join and append feature vector from this case
122 | 		for cla, samples in drawn_samples_case.iteritems():
123 | 		    if not cla in drawn_samples:
124 | 		        drawn_samples[cla] = []
125 | 		    drawn_samples[cla].append(join(*samples)) # vertical join of different features
126 | 		
127 | 	# prepare training set as numpy array and the class memberships
128 | 	samples = [append(*csamples) for csamples in drawn_samples.itervalues()] # append samples belonging to the same class
129 | 	samples_length = [len(x) for x in samples]
130 | 	samples_class_memberships = numpy.zeros(sum(samples_length), dtype=numpy.int8)
131 | 	i = 0
132 | 	for c, sl in zip(drawn_samples.keys(), samples_length):
133 | 	    samples_class_memberships[i:i+sl] = c
134 | 	    i += sl
135 | 	samples_feature_vector = append(*samples)
136 | 	
137 | 	if debug: print 'n_classes', len(drawn_samples)
138 | 	if debug: print 'samples_feature_vector shape', samples_feature_vector.shape
139 | 	if debug: print 'class_memberships shape', samples_class_memberships.shape
140 | 	if debug: print 'class_memberships dytpe', samples_class_memberships.dtype
141 | 	if debug: print 'class_memberships unique', numpy.unique(samples_class_memberships)
142 | 	
143 | 	# save feature vector, feature names and class membership vector as leave-one-out training set
144 | 	if verboose: print 'Saving training data set'
145 | 	with open('{}/trainingset.features.npy'.format(trg_dir), 'wb') as f:
146 | 		numpy.save(f, samples_feature_vector)
147 | 	with open('{}/trainingset.classes.npy'.format(trg_dir), 'wb') as f:
148 | 		numpy.save(f, samples_class_memberships)
149 | 	with open('{}/trainingset.fnames.npy'.format(trg_dir), 'wb') as f:
150 | 		numpy.save(f, features_to_use)
151 | 	with open('{}/trainingset.classesselections.pkl'.format(trg_dir), 'wb') as f:
152 | 		pickle.dump(training_set_classes_selections, f)
153 | 		
154 | 	if verboose: print
155 | 			
156 | 	if verboose: print 'Done.'
157 | 
158 | def feature_struct_entry_to_name(fstruct):
159 | 	seq, fcall, fargs, _ = fstruct
160 | 	return 'feature.{}.{}.{}'.format(seq, fcall.func_name, '_'.join(['arg{}'.format(i) for i in fargs]))
161 | 	
162 | def load_feature_struct(f):
163 | 	"Load the feature struct from a feature config file."
164 | 	d, m = os.path.split(os.path.splitext(f)[0])
165 | 	f, filename, desc = imp.find_module(m, [d])
166 | 	return imp.load_module(m, f, filename, desc).features_to_extract
167 | 
168 | def load_feature_names(f):
169 | 	"Load the feature names from a feature config file."
170 | 	fs = load_feature_struct(f)
171 | 	return [feature_struct_entry_to_name(e) for e in fs]
172 | 
173 | if __name__ == "__main__":
174 | 	main()
175 | 
176 | 
177 | 
178 | 
179 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | #####
  4 | # Adaptable pipeline script for running the whole or parts of the pipeline (distributed where deemed sensible)
  5 | #####
  6 | 
  7 | ### CHANGELOG ###
  8 | # 2014-08-12 created
  9 | 
 10 | ### SETTINGS ###
 11 | #START="forests"
 12 | EVALLOG="results/standalonerun.log"
 13 | LOGGING=true
 14 | USER="maier"
 15 | HOSTS=("bibo" "yipyip" "hastig" "bert" "elmo")
 16 | # Host notes
 17 | # kermit: elastix and transformix give segmentation fault when reading image; error persists after re-installing elastix from the repostiory; forums suggest that it is a known (and unsolved) problem
 18 | # kruemel: uses mastmeyers installation of medpy, rather than the one from the PYTHONATH; might cause problems in some configurations; furthermore, my sklearn is shaded by local installation
 19 | # the slowest, in order from slow to slower: tiffy < piggy < rumpel < mumpitz
 20 | #HOSTS=("tiffy" "bibo" "oskar" "lulatsch" "piggy" "wolle" "mumpitz" "rumpel" "finchen" "kruemel" "yipyip" "hastig" "schorsch" "elmo" "bert") # all
 21 | #HOSTS=("tiffy" "bibo" "oskar" "lulatsch" "kermit" "piggy" "wolle") # bvlab
 22 | #HOSTS=("mumpitz" "rumpel" "finchen" "kruemel" "yipyip" "hastig" "schorsch") # happy users
 23 | #HOSTS=("elmo" "bert") # bad mood users
 24 | 
 25 | ### INCLUDES ###
 26 | source $(dirname $0)/include.sh
 27 | 
 28 | ### CONSTANTS ###
 29 | LOGDIR='logs/'
 30 | CWD="/share$(pwd)"
 31 | 
 32 | ### FUNCTIONS ###
 33 | # Executes runcond, but also with a logging if enabled
 34 | function runcondlog () {
 35 |     local cmd=$1
 36 |     local logfile=$2
 37 |     if [[ -z "$LOGGING" ]] ; then
 38 |         runcond "${cmd}"
 39 |     else
 40 |         runcond "${cmd}" "${LOGDIR}/${logfile}.log"
 41 |     fi
 42 | }
 43 | 
 44 | # executes rundistributed, but also with a loggind if enabled
 45 | function runcondlogdistributed () {
 46 |     local cmd=$1
 47 |     local logfile=$2
 48 |     local errfile=$3
 49 |     if [[ -z "$LOGGING" ]] ; then
 50 |         rundistributed "${cmd}"
 51 |     else
 52 |         rundistributed "${cmd}" "${LOGDIR}/${logfile}" "${LOGDIR}/${errfile}"
 53 |     fi
 54 | }
 55 | 
 56 | function original () {
 57 |     log 2 "### ORIGINALS: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
 58 |     # note sure if possible to execute distributedly
 59 |     runcondlog "./pop_original.sh" "originals"
 60 |     log 2 "### ORIGINALS: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
 61 | }
 62 | 
 63 | function sequencespace () {
 64 |     log 2 "### SEQUENCESPACE: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
 65 |     # requires elastix & transformix!
 66 |     runcondlogdistributed "./pop_sequencespace.sh" "sequencespace" "sequencespace_err"
 67 |     #runcondlog "./pop_sequencespace.sh" "sequencespace"
 68 |     log 2 "### SEQUENCESPACE: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
 69 | }
 70 | 
 71 | function skullstripped () {
 72 |     log 2 "### SEQUENCESKULLSTRIP: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
 73 |     # requires fsl5.0-bet
 74 |     #runcondlogdistributed "./pop_skullstripped.sh" "skullstrip" "skullstrip_err"
 75 |     runcondlog "./pop_skullstripped.sh" "skullstrip"
 76 |     log 2 "### SEQUENCESKULLSTRIP: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
 77 | }
 78 | 
 79 | function biasfieldcorrected () {
 80 |     log 2 "### SEQUENCEBIASFIELD: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
 81 |     # requires cmtk
 82 |     #runcondlogdistributed "./pop_biasfieldcorrected.sh" "biasfieldcorrected" "biasfieldcorrected_err"
 83 |     runcondlog "./pop_biasfieldcorrected.sh" "biasfield"
 84 |     log 2 "### SEQUENCEBIASFIELD: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
 85 | }
 86 | 
 87 | function intensitrangestandardization () {
 88 |     log 2 "### SEQUENCEINTENSITYRANGESTD: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
 89 |     # can not be readily be executed distributedly, as the sc_train_images array is required to the unfragmented
 90 |     runcondlog "./pop_intensitrangestandardization.sh" "intensityrangestd"
 91 |     log 2 "### SEQUENCEINTENSITYRANGESTD: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
 92 | }
 93 | 
 94 | function features () {
 95 |     log 2 "### SEQUENCEFEATURES: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
 96 |     runcondlogdistributed "./pop_features.sh" "features" "features_err"
 97 |     log 2 "### SEQUENCEFEATURES: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
 98 | }
 99 | 
100 | function samplesets () {
101 |     log 2 "### SEQUENCESAMPLESETS: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
102 |     # can not be readily be executed distributedly, as the sc_train_images array is required to the unfragmented
103 |     runcondlog "./pop_samplesets.sh" "samplesets"
104 |     log 2 "### SEQUENCESAMPLESETS: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
105 | }
106 | 
107 | function forests () {
108 |     log 2 "### SEQUENCEFORESTS: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
109 |     runcondlogdistributed "./pop_trainforests.sh" "trainforests" "trainforests_err"
110 |     log 2 "### SEQUENCEFORESTS: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
111 | }
112 | 
113 | function lesionsegmentation () {
114 |     log 2 "### SEQUENCELESIONSEGMENTATION: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
115 |     runcondlogdistributed "./pop_lesionsegmentation.sh" "lesionsegmentation" "lesionsegmentation_err"
116 |     log 2 "### SEQUENCELESIONSEGMENTATION: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
117 | }
118 | 
119 | function evaluation () {
120 |     log 2 "### EVALUATION: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
121 |     # can only be executed all at once and locally
122 |     runcond "./evaluate.sh" ${EVALLOG}
123 |     log 2 "### EVALUATION: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
124 | }
125 | 
126 | function segmentations () {
127 |     log 2 "### SEGMENTATIONS: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
128 |     # note sure if possible to execute distributedly
129 |     runcondlog "./pop_segmentations.sh" "segmentations"
130 |     log 2 "### SEGMENTATIONS: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
131 | }
132 | 
133 | function sequencesegmentations () {
134 |     log 2 "### SEQUENCESEGMENTATIONS: start ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
135 |     # requires elastix & transformix!
136 |     runcondlogdistributed "./pop_sequencesegmentations.sh" "sequencesegmentations" "sequencesegmentations_err"
137 |     #runcondlog "./pop_sequencesegmentations.sh" "sequencesegmentations"
138 |     log 2 "### SEQUENCESEGMENTATIONS: done ###" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
139 | }
140 | 
141 | function createfolderstructure () {
142 |     for folder in "${folders[@]}"; do
143 |         mkdircond ${folder}
144 |     done
145 | }
146 | 
147 | # MODULE: Distributed processing
148 | # create a dedicated config file for each host
149 | function makeconfigs () {
150 |     local -i nhosts=${#HOSTS[@]}
151 |     
152 |     # for each host
153 |     for((i=0;i<${nhosts};i++)); do
154 |         local chunks
155 |     
156 |         # generate config file
157 |         local cnf=".config_${HOSTS[$i]}.sh"
158 |         echo "# Dedicated configuration file for ${HOSTS[$i]}" > ${cnf}
159 |         echo "# $(date +"%D %T")" >> ${cnf}
160 |         echo "# auto generated / can be safely removed after +/- 10s" >> ${cnf}
161 |         echo "" >> ${cnf}
162 |     
163 |         # split and redistribute sc_apply_images array
164 |         echo "declare -A sc_apply_images=( \\" >> "${cnf}"
165 |         for sc_id in "${!sc_apply_images[@]}"; do
166 |             local images=( ${sc_apply_images[$sc_id]} )
167 |             splitarray chunks ${nhosts} images[@]
168 |             echo "[\"${sc_id}\"]=\"${chunks[$i]}\" \\" >> "${cnf}"
169 |         done
170 |         echo ")" >> "${cnf}"
171 |         
172 |         # split and redistribute sc_train_images array
173 |         echo "declare -A sc_train_images=( \\" >> "${cnf}"
174 |         for sc_id in "${!sc_train_images[@]}"; do
175 |             local images=( ${sc_train_images[$sc_id]} )
176 |             splitarray chunks ${nhosts} images[@]
177 |             echo "[\"${sc_id}\"]=\"${chunks[$i]}\" \\" >> "${cnf}"
178 |         done
179 |         echo ")" >> "${cnf}"
180 |         
181 |         # add function call to re-make allimages variable
182 |         echo "makeallimages" >> "${cnf}" 
183 |     done
184 | }
185 | 
186 | # removes the dedicated config file for each host
187 | function removeconfigs () {
188 |     local host
189 |     for host in ${HOSTS[@]};do
190 |         runcond "rm .config_${host}.sh"
191 |     done
192 | }
193 | 
194 | ###
195 | # Function to run a command distributed over a number of machines, taking care of equal image load among them.
196 | # arg1: a command to execute
197 | # arg2: log file for the commands stdout (on the remote machine); will be appendixed with a "_<hostname>"; optional, otherwise goes to /dev/null
198 | # arg3: err file for the commands stderr (on the remote machine); will be appendixed with a "_<hostname>"; optional, otherwise goes to /dev/null
199 | # example: rundistributed "./myscript.sh" "/tmp/log" "/tmp/err"
200 | # Notes:
201 | # - will always try to switch the remote working directory to ${CWD} before executing any command; if this fails, will start in users home
202 | # - take care to supply the command like you would start it in the local bash, i.e. with ./ appendix where required
203 | # - if the function does not return in due time, check if the command is running on the target machine(s)
204 | # - if something goes wrong, enable debugging (loglevel=1) and supply a log as well as an error file
205 | function rundistributed () {
206 |     # catch arguments
207 |     local cmd=$1
208 |     local log=$2
209 |     local err=$3
210 |     
211 |     # prepare
212 |     makeconfigs
213 |     
214 |     # start processes and collect their (remote) pids
215 |     log 2 "Starting distributed processes..." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
216 |     local -a pids
217 |     
218 |     for((i=0;i<${#HOSTS[@]};i++)); do
219 |         # build command
220 |         if [[ -z "$log" ]]; then
221 |             local _log="/dev/null"
222 |         else
223 |             local _log=${log}_${HOSTS[$i]}
224 |         fi
225 |         if [[ -z "$err" ]]; then
226 |             local _err="/dev/null"
227 |         else
228 |             local _err=${err}_${HOSTS[$i]}
229 |         fi
230 |         local rcmd="cd ${CWD}; nohup ${cmd} CUSTOMCONFIG=.config_${HOSTS[$i]}.sh > ${_log} 2> ${_err} < /dev/null & echo \$!"
231 |         
232 |         # execute command remotely and catch return value as array
233 |         log 1 "Command: \"${rcmd}\" / Host: \"ssh ${USER}@${HOSTS[$i]}\"" "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
234 |         local ret=( $(ssh ${USER}@${HOSTS[$i]} "${rcmd}") ) # real command
235 |         local retstring="${ret[@]}"
236 |         log 1 "Shh returned: ${retstring}"
237 |         
238 |         # the last element in the return array is the desired pid
239 |         pids[$i]=${ret[-1]}
240 |         log 2 "Started a process on ${HOSTS[$i]} with pid ${pids[$i]}..." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
241 |     done
242 |     
243 |     # wait for all remote processed to terminate
244 |     log 2 "Waiting for all distributed processes to terminate..." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
245 |     while [ "${#pids[@]}" -ne "0" ]; do
246 |         echo -n '.'
247 |         for i in "${!pids[@]}"; do
248 |             local ret=( $(ssh ${USER}@${HOSTS[$i]} "ps -p${pids[$i]} -opid=") )
249 |             if ! [[ "${ret[-1]}" =~ ^-?[0-9]+$ ]]; then
250 |                 echo -n "(${HOSTS[$i]})"
251 |                 unset pids[$i]
252 |             fi
253 |         done
254 |         sleep 5
255 |     done
256 |     echo ""
257 |     
258 |     # clean up
259 |     removeconfigs
260 | }
261 | 
262 | ### MAIN ###
263 | logelevel=2
264 | #createfolderstructure
265 | #original
266 | #segmentations
267 | #sequencespace
268 | #sequencesegmentations
269 | #skullstripped
270 | #biasfieldcorrected
271 | #intensitrangestandardization
272 | features
273 | #samplesets
274 | #forests
275 | #lesionsegmentation
276 | #evaluation
277 | 
278 | 


--------------------------------------------------------------------------------
/include.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | ########################################
  4 | # Include file with shared information #
  5 | ########################################
  6 | 
  7 | ## changelog
  8 | # 2014-05-08 Adapted to the new, distributed calculation scheme.
  9 | # 2014-05-08 Transfered some settings to a config file and included it here.
 10 | # 2014-05-05 Removed normalized space directories.
 11 | # 2014-05-05 Added the lnrealize() function.
 12 | # 2014-03-25 Adapted directory structure.
 13 | # 2014-03-24 Added the runcond function.
 14 | # 2013-11-14 Added new directories.
 15 | # 2013-11-11 Added new directories.
 16 | # 2013-10-31 Added new directories.
 17 | # 2013-10-22 Added new directories.
 18 | # 2013-10-21 Added new directories.
 19 | # 2013-10-15 Added new directories and made emptydircond a tick more save
 20 | # 2013-10-02 created
 21 | 
 22 | # include the shared config file
 23 | source $(dirname $0)/config.sh
 24 | 
 25 | # folders
 26 | originals="00original/"
 27 | sequencespace="00original/"
 28 | sequenceskullstripped="00original/"
 29 | sequencebiasfieldcorrected="03biasfieldcorrected/"
 30 | sequenceintensitrangestandardization="04intensitystandarized/"
 31 | sequencefeatures="05features/"
 32 | sequencesamplesets="06samplesets/"
 33 | sequenceforests="07forests/"
 34 | sequencelesionsegmentation="08lesionsegmentation/"
 35 | 
 36 | segmentations="100gtsegmentations/"
 37 | sequencesegmentations="100gtsegmentations/"
 38 | sequencebrainmasks="102sequenceforegroundmasks/"
 39 | 
 40 | folders=("${originals}" "${sequencespace}" "${sequenceskullstripped}" "${sequencebiasfieldcorrected}" "${sequenceintensitrangestandardization}" \
 41 |          "${sequencefeatures}" "${sequencesamplesets}" "${sequenceforests}" "${sequencelesionsegmentation}" "${segmentations}" \
 42 |          "${sequencesegmentations}" "${sequencebrainmasks}" )
 43 | 
 44 | scripts="scripts/"
 45 | configs="configs/"
 46 | 
 47 | # other constants
 48 | imgfiletype="nii.gz"
 49 | threadcount=2
 50 | 
 51 | # logging
 52 | loglevel=1 # 1=debug, 2=info, 3=warning, 4=err, 5+=silent
 53 | logprefixes=('DEBUG' 'INFO' 'WARNING' 'ERROR')
 54 | logprintlocation=false # true | false to print the location from where the log was triggered
 55 | 
 56 | 
 57 | # shared functions
 58 | 
 59 | ######
 60 | ## Signal a log message of a determined level
 61 | ######
 62 | function log {
 63 | 	local level=${1}
 64 | 	local msg=${2}
 65 | 	local location=${3} # optional, should be [$SOURCE:$FUNCNAME:$LINENO], [$SOURCE::$LINENO] or similar
 66 | 
 67 | 	local loglevels=${#logprefixes[@]}
 68 | 	
 69 | 	local prefix
 70 | 	
 71 | 	# check if current logging level is lower than the messages logging level
 72 | 	if [ "$loglevel" -le "$level" ]
 73 | 	then
 74 | 		# determine the log type
 75 | 		if [ "$level" -le "0" ]
 76 | 		then
 77 | 			prefix="UNKNOWN"
 78 | 		elif [ "$level" -gt "$loglevels" ]
 79 | 		then
 80 | 			prefix="UNKNOWN"
 81 | 		else
 82 | 			prefix=${logprefixes[$level-1]}
 83 | 		fi
 84 | 
 85 | 		# print, according to logprintlocation, with or without location information
 86 | 		if $logprintlocation
 87 | 		then
 88 | 			echo -e "${prefix}: ${msg} ${location}"
 89 | 		else
 90 | 			echo -e "${prefix}: ${msg}"
 91 | 		fi
 92 | 	fi
 93 | }
 94 | 
 95 | ######
 96 | # Parallelizes a function-call by calling different subprocesses
 97 | ######
 98 | # Note that the different calles are processed in chunks, each of which this functions waits for to terminate before executing the next one.
 99 | # Takes as first parameter the function, as second the number of process to spawn and as third the array of parameters to pass to the function.
100 | # !The third argument is supposed to be an array and therefore has to be passes in the form "parameter[@]"
101 | # Call like "parallelize fun 4 indices[@]"
102 | function parallelize ()
103 | {
104 | 	# Grab parameters
105 | 	local fun=$1
106 | 	local processes=$2
107 | 	local -a parameters=("${!3}")
108 | 	
109 | 	# split $parameters into $processes sized chunks
110 | 	local i
111 | 	local paramter
112 | 	for i in $(seq 0 ${processes} ${#parameters[@]}); do # seq: from stepsize to
113 | 		declare -a parameterchunk="(${parameters[@]:$i:$processes})"
114 | 		# execute function in background for each parameter in the current chunk and then wait for their termination
115 | 		for parameter in "${parameterchunk[@]}"; do
116 | 			${fun} $parameter &
117 | 		done
118 | 		wait
119 | 	done
120 | }
121 | 
122 | ######
123 | ## Create the supplied directory if it does not yet exists
124 | ######
125 | function mkdircond {
126 | 	local directory=${1}
127 | 
128 | 	if [ ! -d "$directory" ]
129 | 	then
130 | 		log 1 "Creating directory ${directory}." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
131 | 		mkdir ${directory}
132 | 	fi
133 | }
134 | 
135 | ######
136 | ## Remove all files (but not directories or write-protected files) from the supplied directory if it is not empty
137 | ######
138 | function emptydircond {
139 | 	local directory=${1}
140 | 
141 | 	if [ -z "$directory" ]; then
142 | 		log 3 "Supplied an empty string to emptydircond function. This might be dangerous and is therefore ignored." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
143 | 	else
144 | 		local filecount=`ls -al ${directory} | wc -l`
145 | 		if [ "$filecount" -gt "3" ]
146 | 		then
147 | 			rm ${directory}/*
148 | 		fi
149 | 	fi
150 | }
151 | 
152 | #####
153 | ## Remove a dirctory if it exists
154 | #####
155 | function rmdircond {
156 | 	local directory=${1}
157 | 
158 | 	if [ -d "$directory" ]
159 | 	then
160 | 		rmdir ${directory}
161 | 	fi
162 | }
163 | 
164 | #####
165 | ## Empties and removes a directory if it exists
166 | #####
167 | function removedircond {
168 | 	local directory=${1}
169 | 	emptydircond ${directory}
170 | 	rmdircond ${directory}
171 | }
172 | 
173 | #####
174 | ## Runs the passed command if no variable "dryrun" has been initialized with a non-empty value.
175 | ## As a second parameter a redirect target of the command std output can optionaly be passed.
176 | #####
177 | function runcond {
178 | 	local cmd=$1
179 | 	if [[ -z "$dryrun" ]]; then
180 | 		if [ $# -gt 1 ]; then
181 | 			$cmd > $2
182 | 		else
183 | 			$cmd
184 | 		fi
185 | 	else
186 | 		echo "DRYRUN: ${cmd}"
187 | 	fi
188 | }
189 | 
190 | ######
191 | ## Copy a file if target file does not exist already
192 | ######
193 | function cpcond {
194 | 	local source=$1
195 | 	local target=$2
196 | 
197 | 	if [ ! -f ${source} ]; then
198 | 		log 3 "Source file ${source} does not exists. Skipping." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
199 | 	elif [ -f ${target} ]; then
200 | 		log 1 "Target file ${target} already exists, skipping." [$BASH_SOURCE:$FUNCNAME:$LINENO]
201 | 	else
202 | 		log 1 "Copying ${source} to ${target}." [$BASH_SOURCE:$FUNCNAME:$LINENO]
203 | 		runcond "cp ${source} ${target}"
204 | 	fi
205 | }
206 | 
207 | ######
208 | ## Create a symlink if non existant or dead
209 | ######
210 | function lncond {
211 | 	local source=$1
212 | 	local target=$2
213 | 
214 | 	# Check if link does not exists or is a dead symlink
215 | 	if [ ! -e ${target} ]
216 | 	then
217 | 		# remove if a dead symlink
218 | 		if [ -L ${target} ]
219 | 		then
220 | 			log 1 "Removing dead symlink ${target}." [$BASH_SOURCE:$FUNCNAME:$LINENO]
221 | 			`rm ${target}`
222 | 		fi
223 | 
224 | 		# create sym link if source file exists
225 | 		if [ -e ${source} ]
226 | 		then
227 | 			log 1 "Linking ${source} to ${target}." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
228 | 			ln -s ${source} ${target}
229 | 		else
230 | 			log 3 "${source} does not exists." "[$BASH_SOURCE:$FUNCNAME:$LINENO]"
231 | 		fi
232 | 	else
233 | 		log 1 "Target file ${target} already exists, skipping." [$BASH_SOURCE:$FUNCNAME:$LINENO]
234 | 	fi
235 | }
236 | 
237 | ###
238 | # Takes a symbolic link and makes it "real" i.e. replaces the link with a copy of the
239 | # actual target file.
240 | ###
241 | lnrealize() {
242 | 	if [ -L ${1} ]
243 | 	then
244 | 		runcond "cp --remove-destination `readlink ${1}` ${1}"
245 | 	fi
246 | }
247 | 
248 | ###
249 | # Links all files in folder from that are missing in folder to into folder to
250 | ###
251 | function linkmissing () {
252 |     local from=$1
253 |     local to=$2
254 |     
255 |     # make pathes absolute if not yet
256 |     [ "${from:0:1}" = "/" ] || from="$(pwd)/${from}"
257 |     [ "${to:0:1}" = "/" ] || to="$(pwd)/${to}"
258 | 
259 |     # link missing files
260 |     local f
261 |     for f in "${from}"/*; do
262 |         [[ -f "${f}" ]] || continue
263 |         lncond "${from}/$(basename "$f")" "${to}/$(basename "$f")"
264 |     done
265 | }
266 | 
267 | #####
268 | ## Checks whether an element exists in an array
269 | ## Call like: isIn "element" "${array[@]}"
270 | #####
271 | isIn () {
272 |   local e
273 |   for e in "${@:2}"; do [[ "$e" == "$1" ]] && return 0; done
274 |   return 1
275 | }
276 | 
277 | ###
278 | # Join the elements of an array using a one-character delimiter
279 | # Call like: joinarr $delimiter ${arr[@]}
280 | ###
281 | function joinarr () {
282 | 	local IFS="${1}"
283 | 	shift
284 | 	echo "$*"
285 | }
286 | 
287 | #####
288 | # Returns a new version of an array with the passed element removed from it.
289 | # Call like: newarray=( $(delEl element array[@]) )
290 | # If the element could not be found, the original array is returned
291 | # Exit codes (available from $?): 0 on success, 1 if the element could not be found
292 | #####
293 | function delEl {
294 | 	local -a arr=("${!2}") # Note: decalre has scope limited to function
295 | 	local pos=$(isAt $1 arr[@])
296 | 	if [[ $pos -lt 0 ]]; then echo "${arr[@]}" && return 1; fi
297 | 	local newarr=(${arr[@]:0:$pos} ${arr[@]:$(($pos + 1))})
298 | 	echo "${newarr[@]}"
299 | 	return 0
300 | }
301 | 
302 | #####
303 | # Returns the position of the first occurence of an element in an array.
304 | # Call like: pos=$(isAt element array[@])
305 | # If the element could not be found, the return value (not! exit code) will be a negative integer
306 | # Exit codes (available from $?): 0 on success, 1 if the element could not be found
307 | #####
308 | isAt () {
309 | 	local -a arr=("${!2}")
310 | 	local e
311 | 	for e in "${!arr[@]}"; do [[ "${arr[$e]}" == "$1" ]] && echo ${e} && return 0; done
312 | 	echo -1
313 | 	return 1
314 | }
315 | 
316 | ###
317 | # Returns the voxel spacing of supplied image as space separated string
318 | # To catch as array, use var=( $(voxelspacing "imagelocation") )
319 | ###
320 | function voxelspacing () {
321 | 	local image=$1
322 | 	local vss=`medpy_info.py "${image}" | grep "spacing"`
323 | 	local vse=${vss:15:-1}
324 | 	local vs=(${vse//, / })
325 | 	echo "${vs[@]}"
326 | }
327 | ###
328 | # Return a sorted version of an array
329 | # Call like sarr=( $(sorted array[@]) )
330 | ###
331 | function sorted () {
332 |     local -a arr=("${!1}")
333 |     local -a sorted
334 |     readarray -t sorted < <(for a in "${arr[@]}"; do echo "$a"; done | sort)
335 |     echo "${sorted[@]}"
336 | }
337 | #####
338 | # Splits an array into as equal chunks as possible and returns these.
339 | # The last chunks will always contain the least elements.
340 | # Call like:
341 | #   splitarray returnvarname nchunks array[@]
342 | # And then iterate over chunks and unpack them like:
343 | #   for packedchunk in "${returnvarname[@]}"; do
344 | #       unpackedchunk=( ${packedchunk[@]} )
345 | #   done
346 | # Note: Note that you'll have to pass the name of the desired return variable.
347 | #       Take care not to unpack into the same variable (e.g. packedchunk=( ${packedchunk[@]} )), as this will cause unexpected behaviour.
348 | #####
349 | function splitarray () {
350 |     # catch parameters
351 |     local retvar=$1
352 |     local -i nchunks=$2
353 |     local -a arr=("${!3}")
354 |     
355 |     # compute step size and round up (always round up)
356 |     local -i len=${#arr[@]}
357 |     local -i step=$(($len / $nchunks)) 
358 |     if [ "$(($len % $nchunks))" -ne "0" ]; then
359 |         step=$(($step+1))
360 |     fi
361 |     
362 |     # split array    
363 |     local collection
364 |     local chunk
365 |     local -i i
366 |     for (( i=0; i<${len}; i+=${step} )); do
367 |         #chunk=( "${arr[@]:${i}:${step}}" ) # as array
368 |         chunk="${arr[@]:${i}:${step}}" # as string
369 |         collection=( "${collection[@]}" "${chunk}" ) # array of fake arrays (strings containing space-separated elements)
370 |     done
371 |     
372 |     # return chunks by referenced variable assignment
373 |     eval "$retvar=(\"\${collection[@]}\")"
374 | }
375 | 


--------------------------------------------------------------------------------