├── .gitignore ├── README.md ├── data ├── evaluate_metapro_galigner.py ├── evaluate_metapro_mzmine2.java ├── evaluate_metapro_openms.py ├── evaluate_metapro_xcms.Rmd ├── evaluate_mzmine2_galigner.py ├── evaluate_openms_galigner.py ├── evaluate_xcms_galigner.py ├── metapro_result_comparison.py ├── openms_result_converter.py └── software_result_comparison.py ├── requirements.txt ├── src ├── coarse_registration.py ├── fine_alignment.py ├── main_galigner.py ├── map_solver │ ├── base_solver.py │ ├── greedy_solver.py │ ├── gurobi_solver.py │ ├── local_bipartite_solver.py │ ├── local_nearest_solver.py │ ├── ortools_solver.py │ └── vlsns_solver.py ├── params.py ├── raw_file_reader.py ├── result_file_reader.py └── tools │ ├── graph_viewer.py │ ├── param_loader.py │ └── trace_recorder.py └── third_party ├── obiwarp ├── History ├── LICENSE ├── README.html ├── README.md ├── Rakefile ├── VERSION └── lib │ ├── Rakefile │ ├── cmdparser.cpp │ ├── cmdparser.h │ ├── doxygen.conf │ ├── dynprog.cpp │ ├── dynprog.h │ ├── lmat.cpp │ ├── lmat.h │ ├── lmat2chrms.cpp │ ├── lmat2lmata.cpp │ ├── lmat2png.cpp │ ├── lmata2lmat.cpp │ ├── mat.cpp │ ├── mat.h │ ├── mat2mata.cpp │ ├── mat_TEMPLATE.cpp │ ├── mat_TEMPLATE.h │ ├── mata2mat.cpp │ ├── not_using │ ├── README.txt │ ├── outliers.cpp │ └── test_outliers.h │ ├── obiwarp.cpp │ ├── obiwarp.dsp │ ├── pngio.cpp │ ├── pngio.h │ ├── test_cmdparser.rb │ ├── test_dynprog.h │ ├── test_lmat.h │ ├── test_lmat_converters.h │ ├── test_mat.h │ ├── test_mat_TEMPLATE.h │ ├── test_mat_converters.rb │ ├── test_obiwarp.h │ ├── test_obiwarp.rb │ ├── test_pngio.h │ ├── test_vec.h │ ├── test_vec_TEMPLATE.h │ ├── tfiles │ ├── file1.mat │ ├── file1.mata │ ├── file3.mat │ ├── file3.mata │ ├── file4.mat │ ├── file4.mata │ ├── tmp1.lmat │ ├── tmp1.lmat.pts │ ├── tmp1.lmata │ ├── tmp1.mat │ ├── tmp1.mata │ ├── tmp1B.lmat │ ├── tmp1B.lmat.warped_default │ ├── tmp1B.lmata │ ├── tmp1_no_header.mata │ ├── tmp1_no_header_messy.mata │ ├── tmp2.lmat │ ├── tmp2.lmata │ └── tmptimes.txt │ ├── variations │ ├── README.txt │ ├── get_ssr_asr_aad.cpp │ ├── obiwarp_anchor_opt.cpp │ ├── obiwarp_chams.cpp │ ├── obiwarp_doing_xy_plots_nicely.cpp │ ├── obiwarp_factor_opt.cpp │ ├── obiwarp_gp_opt.cpp │ ├── obiwarp_probs.cpp │ ├── obiwarp_score.cpp │ ├── obiwarp_speed.cpp │ └── smat_dist.cpp │ ├── vec.cpp │ ├── vec.h │ ├── vec_TEMPLATE.cpp │ └── vec_TEMPLATE.h ├── py_obiwarp.cc └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea/ 2 | /third_party/.eggs/ 3 | /third_party/build/ 4 | /third_party/dist/ 5 | /third_party/py_obiwarp.egg-info/ 6 | /third_party/var/ 7 | 8 | /data/TripleTOF_6600/ 9 | /data/TripleTOF_6600_results_mzmine2/ 10 | /data/TripleTOF_6600_results_xcms/ 11 | /data/TripleTOF_6600_results_openms/ 12 | /data/TripleTOF_6600_results_metapro/ 13 | 14 | /data/QE_HF/ 15 | /data/QE_HF_results_mzmine2/ 16 | /data/QE_HF_results_xcms/ 17 | /data/QE_HF_results_openms/ 18 | /data/QE_HF_results_metapro/ 19 | 20 | /experiments/ 21 | /data/MTBLS562/ 22 | /data/MTBLS562_results_openms/ 23 | /data/MTBLS562_results_mzmine2/ 24 | /data/MTBLS562_results_metapro/ 25 | /data/MTBLS562_results_xcms/ 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # G-Aligner: a graph-based feature alignment method for untargeted LC-MS-based metabolomics 2 | 3 | 4 | ## Highlights 5 | - **Novelty:** G-Aligner enables comprehensive analysis of all potential correspondences among features all runs for the first time. G-Aligner treats features and potential correspondences as nodes and edges of a multipartite graph, converts the feature matching problem as a multidimensional assignment problem (MAP), and proposes three combinatorial optimization methods to solve the MAP. 6 | - **Accuracy:** G-Aligner achieved the best performance in comparison with popular feature alignment methods in MZmine2, OpenMS and XCMS on two public metabolomics benchmark datasets. 7 | - **Reliability:** G-Aligner achieved the best performance on manually annotated feature lists and untargeted extracted features of MZmine2, OpenMS and XCMS, and helped all compared software obtaining more accurate result by integrating G-Aligner into their workflow. 8 | - **Open source:** We open-sourced G-Aligner under a permissive license to promote the accuracy of MS data analysis more broadly. 9 | - **Dataset:** We manually annotated a feature dataset for three public benchmark datasets, which contains m/z, RT, area information of library analytes and can be used in evaluations of feature detection, quantification and alignment accuracy. 10 | 11 | ## Datasets 12 | Raw MS files of the metabolomics datasets can be downloaded at [Google Drive](https://drive.google.com/drive/folders/1PRDIvihGFgkmErp2fWe41UR2Qs2VY_5G). 13 | 14 | The mzML files of the metabolomics datasets can be downloaded at [Zenodo](https://doi.org/10.5281/zenodo.8313034). 15 | 16 | Targeted annotation results, evaluation results and evaluation methods can be downloaded at [Zenodo](https://doi.org/10.5281/zenodo.8313034). 17 | 18 | 19 | ## Setup 20 | 1. Prepare the python environment based on your system and hardware. 21 | 22 | 2. Install the dependencies. Here we use ROOT_PATH to represent the root path of G-Aligner. 23 | 24 | ```cd ROOT_PATH\third_party\``` 25 | 26 | ```python setup.py install``` 27 | 28 | ```cd ROOT_PATH``` 29 | 30 | ```pip install -r requirements.txt``` 31 | 32 | 33 | 34 | ## Run G-Aligner 35 | 36 | ### Supported formats 37 | Feature extraction rsults in csv format, containing m/z, RT and area columns. 38 | 39 | ### Demos 40 | Our demos can help you reproduce the evaluation results. 41 | 42 | Place the data download from the Zenodo repository as follows. 43 | ``` 44 | G-Aligner-master 45 | ├── data 46 | │ ├── MTBLS562 47 | │ ├── MTBLS562_results_metapro 48 | │ ├── MTBLS562_results_mzmine2 49 | │ ├── MTBLS562_results_openms 50 | │ ├── MTBLS562_results_xcms 51 | │ ├── QE_HF 52 | │ ├── QE_HF_results_metapro 53 | │ ├── QE_HF_results_mzmine2 54 | │ ├── QE_HF_results_openms 55 | │ ├── QE_HF_results_xcms 56 | │ ├── TripleTOF_6600 57 | │ ├── TripleTOF_6600_results_metapro 58 | │ ├── TripleTOF_6600_results_mzmine2 59 | │ ├── TripleTOF_6600_results_openms 60 | │ ├── TripleTOF_6600_results_xcms 61 | │ ├── evaluate_metapro_galigner.py 62 | │ ├── evaluate_metapro_mzmine2.java 63 | │ ├── evaluate_metapro_openms.py 64 | │ ├── evaluate_metapro_xcms.Rmd 65 | │ ├── evaluate_mzmine2_galigner.py 66 | │ ├── evaluate_openms_galigner.py 67 | │ ├── evaluate_xcms_galigner.py 68 | │ ├── metapro_result_comparison.py 69 | │ ├── software_result_comparison.py 70 | ``` 71 | 72 | - To run the benchmark scripts: 73 | 74 | ```cd ROOT_PATH``` 75 | 76 | ```python data/metapro_result_comparison.py``` 77 | 78 | ```python data/software_result_comparison.py``` 79 | 80 | - To analyze with G-Aligner: 81 | 82 | ```cd ROOT_PATH``` 83 | 84 | Change the parameters in data/evaluate_metapro_galigner.py 85 | 86 | ```python data/evaluate_metapro_galigner.py``` 87 | 88 | Feature alignment results are saved in ```experiment``` folder. 89 | 90 | ## Citation 91 | 92 | Cite our paper at: 93 | ``` 94 | @article{wang2023, 95 | title={G-Aligner: a graph-based feature alignment method for untargeted LC--MS-based metabolomics}, 96 | author={Wang, Ruimin and Lu, Miaoshan and An, Shaowei and Wang, Jinyin and Yu, Changbin}, 97 | journal={BMC bioinformatics}, 98 | volume={24}, 99 | number={1}, 100 | pages={431}, 101 | year={2023}, 102 | publisher={Springer}, 103 | doi={10.1186/s12859-023-05525-4} 104 | } 105 | ``` 106 | 107 | ## License 108 | 109 | G-Aligner is an open-source tool, using [***Mulan Permissive Software License,Version 2 (Mulan PSL v2)***](http://license.coscl.org.cn/MulanPSL2) 110 | 111 | -------------------------------------------------------------------------------- /data/evaluate_metapro_galigner.py: -------------------------------------------------------------------------------- 1 | import os 2 | from src.params import ResultFileReadingParams, RawFileReadingParams, CoarseRegistrationParams, FineAssignmentParams 3 | from src.main_galigner import GAligner 4 | 5 | 6 | def eval_wiff(folder_name, solver, vlsns_init_mode): 7 | result_file_path = os.path.join(os.getcwd(), folder_name, 'metapro') 8 | 9 | result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=0, 10 | rt_col_num=2, mz_col_num=1, area_col_num=3) 11 | raw_file_reading_params = RawFileReadingParams() 12 | coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0, 13 | rt_tolerance=0.5, rt_residual_threshold=0.05, degree=1) 14 | fine_assignment_params = FineAssignmentParams(rt_tolerance=0.3, mz_tolerance=0.01, mz_factor=1, rt_factor=1, 15 | area_factor=1, use_ppm=False, solver=solver) 16 | if vlsns_init_mode is not None: 17 | fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode 18 | 19 | g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params, 20 | fine_assignment_params) 21 | g_aligner.do_align() 22 | 23 | 24 | def eval_raw(folder_name, solver, vlsns_init_mode): 25 | result_file_path = os.path.join(os.getcwd(), folder_name, 'metapro') 26 | 27 | result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=0, 28 | rt_col_num=2, mz_col_num=1, area_col_num=3) 29 | raw_file_reading_params = RawFileReadingParams() 30 | coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.005, use_ppm=False, centric_idx=0, 31 | rt_tolerance=0.3, rt_residual_threshold=0.02, degree=1) 32 | fine_assignment_params = FineAssignmentParams(rt_tolerance=0.15, mz_tolerance=0.003, mz_factor=1, rt_factor=1, 33 | area_factor=1, use_ppm=False, solver=solver) 34 | if vlsns_init_mode is not None: 35 | fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode 36 | 37 | g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params, 38 | fine_assignment_params) 39 | g_aligner.do_align() 40 | 41 | def eval_mtbls(folder_name, solver, vlsns_init_mode): 42 | result_file_path = os.path.join(os.getcwd(), folder_name, 'metapro') 43 | 44 | result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=0, 45 | rt_col_num=2, mz_col_num=1, area_col_num=3) 46 | raw_file_reading_params = RawFileReadingParams() 47 | coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0, 48 | rt_tolerance=0.3, rt_residual_threshold=0.05, degree=1) 49 | fine_assignment_params = FineAssignmentParams(rt_tolerance=0.1, mz_tolerance=0.01, mz_factor=1, rt_factor=1, 50 | area_factor=1, use_ppm=False, solver=solver) 51 | if vlsns_init_mode is not None: 52 | fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode 53 | 54 | g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params, 55 | fine_assignment_params) 56 | g_aligner.do_align() 57 | 58 | 59 | # # TripleTOF_6600 60 | # eval_wiff('TripleTOF_6600', solver='local_bipartite', vlsns_init_mode=None) 61 | # eval_wiff('TripleTOF_6600', solver='greedy', vlsns_init_mode=None) 62 | # eval_wiff('TripleTOF_6600', solver='gurobi', vlsns_init_mode=None) 63 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msr') 64 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msg') 65 | # 66 | # 67 | # # QE_HF 68 | # eval_raw('QE_HF', solver='local_bipartite', vlsns_init_mode=None) 69 | # eval_raw('QE_HF', solver='greedy', vlsns_init_mode=None) 70 | # eval_raw('QE_HF', solver='gurobi', vlsns_init_mode=None) 71 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msr') 72 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msg') 73 | # 74 | # # MTBLS562 75 | eval_mtbls('MTBLS562', solver='local_bipartite', vlsns_init_mode=None) 76 | eval_mtbls('MTBLS562', solver='greedy', vlsns_init_mode=None) 77 | eval_mtbls('MTBLS562', solver='gurobi', vlsns_init_mode=None) 78 | eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msg') 79 | eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msr') 80 | -------------------------------------------------------------------------------- /data/evaluate_metapro_openms.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | import numpy as np 5 | from pyopenms import * 6 | from src.params import ResultFileReadingParams 7 | from src.result_file_reader import ResultFileReader 8 | 9 | 10 | def eval(folder_name, align_mz_tolerance, align_rt_tolerance, match_mz_tolerance, match_rt_tolerance): 11 | tmp_path = os.getcwd() 12 | result_file_path = os.path.join(tmp_path, folder_name, "metapro") 13 | 14 | result_file_reader = ResultFileReader( 15 | ResultFileReadingParams(result_file_path, skip_line=0, rt_col_num=2, mz_col_num=1, area_col_num=3)) 16 | result_file_paths, result_file_count = result_file_reader.load_result_paths() 17 | 18 | feature_maps = [] 19 | for path in result_file_paths: 20 | results = result_file_reader.load_result(path) 21 | feature_map = FeatureMap() 22 | for row in results: 23 | feature = Feature() 24 | feature.setMZ(row[0]) 25 | feature.setRT(row[1]) 26 | feature.setIntensity(row[2]) 27 | feature_map.push_back(feature) 28 | feature_maps.append(feature_map) 29 | 30 | # set ref_index to feature map index with the largest number of features 31 | ref_index = [ 32 | i[0] 33 | for i in sorted( 34 | enumerate([fm.size() for fm in feature_maps]), key=lambda x: x[1] 35 | ) 36 | ][-1] 37 | 38 | aligner = MapAlignmentAlgorithmPoseClustering() 39 | aligner_params = MapAlignmentAlgorithmPoseClustering().getDefaults() 40 | 41 | aligner_params[b'superimposer:max_shift'] = align_rt_tolerance 42 | aligner_params[b'superimposer:shift_bucket_size'] = 0.005 43 | aligner_params[b'superimposer:mz_pair_max_distance'] = align_mz_tolerance 44 | aligner_params[b'superimposer:rt_pair_distance_fraction'] = 0.005 45 | aligner_params[b'pairfinder:ignore_charge'] = 'true' 46 | aligner_params[b'pairfinder:distance_RT:max_difference'] = align_rt_tolerance 47 | aligner_params[b'pairfinder:distance_MZ:max_difference'] = align_mz_tolerance 48 | aligner.setReference(feature_maps[ref_index]) 49 | aligner.setParameters(aligner_params) 50 | 51 | # perform alignment and transformation of feature maps to the reference map (exclude reference map) 52 | for feature_map in feature_maps[:ref_index] + feature_maps[ref_index + 1:]: 53 | trafo = TransformationDescription() 54 | aligner.align(feature_map, trafo) 55 | transformer = MapAlignmentTransformer() 56 | transformer.transformRetentionTimes(feature_map, trafo, True) # store original RT as meta value 57 | 58 | feature_grouper = FeatureGroupingAlgorithmQT() 59 | feature_grouper_params = feature_grouper.getDefaults() 60 | feature_grouper_params[b'ignore_charge'] = 'true' 61 | feature_grouper_params[b'distance_RT:max_difference'] = match_rt_tolerance 62 | feature_grouper_params[b'distance_MZ:max_difference'] = match_mz_tolerance 63 | feature_grouper_params[b'distance_MZ:exponent'] = 1.0 64 | feature_grouper_params[b'distance_intensity:weight'] = 1.0 65 | feature_grouper.setParameters(feature_grouper_params) 66 | consensus_map = ConsensusMap() 67 | file_descriptions = consensus_map.getColumnHeaders() 68 | 69 | # collect information about input maps 70 | for i, feature_map in enumerate(feature_maps): 71 | file_description = file_descriptions.get(i, ColumnHeader()) 72 | file_description.filename = str(i) 73 | file_description.size = feature_map.size() 74 | file_description.unique_id = i 75 | file_descriptions[i] = file_description 76 | 77 | consensus_map.setColumnHeaders(file_descriptions) 78 | feature_grouper.group(feature_maps, consensus_map) 79 | 80 | first_line = ['mz', 'rt', 'area', '#'] 81 | for i in range(len(result_file_paths)): 82 | file_name = os.path.basename(result_file_paths[i]).split('.')[0] 83 | first_line += [file_name + '_mz', file_name + '_rt', file_name + '_area'] 84 | 85 | result_data = np.zeros((consensus_map.size(), 4 + 3 * result_file_count)) 86 | for i in range(consensus_map.size()): 87 | consensus_feature = consensus_map[i] 88 | result_data[i, 0] = consensus_feature.getMZ() 89 | result_data[i, 1] = consensus_feature.getRT() 90 | result_data[i, 2] = consensus_feature.getIntensity() 91 | feature_list = consensus_feature.getFeatureList() 92 | for feature_handle in feature_list: 93 | idx = feature_handle.getMapIndex() 94 | mz = feature_handle.getMZ() 95 | rt = feature_handle.getRT() 96 | intensity = feature_handle.getIntensity() 97 | result_data[i, 4 + 3 * idx] = mz 98 | result_data[i, 5 + 3 * idx] = rt 99 | result_data[i, 6 + 3 * idx] = intensity 100 | # ConsensusXMLFile().store('D:\workspace\GAligner\metapro\preview.consensusXML', consensus_map) 101 | 102 | file = open('D:\workspace\GAligner\data\\' + folder_name + '_results_metapro\\' + folder_name + '_aligned_openms.csv', 'w') 103 | writer = csv.writer(file, dialect='unix', quoting=csv.QUOTE_NONE, quotechar='') 104 | writer.writerow(first_line) 105 | writer.writerows(result_data) 106 | file.close() 107 | 108 | 109 | # eval('TripleTOF_6600', align_mz_tolerance=0.01, align_rt_tolerance=0.5, match_mz_tolerance=0.01, match_rt_tolerance=0.5) 110 | # eval('QE_HF', align_mz_tolerance=0.005, align_rt_tolerance=0.3, match_mz_tolerance=0.005, match_rt_tolerance=0.3) 111 | eval('MTBLS562', align_mz_tolerance=0.015, align_rt_tolerance=0.3, match_mz_tolerance=0.015, match_rt_tolerance=0.3) 112 | -------------------------------------------------------------------------------- /data/evaluate_metapro_xcms.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Notebook" 3 | output: html_notebook 4 | --- 5 | 6 | This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 7 | 8 | Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. 9 | 10 | 11 | Load XCMS library 12 | ```{r} 13 | library(xcms) 14 | library(SummarizedExperiment) 15 | library(BiocParallel) 16 | register(SerialParam()) 17 | ``` 18 | 19 | 20 | Define functions 21 | ```{r} 22 | load_mzml <- function(mzml_path) { 23 | mzmls <- list.files(path = mzml_path, pattern = ".mzML",recursive = TRUE,full.names = TRUE) 24 | pd <- data.frame(sample_name = sub(basename(mzmls), pattern = ".mzML", replacement = "", fixed = TRUE), stringsAsFactors = FALSE) 25 | xdata <- readMSData(files = mzmls, pdata = new("NAnnotatedDataFrame",pd),mode = "onDisk") 26 | return(xdata) 27 | } 28 | 29 | 30 | 31 | load_peak_ranges <- function(result_path) { 32 | result_data <- read.table(result_path, header=FALSE, sep=",") 33 | range_data <- result_data[, c(4:7)] 34 | range_data[, 3] <- range_data[, 3] * 60 35 | range_data[, 4] <- range_data[, 4] * 60 36 | 37 | anno_ranges <- c() 38 | for (i in 1:nrow(range_data)) { 39 | anno_ranges <- c(anno_ranges, c(t(range_data[i,]))) 40 | } 41 | colnames = c("mzmin", "mzmax", "rtmin", "rtmax") 42 | peak_ranges <- matrix(anno_ranges, nrow = nrow(result_data), byrow = TRUE, dimnames = list(NULL, colnames)) 43 | return(peak_ranges) 44 | } 45 | 46 | 47 | load_peak_data <- function(result_path) { 48 | result_data <- read.table(result_path, header=FALSE, sep=",") 49 | peak_data <- result_data[, c(1:3)] 50 | peak_data[, 2] <- peak_data[, 2] * 60 51 | return(peak_data) 52 | } 53 | 54 | 55 | extract_peaks <- function(xdata, result_path, samples) { 56 | start_idx <- 0 57 | for (i in 1:length(samples)) { 58 | result_full_path <- paste0(result_path, samples[i], '.csv') 59 | peak_ranges <- load_peak_ranges(result_full_path) 60 | peak_data <- load_peak_data(result_full_path) 61 | xdata <- manualChromPeaks(xdata, peak_ranges, i) 62 | chromPeaks(xdata)[(start_idx + 1):(start_idx + nrow(peak_ranges)), c('mz', 'rt', 'into')] <- unlist(peak_data) 63 | start_idx <- start_idx + nrow(peak_ranges) 64 | } 65 | return(xdata) 66 | } 67 | 68 | 69 | group_align <- function(xdata, bw, binsize) { 70 | pdp <- PeakDensityParam(minFraction = 0.9, bw = 30, binSize = binsize, sampleGroups = c(1:length(fileNames(xdata)))) 71 | xdata <- groupChromPeaks(xdata, param = pdp) 72 | pgp <- PeakGroupsParam(minFraction = 0.9) 73 | xdata <- adjustRtime(xdata, param = pgp) 74 | 75 | pdp <- PeakDensityParam(minFraction = 0.5, bw = bw, binSize = binsize, sampleGroups = c(1:length(fileNames(xdata)))) 76 | grouped <- groupChromPeaks(xdata, param = pdp) 77 | return(grouped) 78 | } 79 | 80 | 81 | obiwarp_align <- function(xdata, profStep, bw, binsize) { 82 | 83 | obi <- ObiwarpParam(binSize = profStep) 84 | xdata <- adjustRtime(xdata,param = obi) 85 | 86 | pdp <- PeakDensityParam(minFraction = 0.5, bw = bw, binSize = binsize, sampleGroups = c(1:length(fileNames(xdata)))) 87 | grouped <- groupChromPeaks(xdata, param = pdp) 88 | } 89 | 90 | 91 | write_results <- function(grouped, csv_path) { 92 | mzs <- assay(quantify(grouped, value="mz")) 93 | rts <- assay(quantify(grouped, value="rt")) / 60 94 | areas <- assay(quantify(grouped, value="into")) 95 | 96 | result <- data.frame(mz = rowMedians(mzs, na.rm = TRUE), rt = rowMedians(rts, na.rm = TRUE), area = rowMedians(areas, na.rm = TRUE), need_assign = 0) 97 | for (name in colnames(mzs)) { 98 | result[paste0(name, '_mz')] <- mzs[, name] 99 | result[paste0(name, '_rt')] <- rts[, name] 100 | result[paste0(name, '_area')] <- areas[, name] 101 | } 102 | result[is.na(result)] <- 0 103 | write.csv(result, csv_path, row.names = FALSE) 104 | } 105 | 106 | ``` 107 | 108 | 109 | 110 | 111 | Result paths 112 | ```{r} 113 | 114 | wiff_result_path <- 'D:/workspace/GAligner/data/TripleTOF_6600/metapro/' 115 | raw_result_path <- 'D:/workspace/GAligner/data/QE_HF/metapro/' 116 | mtbls_result_path <- 'D:/workspace/GAligner/data/MTBLS562/metapro/' 117 | 118 | wiff_samples <- c('SampleA_1', 'SampleA_2', 'SampleA_3', 'SampleA_4', 'SampleB_1', 'SampleB_2', 'SampleB_3', 'SampleB_4') 119 | raw_samples <- c('SA1', 'SA2', 'SA3', 'SA4', 'SA5', 'SB1', 'SB2', 'SB3', 'SB4', 'SB5') 120 | mtbls_samples <- c('12W-1', '12W-2', '12W-3', '12W-4', '12W-5', '12W-6', '12W-7', '12W-8', 121 | '24W-1', '24W-2', '24W-3', '24W-4', '24W-5', '24W-6', '24W-7', '24W-8', 122 | '32W-1', '32W-2', '32W-3', '32W-4', '32W-5', '32W-6', '32W-7', '32W-8', 123 | '4W-1', '4W-2', '4W-3', '4W-4', '4W-5', '4W-6', '4W-7', '4W-8', 124 | '52W-1', '52W-2', '52W-3', '52W-4', '52W-5', '52W-6', '52W-7', '52W-8') 125 | ``` 126 | 127 | 128 | 129 | 130 | ```{r} 131 | # TripleTOF_6600 group 132 | wiff_xdata <- load_mzml(mzml_path = "D:/workspace/GAligner/data/TripleTOF_6600/mzml") 133 | wiff_xdata <- extract_peaks(wiff_xdata, wiff_result_path, wiff_samples) 134 | wiff_grouped <- group_align(wiff_xdata, bw=10, binsize=0.02) 135 | write_results(wiff_grouped, "D:/workspace/GAligner/data/TripleTOF_6600_results_metapro/TripleTOF_6600_group_aligned_xcms.csv") 136 | ``` 137 | 138 | 139 | ```{r} 140 | # TripleTOF_6600 obiwarp 141 | wiff_xdata <- load_mzml(mzml_path = "D:/workspace/GAligner/data/TripleTOF_6600/mzml") 142 | wiff_xdata <- extract_peaks(wiff_xdata, wiff_result_path, wiff_samples) 143 | wiff_grouped <- obiwarp_align(wiff_xdata, profStep = 1, bw = 10, binsize = 0.02) 144 | write_results(wiff_grouped, "D:/workspace/GAligner/data/TripleTOF_6600_results_metapro/TripleTOF_6600_obiwarp_aligned_xcms.csv") 145 | ``` 146 | 147 | 148 | ```{r} 149 | # QE_HF group 150 | raw_xdata <- load_mzml(mzml_path = "D:/workspace/GAligner/data/QE_HF/mzml") 151 | raw_xdata <- extract_peaks(raw_xdata, raw_result_path, raw_samples) 152 | raw_grouped <- group_align(raw_xdata, bw = 6, binsize = 0.01) 153 | write_results(raw_grouped, "D:/workspace/GAligner/data/QE_HF_results_metapro/QE_HF_group_aligned_xcms.csv") 154 | ``` 155 | 156 | 157 | ```{r} 158 | # QE_HF obiwarp 159 | raw_xdata <- load_mzml(mzml_path = "D:/workspace/GAligner/data/QE_HF/mzml") 160 | raw_xdata <- extract_peaks(raw_xdata, raw_result_path, raw_samples) 161 | raw_grouped <- obiwarp_align(raw_xdata, profStep = 1, bw = 6, binsize = 0.01) 162 | write_results(raw_grouped, "D:/workspace/GAligner/data/QE_HF_results_metapro/QE_HF_obiwarp_aligned_xcms.csv") 163 | 164 | ``` 165 | 166 | 167 | ```{r} 168 | # MTBLS group 169 | mtbls_xdata <- load_mzml(mzml_path = "D:/data/MTBLS562/mzml") 170 | mtbls_xdata <- extract_peaks(mtbls_xdata, mtbls_result_path, mtbls_samples) 171 | align_time <- Sys.time() 172 | mtbls_grouped <- group_align(mtbls_xdata, bw = 10, binsize = 0.02) 173 | write_results(mtbls_grouped, "D:/workspace/GAligner/data/MTBLS562_results_metapro/MTBLS562_group_aligned_xcms.csv") 174 | print("Align time taken: ") 175 | print(Sys.time() - align_time) 176 | ``` 177 | 178 | 179 | ```{r} 180 | # MTBLS obiwarp 181 | mtbls_xdata <- load_mzml(mzml_path = "D:/data/MTBLS562/mzml") 182 | mtbls_xdata <- extract_peaks(mtbls_xdata, mtbls_result_path, mtbls_samples) 183 | align_time <- Sys.time() 184 | mtbls_grouped <- obiwarp_align(mtbls_xdata, profStep = 1, bw = 10, binsize = 0.02) 185 | write_results(mtbls_grouped, "D:/workspace/GAligner/data/MTBLS562_results_metapro/MTBLS562_obiwarp_aligned_xcms.csv") 186 | print("Align time taken: ") 187 | print(Sys.time() - align_time) 188 | 189 | ``` 190 | 191 | 192 | -------------------------------------------------------------------------------- /data/evaluate_mzmine2_galigner.py: -------------------------------------------------------------------------------- 1 | import os 2 | from src.params import ResultFileReadingParams, RawFileReadingParams, CoarseRegistrationParams, FineAssignmentParams 3 | from src.main_galigner import GAligner 4 | 5 | 6 | def eval_wiff(folder_name, solver, vlsns_init_mode): 7 | result_file_path = os.path.join(os.getcwd(), folder_name, 'mzmine2') 8 | 9 | result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=1, 10 | rt_col_num=2, mz_col_num=1, area_col_num=3) 11 | raw_file_reading_params = RawFileReadingParams() 12 | coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0, 13 | rt_tolerance=0.5, rt_residual_threshold=0.05, degree=1) 14 | fine_assignment_params = FineAssignmentParams(rt_tolerance=0.3, mz_tolerance=0.01, mz_factor=1, rt_factor=1, 15 | area_factor=1, use_ppm=False, solver=solver) 16 | if vlsns_init_mode is not None: 17 | fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode 18 | 19 | g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params, 20 | fine_assignment_params) 21 | g_aligner.do_align() 22 | 23 | 24 | def eval_raw(folder_name, solver, vlsns_init_mode): 25 | result_file_path = os.path.join(os.getcwd(), folder_name, 'mzmine2') 26 | 27 | result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=1, 28 | rt_col_num=2, mz_col_num=1, area_col_num=3) 29 | raw_file_reading_params = RawFileReadingParams() 30 | coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.005, use_ppm=False, centric_idx=0, 31 | rt_tolerance=0.3, rt_residual_threshold=0.03, degree=1) 32 | fine_assignment_params = FineAssignmentParams(rt_tolerance=0.15, mz_tolerance=0.003, mz_factor=1, rt_factor=1, 33 | area_factor=1, use_ppm=False, solver=solver) 34 | if vlsns_init_mode is not None: 35 | fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode 36 | 37 | g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params, 38 | fine_assignment_params) 39 | g_aligner.do_align() 40 | 41 | 42 | def eval_mtbls(folder_name, solver, vlsns_init_mode): 43 | result_file_path = os.path.join(os.getcwd(), folder_name, 'mzmine2') 44 | 45 | result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=1, 46 | rt_col_num=2, mz_col_num=1, area_col_num=3) 47 | raw_file_reading_params = RawFileReadingParams() 48 | 49 | coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0, 50 | rt_tolerance=0.3, rt_residual_threshold=0.05, degree=1) 51 | fine_assignment_params = FineAssignmentParams(rt_tolerance=0.1, mz_tolerance=0.01, mz_factor=1, rt_factor=1, 52 | area_factor=1, use_ppm=False, solver=solver) 53 | if vlsns_init_mode is not None: 54 | fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode 55 | 56 | g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params, 57 | fine_assignment_params) 58 | g_aligner.do_align() 59 | 60 | 61 | # TripleTOF_6600 62 | # eval_wiff('TripleTOF_6600', solver='local_bipartite', vlsns_init_mode=None) 63 | # eval_wiff('TripleTOF_6600', solver='greedy', vlsns_init_mode=None) 64 | # eval_wiff('TripleTOF_6600', solver='gurobi', vlsns_init_mode=None) 65 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msr') 66 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msg') 67 | 68 | # QE_HF 69 | # eval_raw('QE_HF', solver='local_bipartite', vlsns_init_mode=None) 70 | # eval_raw('QE_HF', solver='greedy', vlsns_init_mode=None) 71 | # eval_raw('QE_HF', solver='gurobi', vlsns_init_mode=None) 72 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msr') 73 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msg') 74 | 75 | # MTBLS562 76 | # eval_mtbls('MTBLS562', solver='local_bipartite', vlsns_init_mode=None) 77 | # eval_mtbls('MTBLS562', solver='greedy', vlsns_init_mode=None) 78 | # eval_mtbls('MTBLS562', solver='gurobi', vlsns_init_mode=None) 79 | # eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msg') 80 | eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msr') 81 | 82 | -------------------------------------------------------------------------------- /data/evaluate_openms_galigner.py: -------------------------------------------------------------------------------- 1 | import os 2 | from src.params import ResultFileReadingParams, RawFileReadingParams, CoarseRegistrationParams, FineAssignmentParams 3 | from src.main_galigner import GAligner 4 | 5 | 6 | def eval_wiff(folder_name, solver, vlsns_init_mode): 7 | result_file_path = os.path.join(os.getcwd(), folder_name, 'openms') 8 | 9 | result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=0, 10 | rt_col_num=2, mz_col_num=1, area_col_num=3) 11 | raw_file_reading_params = RawFileReadingParams() 12 | coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0, 13 | rt_tolerance=0.5, rt_residual_threshold=0.04, degree=1) 14 | fine_assignment_params = FineAssignmentParams(rt_tolerance=0.3, mz_tolerance=0.01, mz_factor=1, rt_factor=1, 15 | area_factor=1, use_ppm=False, solver=solver) 16 | if vlsns_init_mode is not None: 17 | fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode 18 | 19 | g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params, 20 | fine_assignment_params) 21 | g_aligner.do_align() 22 | 23 | 24 | def eval_raw(folder_name, solver, vlsns_init_mode): 25 | result_file_path = os.path.join(os.getcwd(), folder_name, 'openms') 26 | 27 | result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=0, 28 | rt_col_num=2, mz_col_num=1, area_col_num=3) 29 | raw_file_reading_params = RawFileReadingParams() 30 | coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.005, use_ppm=False, centric_idx=0, 31 | rt_tolerance=0.3, rt_residual_threshold=0.03, degree=1) 32 | fine_assignment_params = FineAssignmentParams(rt_tolerance=0.15, mz_tolerance=0.003, mz_factor=1, rt_factor=1, 33 | area_factor=1, use_ppm=False, solver=solver) 34 | if vlsns_init_mode is not None: 35 | fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode 36 | 37 | g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params, 38 | fine_assignment_params) 39 | g_aligner.do_align() 40 | 41 | 42 | def eval_mtbls(folder_name, solver, vlsns_init_mode): 43 | result_file_path = os.path.join(os.getcwd(), folder_name, 'openms4') 44 | 45 | result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=0, 46 | rt_col_num=2, mz_col_num=1, area_col_num=3) 47 | raw_file_reading_params = RawFileReadingParams() 48 | coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0, 49 | rt_tolerance=0.3, rt_residual_threshold=0.05, degree=1) 50 | fine_assignment_params = FineAssignmentParams(rt_tolerance=0.1, mz_tolerance=0.01, mz_factor=1, rt_factor=1, 51 | area_factor=1, use_ppm=False, solver=solver) 52 | if vlsns_init_mode is not None: 53 | fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode 54 | 55 | g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params, 56 | fine_assignment_params) 57 | g_aligner.do_align() 58 | 59 | 60 | # # TripleTOF_6600 61 | # eval_wiff('TripleTOF_6600', solver='local_bipartite', vlsns_init_mode=None) 62 | # eval_wiff('TripleTOF_6600', solver='greedy', vlsns_init_mode=None) 63 | # eval_wiff('TripleTOF_6600', solver='gurobi', vlsns_init_mode=None) 64 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msr') 65 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msg') 66 | # 67 | # # QE_HF 68 | # eval_raw('QE_HF', solver='local_bipartite', vlsns_init_mode=None) 69 | # eval_raw('QE_HF', solver='greedy', vlsns_init_mode=None) 70 | # eval_raw('QE_HF', solver='gurobi', vlsns_init_mode=None) 71 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msr') 72 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msg') 73 | 74 | # MTBLS562 75 | # eval_mtbls('MTBLS562', solver='local_nearest', vlsns_init_mode=None) 76 | eval_mtbls('MTBLS562', solver='local_bipartite', vlsns_init_mode=None) 77 | # eval_mtbls('MTBLS562', solver='greedy', vlsns_init_mode=None) 78 | # eval_mtbls('MTBLS562', solver='gurobi', vlsns_init_mode=None) 79 | eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msg') 80 | eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msr') -------------------------------------------------------------------------------- /data/evaluate_xcms_galigner.py: -------------------------------------------------------------------------------- 1 | import os 2 | from src.params import ResultFileReadingParams, RawFileReadingParams, CoarseRegistrationParams, FineAssignmentParams 3 | from src.main_galigner import GAligner 4 | 5 | 6 | def eval_wiff(folder_name, solver, vlsns_init_mode): 7 | result_file_path = os.path.join(os.getcwd(), folder_name, 'xcms') 8 | 9 | result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=1, 10 | rt_col_num=5, mz_col_num=2, area_col_num=8) 11 | raw_file_reading_params = RawFileReadingParams() 12 | coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0, 13 | rt_tolerance=0.5, rt_residual_threshold=0.05, degree=1) 14 | fine_assignment_params = FineAssignmentParams(rt_tolerance=0.3, mz_tolerance=0.01, mz_factor=1, rt_factor=1, 15 | area_factor=1, use_ppm=False, solver=solver) 16 | if vlsns_init_mode is not None: 17 | fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode 18 | 19 | g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params, 20 | fine_assignment_params) 21 | g_aligner.do_align() 22 | 23 | 24 | def eval_raw(folder_name, solver, vlsns_init_mode): 25 | result_file_path = os.path.join(os.getcwd(), folder_name, 'xcms') 26 | 27 | result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=1, 28 | rt_col_num=5, mz_col_num=2, area_col_num=8) 29 | raw_file_reading_params = RawFileReadingParams() 30 | coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.005, use_ppm=False, centric_idx=0, 31 | rt_tolerance=0.3, rt_residual_threshold=0.03, degree=1) 32 | fine_assignment_params = FineAssignmentParams(rt_tolerance=0.15, mz_tolerance=0.003, mz_factor=1, rt_factor=1, 33 | area_factor=1, use_ppm=False, solver=solver) 34 | if vlsns_init_mode is not None: 35 | fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode 36 | 37 | g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params, 38 | fine_assignment_params) 39 | g_aligner.do_align() 40 | 41 | def eval_mtbls(folder_name, solver, vlsns_init_mode): 42 | result_file_path = os.path.join(os.getcwd(), folder_name, 'xcms') 43 | 44 | result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=1, 45 | rt_col_num=4, mz_col_num=1, area_col_num=7) 46 | raw_file_reading_params = RawFileReadingParams() 47 | coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0, 48 | rt_tolerance=0.3, rt_residual_threshold=0.05, degree=1) 49 | fine_assignment_params = FineAssignmentParams(rt_tolerance=0.1, mz_tolerance=0.01, mz_factor=1, rt_factor=1, 50 | area_factor=1, use_ppm=False, solver=solver) 51 | if vlsns_init_mode is not None: 52 | fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode 53 | 54 | g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params, 55 | fine_assignment_params) 56 | g_aligner.do_align() 57 | 58 | 59 | # # TripleTOF_6600 60 | # eval_wiff('TripleTOF_6600', solver='local_bipartite', vlsns_init_mode=None) 61 | # eval_wiff('TripleTOF_6600', solver='greedy', vlsns_init_mode=None) 62 | # eval_wiff('TripleTOF_6600', solver='gurobi', vlsns_init_mode=None) 63 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msr') 64 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msg') 65 | # 66 | # # QE_HF 67 | # eval_raw('QE_HF', solver='local_bipartite', vlsns_init_mode=None) 68 | # eval_raw('QE_HF', solver='greedy', vlsns_init_mode=None) 69 | # eval_raw('QE_HF', solver='gurobi', vlsns_init_mode=None) 70 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msr') 71 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msg') 72 | 73 | # MTBLS562 74 | eval_mtbls('MTBLS562', solver='local_bipartite', vlsns_init_mode=None) 75 | # eval_mtbls('MTBLS562', solver='greedy', vlsns_init_mode=None) 76 | # eval_mtbls('MTBLS562', solver='gurobi', vlsns_init_mode=None) 77 | eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msg') 78 | # eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msr') 79 | -------------------------------------------------------------------------------- /data/openms_result_converter.py: -------------------------------------------------------------------------------- 1 | import glob 2 | 3 | from pyopenms import ConsensusXMLFile, ConsensusMap, FeatureXMLFile, FeatureMap 4 | import numpy as np 5 | import csv 6 | 7 | 8 | def convert_consensusxml_to_csv(xml_path, csv_path, file_names): 9 | consensusMap = ConsensusMap() 10 | ConsensusXMLFile().load(xml_path, consensusMap) 11 | 12 | csv_file = open(csv_path, 'w') 13 | writer = csv.writer(csv_file, dialect='unix', quoting=csv.QUOTE_NONE, quotechar='') 14 | 15 | first_row = ['mz', 'rt', 'area', 'need_assign'] 16 | for file_name in file_names: 17 | first_row += [file_name + "_mz", file_name + "_rt", file_name + "_area"] 18 | writer.writerow(first_row) 19 | 20 | for i in range(consensusMap.size()): 21 | consensusFeature = consensusMap[i] 22 | avg_mz = consensusFeature.getMZ() 23 | avg_rt = consensusFeature.getRT() / 60 24 | avg_area = consensusFeature.getIntensity() 25 | features = np.zeros((len(file_names), 3)) 26 | for feature in consensusFeature.getFeatureList(): 27 | mapIndex = feature.getMapIndex() 28 | features[mapIndex][0] = feature.getMZ() 29 | features[mapIndex][1] = feature.getRT() / 60 30 | features[mapIndex][2] = feature.getIntensity() 31 | features = features.ravel().tolist() 32 | row = [avg_mz, avg_rt, avg_area, 0] + features 33 | writer.writerow(row) 34 | csv_file.close() 35 | print(xml_path + " convertion finished.") 36 | 37 | 38 | def convert_featurexml_to_csv(xml_path, csv_path): 39 | featureMap = FeatureMap() 40 | FeatureXMLFile().load(xml_path, featureMap) 41 | 42 | csv_file = open(csv_path, 'w') 43 | writer = csv.writer(csv_file, dialect='unix', quoting=csv.QUOTE_NONE, quotechar='') 44 | for feature in featureMap: 45 | writer.writerow([feature.getMZ(), feature.getRT() / 60, feature.getIntensity()]) 46 | csv_file.close() 47 | 48 | print(xml_path + " convertion finished.") 49 | 50 | 51 | if __name__ == '__main__': 52 | 53 | wiff_sample_names = ['SampleA_1', 'SampleA_2', 'SampleA_3', 'SampleA_4', 54 | 'SampleB_1', 'SampleB_2', 'SampleB_3', 'SampleB_4'] 55 | raw_sample_names = ['SA1', 'SA2', 'SA3', 'SA4', 'SA5', 'SB1', 'SB2', 'SB3', 'SB4', 'SB5'] 56 | 57 | mtbls_sample_names = ['12W-1', '12W-2', '12W-3', '12W-4', '12W-5', '12W-6', '12W-7', '12W-8', 58 | '24W-1', '24W-2', '24W-3', '24W-4', '24W-5', '24W-6', '24W-7', '24W-8', 59 | '32W-1', '32W-2', '32W-3', '32W-4', '32W-5', '32W-6', '32W-7', '32W-8', 60 | '4W-1', '4W-2', '4W-3', '4W-4', '4W-5', '4W-6', '4W-7', '4W-8', 61 | '52W-1', '52W-2', '52W-3', '52W-4', '52W-5', '52W-6', '52W-7', '52W-8'] 62 | 63 | # wiff_featurexml_files = glob.glob("D:/workspace/GAligner/data/TripleTOF_6600/openms/*.featureXML") 64 | # for file in wiff_featurexml_files: 65 | # convert_featurexml_to_csv(file, file.replace("featureXML", "csv")) 66 | # 67 | # raw_featurexml_files = glob.glob("D:/workspace/GAligner/data/QE_HF/openms/*.featureXML") 68 | # for file in raw_featurexml_files: 69 | # convert_featurexml_to_csv(file, file.replace("featureXML", "csv")) 70 | 71 | mtbls_featurexml_files = glob.glob("D:/workspace/GAligner/data/MTBLS562/openms/*.featureXML") 72 | for file in mtbls_featurexml_files: 73 | convert_featurexml_to_csv(file, file.replace("featureXML", "csv")) 74 | 75 | # wiff_consensusxml_file = "D:/workspace/GAligner/data/TripleTOF_6600_results_openms/wiff_aligned.consensusXML" 76 | # wiff_csv_file = "D:/workspace/GAligner/data/TripleTOF_6600_results_openms/openms_aligned.csv" 77 | # convert_consensusxml_to_csv(wiff_consensusxml_file, wiff_csv_file, wiff_sample_names) 78 | # 79 | # raw_consensusxml_file = "D:/workspace/GAligner/data/QE_HF_results_openms/raw_aligned.consensusXML" 80 | # raw_csv_file = "D:/workspace/GAligner/data/QE_HF_results_openms/openms_aligned.csv" 81 | # convert_consensusxml_to_csv(raw_consensusxml_file, raw_csv_file, raw_sample_names) 82 | 83 | mtbls_consensusxml_file = "D:/workspace/GAligner/data/MTBLS562_results_openms/openms_wiff.consensusXML" 84 | mtbls_csv_file = "D:/workspace/GAligner/data/MTBLS562_results_openms/openms_wiff.csv" 85 | convert_consensusxml_to_csv(mtbls_consensusxml_file, mtbls_csv_file, mtbls_sample_names) 86 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.19.2 2 | scipy>=1.9.3 3 | pytest>=7.0.1 4 | pyteomics>=4.4.2 5 | lxml>=4.9.1 6 | matplotlib>=3.3.4 7 | networkx>=3.0 8 | gurobipy>=9.1.2 9 | pandas>=1.1.5 10 | ortools>=9.5 11 | scikit_learn>=1.2.0 12 | xlrd>=2.0.1 13 | openpyxl>=3.0.10 14 | -------------------------------------------------------------------------------- /src/fine_alignment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import networkx as nx 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | class Graph: 9 | def __init__(self, graph_params): 10 | self.mz_tolerance = graph_params.mz_tolerance 11 | self.rt_tolerance = graph_params.rt_tolerance 12 | self.use_ppm = graph_params.use_ppm 13 | self.mz_factor = graph_params.mz_factor 14 | self.rt_factor = graph_params.rt_factor 15 | self.area_factor = graph_params.area_factor 16 | 17 | def do_build(self, data_list): 18 | g = nx.Graph() 19 | data_lens = [] 20 | start_idxes = [0] 21 | node_idx = 0 22 | 23 | print('\tConverting result to graph nodes...') 24 | start_time = time.time() 25 | print('\r\t[{}] 0/ 1 Time cost:{:.1f}s'.format('-' * 50, time.time() - start_time), end='') 26 | for i, data in enumerate(data_list): 27 | data_lens += [len(data)] 28 | start_idxes += [start_idxes[-1] + len(data)] 29 | for point in data: 30 | g.add_node(node_idx, data_idx=i, mz=point[0], rt=point[1], area=point[2]) 31 | node_idx += 1 32 | print('\r\t[{}] 1/ 1 Time cost:{:.1f}s'.format('▓' * 50, time.time() - start_time)) 33 | 34 | print('\tAnalyzing node relations and build edges...') 35 | start_time = time.time() 36 | comparison_idx = 0 37 | total_comparisons = ((len(data_list) - 1) // 2) * len(data_list) + ((len(data_list) - 1) % 2) * ( 38 | len(data_list) // 2) 39 | 40 | def build_edges(g, left_data, right_data, left_start_idx, right_start_idx, mz_tolerance, use_ppm, rt_tolerance, 41 | mz_factor, rt_factor, area_factor, window_factor): 42 | edges = [] 43 | right_idx = 0 44 | for left_idx in range(len(left_data)): 45 | adj_nodes = np.array(list(nx.edges(g, left_start_idx + left_idx))) 46 | if len(adj_nodes) > 0 and np.sum((adj_nodes[:, 1] >= right_start_idx) * 47 | (adj_nodes[:, 1] < right_start_idx + len(right_data))) > 0: 48 | continue 49 | tmp_mz_tolerance = mz_tolerance 50 | if use_ppm: 51 | tmp_mz_tolerance = left_data[left_idx][0] * mz_tolerance * 1e-6 52 | mz_start = left_data[left_idx][0] - tmp_mz_tolerance * window_factor 53 | mz_end = left_data[left_idx][0] + tmp_mz_tolerance * window_factor 54 | if right_data[right_idx][0] > mz_end: 55 | continue 56 | while right_idx < len(right_data) and right_data[right_idx][0] < mz_start: 57 | right_idx += 1 58 | if right_idx >= len(right_data): 59 | break 60 | 61 | rt_start = left_data[left_idx][1] - rt_tolerance * window_factor 62 | rt_end = left_data[left_idx][1] + rt_tolerance * window_factor 63 | for right_iter_idx in range(right_idx, len(right_data)): 64 | if right_data[right_iter_idx][0] > mz_end: 65 | break 66 | if (right_data[right_iter_idx][1] < rt_start) \ 67 | or (right_data[right_iter_idx][1] > rt_end): 68 | continue 69 | dist = mz_factor * abs(left_data[left_idx][0] - right_data[right_iter_idx][0]) / tmp_mz_tolerance + \ 70 | rt_factor * abs(left_data[left_idx][1] - right_data[right_iter_idx][1]) / rt_tolerance 71 | dist /= mz_factor + rt_factor 72 | edges.append([left_start_idx + left_idx, right_start_idx + right_iter_idx, dist]) 73 | edges = np.array(edges) 74 | 75 | # for edge in edges[filtered_edges]: 76 | for edge in edges: 77 | g.add_edge(int(edge[0]), int(edge[1]), weight=edge[2]) 78 | 79 | for i in range(len(data_list)): 80 | for j in range(i + 1, len(data_list)): 81 | left_data = data_list[i] 82 | right_data = data_list[j] 83 | # build_edges(g, left_data, right_data, start_idxes[i], start_idxes[j], self.mz_tolerance, self.use_ppm, 84 | # self.rt_tolerance / 2, self.mz_factor, self.rt_factor, self.area_factor, pow(2, -1)) 85 | build_edges(g, left_data, right_data, start_idxes[i], start_idxes[j], self.mz_tolerance, self.use_ppm, 86 | self.rt_tolerance, self.mz_factor, self.rt_factor, self.area_factor, 1) 87 | comparison_idx += 1 88 | done_progress = int((comparison_idx / total_comparisons) * 50) 89 | print('\r\t[{}{}]{:2d}/{:2d} Time cost:{:.1f}s'.format('▓' * done_progress, '-' * (50 - done_progress), 90 | comparison_idx, total_comparisons, 91 | time.time() - start_time), end='') 92 | print() 93 | print('\tSplitting graph into sub-graphs...') 94 | start_time = time.time() 95 | print('\r\t[{}] 0/ 1 Time cost:{:.1f}s'.format('▓' * 50, time.time() - start_time), end='') 96 | sub_graphs = [g.subgraph(c) for c in nx.connected_components(g)] 97 | max_nodes = 0 98 | max_edges = 0 99 | for s in sub_graphs: 100 | if max_nodes < len(s.nodes): 101 | max_nodes = len(s.nodes) 102 | if max_edges < len(s.edges): 103 | max_edges = len(s.edges) 104 | print('\r\t[{}] 1/ 1 Time cost:{:.1f}s'.format('▓' * 50, time.time() - start_time)) 105 | print('\tGraph built. Total %d sub-graphs. Max %d nodes, %d edges.' % (len(sub_graphs), max_nodes, max_edges)) 106 | 107 | for sub_graph in sub_graphs: 108 | max_area_map = {} 109 | nodes = sub_graph.nodes(data=True) 110 | for node in nodes: 111 | if (node[1]['data_idx'] not in max_area_map.keys()) or (max_area_map[node[1]['data_idx']] < node[1]['area']): 112 | max_area_map[node[1]['data_idx']] = node[1]['area'] 113 | for edge in sub_graph.edges: 114 | dist = sub_graph.edges[edge]['weight'] * (self.mz_factor + self.rt_factor) 115 | area_0 = nodes[edge[0]]['area'] / (max_area_map[nodes[edge[0]]['data_idx']] + 1e-6) 116 | area_1 = nodes[edge[1]]['area'] / (max_area_map[nodes[edge[1]]['data_idx']] + 1e-6) 117 | dist += self.area_factor * (1 - min(area_0, area_1) / (max(area_0, area_1) + 1e-6)) 118 | dist /= self.mz_factor + self.rt_factor + self.area_factor 119 | sub_graph.edges[edge]['weight'] = dist 120 | return sub_graphs, max_nodes 121 | -------------------------------------------------------------------------------- /src/map_solver/greedy_solver.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import networkx as nx 4 | import numpy as np 5 | 6 | from src.map_solver.base_solver import graph_preprocessing, calc_cost_list, flatten_to_matrix_idx, \ 7 | assigned_flatten_idx_to_edges 8 | 9 | 10 | def greedy_assigner(group_node_list, group_len_list, cost_list): 11 | assign_time = time.time() 12 | assigned_idx_list = [] 13 | cost_list[-1] = np.inf 14 | while np.min(cost_list) != np.inf: 15 | # for i in range(max(group_len_list)): 16 | min_cost_idx = np.argmin(cost_list) 17 | assigned_idx_list.append(min_cost_idx) 18 | # if i == max(group_len_list) - 1: # save time for last iteration 19 | # break 20 | idx_tuple = flatten_to_matrix_idx(min_cost_idx, group_len_list) 21 | settled = np.zeros(group_len_list) 22 | for j, idx in enumerate(idx_tuple): 23 | if idx >= len(group_node_list[j]): # not settle dumb nodes 24 | continue 25 | slices = () 26 | for k in range(len(idx_tuple)): 27 | if k == j: 28 | slices += idx, 29 | continue 30 | tmp_group_len = group_len_list[k] 31 | tmp_slice = slice(tmp_group_len) 32 | slices += tmp_slice, 33 | settled[slices] = 1 34 | cost_list[settled.ravel() == 1] = float('inf') 35 | assign_time = time.time() - assign_time 36 | return assigned_idx_list, assign_time 37 | 38 | 39 | def greedy_solve(sub_graph, graph_params, debug=False): 40 | # Preprocessing 41 | no_need_to_assign, group_node_list, group_len_list, combination_size, pre_time = graph_preprocessing(sub_graph) 42 | if no_need_to_assign: 43 | return [sub_graph.nodes(data=True)] 44 | 45 | # Prepare cost list 46 | cost_list, cost_time = calc_cost_list(sub_graph, graph_params, group_node_list, group_len_list, combination_size, greedy=True) 47 | 48 | # Greedy assign 49 | assigned_idx_list, assign_time = greedy_assigner(group_node_list, group_len_list, cost_list) 50 | 51 | # Confirm assignment result 52 | assignment_nodes, refine_time = assigned_flatten_idx_to_edges(assigned_idx_list, sub_graph, group_node_list, group_len_list) 53 | if debug and len(sub_graph) > 20: 54 | print(len(sub_graph), combination_size, pre_time, cost_time, assign_time, refine_time) 55 | return assignment_nodes 56 | -------------------------------------------------------------------------------- /src/map_solver/gurobi_solver.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import networkx as nx 4 | import gurobipy as gp 5 | from src.map_solver.base_solver import graph_preprocessing, calc_cost_list, flatten_to_matrix_idx,\ 6 | flatten_idx_digit_match, assigned_flatten_idx_to_edges 7 | 8 | 9 | def gurobi_solve(sub_graph, graph_params, debug=False): 10 | # Preprocessing 11 | no_need_to_assign, group_node_list, group_len_list, combination_size, pre_time = graph_preprocessing(sub_graph) 12 | if no_need_to_assign: 13 | return [sub_graph.nodes(data=True)] 14 | # Prepare cost list 15 | cost_list, cost_time = calc_cost_list(sub_graph, graph_params, group_node_list, group_len_list, combination_size, greedy=True) 16 | 17 | # Set Gurobi optimizer 18 | assign_time = time.time() 19 | m = gp.Model("Multidimensional Assignment Problem") 20 | m.setParam('OutputFlag', 0) 21 | 22 | x = [] 23 | for i in range(combination_size): 24 | idx_tuple = flatten_to_matrix_idx(i, group_len_list) 25 | x.append(m.addVar(vtype=gp.GRB.BINARY, name="x" + "%d" * len(group_len_list) % idx_tuple)) 26 | 27 | # i: digit from right to left 28 | for i in range(len(group_len_list)): 29 | non_dumb_node_num = len(group_node_list[len(group_len_list) - 1 - i]) 30 | for j in range(non_dumb_node_num): 31 | m.addConstr(gp.quicksum([x[k] for k in range(combination_size) 32 | if flatten_idx_digit_match(k, group_len_list, i, j)]) == 1, 33 | name='Constraint%d%d' % (i, j)) 34 | m.addConstr(x[-1] == 0, name='ConstraintDumb') 35 | 36 | m.setObjective(gp.quicksum(x[i] * cost_list[i] for i in range(combination_size)), gp.GRB.MINIMIZE) 37 | m.optimize() 38 | assigned_idx_list = [] 39 | for i in range(len(x)): 40 | if x[i].x > 1e-6: 41 | assigned_idx_list.append(i) 42 | assign_time = time.time() - assign_time 43 | 44 | # Confirm assignment result 45 | assignment_nodes, refine_time = assigned_flatten_idx_to_edges(assigned_idx_list, sub_graph, group_node_list, group_len_list) 46 | if debug: 47 | print(len(sub_graph), pre_time, cost_time, assign_time, refine_time) 48 | return assignment_nodes 49 | -------------------------------------------------------------------------------- /src/map_solver/local_bipartite_solver.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import networkx as nx 4 | import numpy as np 5 | from scipy.optimize import linear_sum_assignment 6 | from src.map_solver.base_solver import graph_preprocessing 7 | 8 | 9 | def bipartite_assigner(sub_graph, group_node_list): 10 | assign_time = time.time() 11 | 12 | node_length_list = [len(nodes) for nodes in group_node_list] 13 | descend_idxes = np.argsort(-1 * np.array(node_length_list)) 14 | 15 | assigned_nodes = group_node_list[descend_idxes[0]].copy() 16 | assignments = [[node] for node in assigned_nodes] 17 | for i in range(1, len(descend_idxes)): 18 | connected_nodes = [] 19 | unconnected_nodes = [] 20 | connected_assignments_idxes = [] 21 | for node in group_node_list[descend_idxes[i]]: 22 | tmp_assignment_idxes = [] 23 | for j, assignment in enumerate(assignments): 24 | match_idx = -1 25 | tmp_min_weight = float('inf') 26 | for k, assigned_node in enumerate(assignment): 27 | if sub_graph.has_edge(node, assigned_node): 28 | tmp_weight = sub_graph.get_edge_data(node, assigned_node)['weight'] 29 | if tmp_weight < tmp_min_weight: 30 | tmp_min_weight = tmp_weight 31 | match_idx = k 32 | if match_idx != -1: 33 | tmp_assignment_idxes.append([j, match_idx]) 34 | if len(tmp_assignment_idxes) > 0: 35 | connected_nodes.append(node) 36 | connected_assignments_idxes.append(tmp_assignment_idxes) 37 | else: 38 | unconnected_nodes.append(node) 39 | 40 | if len(connected_nodes) > 0: 41 | cost_matrix = np.ones([len(connected_nodes), len(assignments)]) * 100 42 | for m in range(len(connected_nodes)): 43 | for match_idxes in connected_assignments_idxes[m]: 44 | assigned_node = assignments[match_idxes[0]][match_idxes[1]] 45 | tmp_weight = sub_graph.get_edge_data(connected_nodes[m], assigned_node)['weight'] 46 | cost_matrix[m][match_idxes[0]] = tmp_weight 47 | assigned_idx = linear_sum_assignment(cost_matrix) 48 | 49 | for j in range(len(connected_nodes)): 50 | if j not in assigned_idx[0]: 51 | unconnected_nodes.append(connected_nodes[j]) 52 | 53 | for j in range(len(assigned_idx[0])): 54 | if cost_matrix[assigned_idx[0][j]][assigned_idx[1][j]] == 100: 55 | unconnected_nodes.append(connected_nodes[assigned_idx[0][j]]) 56 | continue 57 | from_node = connected_nodes[assigned_idx[0][j]] 58 | assignments[assigned_idx[1][j]].append(from_node) 59 | 60 | for node in unconnected_nodes: 61 | assignments.append([node]) 62 | 63 | assigned_nodes += group_node_list[descend_idxes[i]] 64 | 65 | assign_time = time.time() - assign_time 66 | return assignments, assign_time 67 | 68 | 69 | def local_bipartite_solve(sub_graph, graph_params, debug=False): 70 | 71 | # Preprocessing 72 | no_need_to_assign, group_node_list, group_len_list, combination_size, pre_time = graph_preprocessing(sub_graph) 73 | if no_need_to_assign: 74 | return [sub_graph.nodes(data=True)] 75 | 76 | # Local bipartite assign 77 | assignments, assign_time = bipartite_assigner(sub_graph, group_node_list) 78 | 79 | # Confirm assignment result 80 | start_time = time.time() 81 | assignment_nodes = [] 82 | for nodes in assignments: 83 | s = sub_graph.subgraph(nodes) 84 | assignment_nodes.append(s.nodes(data=True)) 85 | refine_time = time.time() - start_time 86 | 87 | if debug and len(sub_graph) > 20: 88 | print(len(sub_graph), combination_size, pre_time, assign_time, refine_time) 89 | return assignment_nodes 90 | -------------------------------------------------------------------------------- /src/map_solver/local_nearest_solver.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import networkx as nx 4 | import numpy as np 5 | 6 | from src.map_solver.base_solver import graph_preprocessing 7 | 8 | 9 | def nearest_assigner(sub_graph, group_node_list): 10 | assign_time = time.time() 11 | 12 | node_length_list = [len(nodes) for nodes in group_node_list] 13 | descend_idxes = np.argsort(-1 * np.array(node_length_list)) 14 | 15 | assignments = [[node] for node in group_node_list[descend_idxes[0]]] 16 | for i in range(1, len(descend_idxes)): 17 | # build a cost map 18 | cost_matrix = np.ones([len(group_node_list[descend_idxes[i]]), len(assignments)]) 19 | for j in range(len(group_node_list[descend_idxes[i]])): 20 | node = group_node_list[descend_idxes[i]][j] 21 | for k, assignment in enumerate(assignments): 22 | tmp_min_weight = 100 23 | for assigned_node in assignment: 24 | if sub_graph.has_edge(node, assigned_node): 25 | tmp_weight = sub_graph.get_edge_data(node, assigned_node)['weight'] 26 | if tmp_weight < tmp_min_weight: 27 | tmp_min_weight = tmp_weight 28 | cost_matrix[j][k] = tmp_min_weight 29 | 30 | for j in range(cost_matrix.shape[0]): 31 | min_idx = np.argmin(cost_matrix) 32 | min_x_idx = min_idx // cost_matrix.shape[1] 33 | min_y_idx = min_idx - min_x_idx * cost_matrix.shape[1] 34 | tmp_node = group_node_list[descend_idxes[i]][min_x_idx] 35 | tmp_cost = cost_matrix[min_x_idx, min_y_idx] 36 | if tmp_cost != 100: 37 | assignments[min_y_idx].append(tmp_node) 38 | else: 39 | assignments.append([tmp_node]) 40 | cost_matrix[min_x_idx, :] = float('inf') 41 | cost_matrix[:, min_y_idx] = float('inf') 42 | 43 | assign_time = time.time() - assign_time 44 | return assignments, assign_time 45 | 46 | 47 | def local_nearest_solve(sub_graph, graph_params, debug=False): 48 | 49 | # Preprocessing 50 | no_need_to_assign, group_node_list, group_len_list, combination_size, pre_time = graph_preprocessing(sub_graph) 51 | if no_need_to_assign: 52 | return [sub_graph.nodes(data=True)] 53 | 54 | # Local nearest assign 55 | assignments, assign_time = nearest_assigner(sub_graph, group_node_list) 56 | 57 | # Confirm assignment result 58 | start_time = time.time() 59 | assignment_nodes = [] 60 | for nodes in assignments: 61 | s = sub_graph.subgraph(nodes) 62 | assignment_nodes.append(s.nodes(data=True)) 63 | refine_time = time.time() - start_time 64 | 65 | if debug and len(sub_graph) > 20: 66 | print(len(sub_graph), combination_size, pre_time, assign_time, refine_time) 67 | return assignment_nodes 68 | -------------------------------------------------------------------------------- /src/map_solver/ortools_solver.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import networkx as nx 4 | from ortools.linear_solver import pywraplp 5 | from ortools.sat.python import cp_model 6 | 7 | from src.map_solver.base_solver import graph_preprocessing, calc_cost_list, flatten_to_matrix_idx, \ 8 | flatten_idx_digit_match, assigned_flatten_idx_to_edges 9 | 10 | 11 | def ortools_mip_solve(sub_graph, graph_params, debug=False): 12 | # Preprocessing 13 | no_need_to_assign, group_node_list, group_len_list, combination_size, pre_time = graph_preprocessing(sub_graph) 14 | if no_need_to_assign: 15 | return [sub_graph.nodes(data=True)] 16 | # Prepare cost list 17 | cost_list, cost_time = calc_cost_list(sub_graph, graph_params, group_node_list, group_len_list, combination_size) 18 | 19 | assign_time = time.time() 20 | 21 | # Solver 22 | # Create the mip solver with the SCIP backend. 23 | solver = pywraplp.Solver.CreateSolver('PDLP') 24 | 25 | # Variables 26 | x = [] 27 | for i in range(combination_size): 28 | idx_tuple = flatten_to_matrix_idx(i, group_len_list) 29 | x.append(solver.IntVar(0, 1, name="x" + "%d" * len(group_len_list) % idx_tuple)) 30 | 31 | # Constraints 32 | for i in range(len(group_len_list)): 33 | non_dumb_node_num = len(group_node_list[len(group_len_list) - 1 - i]) 34 | for j in range(non_dumb_node_num): 35 | solver.Add(solver.Sum([x[k] for k in range(combination_size) 36 | if flatten_idx_digit_match(k, group_len_list, i, j)]) == 1, 37 | name='Constraint%d%d' % (i, j)) 38 | 39 | # Objective 40 | objective_terms = [] 41 | for i in range(combination_size): 42 | objective_terms.append(x[i] * cost_list[i]) 43 | solver.Minimize(solver.Sum(objective_terms)) 44 | 45 | # Solve 46 | status = solver.Solve() 47 | 48 | assigned_idx_list = [] 49 | for i in range(len(x)): 50 | if x[i].solution_value() > 0.5: 51 | assigned_idx_list.append(i) 52 | assign_time = time.time() - assign_time 53 | 54 | # Confirm assignment result 55 | assignment_nodes, refine_time = assigned_flatten_idx_to_edges(assigned_idx_list, sub_graph, 56 | group_node_list, group_len_list) 57 | if debug: 58 | print(len(sub_graph), pre_time, cost_time, assign_time, refine_time) 59 | return assignment_nodes 60 | 61 | 62 | def ortools_cp_solve(sub_graph, graph_params, debug=False): 63 | # Preprocessing 64 | no_need_to_assign, group_node_list, group_len_list, combination_size, pre_time = graph_preprocessing(sub_graph) 65 | if no_need_to_assign: 66 | return [sub_graph.nodes(data=True)] 67 | # Prepare cost list 68 | cost_list, cost_time = calc_cost_list(sub_graph, graph_params, group_node_list, group_len_list, combination_size) 69 | 70 | assign_time = time.time() 71 | 72 | # Model 73 | model = cp_model.CpModel() 74 | 75 | # Variables 76 | x = [] 77 | for i in range(combination_size): 78 | idx_tuple = flatten_to_matrix_idx(i, group_len_list) 79 | x.append(model.NewBoolVar('x' + '%d' * len(group_len_list) % idx_tuple)) 80 | 81 | # Constraints 82 | for i in range(len(group_len_list)): 83 | non_dumb_node_num = len(group_node_list[len(group_len_list) - 1 - i]) 84 | for j in range(non_dumb_node_num): 85 | model.AddExactlyOne([x[k] for k in range(combination_size) 86 | if flatten_idx_digit_match(k, group_len_list, i, j)]) 87 | 88 | # Objective 89 | objective_terms = [] 90 | for i in range(combination_size): 91 | objective_terms.append(x[i] * cost_list[i]) 92 | model.Minimize(sum(objective_terms)) 93 | 94 | # Solve 95 | solver = cp_model.CpSolver() 96 | status = solver.Solve(model) 97 | 98 | assigned_idx_list = [] 99 | for i in range(len(x)): 100 | if solver.BooleanValue(x[i]): 101 | assigned_idx_list.append(i) 102 | assign_time = time.time() - assign_time 103 | 104 | # Confirm assignment result 105 | assignment_nodes, refine_time = assigned_flatten_idx_to_edges(assigned_idx_list, sub_graph, 106 | group_node_list, group_len_list) 107 | if debug: 108 | print(len(sub_graph), pre_time, cost_time, assign_time, refine_time) 109 | return assignment_nodes 110 | 111 | -------------------------------------------------------------------------------- /src/raw_file_reader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import time 4 | 5 | from pyteomics import mzml 6 | 7 | 8 | class Spectrum: 9 | def __init__(self, rt, mzs, intensities, base_peak_intensity, base_peak_mz, total_ion_current): 10 | self.rt = rt 11 | self.mzs = mzs 12 | self.intensities = intensities 13 | self.base_peak_intensity = base_peak_intensity 14 | self.base_peak_mz = base_peak_mz 15 | self.total_ion_current = total_ion_current 16 | 17 | 18 | class RawFileReader: 19 | def __init__(self, raw_params): 20 | self.min_intensity = max(0, raw_params.min_intensity) 21 | 22 | def load_ms1_spectra(self, result_path): 23 | glob_paths = glob.glob(result_path.split('.')[0] + '*.[mM][zZ][mM][lL]') 24 | assert len(glob_paths) == 1, 'File Error! Result and RAW file name and number should be the same. ' 25 | raw_path = glob_paths[0] 26 | 27 | ms1_spectra = [] 28 | ms_file = mzml.read(raw_path) 29 | for spectrum in ms_file: 30 | if spectrum.get('ms level') != 1: 31 | continue 32 | mzs = spectrum.get('m/z array') 33 | intensities = spectrum.get('intensity array') 34 | rt = spectrum.get('scanList').get('scan')[0].get('scan start time') 35 | idx = intensities > self.min_intensity 36 | mzs = mzs[idx] 37 | intensities = intensities[idx] 38 | base_peak_intensity = spectrum.get('base peak intensity') 39 | base_peak_mz = spectrum.get('base peak m/z') 40 | total_ion_current = spectrum.get('total ion current') 41 | ms1_spectra += [Spectrum(rt, mzs, intensities, base_peak_intensity, base_peak_mz, total_ion_current)] 42 | return ms1_spectra 43 | -------------------------------------------------------------------------------- /src/result_file_reader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import time 4 | 5 | import numpy as np 6 | 7 | 8 | class ResultFileReader: 9 | def __init__(self, result_params): 10 | 11 | if os.path.isfile(result_params.result_folder_path): 12 | exit('ERROR: result_folder_path must be a folder.') 13 | 14 | self.skip_line = result_params.skip_line 15 | self.mz_col_idx = result_params.mz_col_idx 16 | self.rt_col_idx = result_params.rt_col_idx 17 | self.area_col_idx = result_params.area_col_idx 18 | self.result_folder_path = result_params.result_folder_path 19 | 20 | def load_result(self, result_path): 21 | if result_path.endswith('.tsv') or result_path.endswith('.txt') or result_path.endswith('.hills.csv'): 22 | separator = '\t' 23 | else: 24 | separator = ',' 25 | 26 | result_file = open(result_path, 'r') 27 | for j in range(self.skip_line): 28 | header = result_file.readline().split(separator) 29 | result_data = np.array([line.strip().replace('"', '').split(separator) for line in result_file]) 30 | results = result_data[:, (self.mz_col_idx, self.rt_col_idx, self.area_col_idx)].astype(np.float32) 31 | return results 32 | 33 | def _load_result_paths(self, folder_path): 34 | file_paths = [] 35 | file_count = 0 36 | files = glob.glob(os.path.join(folder_path, '*')) 37 | if len(files) == 0: 38 | return file_paths 39 | for file in files: 40 | if os.path.isfile(file) and (file.endswith('.csv') or file.endswith('.tsv') or file.endswith('.txt')): 41 | file_paths.append(file) 42 | file_count += 1 43 | if os.path.isdir(file): 44 | sub_file_paths, sub_file_count = self._load_result_paths(file) 45 | if len(sub_file_paths) > 0: 46 | file_paths.append(sub_file_paths) 47 | file_count += sub_file_count 48 | return file_paths, file_count 49 | 50 | def load_result_paths(self): 51 | file_paths, file_count = self._load_result_paths(self.result_folder_path) 52 | return file_paths, file_count 53 | -------------------------------------------------------------------------------- /src/tools/graph_viewer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import networkx as nx 5 | from matplotlib import cm 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | 9 | root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 10 | 11 | 12 | def plt_coarse_registration_results(coarse_registration_data, save=True, save_folder=''): 13 | plt.rcParams['savefig.dpi'] = 1200 14 | plt.figure(figsize=(8, 4)) 15 | path = os.path.join(root_dir, 'experiments', save_folder, 'coarse_alignment_figs') 16 | if not os.path.exists(path): 17 | os.mkdir(path) 18 | # for i in range(len(coarse_registration_data)): 19 | for i in range(len(coarse_registration_data)): 20 | ori_rts = coarse_registration_data[i][0] 21 | warped_rts = coarse_registration_data[i][1] 22 | plt.plot(warped_rts, warped_rts - ori_rts, label=i+1, linewidth=1) 23 | plt.xlabel('Warped RT') 24 | plt.ylabel('Warped RT - Raw RT') 25 | plt.legend() 26 | plt.savefig(os.path.join(path, time.strftime('%H%M%S', time.localtime()) + '_coarse_registration_residual.png')) if save else plt.show() 27 | plt.clf() 28 | 29 | plt.rcParams['savefig.dpi'] = 1200 30 | fig = plt.figure(figsize=(8, 6)) 31 | ax1 = fig.add_subplot(2, 1, 1) 32 | ax2 = fig.add_subplot(2, 1, 2) 33 | for i in range(len(coarse_registration_data)): 34 | ori_rts = coarse_registration_data[i][0] 35 | warped_rts = coarse_registration_data[i][1] 36 | ints = coarse_registration_data[i][2] 37 | ax1.plot(ori_rts, ints, label=i+1, linewidth=1) 38 | ax2.plot(warped_rts, ints, label=i+1, linewidth=1) 39 | ax1.set_xlabel('Raw RT') 40 | ax2.set_xlabel('Warped RT') 41 | ax1.set_ylabel('Intensity') 42 | ax2.set_ylabel('Intensity') 43 | plt.legend() 44 | plt.tight_layout() 45 | plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.45) 46 | plt.savefig(os.path.join(path, time.strftime('%H%M%S', time.localtime()) + '_coarse_registration_tic.png')) if save else plt.show() 47 | plt.clf() 48 | 49 | 50 | def plt_scatter(g, save=False, save_folder='', save_name=''): 51 | plt.rcParams['savefig.dpi'] = 1200 52 | mzs = list(nx.get_node_attributes(g, 'mz').values()) 53 | rts = list(nx.get_node_attributes(g, 'rt').values()) 54 | c = list(nx.get_node_attributes(g, 'data_idx')) 55 | plt.scatter(rts, mzs, c=c, cmap='tab10') 56 | path = os.path.join(root_dir, 'experiments', save_folder, 'fine_assignment_figs') 57 | if not os.path.exists(path): 58 | os.mkdir(path) 59 | plt.savefig(os.path.join(path, save_name + '_scatter.png')) if save else plt.show() 60 | plt.clf() 61 | 62 | 63 | def plt_assignment(g, node_assignment_group, save=False, save_folder='', save_name='', show_weight=True): 64 | plt.rcParams['savefig.dpi'] = 1200 65 | plt.rcParams['figure.dpi'] = 400 66 | mzs = list(nx.get_node_attributes(g, 'mz').values()) 67 | rts = list(nx.get_node_attributes(g, 'rt').values()) 68 | c = list(nx.get_node_attributes(g, 'data_idx')) 69 | path = os.path.join(root_dir, 'experiments', save_folder, 'fine_assignment_figs') 70 | if not os.path.exists(path): 71 | os.mkdir(path) 72 | plt.scatter(rts, mzs, c=c, s=15, cmap='tab10') 73 | for assignment_nodes in node_assignment_group: 74 | color = np.random.rand(3,) 75 | nodes = [] 76 | for node in assignment_nodes: 77 | nodes.append(node[0]) 78 | for node_set in nx.connected_components(g.subgraph(nodes)): 79 | sub_graph = g.subgraph(node_set) 80 | assignment_edges = nx.minimum_spanning_tree(sub_graph).edges 81 | 82 | for edge in assignment_edges: 83 | mzs = [] 84 | rts = [] 85 | mzs.append(g.nodes[edge[0]]['mz']) 86 | mzs.append(g.nodes[edge[1]]['mz']) 87 | rts.append(g.nodes[edge[0]]['rt']) 88 | rts.append(g.nodes[edge[1]]['rt']) 89 | weight = '%.2f' % g.get_edge_data(edge[0], edge[1])['weight'] 90 | plt.plot(rts, mzs, alpha=0.4, c=color) 91 | if show_weight: 92 | plt.text((rts[0] + rts[1]) / 2, (mzs[0] + mzs[1]) / 2, weight) 93 | plt.savefig(os.path.join(path, save_name + '_assigned.png')) if save else plt.show() 94 | plt.clf() 95 | 96 | 97 | def plt_all_edges(g, save=False, save_folder='', save_name='', show_weight=True): 98 | plt.rcParams['savefig.dpi'] = 400 99 | plt.rcParams['figure.dpi'] = 400 100 | mzs = list(nx.get_node_attributes(g, 'mz').values()) 101 | rts = list(nx.get_node_attributes(g, 'rt').values()) 102 | c = list(nx.get_node_attributes(g, 'data_idx')) 103 | path = os.path.join(root_dir, 'experiments', save_folder, 'fine_assignment_figs') 104 | if not os.path.exists(path): 105 | os.mkdir(path) 106 | plt.scatter(rts, mzs, c=c, s=15, cmap='tab10') 107 | 108 | assignment_edges = g.edges 109 | for edge in assignment_edges: 110 | mzs = [] 111 | rts = [] 112 | mzs.append(g.nodes[edge[0]]['mz']) 113 | mzs.append(g.nodes[edge[1]]['mz']) 114 | rts.append(g.nodes[edge[0]]['rt']) 115 | rts.append(g.nodes[edge[1]]['rt']) 116 | weight = '%.2f' % g.get_edge_data(edge[0], edge[1])['weight'] 117 | plt.plot(rts, mzs, alpha=0.4) 118 | if show_weight: 119 | plt.text((rts[0] + rts[1]) / 2, (mzs[0] + mzs[1]) / 2, weight) 120 | plt.savefig(os.path.join(path, save_name + '_all_edges.png')) if save else plt.show() 121 | plt.clf() 122 | 123 | 124 | def show(g): 125 | pos, labels = _get_layout(g) 126 | cmap = cm.get_cmap('tab10') 127 | nx.draw(g, pos=pos, node_size=10, cmap=cmap, node_color=list(labels.values()), font_weight='bold') 128 | plt.show() 129 | 130 | 131 | def show_multipartite(g): 132 | pos, labels = _get_layout(g) 133 | cmap = cm.get_cmap('tab10') 134 | pos = nx.multipartite_layout(g, subset_key='data_idx') 135 | nx.draw(g, pos=pos, node_size=60, cmap=cmap, node_color=list(labels.values()), font_weight='bold') 136 | plt.show() 137 | 138 | 139 | def show_assignment(g, assignment_group): 140 | a = nx.Graph() 141 | a.add_nodes_from(g.nodes(data=True)) 142 | for assignment in assignment_group: 143 | a.add_edges_from(assignment) 144 | show(a) 145 | 146 | 147 | def _get_layout(g): 148 | mzs = np.array(list(nx.get_node_attributes(g, 'mz').values())) 149 | rts = np.array(list(nx.get_node_attributes(g, 'rt').values())) 150 | pos = {} 151 | labels = {} 152 | for node in list(g.nodes(data=True)): 153 | mz = node[1]['mz'] 154 | rt = node[1]['rt'] 155 | pos_mz = (mz - np.min(mzs)) 156 | pos_rt = (rt - np.min(rts)) 157 | pos[node[0]] = np.array([pos_rt, pos_mz]).astype(np.float32) 158 | labels[node[0]] = node[1]['data_idx'] 159 | return pos, labels 160 | -------------------------------------------------------------------------------- /src/tools/param_loader.py: -------------------------------------------------------------------------------- 1 | 2 | import argparse 3 | 4 | from src.params import ResultFileReadingParams, RawFileReadingParams, CoarseRegistrationParams, FineAssignmentParams 5 | 6 | 7 | def load_params(param_path): 8 | param_file = open(param_path, 'r') 9 | lines = param_file.read().split('\n') 10 | param_map = {} 11 | for line in lines: 12 | splits = line.strip().split(':') 13 | if len(splits) < 2: 14 | continue 15 | elif len(splits) == 2: 16 | param_map[splits[0]] = splits[1] 17 | elif len(splits) == 3: # windows path 18 | param_map[splits[0]] = splits[1] + ':' + splits[2] 19 | else: 20 | print('Wrong parameters.') 21 | return None, None, None, None 22 | 23 | # Result File Reading Params 24 | result_file_path = param_map['result_file_path'] 25 | skip_line = param_map['skip_line'] 26 | rt_col_num = param_map['rt_col_num'] 27 | mz_col_num = param_map['mz_col_num'] 28 | area_col_num = param_map['area_col_num'] 29 | 30 | assert result_file_path is not None 31 | assert skip_line is not None 32 | assert rt_col_num is not None 33 | assert mz_col_num is not None 34 | assert area_col_num is not None 35 | 36 | result_file_reading_params = ResultFileReadingParams(result_folder_path=result_file_path, 37 | skip_line=skip_line, rt_col_num=rt_col_num, 38 | mz_col_num=mz_col_num, area_col_num=area_col_num) 39 | 40 | # Raw File Reading Params 41 | raw_file_path = param_map['raw_file_path'] 42 | min_intensity = float(param_map['min_intensity']) 43 | 44 | raw_file_reading_params = RawFileReadingParams(raw_file_path=raw_file_path, min_intensity=min_intensity) 45 | 46 | # Coarse Registration Params 47 | bin_size = float(param_map['bin_size']) 48 | percent_anchors = float(param_map['percent_anchors']) 49 | score_type = param_map['score_type'] 50 | gap_init = float(param_map['gap_init']) 51 | gap_extend = float(param_map['gap_extend']) 52 | factor_diag = float(param_map['factor_diag']) 53 | factor_gap = float(param_map['factor_gap']) 54 | local_alignment = int(param_map['local_alignment']) 55 | init_penalty = float(param_map['init_penalty']) 56 | 57 | coarse_registration_params = CoarseRegistrationParams(bin_size=bin_size, percent_anchors=percent_anchors, 58 | score_type=score_type, 59 | gap_init=gap_init, gap_extend=gap_extend, 60 | factor_diag=factor_diag, factor_gap=factor_gap, 61 | local_alignment=local_alignment, init_penalty=init_penalty) 62 | 63 | # Fine Assignment Params 64 | rt_tolerance = float(param_map['rt_tolerance']) 65 | mz_tolerance = float(param_map['mz_tolerance']) 66 | use_ppm = bool(param_map['use_ppm']) 67 | mz_factor = float(param_map['mz_factor']) 68 | rt_factor = float(param_map['rt_factor']) 69 | area_factor = float(param_map['area_factor']) 70 | solver = param_map['solver'] 71 | vlsns_init_mode = param_map['vlsns_solution_init_mode'] 72 | vlsns_init_num = int(param_map['vlsns_solution_init_number']) 73 | vlsns_update_mode = param_map['vlsns_solution_update_mode'] 74 | 75 | fine_assignment_params = FineAssignmentParams(mz_tolerance=mz_tolerance, rt_tolerance=rt_tolerance, 76 | use_ppm=use_ppm, mz_factor=mz_factor, rt_factor=rt_factor, 77 | area_factor=area_factor, 78 | solver=solver, vlsns_init_mode=vlsns_init_mode, 79 | vlsns_init_num=vlsns_init_num, vlsns_update_mode=vlsns_update_mode) 80 | 81 | return result_file_reading_params, raw_file_reading_params, coarse_registration_params, fine_assignment_params 82 | 83 | 84 | if __name__ == '__main__': 85 | parser = argparse.ArgumentParser(description='GAligner parameter loader') 86 | 87 | # Result file reading params 88 | parser.add_argument('--param_path', type=str, help='Path to parameter file', required=True) 89 | args = parser.parse_args() 90 | 91 | load_params(args.param_path) 92 | import configparser 93 | parser = configparser.ConfigParser() 94 | parser.get() -------------------------------------------------------------------------------- /src/tools/trace_recorder.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import numpy as np 4 | 5 | root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 6 | 7 | 8 | def save_params(save_folder, result_file_params, raw_file_params, coarse_registration_params, fine_assignment_params): 9 | result_root_path = os.path.join(root_dir, 'experiments') 10 | if not os.path.exists(result_root_path): 11 | os.mkdir(result_root_path) 12 | path = os.path.join(result_root_path, save_folder) 13 | if not os.path.exists(path): 14 | os.mkdir(path) 15 | file = open(os.path.join(path, 'params.txt'), 'w') 16 | 17 | # ResultFileReadingParams 18 | file.write('# Result File Reading Params' + '\n') 19 | file.write('\tresult_file_path: ' + result_file_params.result_folder_path + '\n') 20 | file.write('\tskip_line: ' + str(result_file_params.skip_line) + '\n') 21 | file.write('\trt_col_num: ' + str(result_file_params.rt_col_idx + 1) + '\n') 22 | file.write('\tmz_col_num: ' + str(result_file_params.mz_col_idx + 1) + '\n') 23 | file.write('\tarea_col_num: ' + str(result_file_params.area_col_idx + 1) + '\n') 24 | file.write(os.linesep) 25 | 26 | # RawFileReadingParams 27 | file.write('# Raw File Reading Params' + '\n') 28 | file.write('\tmin_intensity: ' + str(raw_file_params.min_intensity) + '\n') 29 | file.write(os.linesep) 30 | 31 | # CoarseRegistrationParams 32 | file.write('# Coarse Registration Params' + '\n') 33 | file.write('\tbin_size: ' + str(coarse_registration_params.bin_size) + '\n') 34 | file.write('\tpercent_anchors: ' + str(coarse_registration_params.percent_anchors) + '\n') 35 | file.write('\tscore_type: ' + str(coarse_registration_params.score_type) + '\n') 36 | file.write('\tgap_init: ' + str(coarse_registration_params.gap_init) + '\n') 37 | file.write('\tgap_extend: ' + str(coarse_registration_params.gap_extend) + '\n') 38 | file.write('\tfactor_diag: ' + str(coarse_registration_params.factor_diag) + '\n') 39 | file.write('\tfactor_gap: ' + str(coarse_registration_params.factor_gap) + '\n') 40 | file.write('\tlocal_alignment: ' + str(coarse_registration_params.local_alignment) + '\n') 41 | file.write('\tinit_penalty: ' + str(coarse_registration_params.init_penalty) + '\n') 42 | file.write(os.linesep) 43 | 44 | # FineAssignmentParams 45 | file.write('# Fine Assignment Params' + '\n') 46 | file.write('\trt_tolerance: ' + str(fine_assignment_params.rt_tolerance) + '\n') 47 | file.write('\tmz_tolerance: ' + str(fine_assignment_params.mz_tolerance) + '\n') 48 | file.write('\tuse_ppm: ' + str(fine_assignment_params.use_ppm) + '\n') 49 | file.write('\tmz_factor: ' + str(fine_assignment_params.mz_factor) + '\n') 50 | file.write('\trt_factor: ' + str(fine_assignment_params.rt_factor) + '\n') 51 | file.write('\tarea_factor: ' + str(fine_assignment_params.area_factor) + '\n') 52 | file.write('\tsolver: ' + str(fine_assignment_params.solver) + '\n') 53 | file.write('\tvlsns_solution_init_mode: ' + str(fine_assignment_params.vlsns_solution_init_mode) + '\n') 54 | file.write('\tvlsns_solution_init_number: ' + str(fine_assignment_params.vlsns_solution_init_number) + '\n') 55 | file.write('\tvlsns_solution_update_mode: ' + str(fine_assignment_params.vlsns_solution_update_mode) + '\n') 56 | file.write(os.linesep) 57 | 58 | file.close() 59 | 60 | 61 | def prepare_result_rows(result_data_list, assignment_nodes_list, need_assign_list): 62 | node_start_idxes = [0] 63 | for i, data in enumerate(result_data_list): 64 | node_start_idxes += [node_start_idxes[-1] + len(data)] 65 | 66 | row_length = 4 67 | row_start_idxes = [] 68 | for result_data in result_data_list: 69 | row_start_idxes.append(row_length) 70 | tmp_row_length = len(result_data[0]) 71 | if tmp_row_length == 3: 72 | row_length += 3 73 | else: 74 | row_length += tmp_row_length - 4 75 | 76 | rows = [] 77 | for i, nodes in enumerate(assignment_nodes_list): 78 | assembled_row = np.zeros(row_length) 79 | tmp_mzs = [] 80 | tmp_rts = [] 81 | tmp_areas = [] 82 | for node in nodes: 83 | data_idx = node[1]['data_idx'] 84 | row_start_idx = row_start_idxes[data_idx] 85 | result_data_idx = node[0] - node_start_idxes[data_idx] 86 | result_data_row = result_data_list[data_idx][result_data_idx] 87 | if len(result_data_row) == 3: 88 | assembled_row[row_start_idx: row_start_idx + 3] = np.array(result_data_row) 89 | else: 90 | tmp_payload = result_data_row[4:] 91 | assembled_row[row_start_idx: row_start_idx + len(tmp_payload)] = np.array(tmp_payload) 92 | need_assign_list[i] = need_assign_list[i] or result_data_row[3] 93 | tmp_mzs.append(node[1]['mz']) 94 | tmp_rts.append(node[1]['rt']) 95 | tmp_areas.append(node[1]['area']) 96 | tmp_mzs.sort() 97 | tmp_rts.sort() 98 | tmp_areas.sort() 99 | assembled_row[0] = tmp_mzs[len(tmp_mzs) // 2] 100 | assembled_row[1] = tmp_rts[len(tmp_rts) // 2] 101 | assembled_row[2] = tmp_areas[len(tmp_areas) // 2] 102 | assembled_row[3] = need_assign_list[i] 103 | rows.append(assembled_row) 104 | return np.array(rows) 105 | 106 | 107 | def save_alignment_results(result_data_list, file_names, save_folder, min_sample=None): 108 | if min_sample is None: 109 | min_sample = (len(file_names) + 1) // 2 110 | 111 | filtered_idxes = np.sum(result_data_list[:, [4 + 3 * i for i in range(len(file_names))]] > 0, axis=-1) > min_sample 112 | 113 | result_root_path = os.path.join(root_dir, 'experiments') 114 | if not os.path.exists(result_root_path): 115 | os.mkdir(result_root_path) 116 | path = os.path.join(result_root_path, save_folder) 117 | if not os.path.exists(path): 118 | os.mkdir(path) 119 | file_path = os.path.join(path, 'aligned_result.csv') 120 | file = open(file_path, 'w') 121 | writer = csv.writer(file, dialect='unix', quoting=csv.QUOTE_NONE, quotechar='') 122 | first_row = ['mz', 'rt', 'area', 'need_assign'] 123 | for file_name in file_names: 124 | first_row += [file_name + "_mz", file_name + "_rt", file_name + "_area"] 125 | writer.writerow(first_row) 126 | writer.writerows(result_data_list[filtered_idxes]) 127 | file.close() 128 | return file_path 129 | -------------------------------------------------------------------------------- /third_party/obiwarp/History: -------------------------------------------------------------------------------- 1 | 0.9.4 2 | 3 | * silenced warnings 4 | * moved build to Rakefile (rewrote build system and to be much cleaner) 5 | 6 | 0.9.3 7 | 8 | * added some include statements so that it would compile on more recent g++ compilers. No change in code (or binaries) from 0.9.2 9 | -------------------------------------------------------------------------------- /third_party/obiwarp/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2006, The University of Texas at Austin("U.T. Austin"). All rights reserved. 2 | 3 | Software by John T. Prince under the direction of Edward M. Marcotte. 4 | 5 | By using this software the USER indicates that he or she has read, understood and will comply with the following: 6 | 7 | U. T. Austin hereby grants USER permission to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of this software and its documentation for any purpose and without fee, provided that a full copy of this notice is included with the software and its documentation. 8 | 9 | Title to copyright to this software and its associated documentation shall at all times remain with U. T. Austin. No right is granted to use in advertising, publicity or otherwise any trademark, service mark, or the name of U. T. Austin. 10 | 11 | This software and any associated documentation are provided "as is," and U. T. AUSTIN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESSED OR IMPLIED, INCLUDING THOSE OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT USE OF THE SOFTWARE, MODIFICATIONS, OR ASSOCIATED DOCUMENTATION WILL NOT INFRINGE ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER INTELLECTUAL PROPERTY RIGHTS OF A THIRD PARTY. U. T. Austin, The University of Texas System, its Regents, officers, and employees shall not be liable under any circumstances for any direct, indirect, special, incidental, or consequential damages with respect to any claim by USER or any third party on account of or arising from the use, or inability to use, this software or its associated documentation, even if U. T. Austin has been advised of the possibility of those damages. 12 | 13 | Submit software operation questions to: Edward M. Marcotte, Department of Chemistry and Biochemistry, U. T. Austin, Austin, Texas 78712. 14 | -------------------------------------------------------------------------------- /third_party/obiwarp/README.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | README.md 4 | 5 |

OBI-Warp

6 | 7 |

Ordered Bijective Interpolated Warping (OBI-Warp) aligns matrices along a 8 | single axis using Dynamic Time Warping (DTW) and a one-to-one (bijective) 9 | interpolated warp function. OBI-Warp harnesses the non-linear, comprehensive 10 | alignment power of DTW and builds on the discrete, non-bijective output of DTW 11 | to give natural interpolants that can be used across multiple datasets.

12 | 13 |

OBI-Warp was developed specifically for the chromatographic alignment of 14 | complex mass spectrometry (MS) proteomics data. Using high confidence MS/MS 15 | identifications as time standards, OBI-Warp default parameters have been 16 | optimized to give accurate alignments under a variety of real-world conditions 17 | including datasets with little overlapping signal. Command-line options to 18 | override defaults are available (e.g., gap penalty, local weights and number 19 | of bijective anchors). Though developed for MS proteomics data, OBI-Warp is 20 | suited to a wide variety of alignment problems.

21 | 22 |

Pearson's correlation coefficient, covariance, dot product, and Euclidean 23 | distance have been implemented as the available vector similarity functions. 24 | Redundant calculations for correlation coefficient and covariance are cached 25 | in the n x m comparisons to give the algorithmic equivalent of calculating the 26 | dot product.

27 | 28 |

The dynamic programming algorithm is written to allow any arbitrary gap 29 | penalty function, or users may use a linear initiation and elongation penalty. 30 | Local weighting schemes may also be controlled.

31 | 32 |

Links

33 | 34 | 40 | 41 | 42 |

Building

43 | 44 |

Building is tested rigorously on Ubuntu and should work fine on any POSIX 45 | system. Windows compilation under VC++ should be possible and compilation 46 | with cygwin, or msys/mingw should work without any problems.

47 | 48 |

Prerequisites

49 | 50 |

ruby and rake. rake comes standard with the 51 | newer ruby (1.9.X), but it can also be installed using 52 | rubygems (for ruby versions below 1.9):

53 | 54 |
gem install rake
 55 | 
56 | 57 |

Optional: valgrind can be used for memory testing and 58 | matrix2png can be 59 | used to create images of the various matrices.

60 | 61 |

Compiling

62 | 63 |
rake
 64 | # creates bin/obiwarp   (notice it is in the *bin* directory)
 65 | 
66 | 67 |

This will compile all the code and link it into the obiwarp executable. 68 | NOTE: All executables will be created in the bin directory (testing 69 | executables stay in the lib dir).

70 | 71 |

If you want to explore other options:

72 | 73 |
rake -T
 74 | 
75 | 76 |

From within the lib directory, the generation of any file, intermediate, or 77 | test can be invoked by name. For example:

78 | 79 |
# build the lmat2lmata binary:
 80 | rake lmat2lmata    # note, it will be in created in the bin directory
 81 | # this also works
 82 | rake ../bin/lmat2lmata
 83 | 
84 | 85 |

From the top directory, all the tasks (not necessarily filetasks) are available:

86 | 87 |
# from the top level directory
 88 | rake memtest
 89 | # from the lib directory
 90 | rake memtest
 91 | 
92 | 93 |

Installation

94 | 95 |

Binaries are compiled and depositied in the bin folder. System 96 | installation is left to the user, but it can be as simple as:

97 | 98 |
rake      # make sure it is compiled and linked
 99 | sudo cp bin/obiwarp /usr/local/bin/
100 | 
101 | 102 |

Testing

103 | 104 |
# run all tests
105 | rake test
106 | # if you have valgrind installed:
107 | rake memtest
108 | rake test_cmdparser.rb  # for tests written in ruby
109 | rake run_test_dynprog   # for tests written in cxxtest 'run_test_<whatever>'
110 | 
111 | 112 |

tasks found in the lib Rakefile are also available from the top level Rakefile using 113 | the 'lib:' prefix:

114 | 115 |
rake clean      # from inside the lib directory
116 | rake lib:clean  # from the top dir
117 | 
118 | 119 | 120 | -------------------------------------------------------------------------------- /third_party/obiwarp/README.md: -------------------------------------------------------------------------------- 1 | # OBI-Warp 2 | 3 | Ordered Bijective Interpolated Warping (OBI-Warp) aligns matrices along a 4 | single axis using Dynamic Time Warping (DTW) and a one-to-one (bijective) 5 | interpolated warp function. OBI-Warp harnesses the non-linear, comprehensive 6 | alignment power of DTW and builds on the discrete, non-bijective output of DTW 7 | to give natural interpolants that can be used across multiple datasets. 8 | 9 | OBI-Warp was developed specifically for the chromatographic alignment of 10 | complex mass spectrometry (MS) proteomics data. Using high confidence MS/MS 11 | identifications as time standards, OBI-Warp default parameters have been 12 | optimized to give accurate alignments under a variety of real-world conditions 13 | including datasets with little overlapping signal. Command-line options to 14 | override defaults are available (e.g., gap penalty, local weights and number 15 | of bijective anchors). Though developed for MS proteomics data, OBI-Warp is 16 | suited to a wide variety of alignment problems. 17 | 18 | Pearson's correlation coefficient, covariance, dot product, and Euclidean 19 | distance have been implemented as the available vector similarity functions. 20 | Redundant calculations for correlation coefficient and covariance are cached 21 | in the n x m comparisons to give the algorithmic equivalent of calculating the 22 | dot product. 23 | 24 | The dynamic programming algorithm is written to allow any arbitrary gap 25 | penalty function, or users may use a linear initiation and elongation penalty. 26 | Local weighting schemes may also be controlled. 27 | 28 | ### Links 29 | 30 | * [Project Summary Page](://sourceforge.net/projects/obi-warp/) 31 | * [Download OBI-Warp](://sourceforge.net/project/showfiles.php?group_id=161548) 32 | * [Analytical Chemistry Publication](://dx.doi.org/10.1021/ac0605344) 33 | * [Supplementary Material for Publication](://bioinformatics.icmb.utexas.edu/obi-warp/) (*this server is currently down*) 34 | 35 | ## Building 36 | 37 | Building is tested rigorously on Ubuntu and should work fine on any POSIX 38 | system. Windows compilation under VC++ should be possible and compilation 39 | with cygwin, or msys/mingw should work without any problems. 40 | 41 | ### Prerequisites 42 | 43 | [ruby](http://www.ruby-lang.org) and *rake*. *rake* comes standard with the 44 | newer ruby (1.9.X), but it can also be installed using 45 | [rubygems](http://rubygems.org/pages/download) (for ruby versions below 1.9): 46 | 47 | gem install rake 48 | 49 | Optional: *valgrind* can be used for memory testing and 50 | [matrix2png](http://www.bioinformatics.ubc.ca/matrix2png/download.html) can be 51 | used to create images of the various matrices. 52 | 53 | ### Compiling 54 | 55 | rake 56 | # creates bin/obiwarp (notice it is in the *bin* directory) 57 | 58 | This will compile all the code and link it into the obiwarp executable. 59 | **NOTE**: All executables will be created in the **bin** directory (testing 60 | executables stay in the lib dir). 61 | 62 | If you want to explore other options: 63 | 64 | rake -T 65 | 66 | From within the *lib* directory, the generation of any file, intermediate, or 67 | test can be invoked by name. For example: 68 | 69 | # build the lmat2lmata binary: 70 | rake lmat2lmata # note, it will be in created in the bin directory 71 | # this also works 72 | rake ../bin/lmat2lmata 73 | 74 | From the top directory, all the tasks (not necessarily filetasks) are available: 75 | 76 | # from the top level directory 77 | rake memtest 78 | # from the lib directory 79 | rake memtest 80 | 81 | ### Installation 82 | 83 | Binaries are compiled and depositied in the **bin** folder. System 84 | installation is left to the user, but it can be as simple as: 85 | 86 | rake # make sure it is compiled and linked 87 | sudo cp bin/obiwarp /usr/local/bin/ 88 | 89 | ### Testing 90 | 91 | # run all tests 92 | rake test 93 | # if you have valgrind installed: 94 | rake memtest 95 | rake test_cmdparser.rb # for tests written in ruby 96 | rake run_test_dynprog # for tests written in cxxtest 'run_test_' 97 | 98 | 99 | tasks found in the lib Rakefile are also available from the top level Rakefile using 100 | the 'lib:' prefix: 101 | 102 | rake clean # from inside the lib directory 103 | rake lib:clean # from the top dir 104 | 105 | -------------------------------------------------------------------------------- /third_party/obiwarp/Rakefile: -------------------------------------------------------------------------------- 1 | require 'rake/clean' 2 | require 'rake/packagetask' 3 | 4 | $INCLUDED_FILES = nil 5 | 6 | CLEAN.add('pkg') 7 | CLEAN.existing! 8 | CLOBBER.add(*FileList["bin/*"]) 9 | 10 | def unix? 11 | RUBY_PLATFORM =~ /(aix|darwin|linux|(net|free|open)bsd|cygwin|solaris|irix|hpux)/i 12 | end 13 | 14 | # lame 15 | def get_tasks(rakefile) 16 | get_tasks = %Q{ruby -e ' 17 | load "#{rakefile}" 18 | tm=nil 19 | ObjectSpace.each_object {|v| tm=v if v.is_a?(Rake::Application) } 20 | p tm.tasks.select {|v| v.is_a?(Rake::Task) }.map {|v| [v.name, v.comment] } 21 | ' 22 | } 23 | eval `#{get_tasks}` 24 | end 25 | 26 | def get_real_lib_files(rakefile) 27 | get_tasks = %Q{ruby -e ' 28 | load "#{rakefile}" 29 | ' 30 | } 31 | end 32 | 33 | # position can be :major, :minor, or :patch 34 | def bump_version(position=:patch) 35 | index = {:major => 0, :minor => 1, :patch => 2} 36 | versions = VERSION.split('.').map(&:to_i) 37 | versions[ index[position] ] += 1 38 | version_string = versions.join('.') 39 | File.open(VERSION_FILE,'w') {|out| out.puts version_string } 40 | # update code from VERSION 41 | replaced = IO.readlines(CODE_VERSION).map do |line| 42 | if line.match(/VERSION\s*=/) 43 | %Q{char * VERSION = (char *)"#{version_string}";} + "\n" 44 | else ; line 45 | end 46 | end.join 47 | File.open(CODE_VERSION,'w') {|out| out.print replaced } 48 | end 49 | 50 | VERSION_FILE = 'VERSION' 51 | VERSION = IO.read(VERSION_FILE).chomp 52 | 53 | 54 | 55 | =begin 56 | catchall that allows any task in the subdirectory to be run from the top 57 | rule "" do |t| 58 | cd "lib" 59 | sh "rake #{t.name}" 60 | cd TOPDIR 61 | end 62 | =end 63 | 64 | QUIET = {:verbose => false } 65 | TOPDIR = File.expand_path(File.dirname(__FILE__)) 66 | LIBDIR = File.expand_path('lib') 67 | 68 | # this is lame, but I can't figure out a better way to do this right now 69 | cd LIBDIR, QUIET 70 | tasks = get_tasks("Rakefile") 71 | cd TOPDIR, QUIET 72 | LIB_NS = :lib 73 | 74 | TASKS_TO_DUP = [] 75 | namespace LIB_NS do 76 | tasks.each do |name, comment| 77 | if comment 78 | TASKS_TO_DUP << [name, comment] 79 | #desc comment 80 | task name do 81 | cd LIBDIR 82 | sh "rake #{name}" 83 | cd TOPDIR, QUIET 84 | end 85 | end 86 | end 87 | end 88 | 89 | TASKS_TO_DUP.each do |name, comment| 90 | desc comment 91 | task name => ["#{LIB_NS}:#{name}"] 92 | end 93 | 94 | TEMPLATES_STAMPED = FileList["lib/**/*TEMPLATE.*"].map! {|v| v.sub('_TEMPLATE','') } 95 | INCLUDED_FILES = FileList["lib/**/*", "bin/*", "*", *TEMPLATES_STAMPED].reject {|v| v =~ /\.o$/ } 96 | 97 | Rake::PackageTask.new("obiwarp", VERSION) do |p| 98 | p.need_tar = true 99 | p.need_zip = true 100 | p.package_files.include(INCLUDED_FILES) 101 | end 102 | 103 | desc 'safe, complete build process (avoid package)' 104 | task :build => [:clobber, :doc, :stamp_templates, :package] 105 | 106 | Rake::Task[:clean].prerequisites ||= [] 107 | Rake::Task[:clean].prerequisites << :clobber_package 108 | 109 | CODE_VERSION = "lib/obiwarp.cpp" 110 | 111 | %w(major minor patch).each do |v| 112 | desc "bumps #{v} number in VERSION (and #{CODE_VERSION})" 113 | task "version:bump:#{v}" do 114 | bump_version(v.to_sym) 115 | end 116 | end 117 | 118 | task :version do 119 | print IO.read(VERSION_FILE) 120 | end 121 | 122 | task :default => 'obiwarp' 123 | 124 | desc "generate html page with bluecloth" 125 | task :doc do 126 | sh "bluecloth README.md > README.html" 127 | end 128 | 129 | desc "build a native binary" 130 | task "binary" do 131 | cd LIBDIR 132 | sh %Q{export CFLAGS="$CFLAGS -O3" ; rake obiwarp} 133 | cd TOPDIR 134 | mv "bin/obiwarp", "bin/obiwarp-#{RUBY_PLATFORM}" + (unix? ? '' : '.exe') 135 | end 136 | 137 | desc "build a windows binary with mingw (mingw-g++ needed)" 138 | task "windows_binary" do 139 | cd LIBDIR 140 | sh %Q{export COMPILER=mingw ; export CFLAGS="$CFLAGS -O3" ; rake obiwarp} 141 | cd TOPDIR 142 | mv "bin/obiwarp", "bin/obiwarp.exe" 143 | end 144 | -------------------------------------------------------------------------------- /third_party/obiwarp/VERSION: -------------------------------------------------------------------------------- 1 | 0.9.4 2 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/Rakefile: -------------------------------------------------------------------------------- 1 | require 'fileutils' 2 | require 'rake/clean' 3 | require 'open3' 4 | 5 | $VALGRIND_VERBOSE = false 6 | 7 | # build 32 bit on linux with "export CFLAGS=-m32" 8 | 9 | VAR = { 10 | 'CFLAGS' => ENV['CFLAGS'], 11 | :compiler => (ENV['COMPILER'] == 'mingw') ? 'i586-mingw32msvc-g++' : 'g++', 12 | :bindir => '../bin', 13 | } 14 | 15 | SDK_INCLUDES = %w(cxxtest ../sdk) 16 | SDK_DIR = "../sdk" 17 | Fl = FileList ; FU = FileUtils 18 | 19 | INCLUDES = SDK_INCLUDES.map {|f| '-I' + File.join(SDK_DIR,f) }.join(' ') 20 | 21 | def have_matrix2png? 22 | begin 23 | reply = `matrix2png --version` 24 | reply && reply.match(/Usage:/) 25 | rescue StandardError 26 | false 27 | end 28 | end 29 | 30 | def have_valgrind? 31 | begin 32 | `valgrind --help` 33 | rescue SystemCallError 34 | false 35 | end 36 | end 37 | 38 | def build_from_template(template_filename, opts={}) 39 | opt = { :file_template_tag => '_TEMPLATE', 40 | :type_template_tag => 'FLOAT', 41 | :abbr_template_tag => 'ABR', 42 | :start_flag => %r{//\s+BEGIN TEMPLATE}, 43 | :end_flag => %r{//\s+END TEMPLATE}, 44 | :to_build => { 'float' => 'F', 'double' => 'D', 'int' => 'I' } 45 | }.merge(opts) 46 | outfile = template_filename.gsub(opt[:file_template_tag],'') 47 | 48 | abr_template_tag = 'ABR' 49 | to_expand = [] 50 | pre_section = [] 51 | post_section = [] 52 | in_template_section = nil 53 | IO.foreach(template_filename) do |line| 54 | pre_section << line if in_template_section.nil? 55 | in_template_section = false if line =~ opt[:end_flag] 56 | to_expand << line if in_template_section 57 | in_template_section = true if line =~ opt[:start_flag] 58 | post_section << line if in_template_section == false 59 | end 60 | 61 | all_lines_expanded = [] 62 | opt[:to_build].each do |c_type, abbr| 63 | all_lines_expanded << to_expand.map {|line| line.gsub(opt[:type_template_tag], c_type).gsub(opt[:abbr_template_tag],abbr) } 64 | end 65 | File.open(outfile,'w') do |out| 66 | out.print( pre_section.join << all_lines_expanded.join << post_section.join ) 67 | end 68 | end 69 | 70 | MY_BIN_DIR = File.expand_path("../bin") 71 | 72 | def link(executable, object_files) 73 | sh "#{VAR[:compiler]} #{VAR["CFLAGS"]} -o #{executable} #{object_files.join(" ")}" 74 | end 75 | 76 | template_rule = lambda do |name, ext| 77 | file "#{name}.#{ext}" => "#{name}_TEMPLATE.#{ext}" do |fn| 78 | build_from_template(fn.prerequisites[0]) 79 | end 80 | end 81 | 82 | def compile(object_file, prereq) 83 | sh "#{VAR[:compiler]} #{VAR["CFLAGS"]} -c -o #{object_file} #{prereq} #{INCLUDES}" 84 | end 85 | 86 | OBIWARP_EXE = File.join(VAR[:bindir], 'obiwarp') 87 | UTILITIES = %w(mat2mata mata2mat lmat2chrms lmat2lmata lmat2png lmata2lmat) 88 | BASE = %w(vec mat lmat dynprog cmdparser pngio) 89 | BASE_OBJECT_FILES = BASE.map {|v| v << ".o" } 90 | 91 | TEMPLATE_FILES = Fl["*_TEMPLATE.*"] 92 | TEMPLATE_OUTPUT_FILES = TEMPLATE_FILES.map {|f| f.sub('_TEMPLATE','') } 93 | 94 | TEMPLATE_FILES.each do |fn| 95 | file(fn.sub('_TEMPLATE','') => [fn]) { build_from_template(fn) } 96 | end 97 | 98 | desc 'create the files from template: *_TEMPLATE.*' 99 | task :stamp_templates => TEMPLATE_OUTPUT_FILES 100 | 101 | desc 'compile *.o files for core obi-warp library' 102 | task :compile_objects => (BASE_OBJECT_FILES + %w(obiwarp.o)) 103 | 104 | TESTEXECS = [] 105 | CPP_TEST_FILES_H = Fl["test_*.h"].reject {|v| v=~/TEMPLATE/} + %w(test_vec.h test_mat.h) 106 | CPP_TEST_FILES_H.delete("test_pngio.h") unless have_matrix2png? 107 | CPP_TEST_FILES_H.uniq! 108 | 109 | CPP_TEST_FILES_H.each do |hfile| 110 | cppfile = hfile.ext('cpp') 111 | ofile = hfile.ext('o') 112 | no_ext = hfile.sub('.h','') 113 | CLEAN << cppfile << ofile 114 | CLOBBER << no_ext 115 | file cppfile => [hfile] do |t| 116 | sh "perl -w #{SDK_DIR}/cxxtest/cxxtestgen.pl --error-printer -o #{t.name} #{hfile}" 117 | end 118 | # BASE_OBJECT_FILES are required here only so that templates are stamped and 119 | # .h files will be present 120 | file ofile => [cppfile, *BASE_OBJECT_FILES] {|t| compile(ofile, cppfile) } 121 | file no_ext => [ofile] { link(no_ext, [ofile, *BASE_OBJECT_FILES]) } 122 | TESTEXECS << no_ext 123 | end 124 | 125 | TESTRUBYFILES = Fl["test_*.rb"] 126 | TESTRUBYFILES.each do |fn| 127 | if fn =~ /converter/ 128 | task fn => [:utilities] {|t| sh "ruby #{t.name}" } 129 | else 130 | task fn => [OBIWARP_EXE] {|t| sh "ruby #{t.name}" } 131 | end 132 | end 133 | 134 | rule '.o' => ['.cpp', '.h'] do |t| 135 | compile(t.name, t.prerequisites[0]) 136 | end 137 | 138 | rule '.o' => ['.cpp'] do |t| 139 | compile(t.name, t.prerequisites[0]) 140 | end 141 | 142 | EXECUTABLES_TO_DOCUMENT = %w(obiwarp) 143 | UTILITIES_IN_BINDIR = UTILITIES.map {|v| File.join(VAR[:bindir], v) } 144 | ALL_USE_EXECS = [OBIWARP_EXE, *UTILITIES_IN_BINDIR] 145 | 146 | # create executables 147 | ALL_USE_EXECS.each do |binpath| 148 | basename = File.basename(binpath) 149 | CLOBBER << binpath 150 | file binpath => (BASE_OBJECT_FILES + ["#{basename}.o"]) do |t| 151 | FU.mkpath VAR[:bindir] 152 | link(binpath, t.prerequisites) 153 | #puts "*** GO TO: '#{File.expand_path(VAR[:bindir])}' to find \"#{executable}\" ***" 154 | end 155 | if EXECUTABLES_TO_DOCUMENT.include?(basename) 156 | desc "create #{binpath}" 157 | end 158 | task basename => [binpath] 159 | end 160 | 161 | desc "create: #{UTILITIES.join(' ')}" 162 | task :utilities => UTILITIES_IN_BINDIR 163 | 164 | RUNTESTEXECS = TESTEXECS.map do |exe| 165 | runtest = "run_#{exe}" 166 | task runtest => [exe] {|t| sh File.join(Dir.getwd,exe) } 167 | runtest 168 | end 169 | 170 | ALL_MEMTESTS = TESTEXECS.map do |exe| 171 | basename = File.basename(exe) 172 | memtest = "memtest_#{basename}" 173 | task memtest => [exe] do 174 | if have_valgrind? 175 | fn = File.join(Dir.getwd, exe) 176 | if $VALGRIND_VERBOSE 177 | sh "valgrind --tool=memcheck --leak-check=yes #{fn}" 178 | else 179 | Open3.popen3("valgrind --tool=memcheck --leak-check=yes #{fn}") do |stdin,stdout,stderr| 180 | stderr.each do |line| 181 | if md = line.match(/==\d+== (malloc\/free:.*)/) 182 | puts md[1] 183 | end 184 | if md = line.match(/total heap usage: (([\d\,]*) allocs, ([\d\,]*) frees, [\d\,]* bytes allocated)/) 185 | puts "*** #{basename}: #{md[1]}" 186 | if md[2] != md[3] 187 | puts (ast="*"*40) + " WARNING!!!! potential memory leak! " + ast 188 | end 189 | end 190 | end 191 | end 192 | end 193 | else 194 | puts "requires valgrind to run memcheck! (Linux)" 195 | end 196 | end 197 | memtest 198 | end 199 | 200 | desc "run memory checks against all execs (run each separate: memtest_)" 201 | task :memtest => ALL_MEMTESTS 202 | 203 | desc ([%Q{"test_*.rb"}] + %w(and) + %w("run_test_") + TESTEXECS.map{|v|v.sub('test_','')}).join(' ') 204 | task :test => TESTRUBYFILES + RUNTESTEXECS 205 | 206 | task :default => OBIWARP_EXE 207 | 208 | ############################################################ 209 | # CLEANUP 210 | ############################################################ 211 | 212 | CLEAN.add(Fl["*.o"], TEMPLATE_OUTPUT_FILES).existing! 213 | CLOBBER.existing! 214 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/cmdparser.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _CMDPARSER_H 3 | #define _CMDPARSER_H 4 | 5 | 6 | class CmdParser { 7 | public: 8 | // internal 9 | int _i; 10 | char ** _argv; 11 | 12 | // options: 13 | char * format; 14 | char * outfile; 15 | bool images; 16 | char * timefile; 17 | char * score; 18 | bool nostdnrm; 19 | bool local; 20 | float factor_diag; 21 | float factor_gap; 22 | float gap_init; 23 | float gap_extend; 24 | float init_penalty; 25 | float response; 26 | void help_func(int arg_cnt); 27 | void print_version(char * version); 28 | //bool warp_data; 29 | char * smat_out; 30 | char * smat_in; 31 | 32 | char * infiles[2]; 33 | 34 | char * progname; 35 | 36 | // initialize 37 | CmdParser(int argc, char ** argv, char * version); 38 | 39 | // internal: 40 | void set_flag(bool &val); 41 | void set_string(char ** string_ptr); 42 | void set_float(float &val); 43 | void set_comma_list_float(float &val1, float &val2); 44 | void verify_infiles(); 45 | void get_format_from_file1(); 46 | bool file_is_readable(char * filename); 47 | void set_defaults_by_score(char * score_arg); 48 | bool eq(char * first, char * sec); 49 | 50 | }; 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/dynprog.h: -------------------------------------------------------------------------------- 1 | #ifndef _DYNPROG_H 2 | #define _DYNPROG_H 3 | 4 | #include "math.h" 5 | 6 | #include "vec.h" 7 | #include "mat.h" 8 | 9 | using namespace VEC; 10 | 11 | 12 | class DynProg { 13 | private: 14 | float DEFAULT_GAP_PENALTY_SLOPE; 15 | public: 16 | int stupid; 17 | MatF* _smat; 18 | MatF _asmat; 19 | MatI _tb; 20 | MatI _tbpath; 21 | MatF _tbscores; // the tbpath with the score at each position 22 | MatI _gapmat; 23 | VecI _mCoords; 24 | VecI _nCoords; 25 | VecF _sCoords; 26 | float _bestScore; // the scores at each m,n coordinate! 27 | float _prob; 28 | 29 | DynProg() { DEFAULT_GAP_PENALTY_SLOPE = 2.f; } 30 | 31 | // If gap_penalty array len = 0, then a linear gap penalty based on the 32 | // average matrix score will be used 33 | // neither diag or gap factor can be 0.0 for minimization 34 | void find_path(MatF &smat, VecF &gap_penalty, int minimize=0, float diag_factor=2.f, float gap_factor=1.f, int local=0, float init_penalty=0.0f); 35 | // If gap_penalty array len = 0, then a linear gap penalty based on the 36 | // average matrix score will be used 37 | // a gap is introduced without adding in the score of the matrix 38 | // at that index 39 | //void find_path_with_gaps(MatF &smat, VecF &gap_penalty, int minimize=0, int local=0, float init_penalty=0.0f); 40 | void default_gap_penalty(MatF &smat, VecF &out); 41 | 42 | ~DynProg() {} 43 | // x are the times along the n axis of the tbpath 44 | // fx are the equivalents along the y axis of the tbpath 45 | 46 | // RETURNS the actual number of internal anchors used 47 | void warp_map(VecI &mOut, VecI &nOut, float percent_anchors, int minimize=0); 48 | void best_anchors(VecI &mBijShort, VecI &nBijShort, VecF &sBijShort, VecI &mCoords, VecI &nCoords, VecI &mOut, VecI &nOut, int num_internal_anchors); 49 | void best_anchors(VecI &mCoordsBijShort, VecI &nCoordsBijShort, VecF &sCoordsBijShort, VecI &mOut, VecI &nOut, int num_internal_anchors); 50 | void bijective_anchors(VecI &mCoords, VecI &nCoords, VecF &scores, VecI &mBijShort, VecI &nBijShort, VecF &sBijShort); 51 | 52 | // NEED to redo these and affirm correctness... 53 | // warps the mMat along the m axis where mMat coordinates are 54 | // specified by mCoords and the new, warped coordinates are 55 | // supplied by nCoords 56 | // void warp(VecI &mCoords, VecI &nCoords, MatF &mMat, MatF &warpedOut, bool mCoord_row_nums=0); 57 | // void warp(VecI &mCoords, VecI &nCoords, VecF &mVec, VecF &warpedOut, bool mCoord_row_nums=0); 58 | // void warp(VecF &mCoords, VecF &nCoords, VecF &mVec, VecF &warpedOut, bool mCoord_row_nums=0); 59 | // Calculates the sum of the sq of the residuals of the warped nVals 60 | float sum_sq_res_yeqx(VecF &m_tm, VecF &n_tm, VecI &mWarpMap, VecI &nWarpMap, VecF &mVals, VecF &nVals); 61 | void path_accuracy_details(VecF &mWarpMapFt, VecF &nWarpMapFt, VecF &mVals, VecF &nVals, VecF &sq_res_yeqx, VecF &abs_diff, int linear_interp=0); 62 | void path_accuracy(VecF &mWarpMapFt, VecF &nWarpMapFt, VecF &mVals, VecF &nVals, float &sum_sq_res_yeqx, float &avg_sq_res_yeqx, float &sum_abs_diff, float &avg_abs_diff, int linear_interp=0); 63 | void path_accuracy(VecF &m_tm, VecF &n_tm, VecI &mWarpMap, VecI &nWarpMap, VecF &mVals, VecF &nVals, float &sum_sq_res_yeqx, float &avg_sq_res_yeqx, float &sum_abs_diff, float &avg_abs_diff, int linear_interp=0); 64 | void _max(float diag, float top, float left, float &val, int &pos); 65 | void _min(float diag, float top, float left, float &val, int &pos); 66 | float _global_max(MatF& asmat, int& m_index, int& n_index); 67 | float _max_right(MatF& asmat, int& m_index); 68 | float _max_bottom(MatF& asmat, int& n_index); 69 | float _global_min(MatF& asmat, int& m_index, int& n_index); 70 | float _min_right(MatF& asmat, int& m_index); 71 | float _min_bottom(MatF& asmat, int& n_index); 72 | static int exponential_less_before(float order, int len, float *expon, float *lessbefore) { 73 | float val; 74 | for (int i = 0; i < len; i++) { 75 | val = pow(i,order); 76 | expon[i] = val; 77 | lessbefore[i] = val - expon[i - 1]; 78 | } 79 | return 1; 80 | } 81 | 82 | // Score matrices arranged like this 83 | // nCoords -> scans along the x axis 84 | // mCoords run | 85 | // V 86 | // scans along the y axis 87 | void score_product(MatF &mCoords, MatF &nCoords, MatF &scores); 88 | void score_covariance(MatF &mCoords, MatF &nCoords, MatF &scores); 89 | void score_pearsons_r(MatF &mCoords, MatF &nCoords, MatF &scores); 90 | void score_pearsons_r2(MatF &mCoords, MatF &nCoords, MatF &scores); 91 | void score_mutual_info(MatF &mCoords, MatF &nCoords, MatF &scores, int num_bins=2); 92 | void score_euclidean(MatF &mCoords, MatF &nCoords, MatF &scores); 93 | // convenience method for scoring 94 | void score(MatF &mCoords, MatF &nCoords, MatF &scores, const char *type, int mi_num_bins=2); 95 | 96 | // DynProg::expandFlag(mat1, 2, 1) 97 | // 98 | // before after 99 | // 2 0 0 0 0 0 2 2 2 0 0 0 100 | // 0 2 0 0 0 0 2 2 2 2 0 0 101 | // 0 0 2 0 0 0 => 2 2 2 2 2 0 102 | // 0 0 0 2 0 0 0 2 2 2 2 2 103 | // 0 0 0 0 2 0 0 0 2 2 2 2 104 | // 0 0 0 0 0 2 0 0 0 2 2 2 105 | static void expandFlag(MatI &flagged, int flag, int numSteps, MatI &expanded); 106 | // val[i] -= val[i-1] (inplace) 107 | void less_before(VecF &arr); 108 | // linear function mx + b where m is slope and b is y intercept 109 | // each value is less the value of of the array before it 110 | void linear_less_before(float m, float b, int len, VecF &lessbefore); 111 | // linear function mx + b where m is slope and b is y intercept 112 | void linear(float m, float b, int len, VecF &arr); 113 | 114 | 115 | //void replaceAlignmentPathRandom(MatF& mat, MatI& toReplace); 116 | //float toProb(int halfWindow, short int numShuffles, char *type, float init_penalty, int minimum); 117 | }; 118 | 119 | 120 | #endif 121 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/lmat.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _LMAT_H 3 | #define _LMAT_H 4 | 5 | #include "vec.h" 6 | #include "mat.h" 7 | 8 | 9 | using namespace VEC; 10 | 11 | class LMat { 12 | private: 13 | #define LEN_LARGEST_NUM (30) 14 | #define LARGEST_NUM_MZ_VALS (40000) 15 | #define LARGEST_NUM_TIME_VALS (40000) 16 | public: 17 | int _mz_vals; 18 | int _tm_vals; 19 | 20 | // All constructors call new! 21 | // All swaps of these MUST delete their memory before swapping! 22 | MatF *_mat; 23 | VecF *_mz; 24 | VecF *_tm; 25 | 26 | LMat(); 27 | // Takes a binary lmat file as input 28 | LMat(const char *file); 29 | ~LMat(); 30 | int mzlen() { return _mz_vals; } 31 | int tmlen() { return _tm_vals; } 32 | int num_mz() { return _mz_vals; } 33 | int num_tm() { return _tm_vals; } 34 | MatF * mat() { return _mat; } 35 | VecF * mz() { return _mz; } 36 | VecF * tm() { return _tm; } 37 | 38 | float hi_mz() { return (*_mz)[_mz_vals-1]; } 39 | float lo_mz() { return (*_mz)[0]; } 40 | float hi_tm() { return (*_tm)[_tm_vals-1]; } 41 | float lo_tm() { return (*_tm)[0]; } 42 | void mz_axis_vals(VecI &mzCoords, VecF &mzVals); 43 | void tm_axis_vals(VecI &tmCoords, VecF &tmVals); 44 | void set_from_ascii(const char *file); 45 | void set_from_binary(const char *file); 46 | // Sets the matrix and gives m and n axis labels as the indices 47 | void set_from_binary_mat(const char *file); 48 | void set_from_ascii_mat(const char *file); 49 | 50 | 51 | // selfTimes and equivTimes are the anchor points for the warping 52 | // function.. 53 | // warps the time values (not the actual data values) 54 | void warp_tm(VecF &selfTimes, VecF &equivTimes); 55 | 56 | // expects one line with the # mz vals and next with the vals 57 | void set_mz_from_ascii(FILE *fpt); 58 | // expects one line with the # tm vals and next with the vals 59 | void set_tm_from_ascii(FILE *fpt); 60 | // expects the matrix in ascii format 61 | void set_mat_from_ascii(FILE *ptr, int rows, int cols); 62 | // writes the lmat in binary to a file (or STDOUT if NULL) 63 | void write(const char *file=NULL); 64 | // writes the lmat in ascii to a file (or STDOUT if NULL) 65 | void print(const char *file=NULL); 66 | 67 | // obviously not the final resting place 68 | void chomp_plus_spaces( char *str); 69 | }; 70 | 71 | #endif 72 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/lmat2chrms.cpp: -------------------------------------------------------------------------------- 1 | // STDLIB: 2 | #include 3 | #include 4 | #include 5 | #include "string.h" 6 | 7 | // MINE 8 | #include "lmat.h" 9 | 10 | 11 | using namespace VEC; 12 | int main (int argc, char *argv[]) { 13 | 14 | if (argc == 1) { 15 | std::cerr << 16 | "**********************************************************************\n" << 17 | "usage: lmat2chrms file.lmat m/z ...\n" << 18 | "**********************************************************************\n"; 19 | exit(1); 20 | } 21 | /************************************************************ 22 | * GET ARGUMENTS 23 | ************************************************************/ 24 | int i; 25 | char file[1024]; 26 | 27 | LMat lmat; 28 | strcpy(file, argv[1]); 29 | lmat.set_from_binary(file); 30 | MatF trans; 31 | lmat.mat()->transpose(trans); 32 | //printf("rows %d cols %d\n", lmat.mat()->rows(), lmat.mat()->cols()); 33 | //printf("rows %d cols %d\n", trans.rows(), trans.cols()); 34 | VecF *vecs = new VecF[trans.rows()]; 35 | int num_vecs; 36 | trans.row_vecs(num_vecs, vecs); 37 | 38 | char fn[1024]; 39 | char toplotfn[1024]; 40 | 41 | strcpy(fn, file); 42 | char *pch; 43 | pch = strstr(fn, ".lmat"); 44 | *pch = '\0'; 45 | 46 | char fnbase[1024]; 47 | strcpy(fnbase, fn); 48 | char *start = strrchr(fnbase, '/'); 49 | if (start != NULL) { 50 | strcpy(fnbase, ++start); 51 | } 52 | 53 | strcpy(toplotfn, fn); 54 | strcat(toplotfn, ".toplot"); 55 | std::ofstream fh(toplotfn); 56 | printf("WRITING TO: %s\n", toplotfn); 57 | 58 | fh << "XYData" << "\n"; 59 | fh << fnbase << "\n"; 60 | fh << fnbase << " chromatograms" << "\n"; 61 | fh << "time (sec)" << "\n"; 62 | fh << "ion counts" << "\n"; 63 | 64 | for (int i = 2; i < argc; ++i) { 65 | fh << "m/z " << argv[i] << "\n"; 66 | //std::cout << "m/z " << argv[i] << "\n"; 67 | int ind = lmat.mz()->index(atof(argv[i])); 68 | //printf("IND: %d\n", ind); 69 | lmat.tm()->print(fh, 1); 70 | vecs[ind].print(fh, 1); 71 | } 72 | 73 | delete[] vecs; 74 | } 75 | 76 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/lmat2lmata.cpp: -------------------------------------------------------------------------------- 1 | // STDLIB: 2 | #include 3 | #include 4 | #include 5 | #include "string.h" 6 | 7 | // MINE 8 | #include "lmat.h" 9 | 10 | 11 | using namespace VEC; 12 | int main (int argc, char *argv[]) { 13 | 14 | if (argc == 1) { 15 | std::cerr << 16 | "**********************************************************************\n" << 17 | "usage: lmat2lmata file.lmat ... \n" << 18 | "**********************************************************************\n" << 19 | "converts binary lmat file into ascii file (does NOT delete original)\n" << 20 | "outputs file.lmata\n" << 21 | "\n" << 22 | "FORMATS: \n" << 23 | "lmat = binary (all vals 4 bytes; all on one line):\n" << 24 | " (int) # rows, (floats) m axis values (i.e. time vals),\n" << 25 | " (int) # cols, (floats) n axis values (i.e. m/z vals),\n" << 26 | " (floats) matrix data values row1, row2, row3 ...\n" << 27 | "lmata = ascii format (space delimited, with newlines as shown below):\n" << 28 | " # rows\n" << 29 | " m axis values (i.e. time vals)\n" << 30 | " # cols\n" << 31 | " n axis values (i.e. m/z vals)\n" << 32 | " matrix data row1\n" << 33 | " matrix data row2\n" << 34 | " matrix data row3 ...\n" << 35 | "**********************************************************************\n"; 36 | exit(1); 37 | } 38 | /************************************************************ 39 | * GET ARGUMENTS 40 | ************************************************************/ 41 | int i; 42 | char file[1024]; 43 | char outfile[1024]; 44 | 45 | LMat lmat; 46 | for (i = 1; i < argc; i++) { 47 | strcpy(file, argv[i]); 48 | strcpy(outfile, file); 49 | int outfile_strlen = strlen(outfile); 50 | outfile[outfile_strlen] = 'a'; 51 | outfile[outfile_strlen+1] = '\0'; 52 | //std::cerr << "creating: " << outfile << "\n"; 53 | lmat.set_from_binary(file); 54 | lmat.print(outfile); 55 | } 56 | } 57 | 58 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/lmat2png.cpp: -------------------------------------------------------------------------------- 1 | // STDLIB: 2 | #include 3 | #include 4 | #include 5 | #include "string.h" 6 | 7 | // MINE 8 | #include "vec.h" 9 | #include "mat.h" 10 | #include "lmat.h" 11 | #include "dynprog.h" 12 | #include "pngio.h" 13 | 14 | 15 | char file[1024]; 16 | 17 | int BINARY = 0; 18 | 19 | int main (int argc, char *argv[]) { 20 | /************************************************************ 21 | * GET ARGUMENTS 22 | ************************************************************/ 23 | if (argc == 1) { 24 | std::cerr << 25 | "*****************************************************************\n" << 26 | "usage: lmat2png [-b] file1 ...\n" << 27 | "*****************************************************************\n" << 28 | "requires png2matrix callable\n" << 29 | "*****************************************************************\n"; 30 | exit(1); 31 | } 32 | int i; 33 | for (i = 1; i < argc; i++) { 34 | if (!strcmp(argv[i],"-b")) { 35 | BINARY = 1; 36 | } 37 | } 38 | for (i = 1; i < argc; i++) { 39 | if (strcmp(argv[i],"-b")) { // if this is a file (not an arg) 40 | char outfile[1024]; 41 | strcpy(file, argv[i]); 42 | strcpy(outfile, file); 43 | char *ptr; 44 | ptr = strstr(outfile, ".lmat"); //works for lmat and lmata 45 | *ptr = '\0'; 46 | strcat(outfile, ".png"); 47 | LMat lmat; 48 | if (BINARY) { 49 | lmat.set_from_binary(file); 50 | } 51 | else { 52 | lmat.set_from_ascii(file); 53 | } 54 | PngIO wrt(1); 55 | wrt.write(outfile, *lmat.mat()); 56 | } 57 | } 58 | } 59 | 60 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/lmata2lmat.cpp: -------------------------------------------------------------------------------- 1 | // STDLIB: 2 | #include 3 | #include 4 | #include 5 | #include "string.h" 6 | 7 | // MINE 8 | #include "lmat.h" 9 | 10 | 11 | using namespace VEC; 12 | int main (int argc, char *argv[]) { 13 | 14 | if (argc == 1) { 15 | std::cerr << 16 | "**********************************************************************\n" << 17 | "usage: lmata2lmat file.lmata ... \n" << 18 | "**********************************************************************\n" << 19 | "outputs file.lmat\n" << 20 | "converts ascii lmata file into binary file (does NOT delete original)\n" << 21 | "\n" << 22 | "FORMATS: \n" << 23 | "lmat = binary (all vals 4 bytes; all on one line):\n" << 24 | " (int) # rows, (floats) m axis values (i.e. time vals),\n" << 25 | " (int) # cols, (floats) n axis values (i.e. m/z vals),\n" << 26 | " (floats) matrix data values row1, row2, row3 ...\n" << 27 | "lmata = ascii format (space delimited, with newlines as shown below):\n" << 28 | " # rows\n" << 29 | " m axis values (i.e. time vals)\n" << 30 | " # cols\n" << 31 | " n axis values (i.e. m/z vals)\n" << 32 | " matrix data row1\n" << 33 | " matrix data row2\n" << 34 | " matrix data row3 ...\n" << 35 | "**********************************************************************\n"; 36 | exit(1); 37 | } 38 | /************************************************************ 39 | * GET ARGUMENTS 40 | ************************************************************/ 41 | int i; 42 | char file[1024]; 43 | char outfile[1024]; 44 | 45 | LMat lmat; 46 | for (i = 1; i < argc; i++) { 47 | strcpy(file, argv[i]); 48 | strcpy(outfile, file); 49 | outfile[strlen(outfile)-1] = '\0'; 50 | //std::cerr << "creating: " << outfile << "\n"; 51 | lmat.set_from_ascii(file); 52 | lmat.write(outfile); 53 | } 54 | } 55 | 56 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/mat2mata.cpp: -------------------------------------------------------------------------------- 1 | // STDLIB: 2 | #include 3 | #include 4 | #include 5 | #include "string.h" 6 | 7 | // MINE 8 | #include "mat.h" 9 | 10 | 11 | using namespace VEC; 12 | int main (int argc, char *argv[]) { 13 | 14 | if (argc == 1) { 15 | std::cerr << 16 | "**********************************************************************\n" << 17 | "usage: mat2mata file.mat ... \n" << 18 | "**********************************************************************\n" << 19 | "outputs file.mata\n" << 20 | "converts binary file into ascii mata file(does NOT delete original)\n" << 21 | "\n" << 22 | "FORMATS: \n" << 23 | "mat = binary (all vals 4 bytes):\n" << 24 | " (int)#rows,(int)#cols,(floats)matrix_data_values...\n" << 25 | "mata = ascii format (space delimited, with newlines as shown below):\n" << 26 | " #rows, #cols\n" << 27 | " matrix data row1\n" << 28 | " matrix data row2\n" << 29 | " matrix data row3 ...\n" << 30 | "**********************************************************************\n"; 31 | exit(1); 32 | } 33 | /************************************************************ 34 | * GET ARGUMENTS 35 | ************************************************************/ 36 | int i; 37 | char file[1024]; 38 | char outfile[1024]; 39 | 40 | MatF mat; 41 | for (i = 1; i < argc; i++) { 42 | strcpy(file, argv[i]); 43 | strcpy(outfile, file); 44 | int outfile_strlen = strlen(outfile); 45 | outfile[outfile_strlen] = 'a'; 46 | outfile[outfile_strlen+1] = '\0'; 47 | //std::cerr << "creating: " << outfile << "\n"; 48 | mat.set_from_binary(file); 49 | mat.print(outfile); 50 | } 51 | } 52 | 53 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/mat_TEMPLATE.h: -------------------------------------------------------------------------------- 1 | #ifndef _MAT_H 2 | #define _MAT_H 3 | 4 | #include "vec.h" 5 | 6 | /************************************************************* 7 | * Creation from existing object/array is always shallow!. 8 | * Will delete any memory allocated. 9 | * Will NOT delete any memory not allocated. 10 | * If you want deep then use copy function! 11 | ************************************************************/ 12 | 13 | 14 | namespace VEC { 15 | 16 | class MatI; 17 | class MatF; 18 | class MatD; 19 | 20 | // BEGIN TEMPLATE 21 | 22 | class MatABR { 23 | 24 | public: 25 | // length 26 | int _m; 27 | int _n; 28 | VecABR _dat; 29 | // Constructors: 30 | MatABR(); 31 | MatABR(int m, int n); 32 | MatABR(int m, int n, const FLOAT &val); 33 | 34 | 35 | // (copied from vec.h) 36 | // if (shallow == 1 (true)) then no memory is deleted upon destruction 37 | // if (shallow == 0 (false)) then delete[] is called 38 | // FOR THIS CONSTRUCTOR ONLY, there is no DEEP copying, EVER! 39 | MatABR(int m, int n, FLOAT *arr, bool shallow=0); 40 | 41 | // (copied from vec.h) 42 | // if (shallow == 0 (false)) a DEEP copy is made of the data 43 | // if (shallow == 1 (true)) a copy of the pointer is made 44 | // if (shallow) then no memory is released upon destruction 45 | // shallow is used for a quick copy with which to work 46 | MatABR(const MatABR &A, bool shallow=0); 47 | 48 | operator FLOAT*() { return (FLOAT*)_dat; } 49 | operator const FLOAT*() { return (FLOAT*)_dat; } 50 | FLOAT* pointer() { return (FLOAT*)_dat; } 51 | FLOAT* pointer(int m) { return &_dat[m*_n]; } 52 | // creates vec objects 53 | // caller must have allocated the array for the vec objects 54 | // the data is a shallow copy! 55 | // transpose and call row_vecs for col_vecs! 56 | void row_vecs(int &cnt, VecABR *vecs); 57 | 58 | MatABR & operator=(const FLOAT &val); 59 | // DEEP 60 | MatABR & operator=(MatABR &A); 61 | ~MatABR(); 62 | // Deep copy unless shallow == true 63 | void copy(MatABR &receiver, bool shallow=0) const; 64 | 65 | void set_from_ascii(std::ifstream &stream, int m, int n, MatABR &out); 66 | void set_from_ascii(std::ifstream &stream, MatABR &out); 67 | void set_from_ascii(const char *file, bool without_axes=0); 68 | void set_from_binary(const char *file); 69 | void file_rows_cols(std::ifstream &stream, int &rows, int &cols); 70 | // tnt_array2d_utils.h has a good example (use ifstream) 71 | 72 | // shallow copy and no ownership of memory 73 | void set(int m, int n, FLOAT *arr); 74 | // shallow copy and no ownership of memory 75 | void set(MatABR &A); 76 | 77 | bool all_equal() { 78 | return _dat.all_equal(); 79 | } 80 | 81 | // Deletes the object's memory (if not shallow) and takes ownership 82 | // of the array memory (we will call delete[]) 83 | void take(int m, int n, FLOAT *arr); 84 | // Deletes previous memory (if not shallow) and takes ownership 85 | // of the other's memory. 86 | void take(MatABR &A); 87 | 88 | // flattens the matrix and returns a vector 89 | void to_vec(VecABR &outvec, bool shallow=0); 90 | 91 | bool operator==(const MatABR &A); 92 | 93 | bool shallow() { return _dat.shallow(); } 94 | int dim1() const { return _m; } 95 | int dim2() const { return _n; } 96 | int mlen() const { return _m; } 97 | int nlen() const { return _n; } 98 | int rows() const { return _m; } 99 | int cols() const { return _n; } 100 | 101 | FLOAT& operator()(int m, int n) { 102 | #ifdef JTP_BOUNDS_CHECK 103 | if (n < 0) { puts("n < 0"); exit(1); } 104 | if (n >= _n) { puts("n >= _n"); exit(1); } 105 | if (m < 0) { puts("m < 0"); exit(1); } 106 | if (m >= _m) { puts("m >= _m"); exit(1); } 107 | #endif 108 | return _dat[m*_n + n]; 109 | } 110 | const FLOAT& operator()(int m, int n) const { 111 | #ifdef JTP_BOUNDS_CHECK 112 | if (n < 0) { puts("n < 0"); exit(1); } 113 | if (n >= _n) { puts("n >= _n"); exit(1); } 114 | if (m < 0) { puts("m < 0"); exit(1); } 115 | if (m >= _m) { puts("m >= _m"); exit(1); } 116 | #endif 117 | return _dat[m*_n + n]; 118 | } 119 | 120 | // NOTE: All assignment operators act on the caller! 121 | void operator+=(const MatABR &A); 122 | void operator-=(const MatABR &A); 123 | void operator*=(const MatABR &A); 124 | void operator/=(const MatABR &A); 125 | void operator+=(const FLOAT val) { _dat += val; } 126 | void operator-=(const FLOAT val) { _dat -= val; } 127 | void operator*=(const FLOAT val) { _dat *= val; } 128 | void operator/=(const FLOAT val) { _dat /= val; } 129 | 130 | 131 | void add(const MatABR &toadd, MatABR &out); 132 | void sub(const MatABR &tosub, MatABR &out); 133 | void mul(const MatABR &tomul, MatABR &out); 134 | void div(const MatABR &todiv, MatABR &out); 135 | 136 | // returns the transpose in out 137 | void transpose(MatABR &out); 138 | 139 | void std_normal() { _dat.std_normal(); } 140 | void logarithm(double base) { _dat.logarithm(base); } 141 | void expand(MatABR &result, FLOAT match, int expand_x_lt, int expand_x_rt, int expand_y_up, int expand_y_dn, int expand_diag_lt_up, int expand_diag_rt_up, int expand_diag_lt_dn, int expand_diag_rt_dn ); 142 | 143 | void min_max(FLOAT &_min, FLOAT &_max) { _dat.min_max(_min,_max); } 144 | double avg() { return _dat.avg(); } 145 | //void operator++(); 146 | //void operator--(); 147 | 148 | FLOAT sum() { return _dat.sum(); } // return the sum of the entire matrix 149 | FLOAT sum(int m); // return the sum of a given row 150 | // Returns in a vector all the values matching mask value 151 | void mask_as_vec(FLOAT return_val, MatI &mask, VecABR &out); 152 | 153 | // prints the bare matrix as ascii 154 | void print(bool without_axes=0); 155 | void print(const char *file, bool without_axes=0); 156 | void print(std::ostream &fout, bool without_axes=0); 157 | 158 | // writes the matrix as binary (includes # rows first and # cols as 159 | // ints) 160 | void write(const char *file=NULL); 161 | 162 | 163 | // @TODO need to write these guys: 164 | // prints the matrix in binary format: 165 | // (int) num cols (int) num rows (FLOAT) data 166 | // void write(const char *file); 167 | // void write(std::ofstream &fout); 168 | 169 | private: 170 | void _copy(FLOAT *p1, const FLOAT *p2, int len) const; 171 | 172 | }; // End class MatABR 173 | 174 | // END TEMPLATE 175 | 176 | } // End namespace 177 | 178 | #endif 179 | 180 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/mata2mat.cpp: -------------------------------------------------------------------------------- 1 | // STDLIB: 2 | #include 3 | #include 4 | #include 5 | #include "string.h" 6 | 7 | // MINE 8 | #include "mat.h" 9 | 10 | 11 | using namespace VEC; 12 | int main (int argc, char *argv[]) { 13 | 14 | if (argc == 1) { 15 | std::cerr << 16 | "**********************************************************************\n" << 17 | "usage: mata2mat file.mata ... \n" << 18 | "**********************************************************************\n" << 19 | "outputs file.mat\n" << 20 | "converts ascii mata file into binary file (does NOT delete original)\n" << 21 | "\n" << 22 | "FORMATS: \n" << 23 | "mat = binary (all vals 4 bytes):\n" << 24 | " (int)#rows,(int)#cols,(floats)matrix_data_values...\n" << 25 | "mata = ascii format (space delimited, with newlines as shown below):\n" << 26 | " #rows, #cols\n" << 27 | " matrix data row1\n" << 28 | " matrix data row2\n" << 29 | " matrix data row3 ...\n" << 30 | "**********************************************************************\n"; 31 | exit(1); 32 | } 33 | /************************************************************ 34 | * GET ARGUMENTS 35 | ************************************************************/ 36 | int i; 37 | char file[1024]; 38 | char outfile[1024]; 39 | 40 | MatF mat; 41 | for (i = 1; i < argc; i++) { 42 | strcpy(file, argv[i]); 43 | strcpy(outfile, file); 44 | outfile[strlen(outfile)-1] = '\0'; 45 | //std::cerr << "creating: " << outfile << "\n"; 46 | mat.set_from_ascii(file); 47 | mat.write(outfile); 48 | } 49 | } 50 | 51 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/not_using/README.txt: -------------------------------------------------------------------------------- 1 | requires argtable library to compile and run and I've removed that as a dependency 2 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/not_using/outliers.cpp: -------------------------------------------------------------------------------- 1 | // STDLIB: 2 | #include 3 | #include 4 | #include "string.h" 5 | #include "math.h" 6 | 7 | // 3RD PARTY 8 | #include 9 | 10 | // MINE 11 | #include "vec.h" 12 | #include "mat.h" 13 | 14 | #define DEBUG (0) 15 | 16 | using namespace VEC; 17 | 18 | int mymain(double *deviations, int deviations_cnt, const char **infiles, int infile_cnt ); 19 | 20 | int main (int argc, char **argv) { 21 | struct arg_lit *help = arg_lit0("h", "help", "prints this help and exits"); 22 | struct arg_dbl *deviations = arg_dbl0("d", "dev", "", "deviations cutoff (default 4.0)"); 23 | struct arg_file *infiles = arg_filen(NULL, NULL, NULL,1,1024, "files to align (first is template)"); 24 | struct arg_end *end = arg_end(20); 25 | void* argtable[] = {help, deviations, infiles, end}; 26 | const char* progname = "outliers"; 27 | int nerrors; 28 | int exitcode=0; 29 | 30 | /* verify the argtable[] entries were allocated sucessfully */ 31 | if (arg_nullcheck(argtable) != 0) 32 | { 33 | /* NULL entries were detected, some allocations must have failed */ 34 | printf("%s: insufficient memory\n",progname); 35 | exitcode=1; 36 | goto exit; 37 | } 38 | 39 | /* set any command line default values prior to parsing */ 40 | 41 | /* Parse the command line as defined by argtable[] */ 42 | nerrors = arg_parse(argc,argv,argtable); 43 | 44 | if (help->count > 0 || argc < 2) 45 | { 46 | printf("*************************************************************************\n"); 47 | printf("Usage: %s ", progname); 48 | arg_print_syntax(stdout,argtable,"\n"); 49 | printf("\n"); 50 | printf("tosses out outliers from regression line beyond a certain deviation\n"); 51 | printf("\n"); 52 | arg_print_glossary(stdout,argtable," %-26s %s\n"); 53 | printf("\n"); 54 | printf("*************************************************************************\n"); 55 | 56 | exitcode=0; 57 | goto exit; 58 | } 59 | 60 | /* If the parser returned any errors then display them and exit */ 61 | if (nerrors > 0) 62 | { 63 | /* Display the error details contained in the arg_end struct.*/ 64 | arg_print_errors(stdout,end,progname); 65 | printf("Try '%s --help' for more information.\n",progname); 66 | exitcode=1; 67 | goto exit; 68 | } 69 | 70 | /* normal case: take the command line options at face value */ 71 | exitcode = mymain(deviations->dval, deviations->count, infiles->filename, infiles->count); 72 | 73 | exit: 74 | /* deallocate each non-null entry in argtable[] */ 75 | arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0])); 76 | 77 | return exitcode; 78 | } 79 | 80 | int mymain(double *deviations, int deviations_cnt, const char **infiles, int infile_cnt ) { 81 | int i; 82 | 83 | for (int cnt = 0; cnt < infile_cnt; ++cnt) { 84 | double dev; 85 | if (deviations_cnt == 1) { 86 | dev = deviations[0]; // if they give one deviation, use it for all 87 | } 88 | else if (deviations_cnt == 0) { 89 | // Set deviations default: 90 | dev = 4.0; 91 | } 92 | else { 93 | dev = deviations[cnt]; // multiple deviatsion, one for each file 94 | } 95 | char file[1024]; 96 | strcpy(file, infiles[cnt]); 97 | 98 | if (DEBUG) { 99 | std::cerr << "**********************************************\n"; 100 | std::cerr << "file: " << file << "\n"; 101 | std::cerr << "**********************************************\n"; 102 | } 103 | 104 | MatD mat; 105 | mat.set_from_ascii(file, 1); 106 | VecD vecs[2]; 107 | int cnt; 108 | MatD as_rows; 109 | mat.transpose(as_rows); 110 | as_rows.row_vecs(cnt, vecs); 111 | //vecs[0].print(); 112 | //vecs[1].print(); 113 | 114 | double rsq, slope, y_intercept; 115 | VecD::rsq_slope_intercept(vecs[0], vecs[1], rsq, slope, y_intercept); 116 | //printf("rsq %f slope %f intercept %f\n", rsq, slope, y_intercept); 117 | //mat.print(); 118 | // mx + b 119 | // y = slope(x) + intercept 120 | 121 | // Get the differences from the regression line 122 | // expected_y = slope(x) + intercept 123 | // ydiff = actual_y - expected_y 124 | // run = ydiff/slope 125 | ////////////////// ydiff = abs(actual_y - expected_y) 126 | // ydiff / run = tan a 127 | // sin a = run / x 128 | // final = run/( sin(arctan(ydiff/run)) ) 129 | /////////////////// if (actual_y - expected_y) < 0 the diff should be (-) 130 | VecD residuals(vecs[0].length()); 131 | for (i = 0; i < vecs[0].length(); ++i) { 132 | double expected_y = (slope*vecs[0][i]) + y_intercept; 133 | double ydiff = vecs[1][i] - expected_y; 134 | double run = ydiff/slope; 135 | residuals[i] = run/( sin(atan(ydiff/run)) ); 136 | } 137 | //puts("RESIDUALS: "); 138 | //residuals.print(); 139 | //puts("END RESIDUALS: "); 140 | 141 | // get the mean and standard deviation 142 | double mean, stddev; 143 | residuals.sample_stats(mean, stddev); 144 | //printf("m: %f std: %f\n", mean, stddev); 145 | 146 | // for each difference calculate standard deviations 147 | MatD acceptable_tmp(vecs[0].length(), 2); 148 | 149 | int num_accept = 0; 150 | int not_accept = 0; 151 | for (i = 0; i < residuals.length(); ++i) { 152 | // #stddevsaway = abs(residuals[i] - mean)/stddev ); 153 | double point_devs = (residuals[i] - mean)/stddev; 154 | if (point_devs < 0.0) { point_devs = -1.0*point_devs; } // abs val 155 | 156 | if (point_devs <= dev) { // acceptable 157 | //printf("acceptable dev: %f\n", point_devs); 158 | acceptable_tmp(num_accept,0) = vecs[0][i]; 159 | acceptable_tmp(num_accept,1) = vecs[1][i]; 160 | ++num_accept; 161 | } 162 | else { // not acceptable! toss out 163 | //printf("NOT ACCEPTABLE: %f, %f\n", vecs[0][i], vecs[1][i]); 164 | ++not_accept; 165 | } 166 | } 167 | 168 | printf("TOSSED %d points > %.1f deviations from regression line (of %d total) reading file: %s\n", not_accept, dev, residuals.length(), file); 169 | MatD accept(num_accept,2,(double*)acceptable_tmp,1); 170 | 171 | 172 | // print to file named file+.4.0out 173 | char devs_str[10]; 174 | sprintf(devs_str, "%.1f",dev); 175 | strcat(file, "."); 176 | strcat(file, devs_str); 177 | strcat(file, "out"); 178 | //puts("Accept"); 179 | //accept.print(); 180 | accept.print(file,1); 181 | } 182 | return 0; 183 | } 184 | 185 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/not_using/test_outliers.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "mat.h" 4 | #include "vec.h" 5 | 6 | 7 | using namespace VEC; 8 | class OutliersTestSuite : public CxxTest::TestSuite 9 | { 10 | public: 11 | // Assures that the same data is representated before and after 12 | // conversions 13 | void test_outliers( void ) { 14 | system("./outliers -d 1.2 tfiles/tmptimes.txt"); 15 | //TS_ASSERT_EQUALS(fromascii.mzlen(), ch_mz_vals); 16 | } 17 | 18 | 19 | }; 20 | 21 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/obiwarp.cpp: -------------------------------------------------------------------------------- 1 | // STDLIB: 2 | #include 3 | #include 4 | #include 5 | #include "string.h" 6 | #include 7 | 8 | // MINE 9 | #include "vec.h" 10 | #include "mat.h" 11 | #include "lmat.h" 12 | #include "dynprog.h" 13 | #include "pngio.h" 14 | #include "cmdparser.h" 15 | 16 | /********************************************/ 17 | char * VERSION = (char *)"0.9.4"; 18 | /********************************************/ 19 | 20 | #define DEBUG (0) 21 | 22 | bool format_is_labelless(const char *format); 23 | 24 | int main (int argc, char **argv) { 25 | // NOTE: use outfile as indicator if option passed in as opts.outfile! 26 | // because we can set opts.outfile to NULL and other routines will 27 | // automatically write to stdout! 28 | bool outfile = 0; 29 | bool outfile_is_stdout = 0; 30 | 31 | CmdParser opts(argc, argv, VERSION); 32 | 33 | if (opts.outfile != NULL) { 34 | outfile = 1; 35 | if (!strcmp(opts.outfile, "STDOUT")) { 36 | outfile_is_stdout = 1; 37 | opts.outfile = NULL; 38 | } 39 | } 40 | 41 | char file1[1024]; 42 | char file2[1024]; 43 | strcpy(file1, opts.infiles[0]); 44 | strcpy(file2, opts.infiles[1]); 45 | 46 | // ************************************************************ 47 | // * READ IN FILES TO GET MAT 48 | // ************************************************************ 49 | LMat lmat1; 50 | LMat lmat2; 51 | MatF smat; 52 | DynProg dyn; 53 | 54 | if (!strcmp(opts.format, "mat")) { 55 | lmat1.set_from_binary_mat(file1); 56 | lmat2.set_from_binary_mat(file2); 57 | } 58 | else if (!strcmp(opts.format, "mata")) { 59 | lmat1.set_from_ascii_mat(file1); 60 | lmat2.set_from_ascii_mat(file2); 61 | } 62 | else if (!strcmp(opts.format, "lmat")) { 63 | lmat1.set_from_binary(file1); 64 | lmat2.set_from_binary(file2); 65 | } 66 | else if (!strcmp(opts.format, "lmata")) { 67 | lmat1.set_from_ascii(file1); 68 | lmat2.set_from_ascii(file2); 69 | } 70 | ////puts("LMAT1 AND LMAT2"); lmat1.print(); lmat2.print(); 71 | 72 | // ************************************************************ 73 | // * SCORE THE MATRICES 74 | // ************************************************************ 75 | if (DEBUG) { 76 | std::cerr << "Scoring the mats!\n"; 77 | } 78 | if (opts.smat_in != NULL) { 79 | smat.set_from_binary(opts.smat_in); 80 | dyn._smat = &smat; 81 | } 82 | else { 83 | dyn.score(*(lmat1.mat()), *(lmat2.mat()), smat, opts.score); 84 | // SETTING THE SMAT TO BE std normal 85 | if (!opts.nostdnrm) { 86 | if (!smat.all_equal()) { 87 | smat.std_normal(); 88 | } 89 | } 90 | if (!strcmp(opts.score,"euc")) { 91 | smat *= -1; // inverting euclidean 92 | } 93 | } 94 | if (opts.smat_out != NULL) { 95 | std::cerr << "Writing binary smat to '" << opts.smat_out << "'\n"; 96 | smat.write(opts.smat_out); 97 | //smat.print(smat_out_files[0]); 98 | exit(0); 99 | } 100 | 101 | // ************************************************************ 102 | // * PREPARE GAP PENALTY ARRAY 103 | // ************************************************************ 104 | 105 | MatF time_tester; 106 | MatF time_tester_trans; 107 | VecF mpt; 108 | VecF npt; 109 | VecF mOut_tm; 110 | VecF nOut_tm; 111 | 112 | int gp_length = smat.rows() + smat.cols(); 113 | 114 | VecF gp_array; 115 | dyn.linear_less_before(opts.gap_extend,opts.gap_init,gp_length,gp_array); 116 | 117 | // ************************************************************ 118 | // * DYNAMIC PROGRAM 119 | // ************************************************************ 120 | int minimize = 0; 121 | if (DEBUG) { 122 | std::cerr << "Dynamic Time Warping Score Matrix!\n"; 123 | } 124 | dyn.find_path(smat, gp_array, minimize, opts.factor_diag, opts.factor_gap, opts.local, opts.init_penalty); 125 | 126 | VecI mOut; 127 | VecI nOut; 128 | dyn.warp_map(mOut, nOut, opts.response, minimize); 129 | //puts("mOUT"); mOut.print(); nOut.print(); 130 | 131 | // Major output unless its the only case where we don't need warped time 132 | // values 133 | if (!(outfile_is_stdout && format_is_labelless(opts.format))) { 134 | // MAJOR OUTPUT: 135 | VecF nOutF; 136 | VecF mOutF; 137 | lmat1.tm_axis_vals(mOut, mOutF); 138 | lmat2.tm_axis_vals(nOut, nOutF); // 139 | lmat2.warp_tm(nOutF, mOutF); 140 | lmat2.tm()->print(1); 141 | } 142 | 143 | // No labels on matrix and we have an outfile to produce 144 | // Needs to be after MAJOR OUTPUT since it warps the data! 145 | if (format_is_labelless(opts.format) && outfile) { 146 | // @TODO: implement data warping here 147 | } 148 | 149 | // All subroutines below should write to the specified file 150 | // if the file == NULL then they should write to stdout! 151 | // opts.outfile is set to NULL if "STDOUT" is specified! 152 | if (outfile) { 153 | if (!strcmp(opts.format, "mat")) { 154 | lmat2.mat()->write(opts.outfile); 155 | } 156 | else if (!strcmp(opts.format, "mata")) { 157 | lmat2.mat()->print(opts.outfile); 158 | } 159 | else if (!strcmp(opts.format, "lmat")) { 160 | lmat2.write(opts.outfile); 161 | } 162 | else if (!strcmp(opts.format, "lmata")) { 163 | lmat2.print(opts.outfile); 164 | } 165 | else { 166 | std::cerr << "Can't output to" << opts.format << "format (yet)\n"; 167 | exit(0); 168 | } 169 | } 170 | 171 | // After all other output to stdout 172 | if (opts.timefile != NULL) { 173 | time_tester.set_from_ascii(opts.timefile, 1); // no headers on the files 174 | time_tester.transpose(time_tester_trans); 175 | mpt.set(time_tester_trans.cols(), time_tester_trans.pointer(0)); 176 | npt.set(time_tester_trans.cols(), time_tester_trans.pointer(1)); 177 | float ssr, asr, sad, aad; 178 | dyn.path_accuracy((*lmat1._tm), (*lmat2._tm), mOut, nOut, mpt, npt, ssr, asr, sad, aad); 179 | printf("%f %f %f %f\n", ssr, asr, sad, aad); 180 | } 181 | 182 | 183 | if (opts.images) { 184 | PngIO wrt(1); 185 | char base_fn[1024]; 186 | strcpy(base_fn, "obi-warp_"); 187 | char tb_fn[1024]; 188 | strcpy(tb_fn, base_fn); 189 | strcat(tb_fn, "tb.png"); 190 | //char *tb_fn = "tb.png"; 191 | wrt.write(tb_fn, dyn._tb); 192 | char tbpath_fn[1024]; 193 | strcpy(tbpath_fn, base_fn); 194 | strcat(tbpath_fn, "tbpath.png"); 195 | wrt.write(tbpath_fn, dyn._tbpath); 196 | 197 | char asmat_fn[1024]; 198 | strcpy(asmat_fn, base_fn); 199 | strcat(asmat_fn, "asmat.png"); 200 | //wrt.write(asmat_fn, dyn._asmat); 201 | 202 | //strcpy(base_fn, "tb.png"); 203 | //char *tbpath_fn = "tbpath.png"; 204 | //char *tbscores_fn = "tbscores.png"; 205 | //wrt.write(tbscores_fn, dyn._tbscores); 206 | //char *asmat_fn = "asmat.png"; 207 | //wrt.write(asmat_fn, dyn._asmat); 208 | char *smat_fn = (char *)"smat.png"; 209 | //wrt.write(smat_fn, *dyn._smat); 210 | } 211 | 212 | /* 213 | char silly[100]; 214 | strcpy(silly, "png_"); 215 | char tmpp[5]; 216 | sprintf(tmpp, "%d", i); 217 | strcat(silly, tmpp); 218 | strcat(silly, ".png"); 219 | 220 | PngIO wrt(0); 221 | //wrt.write(silly, dyn._tbpath); 222 | wrt.write(silly, _scorepath); 223 | */ 224 | 225 | return 0; 226 | } 227 | 228 | bool format_is_labelless(const char *format) { 229 | if (!strcmp(format,"mat") || !strcmp(format,"mata")) { 230 | return 1; 231 | } 232 | else { 233 | return 0; 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/obiwarp.dsp: -------------------------------------------------------------------------------- 1 | # Microsoft Developer Studio Project File - Name="obiwarp" - Package Owner=<4> 2 | # Microsoft Developer Studio Generated Build File, Format Version 6.00 3 | # ** DO NOT EDIT ** 4 | 5 | # TARGTYPE "Win32 (x86) Console Application" 0x0103 6 | 7 | CFG=obiwarp - Win32 Debug 8 | !MESSAGE This is not a valid makefile. To build this project using NMAKE, 9 | !MESSAGE use the Export Makefile command and run 10 | !MESSAGE 11 | !MESSAGE NMAKE /f "obiwarp.mak". 12 | !MESSAGE 13 | !MESSAGE You can specify a configuration when running NMAKE 14 | !MESSAGE by defining the macro CFG on the command line. For example: 15 | !MESSAGE 16 | !MESSAGE NMAKE /f "obiwarp.mak" CFG="obiwarp - Win32 Debug" 17 | !MESSAGE 18 | !MESSAGE Possible choices for configuration are: 19 | !MESSAGE 20 | !MESSAGE "obiwarp - Win32 Release" (based on "Win32 (x86) Application") 21 | !MESSAGE "obiwarp - Win32 Debug" (based on "Win32 (x86) Application") 22 | !MESSAGE 23 | 24 | # Begin Project 25 | # PROP AllowPerConfigDependencies 0 26 | # PROP Scc_ProjName "" 27 | # PROP Scc_LocalPath "" 28 | CPP=cl.exe 29 | RSC=rc.exe 30 | 31 | !IF "$(CFG)" == "obiwarp - Win32 Release" 32 | 33 | # PROP BASE Use_MFC 0 34 | # PROP BASE Use_Debug_Libraries 0 35 | # PROP BASE Output_Dir "Release" 36 | # PROP BASE Intermediate_Dir "Release" 37 | # PROP BASE Target_Dir "" 38 | # PROP Use_MFC 0 39 | # PROP Use_Debug_Libraries 0 40 | # PROP Output_Dir "Release" 41 | # PROP Intermediate_Dir "Release" 42 | # PROP Target_Dir "" 43 | # ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c 44 | # ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c 45 | # ADD BASE RSC /l 0x409 /d "NDEBUG" 46 | # ADD RSC /l 0x409 /d "NDEBUG" 47 | BSC32=bscmake.exe 48 | # ADD BASE BSC32 /nologo 49 | # ADD BSC32 /nologo 50 | LINK32=link.exe 51 | # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 52 | # ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 53 | 54 | !ELSEIF "$(CFG)" == "obiwarp - Win32 Debug" 55 | 56 | # PROP BASE Use_MFC 0 57 | # PROP BASE Use_Debug_Libraries 1 58 | # PROP BASE Output_Dir "Debug" 59 | # PROP BASE Intermediate_Dir "Debug" 60 | # PROP BASE Target_Dir "" 61 | # PROP Use_MFC 0 62 | # PROP Use_Debug_Libraries 1 63 | # PROP Output_Dir "Debug" 64 | # PROP Intermediate_Dir "Debug" 65 | # PROP Target_Dir "" 66 | # ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c 67 | # ADD CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c 68 | # ADD BASE RSC /l 0x409 /d "_DEBUG" 69 | # ADD RSC /l 0x409 /d "_DEBUG" 70 | BSC32=bscmake.exe 71 | # ADD BASE BSC32 /nologo 72 | # ADD BSC32 /nologo 73 | LINK32=link.exe 74 | # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept 75 | # ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nodefaultlib:"libcmtd.lib" /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept 76 | 77 | !ENDIF 78 | 79 | # Begin Target 80 | 81 | # Name "obiwarp - Win32 Release" 82 | # Name "obiwarp - Win32 Debug" 83 | # Begin Group "Source" 84 | 85 | # PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" 86 | # Begin Source File 87 | SOURCE=./vec.cpp 88 | # End Source File 89 | # Begin Source File 90 | SOURCE=./mat.cpp 91 | # End Source File 92 | # Begin Source File 93 | SOURCE=./lmat.cpp 94 | # End Source File 95 | # Begin Source File 96 | SOURCE=./dynprog.cpp 97 | # End Source File 98 | # Begin Source File 99 | SOURCE=./cmdparser.cpp 100 | # End Source File 101 | # Begin Source File 102 | SOURCE=./pngio.cpp 103 | # End Source File 104 | # Begin Source File 105 | SOURCE=./obiwarp.cpp 106 | # End Source File 107 | # End Group 108 | # Begin Group "Headers" 109 | # PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" 110 | # Begin Source File 111 | SOURCE=./vec.h 112 | # End Source File 113 | # Begin Source File 114 | SOURCE=./mat.h 115 | # End Source File 116 | # Begin Source File 117 | SOURCE=./lmat.h 118 | # End Source File 119 | # Begin Source File 120 | SOURCE=./dynprog.h 121 | # End Source File 122 | # Begin Source File 123 | SOURCE=./cmdparser.h 124 | # End Source File 125 | # Begin Source File 126 | SOURCE=./pngio.h 127 | # End Source File 128 | # Begin Source File 129 | SOURCE=./obiwarp.h 130 | # End Source File 131 | # End Group 132 | # End Target 133 | # End Project 134 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/pngio.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "assert.h" 7 | #include "pngio.h" 8 | 9 | #define PNG_DEBUG 3 10 | 11 | #include "mat.h" 12 | 13 | using namespace VEC; 14 | 15 | PngIO::PngIO(bool bw): _bw(bw) { 16 | } 17 | 18 | void PngIO::write(char *file, MatI &mat) { 19 | // This could be made faster (faster printing of matrix [access pointer 20 | // directly] and input to matrix2png stdin 21 | char *_tmp = (char *)"tmp.tmp.tmp"; 22 | FILE *pOUT = fopen(_tmp, "w"); 23 | assert(pOUT); 24 | 25 | // Print data to file: 26 | fputs("CORNERLABEL", pOUT); 27 | for (int i = 0; i < mat.cols(); ++i) { 28 | fprintf(pOUT, "\t%d", i); 29 | } 30 | fputs("\n", pOUT); 31 | 32 | for (int m = 0; m < mat.rows(); ++m) { 33 | fprintf(pOUT, "%d", m); 34 | for (int n = 0; n < mat.cols(); ++n) { 35 | fprintf(pOUT, "\t%d", mat(m,n)); 36 | //printf("%d ", mat(m,n)); 37 | } 38 | //printf("\n"); 39 | fputs("\n", pOUT); 40 | } 41 | fclose(pOUT); 42 | 43 | // CREATE system call to matrix2png: 44 | char str1[1000]; 45 | strcpy (str1, "matrix2png -data "); 46 | strcat(str1, _tmp); 47 | strcat(str1, " "); 48 | if (_bw) { 49 | strcat(str1, "-mincolor white -maxcolor black"); 50 | } 51 | else { 52 | strcat(str1, "-mincolor green -maxcolor red"); 53 | } 54 | strcat(str1, " >"); 55 | strcat(str1, file); 56 | printf("*****************************************************\n"); 57 | printf("Calling: %s\n", str1); 58 | int ret = system(str1); 59 | printf("SYSTEM RETURNED %d\n", ret); 60 | printf("*****************************************************\n"); 61 | 62 | // CLEANUP: 63 | remove(_tmp); 64 | } 65 | 66 | void PngIO::write(char *file, MatF &mat) { 67 | // This could be made faster (faster printing of matrix [access pointer 68 | // directly] and input to matrix2png stdin 69 | char *_tmp = (char *)"tmp.tmp.tmp"; 70 | FILE *pOUT = fopen(_tmp, "w"); 71 | assert(pOUT); 72 | 73 | // Print data to file: 74 | fputs("CORNERLABEL", pOUT); 75 | for (int i = 0; i < mat.cols(); ++i) { 76 | fprintf(pOUT, "\t%d", i); 77 | } 78 | fputs("\n", pOUT); 79 | 80 | for (int m = 0; m < mat.rows(); ++m) { 81 | fprintf(pOUT, "%d", m); 82 | for (int n = 0; n < mat.cols(); ++n) { 83 | fprintf(pOUT, "\t%f", mat(m,n)); 84 | //printf("%d ", mat(m,n)); 85 | } 86 | //printf("\n"); 87 | fputs("\n", pOUT); 88 | } 89 | fclose(pOUT); 90 | 91 | // CREATE system call to matrix2png: 92 | char str1[1000]; 93 | strcpy (str1, (char *)"matrix2png -data "); 94 | strcat(str1, _tmp); 95 | strcat(str1, " "); 96 | if (_bw) { 97 | strcat(str1, "-mincolor white -maxcolor black"); 98 | } 99 | else { 100 | strcat(str1, "-mincolor green -maxcolor red"); 101 | } 102 | strcat(str1, " >"); 103 | strcat(str1, file); 104 | printf("*****************************************************\n"); 105 | printf("Calling: %s\n", str1); 106 | int ret = system(str1); 107 | printf("SYSTEM RETURNED %d\n", ret); 108 | printf("*****************************************************\n"); 109 | 110 | // CLEANUP: 111 | remove(_tmp); 112 | } 113 | 114 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/pngio.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _PNGIO_H 3 | #define _PNGIO_H 4 | 5 | #include "mat.h" 6 | 7 | using namespace VEC; 8 | 9 | class PngIO { 10 | private: 11 | int _bw; 12 | 13 | public: 14 | PngIO(bool bw=0); 15 | void write(char *file, MatI &mat); 16 | //bool write(char *file, VecI vec); 17 | void write(char *file, MatF &mat); 18 | }; 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/test_cmdparser.rb: -------------------------------------------------------------------------------- 1 | 2 | require 'test/unit' 3 | 4 | 5 | $WIN32 = false 6 | 7 | if ENV["OS"] =~ /Windows/ 8 | $WIN32 = true 9 | end 10 | 11 | OBIWARP_PATH = "../bin/obiwarp" + ($WIN32 ? ".exe" : "") 12 | 13 | TFILES = "tfiles/" 14 | LMAT1 = TFILES + 'tmp1.lmat' 15 | LMAT2 = TFILES + 'tmp1B.lmat' 16 | 17 | class MyTests < Test::Unit::TestCase 18 | def diagnostics(reply) 19 | #puts reply 20 | hash = {} 21 | looking = false 22 | reply.split("\n").each do |line| 23 | if line =~ /\*{10,}/ 24 | if looking 25 | looking = false 26 | else 27 | looking = true 28 | end 29 | elsif line =~ /(.*): (.*)/ && looking 30 | hash[$1.dup] = $2.dup 31 | end 32 | end 33 | #p hash 34 | hash 35 | end 36 | 37 | def ob 38 | OBIWARP_PATH 39 | end 40 | 41 | def test_min_input 42 | #puts OBIWARP_PATH 43 | assert( File.exist?(ob), "obiwarp executable is in #{OBIWARP_PATH}") 44 | reply = `"#{ob}"` 45 | assert_match( /USAGE:/, reply, "no values passed in" ) 46 | assert_match( /USAGE: #{File.basename(ob).gsub(/\.exe$/, '')}/, reply, "help progname matches executable") 47 | reply = `"#{ob}" only_1_file` 48 | assert_match( /USAGE:/, reply, "only one file passed in" ) 49 | end 50 | 51 | def test_bad_files_input 52 | reply = `#{ob} badfile1 badfile2` 53 | assert_match(/Cannot open/, reply) 54 | end 55 | 56 | def test_opts 57 | t_opt(["--format"], "format", "mat"); 58 | # What about no format give (should be same as LMAT1) 59 | t_opt_nil("format", "lmat") 60 | t_opt(%w(--outfile -o), "outfile", "myoutfilename"); 61 | t_opt(%w(--images), "images"); 62 | t_opt(%w(--timefile -t), "timefile", "mytimefile"); 63 | 64 | expect = %w(cor cov prd euc) 65 | expect.each do |arg| 66 | t_opt(%w(--score -s), "score", arg) 67 | end 68 | t_opt(%w(--local -l), "local"); 69 | t_opt(%w(--nostdnrm), "nostdnrm"); 70 | t_opt_split(%w(--factor -f), "factor_diag", "factor_gap", "3.2,2.2") 71 | t_opt_split(%w(--gap -g), "gap_init", "gap_extend", "3.2,2.2") 72 | t_opt(%w(--init -i), "init_penalty", 2.1) 73 | t_opt(%w(--response -r), "response", 2.3); 74 | end 75 | 76 | ########################################## 77 | # HELPER FUNCS: 78 | ########################################## 79 | 80 | # For testing args like this: '23.3,5.2' 81 | def t_opt_split(opt_list, varname1, varname2, val) 82 | opt_list.each do |opt| 83 | reply = `#{ob} #{opt} #{val} --diagnostics #{LMAT1} #{LMAT2}` 84 | hash = diagnostics(reply) 85 | val1, val2 = val.split(",") 86 | assert_equal("#{val1}", hash[varname1]) 87 | assert_equal("#{val2}", hash[varname2]) 88 | end 89 | end 90 | 91 | # for a variable we expect to see, even though no variables are passed in 92 | def t_opt_nil(varname, val) 93 | cmd = "#{ob} --diagnostics #{LMAT1} #{LMAT2}" 94 | reply = `#{cmd}` 95 | hash = diagnostics(reply) 96 | assert_equal("#{val}", hash[varname]) 97 | end 98 | 99 | # for testing normal options 100 | # opt_list is a list of equivalent options 101 | # varname is the name of the diagnostic hash variable name 102 | # val is the value of the option passed in and the value expected out 103 | # if val == nil then the option is a flag and the output should be == 1 104 | def t_opt(opt_list, varname, val=nil) 105 | opt_list.each do |opt| 106 | cmd = "#{ob} #{opt} #{val} --diagnostics #{LMAT1} #{LMAT2}" 107 | #puts cmd 108 | reply = `#{cmd}` 109 | #puts "REPLY" 110 | #puts reply 111 | #puts "END PREPFYUDF" 112 | hash = diagnostics(reply) 113 | #p hash 114 | if val == nil 115 | assert_equal("1", hash[varname]) 116 | else 117 | #puts "AHSHH" + "#{val}" 118 | #puts hash["score"] 119 | assert_equal("#{val}", hash[varname]) 120 | end 121 | end 122 | end 123 | 124 | end 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/test_lmat.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "lmat.h" 4 | #include "mat.h" 5 | #include "vec.h" 6 | 7 | 8 | using namespace VEC; 9 | class LMatTestSuite : public CxxTest::TestSuite 10 | { 11 | public: 12 | void test_creation( void ) { 13 | LMat testing; 14 | TS_ASSERT_EQUALS(testing._mz_vals, 0); 15 | TS_ASSERT_EQUALS(testing._tm_vals, 0); 16 | } 17 | 18 | void test_warp_tm( void ) { 19 | float self_arr[5] = {0,3,5,6,12}; 20 | float other_arr[5] = {1,3,7,9,20}; 21 | VecF selfy(5,self_arr,1); 22 | VecF other(5,other_arr,1); 23 | float time_arr[16] = {0,1,2,3,4,4.1,5,6,7,8,9,10,10.001,11,12.1, 13.1}; 24 | float answ_arr[16] = {1, 1.28863, 2.02171, 3, 4.75862, 4.98493, 7, 9, 10.9224, 12.8188, 14.6819, 16.5046, 16.5064, 18.2797, 20.1687, 21.8204}; 25 | VecF answ(16, answ_arr,1); 26 | LMat obj; 27 | obj._tm->set(16, time_arr); 28 | obj.warp_tm(selfy, other); 29 | for (int i = 0; i < 16; i++) { 30 | TS_ASSERT_DELTA(answ[1], obj.tm()->at(1), 0.001); 31 | } 32 | } 33 | 34 | void test_ascii_read_write( void ) { 35 | LMat readin; 36 | readin.set_from_ascii("tfiles/tmp1.lmata"); 37 | TS_ASSERT_EQUALS(readin.mzlen(), 30); 38 | TS_ASSERT_EQUALS(readin.tmlen(), 40); 39 | float _mz[30] = { 400,401,402,403,404,405,406,407,408,409, 40 | 410,411,412,413,414,415,416,417,418,419, 41 | 420,421,422,423,424,425,426,427,428,429 42 | }; 43 | VecF mzv(30,_mz,1); 44 | TS_ASSERT_EQUALS( mzv, *(readin.mz()) ); 45 | TS_ASSERT_DELTA( (*readin.mat())(0,0), 6139950.06794636, 0.1 ); 46 | TS_ASSERT_DELTA( (*readin.mat())(39,29), 2292810.65100822, 0.1 ); 47 | TS_ASSERT_DELTA((*readin.mat())(7,8), 1397963.17842461, 0.1 ); 48 | 49 | // ******************************************* 50 | // TEST COORDS 51 | // ******************************************* 52 | VecI obj1(4); 53 | obj1[0] = 1; 54 | obj1[1] = 3; 55 | obj1[2] = 4; 56 | obj1[3] = 8; 57 | VecF out; 58 | readin.mz_axis_vals(obj1,out); 59 | TS_ASSERT_DELTA(out[0], 401, 0.001); 60 | TS_ASSERT_DELTA(out[1], 403, 0.001); 61 | TS_ASSERT_DELTA(out[2], 404, 0.001); 62 | TS_ASSERT_DELTA(out[3], 408, 0.001); 63 | readin.tm_axis_vals(obj1,out); 64 | TS_ASSERT_DELTA(out[0], 1212.34, 0.001); 65 | TS_ASSERT_DELTA(out[1], 1236.34, 0.001); 66 | TS_ASSERT_DELTA(out[2], 1248.34, 0.001); 67 | TS_ASSERT_DELTA(out[3], 1296.34, 0.001); 68 | // ******************************************* 69 | 70 | char *tmpfile = (char *)"tmp.tmp.tmp"; 71 | readin.print(tmpfile); 72 | 73 | LMat readnew; 74 | readnew.set_from_ascii(tmpfile); 75 | TS_ASSERT_EQUALS(readnew.mzlen(), 30); 76 | TS_ASSERT_EQUALS(readnew.tmlen(), 40); 77 | TS_ASSERT_EQUALS(readnew.tmlen(), 40); 78 | TS_ASSERT_EQUALS( mzv, *(readnew.mz()) ); 79 | TS_ASSERT_DELTA( (*readnew.mat())(0,0), 6139950.06794636, 0.1 ); 80 | TS_ASSERT_DELTA( (*readnew.mat())(39,29), 2292810.65100822, 0.1 ); 81 | TS_ASSERT_DELTA((*readnew.mat())(7,8), 1397963.17842461, 0.1 ); 82 | remove(tmpfile); 83 | 84 | // Test printing to stdout 85 | //readin.set_from_ascii("tfiles/tmp1.lmata"); 86 | //readin.print(); 87 | } 88 | 89 | void test_creation_from_mat( void ) { 90 | LMat obj; 91 | obj.set_from_binary_mat("tfiles/file1.mat"); 92 | TS_ASSERT_EQUALS(obj.tmlen(), 4); 93 | TS_ASSERT_EQUALS(obj.mzlen(), 3); 94 | TS_ASSERT_EQUALS((*obj.mat())(0,0), 1.0); 95 | TS_ASSERT_EQUALS((*obj.mat())(3,2), 12.0); 96 | } 97 | 98 | void test_creation_from_mata( void ) { 99 | LMat obj; 100 | obj.set_from_ascii_mat("tfiles/file1.mata"); 101 | TS_ASSERT_EQUALS(obj.tmlen(), 4); 102 | TS_ASSERT_EQUALS(obj.mzlen(), 3); 103 | TS_ASSERT_EQUALS((*obj.mat())(0,0), 1.0); 104 | TS_ASSERT_EQUALS((*obj.mat())(3,2), 12.0); 105 | } 106 | 107 | void test_binary_read_write( void ) { 108 | int ch_mz_vals = 30; 109 | int ch_tm_vals = 40; 110 | LMat readin; 111 | readin.set_from_ascii("tfiles/tmp1.lmata"); 112 | TS_ASSERT_EQUALS(readin.mzlen(), ch_mz_vals); 113 | TS_ASSERT_EQUALS(readin.tmlen(), ch_tm_vals); 114 | float *mptr = (float*)(*readin.mat()); 115 | TS_ASSERT_DELTA(mptr[0], 6139950.06794636, 0.1 ); 116 | TS_ASSERT_DELTA(mptr[(ch_mz_vals*ch_tm_vals)-1], 2292810.65100822, 0.1 ); 117 | TS_ASSERT_DELTA((*readin.mat())(7,8), 1397963.17842461, 0.1 ); 118 | 119 | char *tmpfile = (char *)"tmp2.tmp.tmp"; 120 | readin.write(tmpfile); 121 | LMat readnew(tmpfile); 122 | 123 | float _mz[30] = { 400,401,402,403,404,405,406,407,408,409, 124 | 410,411,412,413,414,415,416,417,418,419, 125 | 420,421,422,423,424,425,426,427,428,429 126 | }; 127 | float _tm[40] = { 1200.34, 1212.34, 1224.34, 1236.34, 1248.34, 128 | 1260.34, 1272.34, 1284.34, 1296.34, 1308.34, 1320.34, 1332.34, 129 | 1344.34, 1356.34, 1368.34, 1380.34, 1392.34, 1404.34, 1416.34, 130 | 1428.34, 1440.34, 1452.34, 1464.34, 1476.34, 1488.34, 1500.34, 131 | 1512.34, 1524.34, 1536.34, 1548.34, 1560.34, 1572.34, 1584.34, 132 | 1596.34, 1608.34, 1620.34, 1632.34, 1644.34, 1656.34, 1668.34 133 | }; 134 | VecF mzv(ch_mz_vals,_mz,1); 135 | VecF tmv(ch_tm_vals,_tm,1); 136 | TS_ASSERT_EQUALS(readnew.mzlen(), ch_mz_vals); 137 | TS_ASSERT_EQUALS(readnew.tmlen(), ch_tm_vals); 138 | TS_ASSERT_EQUALS( mzv, *(readnew.mz()) ); 139 | TS_ASSERT_EQUALS( tmv, *(readnew.tm()) ); 140 | TS_ASSERT_DELTA( (*readnew.mat())(0,0), 6139950.06794636, 0.1 ); 141 | TS_ASSERT_DELTA( (*readnew.mat())(7,8),1397963.17842461, 0.1 ); 142 | TS_ASSERT_DELTA( (*readnew.mat())(39,29), 2292810.65100822, 0.1 ); 143 | remove(tmpfile); 144 | 145 | // Test writing binary file to stdout 146 | //readin.write(); 147 | } 148 | 149 | 150 | }; 151 | 152 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/test_lmat_converters.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "lmat.h" 4 | #include "mat.h" 5 | #include "vec.h" 6 | 7 | 8 | using namespace VEC; 9 | class LMatTestSuite : public CxxTest::TestSuite 10 | { 11 | public: 12 | // Assures that the same data is representated before and after 13 | // conversions 14 | void test_conversions( void ) { 15 | // Depends on three file conversions performed elsewhere: 16 | // sh "./lmata2lmat tfiles/tmp1.lmata" 17 | // File.copy('tfiles/tmp1.lmat', 'tfiles/tmp1B.lmat') 18 | // sh "./lmat2lmata tfiles/tmp1B.lmat" 19 | 20 | int ch_mz_vals = 30; 21 | int ch_tm_vals = 40; 22 | 23 | float _mz[30] = { 400,401,402,403,404,405,406,407,408,409, 24 | 410,411,412,413,414,415,416,417,418,419, 25 | 420,421,422,423,424,425,426,427,428,429 26 | }; 27 | float _tm[40] = { 1200.34, 1212.34, 1224.34, 1236.34, 1248.34, 28 | 1260.34, 1272.34, 1284.34, 1296.34, 1308.34, 1320.34, 1332.34, 29 | 1344.34, 1356.34, 1368.34, 1380.34, 1392.34, 1404.34, 1416.34, 30 | 1428.34, 1440.34, 1452.34, 1464.34, 1476.34, 1488.34, 1500.34, 31 | 1512.34, 1524.34, 1536.34, 1548.34, 1560.34, 1572.34, 1584.34, 32 | 1596.34, 1608.34, 1620.34, 1632.34, 1644.34, 1656.34, 1668.34 33 | }; 34 | VecF mzv(ch_mz_vals,_mz,1); 35 | VecF tmv(ch_tm_vals,_tm,1); 36 | 37 | // ************************************************** 38 | // Set from ascii 39 | // ************************************************** 40 | LMat fromascii; 41 | fromascii.set_from_ascii("tfiles/tmp1.lmata"); 42 | 43 | // Assert that this guy is like we expect 44 | TS_ASSERT_EQUALS(fromascii.mzlen(), ch_mz_vals); 45 | TS_ASSERT_EQUALS(fromascii.tmlen(), ch_tm_vals); 46 | TS_ASSERT_EQUALS( mzv, *(fromascii.mz()) ); 47 | TS_ASSERT_EQUALS( tmv, *(fromascii.tm()) ); 48 | TS_ASSERT_DELTA( (*fromascii.mat())(0,0), 6139950.06794636, 0.1 ); 49 | TS_ASSERT_DELTA( (*fromascii.mat())(7,8),1397963.17842461, 0.1 ); 50 | TS_ASSERT_DELTA( (*fromascii.mat())(39,29), 2292810.65100822, 0.1 ); 51 | 52 | // ************************************************** 53 | // Read from binary 54 | // ************************************************** 55 | LMat readnew("tfiles/tmp1.lmat"); 56 | 57 | // Assert that it is identical to 'fromascii' 58 | TS_ASSERT_EQUALS(fromascii.mzlen(), readnew.mzlen()); 59 | TS_ASSERT_EQUALS(fromascii.tmlen(), readnew.tmlen()); 60 | // Problems in WINDOWS HERE:: 61 | TS_ASSERT_SAME_DATA((float*)(*fromascii.mat()),(float*)(*readnew.mat()),ch_mz_vals*ch_tm_vals); 62 | TS_ASSERT_SAME_DATA((float*)(*fromascii.mz()),(float*)(*readnew.mz()),ch_mz_vals); 63 | TS_ASSERT_SAME_DATA((float*)(*fromascii.tm()),(float*)(*readnew.tm()),ch_tm_vals); 64 | 65 | // ************************************************** 66 | // read from ascii 67 | // ************************************************** 68 | LMat fromascii2; 69 | fromascii2.set_from_ascii("tfiles/tmp1B.lmata"); 70 | 71 | TS_ASSERT_EQUALS(fromascii.mzlen(), fromascii2.mzlen()); 72 | TS_ASSERT_EQUALS(fromascii.tmlen(), fromascii2.tmlen()); 73 | // TS_ASSERT_SAME_DATA((float*)(*fromascii.mat()),(float*)(*fromascii2.mat()),ch_mz_vals*ch_tm_vals); 74 | TS_ASSERT_SAME_DATA((float*)(*fromascii.mz()),(float*)(*fromascii2.mz()),ch_mz_vals); 75 | TS_ASSERT_SAME_DATA((float*)(*fromascii.tm()),(float*)(*fromascii2.tm()),ch_tm_vals); 76 | 77 | } 78 | 79 | 80 | }; 81 | 82 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/test_mat_converters.rb: -------------------------------------------------------------------------------- 1 | 2 | require 'test/unit' 3 | require 'fileutils' 4 | 5 | TFILES = "tfiles/" 6 | MAT1 = TFILES + 'file1.mata' 7 | BINDIR = "../bin" 8 | 9 | class MatConvertersTest < Test::Unit::TestCase 10 | def test_mata2mat 11 | tmpmata = TFILES + "trash.mata" 12 | tmpmat_out = TFILES + "trash.mat" 13 | FileUtils.cp MAT1, tmpmata 14 | pr = "mata2mat" 15 | system "#{BINDIR}/#{pr} #{tmpmata}" 16 | assert(File.exist?(tmpmat_out), "#{tmpmat_out} exists") 17 | arr = IO.read(tmpmat_out).unpack('iif*') 18 | exp = [4,3, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0] 19 | assert_equal(exp, arr) 20 | File.unlink(tmpmata) 21 | assert(!File.exist?(tmpmata), "#{tmpmata} does not exist") 22 | 23 | pr = "mat2mata" 24 | tmpmat = TFILES + 'trash.mat' 25 | tmpmat_out = TFILES + 'trash.mata' 26 | assert(File.exist?(tmpmat), "#{tmpmat} exists") 27 | system "#{BINDIR}/#{pr} #{tmpmat}" 28 | assert(File.exist?(tmpmat_out), "#{tmpmat_out} exists") 29 | arr = IO.readlines(tmpmat_out) 30 | assert_equal(%w(4 3), arr[0].split(" ")) 31 | assert_equal(%w(1 2 3), arr[1].split(" ")) 32 | assert_equal(%w(10 11 12), arr[4].split(" ")) 33 | 34 | File.unlink(tmpmat) 35 | File.unlink(tmpmat_out) 36 | end 37 | 38 | 39 | end 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/test_obiwarp.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "mat.h" 4 | #include "vec.h" 5 | 6 | 7 | using namespace VEC; 8 | class AlignTestSuite : public CxxTest::TestSuite 9 | { 10 | public: 11 | // @TODO: either get these smat tests working or move them over to ruby 12 | // 13 | // Assures that the same data is representated before and after 14 | // conversions 15 | void test_smat_io( void ) { 16 | #ifdef WIN32 17 | //system("../bin/obiwarp.exe -a -s product -x smat_product.tmp tfiles/tmp1.lmata tfiles/tmp2.lmata"); 18 | #else 19 | //system("../bin/obiwarp -a -s product -x smat_product.tmp tfiles/tmp1.lmata tfiles/tmp2.lmata"); 20 | #endif 21 | TS_ASSERT_EQUALS(0, 0); 22 | // MatF smat_f; 23 | // char *tmpfile = "smat_product.tmp"; 24 | // 25 | // smat_f.set_from_binary(tmpfile); 26 | // TS_ASSERT_DELTA(smat_f(0,0), 1.63204e+14, 1.0e11); 27 | // TS_ASSERT_DELTA(smat_f(0,1), 1.46614e+14, 1.0e11); 28 | // TS_ASSERT_DELTA(smat_f(39,45), 2.02115e+14, 1.0e11); 29 | // TS_ASSERT_EQUALS(smat_f.rows(), 40); 30 | // TS_ASSERT_EQUALS(smat_f.cols(), 46); 31 | /* 32 | if (WIN32) { 33 | system("obiwarp -a --smat_in smat_product.tmp tfiles/tmp1.lmata tfiles/tmp2.lmata"); 34 | } 35 | else { 36 | system("./obiwarp -a --smat_in smat_product.tmp tfiles/tmp1.lmata tfiles/tmp2.lmata"); 37 | } 38 | */ 39 | // remove(tmpfile); 40 | } 41 | 42 | }; 43 | 44 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/test_obiwarp.rb: -------------------------------------------------------------------------------- 1 | 2 | require 'test/unit' 3 | require 'ostruct' 4 | 5 | $WIN32 = false; if ENV["OS"] == 'Windows_NT' then $WIN32 = true end 6 | 7 | # updat_values copied from ruby facets: 8 | # http://facets.rubyforge.org/doc/api/core/classes/Hash.html 9 | class Hash 10 | def update_values 11 | each{ |k,v| store( k, yield(v) ) } 12 | end 13 | end 14 | 15 | OBIWARP_PATH = "../bin/obiwarp" + ($WIN32 ? ".exe" : '') 16 | TFILES = "tfiles/" 17 | 18 | hash = { 19 | :lmata1 => 'tmp1.lmata', 20 | :lmata1B => 'tmp1B.lmata', 21 | :lmata2 => 'tmp2.lmata', 22 | 23 | :lmat1 => 'tmp1.lmat', 24 | :lmat1B => 'tmp1B.lmat', 25 | :lmat2 => 'tmp2.lmat', 26 | :lmat_warped_default_G => 'tmp1B.lmat.warped_default', 27 | :lmat_warped_default => 'tmp1B.lmat.warped', 28 | 29 | :mat1 => 'tmp1.mat', 30 | :mat2 => 'file1.mat', 31 | :mat3 => 'file3.mat', 32 | :mat4 => 'file4.mat', 33 | :mata1 => 'tmp1.mata', 34 | :mata2 => 'file1.mata', 35 | :mata3 => 'file3.mata', 36 | :mata4 => 'file4.mata', 37 | :mat1_no_header => 'tmp1_no_header.mat', 38 | :mat2_no_header_messy => 'tmp1_no_header_messy.mat', 39 | }.update_values {|v| TFILES + v } 40 | 41 | F = OpenStruct.new(hash) 42 | 43 | # Basic tests to ensure that files are being read, things are being warped 44 | # when they are supposed to, etc. Options are tested in cmdparser. 45 | class ObiWarpTest < Test::Unit::TestCase 46 | 47 | @@lmat1_times = "1200.34 1212.34 1224.34 1236.34 1248.34 1260.34 1272.34 1284.34 1296.34 1308.34 1320.34 1332.34 1344.34 1356.34 1368.34 1380.34 1392.34 1404.34 1416.34 1428.34 1440.34 1452.34 1464.34 1476.34 1488.34 1500.34 1512.34 1524.34 1536.34 1548.34 1560.34 1572.34 1584.34 1596.34 1608.34 1620.34 1632.34 1644.34 1656.34 1668.34\n" 48 | @@mat1_times = "0 1 2 3 4 5\n" 49 | @@mat4_times = "0 1 2 3 4 5 6 7 8\n" 50 | def ob; OBIWARP_PATH end 51 | 52 | def test_min_input 53 | assert( File.exist?(ob), "obiwarp executable is in #{OBIWARP_PATH}") 54 | reply = `#{ob}` 55 | assert_match( /USAGE:/, reply, "no values passed in" ) 56 | assert_match( /USAGE: #{File.basename(ob).gsub(/\.exe$/,'')}/, reply, "help progname matches executable") 57 | reply = `#{ob} only_1_file` 58 | assert_match( /USAGE:/, reply, "only one file passed in" ) 59 | end 60 | 61 | # asserts that the file exists and is the same as "against" and deletes "file" 62 | 63 | def test_self_vs_self 64 | { F.mat1 => @@mat1_times, 65 | F.mata1 => @@mat1_times, 66 | F.lmat1 => @@lmat1_times, 67 | F.lmata1 => @@lmat1_times, 68 | F.mat4 => @@mat4_times, 69 | }.each do |k,v| 70 | vs_self(k,v) 71 | end 72 | end 73 | 74 | def vs_self(file, expected) 75 | reply = `#{ob} #{file} #{file}` 76 | assert_equal(expected, reply) 77 | end 78 | 79 | def test_vs_other 80 | [ 81 | [F.mat3, F.mat4, @@mat4_times], 82 | [F.mata3, F.mata4, @@mat4_times], 83 | [F.lmat2, F.lmat1, @@lmat1_times], 84 | [F.lmata2, F.lmata1, @@lmat1_times], 85 | ].each do |pair| 86 | assert_new_times(*pair) 87 | end 88 | end 89 | 90 | # returns true if the reply != the given times 91 | def assert_new_times(file1, file2, file2_times) 92 | reply = `#{ob} #{file1} #{file2}` 93 | #puts reply 94 | assert_equal(file2_times.chomp.split(" ").size, reply.chomp.split(" ").size, "same number of values") 95 | assert_not_equal(file2_times, reply, "times should not be the same after warping") 96 | end 97 | 98 | ############################################## 99 | ## HELPERS: 100 | ############################################## 101 | 102 | end 103 | 104 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/test_pngio.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "mat.h" 5 | #include "vec.h" 6 | #include "pngio.h" 7 | 8 | 9 | using namespace VEC; 10 | class PngIOTestSuite : public CxxTest::TestSuite 11 | { 12 | public: 13 | void test_simple( void ) { 14 | MatI silly(4,4); 15 | //silly = 8; 16 | for (int m = 0; m < silly.rows(); ++m) { 17 | for (int n = 0; n < silly.cols(); ++n) { 18 | silly(m,n) = 0; 19 | } 20 | } 21 | silly(0,2) = 1; 22 | silly(1,3) = 1; 23 | silly(2,4) = 1; 24 | 25 | PngIO ioguy(1); 26 | ioguy.write("trial.png", silly); 27 | 28 | remove("trial.png"); 29 | } 30 | }; 31 | 32 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/file1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/file1.mat -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/file1.mata: -------------------------------------------------------------------------------- 1 | 4 3 2 | 1 2 3 3 | 4 5 6 4 | 7 8 9 5 | 10 11 12 6 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/file3.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/file3.mat -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/file3.mata: -------------------------------------------------------------------------------- 1 | 8 10 2 | 220 21 20 34 56 76 23 19 43 657 3 | 230 21 29 34 56 76 10 18 43 657 4 | 240 21 32 34 56 76 43 17 43 657 5 | 230 21 42 34 56 46 43 16 43 600 6 | 220 21 52 34 56 76 43 15 43 557 7 | 210 21 62 34 56 76 43 16 43 457 8 | 200 21 82 34 56 76 43 17 43 357 9 | 130 21 92 34 56 76 43 18 43 257 10 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/file4.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/file4.mat -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/file4.mata: -------------------------------------------------------------------------------- 1 | 9 10 2 | 240 20 32 34 56 76 43 17 43 657 3 | 230 21 42 34 56 46 43 16 43 600 4 | 210 21 50 34 56 76 43 15 43 552 5 | 210 23 62 34 66 56 43 16 43 457 6 | 200 21 82 34 56 76 43 17 43 350 7 | 130 22 90 34 56 76 43 18 43 237 8 | 220 21 20 34 56 76 23 19 43 657 9 | 230 21 29 34 56 76 10 18 43 657 10 | 230 21 29 34 56 76 10 18 43 657 11 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/tmp1.lmat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/tmp1.lmat -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/tmp1.lmat.pts: -------------------------------------------------------------------------------- 1 | 1259.0 404.2 1000 2 | 1269.0 428.5 1300 3 | 1590.4 404.4 1500 4 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/tmp1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/tmp1.mat -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/tmp1.mata: -------------------------------------------------------------------------------- 1 | 6 10 2 | 230 21 22 34 56 76 43 12 43 657 3 | 230 21 22 34 56 76 43 12 43 657 4 | 230 21 22 34 56 76 43 12 43 657 5 | 230 21 22 34 56 76 43 12 43 657 6 | 230 21 22 34 56 76 43 12 43 657 7 | 230 21 22 34 56 76 43 12 43 657 8 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/tmp1B.lmat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/tmp1B.lmat -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/tmp1B.lmat.warped_default: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/tmp1B.lmat.warped_default -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/tmp1_no_header.mata: -------------------------------------------------------------------------------- 1 | 230 21 22 34 56 76 43 12 43 657 2 | 230 21 22 34 56 76 43 12 43 657 3 | 230 21 22 34 56 76 43 12 43 657 4 | 230 21 22 34 56 76 43 12 43 657 5 | 230 21 22 34 56 76 43 12 43 657 6 | 230 21 22 34 56 76 43 12 43 657 7 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/tmp1_no_header_messy.mata: -------------------------------------------------------------------------------- 1 | 230 21 22 34 56 76 43 12 43 657 2 | 230 21 22 34 56 76 43 12 43 657 3 | 230 21 22 34 56 76 43 12 43 657 4 | 230 21 22 34 56 76 43 12 43 657 5 | 230 21 22 34 56 76 43 12 43 657 6 | 230 21 22 34 56 76 43 12 43 657 7 | 8 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/tmp2.lmat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/tmp2.lmat -------------------------------------------------------------------------------- /third_party/obiwarp/lib/tfiles/tmptimes.txt: -------------------------------------------------------------------------------- 1 | 1215.0 1208.0 2 | 1272.0 1284.0 3 | 1536.0 1464.0 4 | 1620.0 1600.0 5 | 1656.0 1700.0 6 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/variations/README.txt: -------------------------------------------------------------------------------- 1 | # cp or link these to the obiwarp file to get variant behavior 2 | # (these may or may not actually work since I don't test these) 3 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/variations/get_ssr_asr_aad.cpp: -------------------------------------------------------------------------------- 1 | // STDLIB: 2 | #include 3 | #include "string.h" 4 | 5 | // MINE 6 | #include "vec.h" 7 | #include "mat.h" 8 | 9 | #define DEBUG (0) 10 | 11 | using namespace VEC; 12 | 13 | int main (int argc, char **argv) { 14 | MatF tester; 15 | MatF tester_trans; 16 | VecF mpt; 17 | VecF npt; 18 | for (int c = 1; c < argc; ++c) { 19 | tester.set_from_ascii(argv[c], 1); // no headers on the files 20 | tester.transpose(tester_trans); 21 | mpt.set(tester_trans.cols(), tester_trans.pointer(0)); 22 | npt.set(tester_trans.cols(), tester_trans.pointer(1)); 23 | double ssr = VecF::sum_sq_res_yeqx(mpt, npt); 24 | double asr = VecF::avg_sq_res_yeqx(mpt, npt); 25 | double aad = VecF::avg_abs_diff(mpt, npt); 26 | printf("%s %f %f %f\n", argv[c], ssr, asr, aad); 27 | } 28 | return 1; 29 | } 30 | 31 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/variations/obiwarp_probs.cpp: -------------------------------------------------------------------------------- 1 | // STDLIB: 2 | #include 3 | #include 4 | #include 5 | #include "string.h" 6 | 7 | // MINE 8 | #include "vec.h" 9 | #include "mat.h" 10 | #include "lmat.h" 11 | #include "dynprog.h" 12 | #include "pngio.h" 13 | #include "cmdparser.h" 14 | 15 | #define DEBUG (0) 16 | 17 | int main (int argc, char **argv) { 18 | 19 | CmdParser opts(argc, argv); 20 | 21 | int NUM_INTERNAL_ANCHORS = 10000; // Max this out! 22 | char file1[1024]; 23 | char file2[1024]; 24 | strcpy(file1, opts.infiles[0]); 25 | strcpy(file2, opts.infiles[1]); 26 | char outfilename[1024 + 7]; 27 | 28 | int mi_bins = 5; 29 | //char toError[300]; 30 | 31 | if (DEBUG) { 32 | std::cerr << "**********************************************\n"; 33 | std::cerr << "opts.local: " << opts.local << "\n"; 34 | std::cerr << "opts.images: " << opts.images << "\n"; 35 | std::cerr << "opts.score: " << opts.score << "\n"; 36 | std::cerr << "opts.outfile: "<< opts.outfile << "\n"; 37 | std::cerr << "opts.timefile: " << opts.timefile << "\n"; 38 | std::cerr << "file1: " << file1 << "\n"; 39 | std::cerr << "file2: " << file2 << "\n"; 40 | std::cerr << "**********************************************\n"; 41 | } 42 | 43 | 44 | 45 | if (opts.outfile == NULL) { 46 | strcpy(outfilename, file2); 47 | strcat(outfilename, ".warped"); 48 | } 49 | else { 50 | strcpy(outfilename, opts.outfile); 51 | } 52 | 53 | // ************************************************************ 54 | // * READ IN FILES TO GET MAT 55 | // ************************************************************ 56 | LMat lmat1; 57 | LMat lmat2; 58 | MatF mat1; 59 | MatF mat2; 60 | MatF smat; 61 | DynProg dyn; 62 | 63 | // if (opts.axes) { 64 | // if (opts.binary) { 65 | // lmat1.set_from_binary(file1); 66 | // lmat2.set_from_binary(file2); 67 | // lmat1._mat->copy(mat1, 1); 68 | // lmat2._mat->copy(mat2, 1); 69 | // } 70 | // else { 71 | // lmat1.set_from_ascii(file1); 72 | // lmat2.set_from_ascii(file2); 73 | // lmat1._mat->copy(mat1, 1); 74 | // lmat2._mat->copy(mat2, 1); 75 | // } 76 | // } 77 | // else { 78 | // //mat1.set_from_ascii(file1); @TODO: write this guy 79 | // //mat2.set_from_ascii(file2); 80 | // } 81 | // 82 | // // ************************************************************ 83 | // // * SCORE THE MATRICES 84 | // // ************************************************************ 85 | // if (DEBUG) { 86 | // std::cerr << "Scoring the mats!\n"; 87 | // } 88 | // if (opts.smat_in != NULL) { 89 | // smat.set_from_binary(opts.smat_in); 90 | // dyn._smat = &smat; 91 | // } 92 | // else { 93 | // dyn.score(mat1, mat2, smat, opts.score, mi_bins); 94 | // // SETTING THE SMAT TO BE std normal 95 | // smat -= smat.avg(); 96 | // double mean, stdev; 97 | // smat._dat.sample_stats(mean, stdev); 98 | // smat /= stdev; 99 | // if (!strcmp(opts.score,"euclidean")) { 100 | // smat *= -1; // inverting euclidean 101 | // } 102 | // } 103 | // if (opts.smat_out != NULL) { 104 | // printf("Writing binary smat to '%s'\n", opts.smat_out); 105 | // smat.write(opts.smat_out); 106 | // //smat.print(smat_out_files[0]); 107 | // exit(0); 108 | // } 109 | // 110 | // // ************************************************************ 111 | // // * PREPARE GAP PENALTY ARRAY 112 | // // ************************************************************ 113 | // 114 | // MatF tester; 115 | // MatF tester_trans; 116 | // VecF mpt; 117 | // VecF npt; 118 | // VecF mOut_tm; 119 | // VecF nOut_tm; 120 | // 121 | // double average = smat.avg(); 122 | // int gp_length = smat.rows() + smat.cols(); 123 | // 124 | // VecF gp_array; 125 | // dyn.linear_less_before(opts.gap_extend,opts.gap_init,gp_length,gp_array); 126 | // 127 | // // ************************************************************ 128 | // // * DYNAMIC PROGRAM 129 | // // ************************************************************ 130 | // int minimize = 0; 131 | // if (DEBUG) { 132 | // std::cerr << "Dynamic Time Warping Score Matrix!\n"; 133 | // } 134 | // dyn.find_path(smat, gp_array, minimize, opts.factor_diag, opts.factor_gap, opts.local, opts.init_penalty); 135 | // 136 | // VecI mOut; 137 | // VecI nOut; 138 | // dyn.warp_map(mOut, nOut, minimize, NUM_INTERNAL_ANCHORS); 139 | // 140 | // if (opts.timefile != NULL) { 141 | // tester.set_from_ascii(opts.timefile, 1); // no headers on the files 142 | // tester.transpose(tester_trans); 143 | // mpt.set(tester_trans.cols(), tester_trans.pointer(0)); 144 | // npt.set(tester_trans.cols(), tester_trans.pointer(1)); 145 | // float ssr, asr, sad, aad; 146 | // dyn.path_accuracy((*lmat1._tm), (*lmat2._tm), mOut, nOut, mpt, npt, ssr, asr, sad, aad); 147 | // //printf("average residual^2 (sec): %f\n", asr); 148 | // //printf("average abs time diff (sec): %f\n", aad); 149 | // printf("%f %f %f %f\n", ssr, asr, sad, aad); 150 | // } 151 | // 152 | // // Warp the second lmat run! 153 | // if (opts.axes) { 154 | // VecF nOutF; 155 | // VecF mOutF; 156 | // lmat1.tm_axis_vals(mOut, mOutF); 157 | // lmat2.tm_axis_vals(nOut, nOutF); // 158 | // lmat2.warp_tm(nOutF, mOutF); 159 | // } 160 | // else { 161 | // // or warp the mat itself! 162 | // // @TODO: write the warping of mat itself! 163 | // } 164 | // 165 | // if (opts.binary) { 166 | // //lmat2.print(outfilename); 167 | // lmat2.write(outfilename); 168 | // } 169 | // else { 170 | // lmat2.print(outfilename); 171 | // } 172 | // 173 | // 174 | // 175 | // if (opts.images) { 176 | // PngIO wrt(1); 177 | // char base_fn[1024]; 178 | // strcpy(base_fn, "obi-warp_"); 179 | // char tb_fn[1024]; 180 | // strcpy(tb_fn, base_fn); 181 | // strcat(tb_fn, "tb.png"); 182 | // //char *tb_fn = "tb.png"; 183 | // wrt.write(tb_fn, dyn._tb); 184 | // char tbpath_fn[1024]; 185 | // strcpy(tbpath_fn, base_fn); 186 | // strcat(tbpath_fn, "tbpath.png"); 187 | // wrt.write(tbpath_fn, dyn._tbpath); 188 | // 189 | // char asmat_fn[1024]; 190 | // strcpy(asmat_fn, base_fn); 191 | // strcat(asmat_fn, "asmat.png"); 192 | // //wrt.write(asmat_fn, dyn._asmat); 193 | // 194 | // //strcpy(base_fn, "tb.png"); 195 | // //char *tbpath_fn = "tbpath.png"; 196 | // //char *tbscores_fn = "tbscores.png"; 197 | // //wrt.write(tbscores_fn, dyn._tbscores); 198 | // //char *asmat_fn = "asmat.png"; 199 | // //wrt.write(asmat_fn, dyn._asmat); 200 | // char *smat_fn = "smat.png"; 201 | // //wrt.write(smat_fn, *dyn._smat); 202 | // } 203 | // 204 | ///* 205 | // char silly[100]; 206 | // strcpy(silly, "png_"); 207 | // char tmpp[5]; 208 | // sprintf(tmpp, "%d", i); 209 | // strcat(silly, tmpp); 210 | // strcat(silly, ".png"); 211 | // 212 | // PngIO wrt(0); 213 | ////wrt.write(silly, dyn._tbpath); 214 | //wrt.write(silly, _scorepath); 215 | //*/ 216 | // 217 | 218 | return 0; 219 | } 220 | 221 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/variations/obiwarp_speed.cpp: -------------------------------------------------------------------------------- 1 | // STDLIB: 2 | #include 3 | #include 4 | #include 5 | #include "string.h" 6 | 7 | // 3RD PARTY 8 | #include 9 | #include "tnt_stopwatch.h" 10 | 11 | 12 | // MINE 13 | #include "vec.h" 14 | #include "mat.h" 15 | #include "dynprog.h" 16 | #include "pngio.h" 17 | 18 | #define DEBUG (0) 19 | 20 | 21 | int mymain(const char **SCORE_ARR, int score_cnt); 22 | 23 | int main (int argc, char **argv) { 24 | // Create the argument structures: 25 | struct arg_str *score = arg_str0("s", "score", "", "similarity score to compare vectors"); 26 | struct arg_end *end = arg_end(20); 27 | 28 | void* argtable[] = {score,end}; 29 | const char* progname = "obi-warp"; 30 | int nerrors; 31 | int exitcode=0; 32 | 33 | /* set default values*/ 34 | double diag_factor = 2.f; 35 | double gap_factor = 1.f; 36 | 37 | /* verify the argtable[] entries were allocated sucessfully */ 38 | if (arg_nullcheck(argtable) != 0) 39 | { 40 | /* NULL entries were detected, some allocations must have failed */ 41 | printf("%s: insufficient memory\n",progname); 42 | exitcode=1; 43 | goto exit; 44 | } 45 | 46 | 47 | /* Parse the command line as defined by argtable[] */ 48 | nerrors = arg_parse(argc,argv,argtable); 49 | 50 | exitcode = mymain( 51 | score->sval, score->count 52 | ); 53 | 54 | exit: 55 | /* deallocate each non-null entry in argtable[] */ 56 | arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0])); 57 | 58 | return exitcode; 59 | } 60 | 61 | int mymain(const char **SCORE_ARR, int score_cnt) { 62 | 63 | char SCORE[1024]; 64 | if (score_cnt) { 65 | strcpy(SCORE, SCORE_ARR[0]); 66 | } 67 | else { 68 | strcpy(SCORE, "covariance"); 69 | } 70 | 71 | if (strlen(SCORE) <= 1) { 72 | strcpy(SCORE, "covariance"); 73 | } 74 | 75 | 76 | int num_scores = 10; 77 | int num_peaks = 1000; 78 | 79 | // Write the file for plotting each guy: 80 | char plotfilename[1024]; 81 | char plotfilename_toplot[1024]; 82 | strcpy(plotfilename, "speed_test_"); 83 | strcat(plotfilename, SCORE); 84 | strcpy(plotfilename_toplot, plotfilename); 85 | strcat(plotfilename_toplot, ".toplot"); 86 | std::ofstream fh(plotfilename_toplot); 87 | printf("WRITING TO: %s\n", plotfilename_toplot); 88 | fh << "XYData" << "\n"; 89 | fh << plotfilename << "\n"; 90 | fh << "Scoring Functions Speed Comparison (on draco) " << num_scores << " scores\n"; 91 | fh << "N scans (compared N X N times)\n"; 92 | fh << "sqrt( time to complete " << num_scores << " scores (sec) )\n"; 93 | MatF smat; 94 | DynProg dyn; 95 | 96 | int i; 97 | int num_its = 10; 98 | VecI xaxis(num_its); 99 | for (i = 0; i < num_its; ++i) { 100 | xaxis[i] = i * 10; 101 | } 102 | 103 | fh << SCORE << "numpeaks" << num_peaks << "\n"; 104 | std::cout << "SCORE " << SCORE << "\n"; 105 | 106 | VecF yresult(num_its); 107 | TNT::Stopwatch st; 108 | 109 | for (i = 0; i < xaxis.length(); ++i) { 110 | int num_scans = xaxis[i]; 111 | MatF mat1(num_scans,num_peaks, 20.f); 112 | MatF mat2(num_scans,num_peaks, 12.5f); 113 | MatF smat_slow(mat1.rows(), mat2.rows()); 114 | 115 | int cnt1, cnt2; 116 | VecF *row_vecs1 = new VecF[mat1.rows()]; 117 | VecF *row_vecs2 = new VecF[mat2.rows()]; 118 | mat1.row_vecs(cnt1, row_vecs1); 119 | mat2.row_vecs(cnt2, row_vecs2); 120 | if (!strcmp(SCORE, "covariance_slow")) { 121 | std::cout << "INSIDE" << SCORE << "\n"; 122 | st.start(); 123 | for (int j = 0; j < num_scores; ++j) { 124 | for (int m = 0; m < cnt1; ++m) { 125 | for (int n = 0; n < cnt2; ++n) { 126 | smat_slow(m,n) = VecF::covariance(row_vecs1[m], row_vecs2[n]); 127 | } 128 | } 129 | } 130 | float timed = st.read(); 131 | yresult[i] = timed; 132 | } 133 | else if (!strcmp(SCORE, "pearsonsr_slow")) { 134 | std::cout << "INSIDE" << SCORE << "\n"; 135 | st.start(); 136 | for (int j = 0; j < num_scores; ++j) { 137 | for (int m = 0; m < cnt1; ++m) { 138 | for (int n = 0; n < cnt2; ++n) { 139 | smat_slow(m,n) = VecF::pearsons_r(row_vecs1[m], row_vecs2[n]); 140 | } 141 | } 142 | } 143 | float timed = st.read(); 144 | yresult[i] = timed; 145 | } 146 | else if (!strcmp(SCORE, "product_slow")) { 147 | std::cout << "INSIDE" << SCORE << "\n"; 148 | st.start(); 149 | for (int j = 0; j < num_scores; ++j) { 150 | for (int m = 0; m < cnt1; ++m) { 151 | for (int n = 0; n < cnt2; ++n) { 152 | smat_slow(m,n) = VecF::dot_product(row_vecs1[m], row_vecs2[n]); 153 | } 154 | } 155 | } 156 | float timed = st.read(); 157 | yresult[i] = timed; 158 | } 159 | else if (!strcmp(SCORE, "euclidean_slow")) { 160 | std::cout << "INSIDE" << SCORE << "\n"; 161 | st.start(); 162 | for (int j = 0; j < num_scores; ++j) { 163 | for (int m = 0; m < cnt1; ++m) { 164 | for (int n = 0; n < cnt2; ++n) { 165 | smat_slow(m,n) = VecF::euclidean(row_vecs1[m], row_vecs2[n]); 166 | } 167 | } 168 | } 169 | float timed = st.read(); 170 | yresult[i] = timed; 171 | } 172 | else { 173 | std::cout << "INSIDE" << SCORE << "\n"; 174 | st.start(); 175 | for (int j = 0; j < num_scores; ++j) { 176 | dyn.score(mat1, mat2, smat, SCORE, 5); 177 | } 178 | float timed = st.read(); 179 | yresult[i] = timed; 180 | } 181 | } 182 | xaxis; 183 | yresult.square_root(); 184 | xaxis.print(fh,1); 185 | yresult.print(fh,1); 186 | fh.close(); 187 | 188 | return 0; 189 | } 190 | 191 | 192 | 193 | // Print to file to plot 194 | // title (gap penalty optimization etc.....) 195 | // filename (intercept_linear_gap_penalty_optimization ) 196 | // "slope of gap penalty array" 197 | // "avg of sq of residuals" 198 | // mvals ... 199 | // avgs ... 200 | // mvals ... 201 | // avgs ... 202 | // mvals ... 203 | // avgs ... 204 | 205 | -------------------------------------------------------------------------------- /third_party/obiwarp/lib/variations/smat_dist.cpp: -------------------------------------------------------------------------------- 1 | // STDLIB: 2 | #include 3 | #include 4 | #include "string.h" 5 | 6 | // MINE 7 | #include "vec.h" 8 | #include "mat.h" 9 | #include "lmat.h" 10 | #include "dynprog.h" 11 | #include "pngio.h" 12 | 13 | 14 | char file1[1024]; 15 | char file2[1024]; 16 | int mi_bins = 5; 17 | float init_penalty = 0.f; 18 | char toError[300]; 19 | 20 | int AXES = 0; 21 | int BINARY = 0; 22 | int LOCAL = 0; 23 | int IMAGES = 0; 24 | int LOGYESNO = 0; 25 | char SCORE[10]; 26 | 27 | 28 | int main (int argc, char *argv[]) { 29 | /************************************************************ 30 | * GET ARGUMENTS 31 | ************************************************************/ 32 | strcpy(SCORE, "covariance"); 33 | int file1_found_already = 0; 34 | if (argc == 1) { 35 | std::cerr << "usage: smat_dist [-a] [-b] [-l] [-g] [-s ] file1 file2\n" << 36 | "FORMAT: \n" << 37 | "Data should be in an m(rows)x n(cols) matrix (space delimited)\n" << 38 | "where each line contains one row of data. Should be same # cols.\n" << 39 | "Data will be aligned along the m axis.\n" << 40 | "ARGUMENTS (default marked by asterik*): \n" << 41 | "b|binary = file is binary [precision?, etc] rather than *ascii\n" << 42 | "a|axes = 1st line in file contains x coordinates, 2nd the y\n" << 43 | "s|score = scoring function: *covariance, product (dot product)\n" << 44 | " pearsons_r, pearsons_r2, mutual_info\n" << 45 | "l|local = local rather than *global alignment\n" << 46 | "i|images = creates png images of the alignment process\n" << 47 | "g|log = takes the log (base 2) of smat\n" << 48 | "[space between argument and value, please.]\n"; 49 | exit(1); 50 | } 51 | for (int i = 1; i < argc; i++) { 52 | if (!strcmp(argv[i],"-a")) { 53 | AXES = 1; 54 | } 55 | else if (!strcmp(argv[i],"-b")) { 56 | BINARY = 1; 57 | } 58 | else if (!strcmp(argv[i],"-g")) { 59 | LOGYESNO = 1; 60 | } 61 | else if (!strcmp(argv[i],"-i")) { 62 | IMAGES = 1; 63 | } 64 | else if (!strcmp(argv[i],"-l")) { 65 | LOCAL = 1; 66 | } 67 | else if (!strcmp(argv[i],"-s")) { 68 | i++; 69 | strcpy(SCORE, argv[i]); 70 | } 71 | // if it doesn't match an option then it is our file! 72 | else { 73 | if (file1_found_already) { 74 | strcpy(file2, argv[i]); 75 | } 76 | else { 77 | strcpy(file1, argv[i]); 78 | file1_found_already = 1; 79 | } 80 | } 81 | } 82 | std::cerr << "**********************************************\n"; 83 | std::cerr << "SCORE: " << SCORE << "\n"; 84 | std::cerr << "file1: " << file1 << "\n"; 85 | std::cerr << "file2: " << file2 << "\n"; 86 | std::cerr << "LOCAL: " << LOCAL << "\n"; 87 | std::cerr << "BINARY: " << BINARY << "\n"; 88 | std::cerr << "IMAGES: " << IMAGES << "\n"; 89 | std::cerr << "AXES: " << AXES << "\n"; 90 | std::cerr << "LOG: " << LOGYESNO << "\n"; 91 | std::cerr << "**********************************************\n"; 92 | 93 | /************************************************************ 94 | * READ IN FILES TO GET MAT 95 | ************************************************************/ 96 | LMat lmat1; 97 | LMat lmat2; 98 | MatF mat1; 99 | MatF mat2; 100 | MatF smat; 101 | DynProg dyn; 102 | 103 | if (AXES) { 104 | lmat1.set_from_ascii(file1); 105 | lmat2.set_from_ascii(file2); 106 | lmat1._mat->copy(mat1, 1); 107 | lmat2._mat->copy(mat2, 1); 108 | } 109 | else { 110 | //mat1.set_from_ascii(file1); @TODO: write this guy 111 | //mat2.set_from_ascii(file2); 112 | } 113 | 114 | /************************************************************ 115 | * SCORE THE MATRICES 116 | ************************************************************/ 117 | std::cerr << "Scoring the mats!\n"; 118 | dyn.score(mat1, mat2, smat, SCORE, mi_bins); 119 | 120 | /************************************************************ 121 | * PREPARE GAP PENALTY ARRAY 122 | ************************************************************/ 123 | VecF gp_array; // use default for now 124 | 125 | /************************************************************ 126 | * DYNAMIC PROGRAM 127 | ************************************************************/ 128 | 129 | int minimize = 0; 130 | std::cerr << "Dynamic Time Warping Score Matrix!\n"; 131 | dyn.find_path_with_gaps(smat, gp_array, minimize, LOCAL, init_penalty); 132 | printf("DYNPROG SCORE: %f\n", dyn._bestScore); 133 | 134 | // Run through various distances: 135 | int reply; 136 | int steps; 137 | char steps_st[3]; 138 | char basefilename[255]; 139 | 140 | // strip the lmata: 141 | char *pointer; 142 | pointer = strstr(file1, ".lmata"); 143 | *pointer = '\0'; 144 | pointer = strstr(file2, ".lmata"); 145 | *pointer = '\0'; 146 | 147 | strcpy(basefilename, file1); 148 | strcat(basefilename, "_"); 149 | strcat(basefilename, file2); 150 | strcat(basefilename, "_"); 151 | strcat(basefilename, SCORE); 152 | strcat(basefilename, "_"); 153 | strcat(basefilename, "steps"); 154 | strcat(basefilename, "_"); 155 | char finalfn[255]; 156 | if (LOGYESNO) { 157 | strcat(basefilename, "logbase2"); 158 | strcat(basefilename, "_"); 159 | smat.logarithm(2); 160 | } 161 | 162 | for (int steps = 0; steps < 50; steps += steps + 1) { 163 | MatI tbpathe; 164 | dyn._tbpath.expand(tbpathe,1,steps,steps,steps,steps,0,0,0,0); 165 | 166 | strcpy(finalfn,basefilename); 167 | sprintf(steps_st, "%d", steps); 168 | strcat(finalfn,steps_st); 169 | 170 | VecF result; 171 | smat.mask_as_vec(1, tbpathe, result); 172 | 173 | VecD _bins; 174 | VecI _freqs; 175 | result.hist(100, _bins, _freqs); 176 | char *hist_fn = "hist.txt"; 177 | std::ofstream fh(hist_fn); 178 | // print filename and title: 179 | fh << finalfn << "\n"; // filename 180 | fh << finalfn << "\n"; // title 181 | // print the x and y axis labels: 182 | fh << "score" << "\n"; 183 | fh << "frequency" << "\n"; 184 | // print the data: 185 | _bins.print(fh); 186 | _freqs.print(fh); 187 | fh.close(); 188 | reply = system("plot_xy.rb hist.txt -b"); 189 | if (reply == -1) { puts("Error!"); } 190 | else { puts("success"); } 191 | } 192 | 193 | if (IMAGES) { 194 | PngIO wrt(1); 195 | //char tb_fn[100]; 196 | //strcpy(tb_fn, "tb.png"); 197 | char *tb_fn = "tb.png"; 198 | wrt.write(tb_fn, dyn._tb); 199 | char *tbpath_fn = "tbpath.png"; 200 | wrt.write(tbpath_fn, dyn._tbpath); 201 | char *asmat_fn = "asmat.png"; 202 | wrt.write(asmat_fn, dyn._asmat); 203 | char *smat_fn = "smat.png"; 204 | wrt.write(smat_fn, *dyn._smat); 205 | } 206 | 207 | 208 | /* 209 | char silly[100]; 210 | strcpy(silly, "png_"); 211 | char tmpp[5]; 212 | sprintf(tmpp, "%d", i); 213 | strcat(silly, tmpp); 214 | strcat(silly, ".png"); 215 | 216 | PngIO wrt(0); 217 | //wrt.write(silly, dyn._tbpath); 218 | wrt.write(silly, _scorepath); 219 | */ 220 | } 221 | 222 | -------------------------------------------------------------------------------- /third_party/py_obiwarp.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "string.h" 6 | 7 | #include 8 | #include 9 | #include "obiwarp/lib/vec.h" 10 | #include "obiwarp/lib/mat.h" 11 | #include "obiwarp/lib/lmat.h" 12 | #include "obiwarp/lib/dynprog.h" 13 | 14 | 15 | #define DEBUG (0) 16 | namespace py = pybind11; 17 | 18 | LMat* create_lmat_from_memory(int len_rt, double *rt, int len_mz, double *mz, double *intensity) 19 | { 20 | LMat *lmat = new LMat(); 21 | delete lmat->_mz; 22 | delete lmat->_tm; 23 | delete lmat->_mat; 24 | 25 | // Get the time values: 26 | lmat->_tm_vals = len_rt; 27 | float *tm_tmp = new float[len_rt]; 28 | for(int i=0; i < len_rt; i++) { 29 | tm_tmp[i] = rt[i]; 30 | } 31 | lmat->_tm = new VecF(len_rt, tm_tmp); 32 | 33 | // Get the mz values: 34 | lmat->_mz_vals = len_mz; 35 | float *mz_tmp = new float[len_mz]; 36 | for(int i=0; i < len_mz; i++) { 37 | mz_tmp[i] = mz[i]; 38 | } 39 | lmat->_mz = new VecF(len_mz, mz_tmp); 40 | 41 | // Read the matrix: 42 | int rows_by_cols = len_rt * len_mz; 43 | float *mat_tmp = new float[rows_by_cols]; 44 | for(int i=0; i < rows_by_cols; i++) { 45 | mat_tmp[i] = intensity[i]; 46 | } 47 | lmat->_mat = new MatF(len_rt, len_mz, mat_tmp); 48 | 49 | return lmat; 50 | } 51 | 52 | 53 | py::array_t obiwarp(py::array_t py_rt, py::array_t py_mz, py::array_t py_intensity, 54 | py::array_t py_rt2, py::array_t py_mz2, py::array_t py_intensity2, 55 | float percent_anchors, const char *score, 56 | float gap_init, float gap_extend, 57 | float factor_diag, float factor_gap, 58 | int local_alignment, float init_penalty) 59 | { 60 | // ************************************************************ 61 | // * CONVERT ARRAY TO MAT 62 | // ************************************************************ 63 | int len_rt = py_rt.request().size; 64 | int len_mz = py_mz.request().size; 65 | int len_rt2 = py_rt2.request().size; 66 | int len_mz2 = py_mz2.request().size; 67 | double *rt = (double *)py_rt.request().ptr; 68 | double *mz = (double *)py_mz.request().ptr; 69 | double *intensity = (double *)py_intensity.request().ptr; 70 | double *rt2 = (double *)py_rt2.request().ptr; 71 | double *mz2 = (double *)py_mz2.request().ptr; 72 | double *intensity2 = (double *)py_intensity2.request().ptr; 73 | LMat* lmat1 = create_lmat_from_memory(len_rt, rt, len_mz, mz, intensity); 74 | LMat* lmat2 = create_lmat_from_memory(len_rt2, rt2, len_mz2, mz2, intensity2); 75 | 76 | // ************************************************************ 77 | // * SCORE THE MATRICES 78 | // ************************************************************ 79 | if (DEBUG) { 80 | std::cerr << "Input parameter confirmed!\n"; 81 | std::cerr << " - rt_len = " << lmat1->_tm_vals << "\n"; 82 | std::cerr << " - mz_len = " << lmat1->_mz_vals << "\n"; 83 | } 84 | 85 | MatF smat; 86 | DynProg dyn; 87 | dyn.score(*(lmat1->mat()), *(lmat2->mat()), smat, score); 88 | 89 | if (DEBUG) { 90 | std::cerr << "Matrix scored!\n"; 91 | } 92 | 93 | if (!strcmp(score,"euc")) { 94 | smat *= -1; // inverting euclidean 95 | } 96 | 97 | 98 | // ************************************************************ 99 | // * PREPARE GAP PENALTY ARRAY 100 | // ************************************************************ 101 | 102 | MatF time_tester; 103 | MatF time_tester_trans; 104 | VecF mpt; 105 | VecF npt; 106 | VecF mOut_tm; 107 | VecF nOut_tm; 108 | 109 | int gp_length = smat.rows() + smat.cols(); 110 | 111 | VecF gp_array; 112 | dyn.linear_less_before(gap_extend, gap_init, gp_length, gp_array); 113 | 114 | // ************************************************************ 115 | // * DYNAMIC PROGRAM 116 | // ************************************************************ 117 | int minimize = 0; 118 | dyn.find_path(smat, gp_array, minimize, factor_diag, factor_gap, local_alignment, init_penalty); 119 | if (DEBUG) { 120 | std::cerr << "Dynamic Time Warping path found!\n"; 121 | } 122 | 123 | VecI mOut; 124 | VecI nOut; 125 | dyn.warp_map(mOut, nOut, percent_anchors, minimize); 126 | if (DEBUG) { 127 | std::cerr << "Warping anchors decided!\n"; 128 | } 129 | 130 | VecF nOutF; 131 | VecF mOutF; 132 | lmat1->tm_axis_vals(mOut, mOutF); 133 | lmat2->tm_axis_vals(nOut, nOutF); 134 | lmat2->warp_tm(nOutF, mOutF); 135 | if (DEBUG) { 136 | std::cerr << "Piecewise cubic hermite interpolation finished!\n"; 137 | } 138 | 139 | py::array_t warped_rt2 = py::array_t(len_rt2); 140 | py::buffer_info buffer = warped_rt2.request(); 141 | 142 | float* result = (float *)buffer.ptr; 143 | for(int i=0; i < len_rt2;i++){ 144 | result[i] = lmat2->tm()->pointer()[i]; 145 | } 146 | 147 | delete lmat1; 148 | delete lmat2; 149 | 150 | return warped_rt2; 151 | } 152 | 153 | PYBIND11_MODULE(py_obiwarp, m) { 154 | m.doc() = "Python Bindings for Obiwarp library."; 155 | m.attr("__version__") = "0.9.4"; 156 | 157 | m.def("obiwarp", &obiwarp, "Perform obiwarp function."); 158 | } 159 | 160 | -------------------------------------------------------------------------------- /third_party/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import glob 4 | from setuptools import setup, Extension, find_packages 5 | from setuptools.command.build_ext import build_ext 6 | import setuptools 7 | 8 | __version__ = '0.9.4' 9 | requirements_list = ['pybind11>=2.4', 'numpy'] 10 | 11 | maindir = os.path.join(".", "obiwarp") 12 | libdir = os.path.join(maindir, "lib") 13 | source_files = ['py_obiwarp.cc'] 14 | file_types = ['*.c', '*.cc', '*.cpp'] 15 | exclude_patterns = ['TEMPLATE', '2'] 16 | libraries = [] 17 | extra_objects = [] 18 | 19 | for file_type in file_types: 20 | for f in glob.glob(os.path.join(libdir, file_type)): 21 | exclude = False 22 | for pattern in exclude_patterns: 23 | if pattern in f: 24 | exclude = True 25 | break 26 | if not exclude: 27 | source_files += [f] 28 | 29 | ext_modules = [ 30 | Extension( 31 | 'py_obiwarp', 32 | source_files, 33 | ), 34 | ] 35 | 36 | # As of Python 3.6, CCompiler has a `has_flag` method. 37 | # cf http://bugs.python.org/issue26689 38 | def has_flag(compiler, flagname): 39 | """Return a boolean indicating whether a flag name is supported on 40 | the specified compiler. 41 | """ 42 | import tempfile 43 | with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f: 44 | f.write('int main (int argc, char **argv) { return 0; }') 45 | try: 46 | compiler.compile([f.name], extra_postargs=[flagname]) 47 | except setuptools.distutils.errors.CompileError: 48 | return False 49 | return True 50 | 51 | 52 | def cpp_flag(compiler): 53 | """Return the -std=c++[11/14] compiler flag. 54 | 55 | #The c++14 is prefered over c++11 (when it is available). 56 | # This somehow can fail on a Mac with clang 57 | #""" 58 | if has_flag(compiler, '-std=c++11'): 59 | return '-std=c++11' 60 | else: 61 | raise RuntimeError('Unsupported compiler -- at least C++11 support ' 62 | 'is needed!') 63 | 64 | 65 | class BuildExt(build_ext): 66 | """A custom build extension for adding compiler-specific options.""" 67 | c_opts = { 68 | 'msvc': ['/EHsc', '/openmp', '/O2'], 69 | 'unix': ['-O3', '-march=native', '-std=c99'], 70 | #'unix': ['-O0', '-march=native', '-g'], 71 | } 72 | link_opts = { 73 | 'unix': [], 74 | 'msvc': [], 75 | } 76 | 77 | if sys.platform == 'darwin': 78 | c_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7'] 79 | link_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7'] 80 | else: 81 | c_opts['unix'].append("-fopenmp") 82 | link_opts['unix'].extend(['-fopenmp', '-pthread']) 83 | 84 | def build_extensions(self): 85 | ct = self.compiler.compiler_type 86 | opts = self.c_opts.get(ct, []) 87 | if ct == 'unix': 88 | opts.append('-DVERSION_INFO="%s"' % self.distribution.get_version()) 89 | opts.append(cpp_flag(self.compiler)) 90 | if has_flag(self.compiler, '-fvisibility=hidden'): 91 | opts.append('-fvisibility=hidden') 92 | elif ct == 'msvc': 93 | opts.append('/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version()) 94 | 95 | # extend include dirs here (don't assume numpy/pybind11 are installed when first run, since 96 | # pip could have installed them as part of executing this script 97 | import pybind11 98 | import numpy as np 99 | for ext in self.extensions: 100 | ext.extra_compile_args.extend(opts) 101 | ext.extra_link_args.extend(self.link_opts.get(ct, [])) 102 | ext.include_dirs.extend([ 103 | # Path to pybind11 headers 104 | pybind11.get_include(), 105 | pybind11.get_include(True), 106 | 107 | # Path to numpy headers 108 | np.get_include() 109 | ]) 110 | 111 | build_ext.build_extensions(self) 112 | 113 | 114 | setup( 115 | name='py_obiwarp', 116 | version='0.9.4', 117 | author="CSi-Studio", 118 | author_email="csi@csibio.net", 119 | maintainer="Ruimin Wang", 120 | license="The MIT License", 121 | description='Python bindings for OBI-Warp', 122 | long_description='Python bindings for OBI-Warp algorithm, which was also used in XCMS for mass spectrometry data alignment.', 123 | keywords="ObiWarp, Alignment, Warping", 124 | packages=find_packages(), 125 | ext_modules=ext_modules, 126 | install_requires=requirements_list, 127 | setup_requires=requirements_list, 128 | cmdclass={'build_ext': BuildExt}, 129 | test_suite="tests", 130 | zip_safe=False, 131 | python_requires=">=3.6" 132 | ) 133 | --------------------------------------------------------------------------------