├── .gitignore
├── README.md
├── data
    ├── evaluate_metapro_galigner.py
    ├── evaluate_metapro_mzmine2.java
    ├── evaluate_metapro_openms.py
    ├── evaluate_metapro_xcms.Rmd
    ├── evaluate_mzmine2_galigner.py
    ├── evaluate_openms_galigner.py
    ├── evaluate_xcms_galigner.py
    ├── metapro_result_comparison.py
    ├── openms_result_converter.py
    └── software_result_comparison.py
├── requirements.txt
├── src
    ├── coarse_registration.py
    ├── fine_alignment.py
    ├── main_galigner.py
    ├── map_solver
    │   ├── base_solver.py
    │   ├── greedy_solver.py
    │   ├── gurobi_solver.py
    │   ├── local_bipartite_solver.py
    │   ├── local_nearest_solver.py
    │   ├── ortools_solver.py
    │   └── vlsns_solver.py
    ├── params.py
    ├── raw_file_reader.py
    ├── result_file_reader.py
    └── tools
    │   ├── graph_viewer.py
    │   ├── param_loader.py
    │   └── trace_recorder.py
└── third_party
    ├── obiwarp
        ├── History
        ├── LICENSE
        ├── README.html
        ├── README.md
        ├── Rakefile
        ├── VERSION
        └── lib
        │   ├── Rakefile
        │   ├── cmdparser.cpp
        │   ├── cmdparser.h
        │   ├── doxygen.conf
        │   ├── dynprog.cpp
        │   ├── dynprog.h
        │   ├── lmat.cpp
        │   ├── lmat.h
        │   ├── lmat2chrms.cpp
        │   ├── lmat2lmata.cpp
        │   ├── lmat2png.cpp
        │   ├── lmata2lmat.cpp
        │   ├── mat.cpp
        │   ├── mat.h
        │   ├── mat2mata.cpp
        │   ├── mat_TEMPLATE.cpp
        │   ├── mat_TEMPLATE.h
        │   ├── mata2mat.cpp
        │   ├── not_using
        │       ├── README.txt
        │       ├── outliers.cpp
        │       └── test_outliers.h
        │   ├── obiwarp.cpp
        │   ├── obiwarp.dsp
        │   ├── pngio.cpp
        │   ├── pngio.h
        │   ├── test_cmdparser.rb
        │   ├── test_dynprog.h
        │   ├── test_lmat.h
        │   ├── test_lmat_converters.h
        │   ├── test_mat.h
        │   ├── test_mat_TEMPLATE.h
        │   ├── test_mat_converters.rb
        │   ├── test_obiwarp.h
        │   ├── test_obiwarp.rb
        │   ├── test_pngio.h
        │   ├── test_vec.h
        │   ├── test_vec_TEMPLATE.h
        │   ├── tfiles
        │       ├── file1.mat
        │       ├── file1.mata
        │       ├── file3.mat
        │       ├── file3.mata
        │       ├── file4.mat
        │       ├── file4.mata
        │       ├── tmp1.lmat
        │       ├── tmp1.lmat.pts
        │       ├── tmp1.lmata
        │       ├── tmp1.mat
        │       ├── tmp1.mata
        │       ├── tmp1B.lmat
        │       ├── tmp1B.lmat.warped_default
        │       ├── tmp1B.lmata
        │       ├── tmp1_no_header.mata
        │       ├── tmp1_no_header_messy.mata
        │       ├── tmp2.lmat
        │       ├── tmp2.lmata
        │       └── tmptimes.txt
        │   ├── variations
        │       ├── README.txt
        │       ├── get_ssr_asr_aad.cpp
        │       ├── obiwarp_anchor_opt.cpp
        │       ├── obiwarp_chams.cpp
        │       ├── obiwarp_doing_xy_plots_nicely.cpp
        │       ├── obiwarp_factor_opt.cpp
        │       ├── obiwarp_gp_opt.cpp
        │       ├── obiwarp_probs.cpp
        │       ├── obiwarp_score.cpp
        │       ├── obiwarp_speed.cpp
        │       └── smat_dist.cpp
        │   ├── vec.cpp
        │   ├── vec.h
        │   ├── vec_TEMPLATE.cpp
        │   └── vec_TEMPLATE.h
    ├── py_obiwarp.cc
    └── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | /.idea/
 2 | /third_party/.eggs/
 3 | /third_party/build/
 4 | /third_party/dist/
 5 | /third_party/py_obiwarp.egg-info/
 6 | /third_party/var/
 7 | 
 8 | /data/TripleTOF_6600/
 9 | /data/TripleTOF_6600_results_mzmine2/
10 | /data/TripleTOF_6600_results_xcms/
11 | /data/TripleTOF_6600_results_openms/
12 | /data/TripleTOF_6600_results_metapro/
13 | 
14 | /data/QE_HF/
15 | /data/QE_HF_results_mzmine2/
16 | /data/QE_HF_results_xcms/
17 | /data/QE_HF_results_openms/
18 | /data/QE_HF_results_metapro/
19 | 
20 | /experiments/
21 | /data/MTBLS562/
22 | /data/MTBLS562_results_openms/
23 | /data/MTBLS562_results_mzmine2/
24 | /data/MTBLS562_results_metapro/
25 | /data/MTBLS562_results_xcms/
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # G-Aligner: a graph-based feature alignment method for untargeted LC-MS-based metabolomics
  2 | 
  3 | 
  4 | ## Highlights
  5 | - **Novelty:** G-Aligner enables comprehensive analysis of all potential correspondences among features all runs for the first time. G-Aligner treats features and potential correspondences as nodes and edges of a multipartite graph, converts the feature matching problem as a multidimensional assignment problem (MAP), and proposes three combinatorial optimization methods to solve the MAP.
  6 | - **Accuracy:** G-Aligner achieved the best performance in comparison with popular feature alignment methods in MZmine2, OpenMS and XCMS on two public metabolomics benchmark datasets.
  7 | - **Reliability:** G-Aligner achieved the best performance on manually annotated feature lists and untargeted extracted features of MZmine2, OpenMS and XCMS, and helped all compared software obtaining more accurate result by integrating G-Aligner into their workflow.
  8 | - **Open source:** We open-sourced G-Aligner under a permissive license to promote the accuracy of MS data analysis more broadly.
  9 | - **Dataset:** We manually annotated a feature dataset for three public benchmark datasets, which contains m/z, RT, area information of library analytes and can be used in evaluations of feature detection, quantification and alignment accuracy.
 10 | 
 11 | ## Datasets
 12 | Raw MS files of the metabolomics datasets can be downloaded at [Google Drive](https://drive.google.com/drive/folders/1PRDIvihGFgkmErp2fWe41UR2Qs2VY_5G).
 13 | 
 14 | The mzML files of the metabolomics datasets can be downloaded at [Zenodo](https://doi.org/10.5281/zenodo.8313034).
 15 | 
 16 | Targeted annotation results, evaluation results and evaluation methods can be downloaded at [Zenodo](https://doi.org/10.5281/zenodo.8313034).
 17 | 
 18 | 
 19 | ## Setup
 20 | 1. Prepare the python environment based on your system and hardware.
 21 |    
 22 | 2. Install the dependencies. Here we use ROOT_PATH to represent the root path of G-Aligner.
 23 |     
 24 |     ```cd ROOT_PATH\third_party\```
 25 | 
 26 |     ```python setup.py install```
 27 |    
 28 |     ```cd ROOT_PATH```
 29 |    
 30 |     ```pip install -r requirements.txt```
 31 | 
 32 | 
 33 | 
 34 | ## Run G-Aligner
 35 | 
 36 | ### Supported formats
 37 | Feature extraction rsults in csv format, containing m/z, RT and area columns.
 38 | 
 39 | ### Demos
 40 | Our demos can help you reproduce the evaluation results.
 41 | 
 42 | Place the data download from the Zenodo repository as follows.
 43 | ```
 44 | G-Aligner-master
 45 | ├── data
 46 | │   ├── MTBLS562
 47 | │   ├── MTBLS562_results_metapro
 48 | │   ├── MTBLS562_results_mzmine2
 49 | │   ├── MTBLS562_results_openms
 50 | │   ├── MTBLS562_results_xcms
 51 | │   ├── QE_HF
 52 | │   ├── QE_HF_results_metapro
 53 | │   ├── QE_HF_results_mzmine2
 54 | │   ├── QE_HF_results_openms
 55 | │   ├── QE_HF_results_xcms
 56 | │   ├── TripleTOF_6600
 57 | │   ├── TripleTOF_6600_results_metapro
 58 | │   ├── TripleTOF_6600_results_mzmine2
 59 | │   ├── TripleTOF_6600_results_openms
 60 | │   ├── TripleTOF_6600_results_xcms
 61 | │   ├── evaluate_metapro_galigner.py
 62 | │   ├── evaluate_metapro_mzmine2.java
 63 | │   ├── evaluate_metapro_openms.py
 64 | │   ├── evaluate_metapro_xcms.Rmd
 65 | │   ├── evaluate_mzmine2_galigner.py
 66 | │   ├── evaluate_openms_galigner.py
 67 | │   ├── evaluate_xcms_galigner.py
 68 | │   ├── metapro_result_comparison.py
 69 | │   ├── software_result_comparison.py
 70 | ```
 71 | 
 72 | - To run the benchmark scripts:
 73 | 
 74 | ```cd ROOT_PATH```
 75 | 
 76 | ```python data/metapro_result_comparison.py```
 77 | 
 78 | ```python data/software_result_comparison.py```
 79 | 
 80 | - To analyze with G-Aligner:
 81 | 
 82 | ```cd ROOT_PATH```
 83 | 
 84 | Change the parameters in data/evaluate_metapro_galigner.py
 85 | 
 86 | ```python data/evaluate_metapro_galigner.py```
 87 | 
 88 | Feature alignment results are saved in ```experiment``` folder.
 89 | 
 90 | ## Citation
 91 | 
 92 | Cite our paper at:
 93 | ```
 94 | @article{wang2023,
 95 |   title={G-Aligner: a graph-based feature alignment method for untargeted LC--MS-based metabolomics},
 96 |   author={Wang, Ruimin and Lu, Miaoshan and An, Shaowei and Wang, Jinyin and Yu, Changbin},
 97 |   journal={BMC bioinformatics},
 98 |   volume={24},
 99 |   number={1},
100 |   pages={431},
101 |   year={2023},
102 |   publisher={Springer},
103 |   doi={10.1186/s12859-023-05525-4}
104 | }
105 | ```
106 | 
107 | ## License
108 | 
109 | G-Aligner is an open-source tool, using [***Mulan Permissive Software License，Version 2 (Mulan PSL v2)***](http://license.coscl.org.cn/MulanPSL2)
110 | 
111 | 


--------------------------------------------------------------------------------
/data/evaluate_metapro_galigner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from src.params import ResultFileReadingParams, RawFileReadingParams, CoarseRegistrationParams, FineAssignmentParams
 3 | from src.main_galigner import GAligner
 4 | 
 5 | 
 6 | def eval_wiff(folder_name, solver, vlsns_init_mode):
 7 |     result_file_path = os.path.join(os.getcwd(), folder_name, 'metapro')
 8 | 
 9 |     result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=0,
10 |                                                          rt_col_num=2, mz_col_num=1, area_col_num=3)
11 |     raw_file_reading_params = RawFileReadingParams()
12 |     coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0,
13 |                                                           rt_tolerance=0.5, rt_residual_threshold=0.05, degree=1)
14 |     fine_assignment_params = FineAssignmentParams(rt_tolerance=0.3, mz_tolerance=0.01, mz_factor=1, rt_factor=1,
15 |                                                   area_factor=1, use_ppm=False, solver=solver)
16 |     if vlsns_init_mode is not None:
17 |         fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode
18 | 
19 |     g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params,
20 |                          fine_assignment_params)
21 |     g_aligner.do_align()
22 | 
23 | 
24 | def eval_raw(folder_name, solver, vlsns_init_mode):
25 |     result_file_path = os.path.join(os.getcwd(), folder_name, 'metapro')
26 | 
27 |     result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=0,
28 |                                                          rt_col_num=2, mz_col_num=1, area_col_num=3)
29 |     raw_file_reading_params = RawFileReadingParams()
30 |     coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.005, use_ppm=False, centric_idx=0,
31 |                                                           rt_tolerance=0.3, rt_residual_threshold=0.02, degree=1)
32 |     fine_assignment_params = FineAssignmentParams(rt_tolerance=0.15, mz_tolerance=0.003, mz_factor=1, rt_factor=1,
33 |                                                   area_factor=1, use_ppm=False, solver=solver)
34 |     if vlsns_init_mode is not None:
35 |         fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode
36 | 
37 |     g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params,
38 |                          fine_assignment_params)
39 |     g_aligner.do_align()
40 | 
41 | def eval_mtbls(folder_name, solver, vlsns_init_mode):
42 |     result_file_path = os.path.join(os.getcwd(), folder_name, 'metapro')
43 | 
44 |     result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=0,
45 |                                                          rt_col_num=2, mz_col_num=1, area_col_num=3)
46 |     raw_file_reading_params = RawFileReadingParams()
47 |     coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0,
48 |                                                           rt_tolerance=0.3, rt_residual_threshold=0.05, degree=1)
49 |     fine_assignment_params = FineAssignmentParams(rt_tolerance=0.1, mz_tolerance=0.01, mz_factor=1, rt_factor=1,
50 |                                                   area_factor=1, use_ppm=False, solver=solver)
51 |     if vlsns_init_mode is not None:
52 |         fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode
53 | 
54 |     g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params,
55 |                          fine_assignment_params)
56 |     g_aligner.do_align()
57 | 
58 | 
59 | # # TripleTOF_6600
60 | # eval_wiff('TripleTOF_6600', solver='local_bipartite', vlsns_init_mode=None)
61 | # eval_wiff('TripleTOF_6600', solver='greedy', vlsns_init_mode=None)
62 | # eval_wiff('TripleTOF_6600', solver='gurobi', vlsns_init_mode=None)
63 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msr')
64 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msg')
65 | #
66 | #
67 | # # QE_HF
68 | # eval_raw('QE_HF', solver='local_bipartite', vlsns_init_mode=None)
69 | # eval_raw('QE_HF', solver='greedy', vlsns_init_mode=None)
70 | # eval_raw('QE_HF', solver='gurobi', vlsns_init_mode=None)
71 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msr')
72 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msg')
73 | #
74 | # # MTBLS562
75 | eval_mtbls('MTBLS562', solver='local_bipartite', vlsns_init_mode=None)
76 | eval_mtbls('MTBLS562', solver='greedy', vlsns_init_mode=None)
77 | eval_mtbls('MTBLS562', solver='gurobi', vlsns_init_mode=None)
78 | eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msg')
79 | eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msr')
80 | 


--------------------------------------------------------------------------------
/data/evaluate_metapro_openms.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import os
  3 | 
  4 | import numpy as np
  5 | from pyopenms import *
  6 | from src.params import ResultFileReadingParams
  7 | from src.result_file_reader import ResultFileReader
  8 | 
  9 | 
 10 | def eval(folder_name, align_mz_tolerance, align_rt_tolerance, match_mz_tolerance, match_rt_tolerance):
 11 |     tmp_path = os.getcwd()
 12 |     result_file_path = os.path.join(tmp_path, folder_name, "metapro")
 13 | 
 14 |     result_file_reader = ResultFileReader(
 15 |         ResultFileReadingParams(result_file_path, skip_line=0, rt_col_num=2, mz_col_num=1, area_col_num=3))
 16 |     result_file_paths, result_file_count = result_file_reader.load_result_paths()
 17 | 
 18 |     feature_maps = []
 19 |     for path in result_file_paths:
 20 |         results = result_file_reader.load_result(path)
 21 |         feature_map = FeatureMap()
 22 |         for row in results:
 23 |             feature = Feature()
 24 |             feature.setMZ(row[0])
 25 |             feature.setRT(row[1])
 26 |             feature.setIntensity(row[2])
 27 |             feature_map.push_back(feature)
 28 |         feature_maps.append(feature_map)
 29 | 
 30 |     # set ref_index to feature map index with the largest number of features
 31 |     ref_index = [
 32 |         i[0]
 33 |         for i in sorted(
 34 |             enumerate([fm.size() for fm in feature_maps]), key=lambda x: x[1]
 35 |         )
 36 |     ][-1]
 37 | 
 38 |     aligner = MapAlignmentAlgorithmPoseClustering()
 39 |     aligner_params = MapAlignmentAlgorithmPoseClustering().getDefaults()
 40 | 
 41 |     aligner_params[b'superimposer:max_shift'] = align_rt_tolerance
 42 |     aligner_params[b'superimposer:shift_bucket_size'] = 0.005
 43 |     aligner_params[b'superimposer:mz_pair_max_distance'] = align_mz_tolerance
 44 |     aligner_params[b'superimposer:rt_pair_distance_fraction'] = 0.005
 45 |     aligner_params[b'pairfinder:ignore_charge'] = 'true'
 46 |     aligner_params[b'pairfinder:distance_RT:max_difference'] = align_rt_tolerance
 47 |     aligner_params[b'pairfinder:distance_MZ:max_difference'] = align_mz_tolerance
 48 |     aligner.setReference(feature_maps[ref_index])
 49 |     aligner.setParameters(aligner_params)
 50 | 
 51 |     # perform alignment and transformation of feature maps to the reference map (exclude reference map)
 52 |     for feature_map in feature_maps[:ref_index] + feature_maps[ref_index + 1:]:
 53 |         trafo = TransformationDescription()
 54 |         aligner.align(feature_map, trafo)
 55 |         transformer = MapAlignmentTransformer()
 56 |         transformer.transformRetentionTimes(feature_map, trafo, True)  # store original RT as meta value
 57 | 
 58 |     feature_grouper = FeatureGroupingAlgorithmQT()
 59 |     feature_grouper_params = feature_grouper.getDefaults()
 60 |     feature_grouper_params[b'ignore_charge'] = 'true'
 61 |     feature_grouper_params[b'distance_RT:max_difference'] = match_rt_tolerance
 62 |     feature_grouper_params[b'distance_MZ:max_difference'] = match_mz_tolerance
 63 |     feature_grouper_params[b'distance_MZ:exponent'] = 1.0
 64 |     feature_grouper_params[b'distance_intensity:weight'] = 1.0
 65 |     feature_grouper.setParameters(feature_grouper_params)
 66 |     consensus_map = ConsensusMap()
 67 |     file_descriptions = consensus_map.getColumnHeaders()
 68 | 
 69 |     # collect information about input maps
 70 |     for i, feature_map in enumerate(feature_maps):
 71 |         file_description = file_descriptions.get(i, ColumnHeader())
 72 |         file_description.filename = str(i)
 73 |         file_description.size = feature_map.size()
 74 |         file_description.unique_id = i
 75 |         file_descriptions[i] = file_description
 76 | 
 77 |     consensus_map.setColumnHeaders(file_descriptions)
 78 |     feature_grouper.group(feature_maps, consensus_map)
 79 | 
 80 |     first_line = ['mz', 'rt', 'area', '#']
 81 |     for i in range(len(result_file_paths)):
 82 |         file_name = os.path.basename(result_file_paths[i]).split('.')[0]
 83 |         first_line += [file_name + '_mz', file_name + '_rt', file_name + '_area']
 84 | 
 85 |     result_data = np.zeros((consensus_map.size(), 4 + 3 * result_file_count))
 86 |     for i in range(consensus_map.size()):
 87 |         consensus_feature = consensus_map[i]
 88 |         result_data[i, 0] = consensus_feature.getMZ()
 89 |         result_data[i, 1] = consensus_feature.getRT()
 90 |         result_data[i, 2] = consensus_feature.getIntensity()
 91 |         feature_list = consensus_feature.getFeatureList()
 92 |         for feature_handle in feature_list:
 93 |             idx = feature_handle.getMapIndex()
 94 |             mz = feature_handle.getMZ()
 95 |             rt = feature_handle.getRT()
 96 |             intensity = feature_handle.getIntensity()
 97 |             result_data[i, 4 + 3 * idx] = mz
 98 |             result_data[i, 5 + 3 * idx] = rt
 99 |             result_data[i, 6 + 3 * idx] = intensity
100 |     # ConsensusXMLFile().store('D:\workspace\GAligner\metapro\preview.consensusXML', consensus_map)
101 | 
102 |     file = open('D:\workspace\GAligner\data\\' + folder_name + '_results_metapro\\' + folder_name + '_aligned_openms.csv', 'w')
103 |     writer = csv.writer(file, dialect='unix', quoting=csv.QUOTE_NONE, quotechar='')
104 |     writer.writerow(first_line)
105 |     writer.writerows(result_data)
106 |     file.close()
107 | 
108 | 
109 | # eval('TripleTOF_6600', align_mz_tolerance=0.01, align_rt_tolerance=0.5, match_mz_tolerance=0.01, match_rt_tolerance=0.5)
110 | # eval('QE_HF', align_mz_tolerance=0.005, align_rt_tolerance=0.3, match_mz_tolerance=0.005, match_rt_tolerance=0.3)
111 | eval('MTBLS562', align_mz_tolerance=0.015, align_rt_tolerance=0.3, match_mz_tolerance=0.015, match_rt_tolerance=0.3)
112 | 


--------------------------------------------------------------------------------
/data/evaluate_metapro_xcms.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "R Notebook"
  3 | output: html_notebook
  4 | ---
  5 | 
  6 | This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 
  7 | 
  8 | Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. 
  9 | 
 10 | 
 11 | Load XCMS library
 12 | ```{r}
 13 | library(xcms)
 14 | library(SummarizedExperiment)
 15 | library(BiocParallel)
 16 | register(SerialParam())
 17 | ```
 18 | 
 19 | 
 20 | Define functions
 21 | ```{r}
 22 | load_mzml <- function(mzml_path) {
 23 |   mzmls <- list.files(path = mzml_path, pattern = ".mzML",recursive = TRUE,full.names = TRUE)
 24 |   pd <- data.frame(sample_name = sub(basename(mzmls), pattern = ".mzML", replacement = "", fixed = TRUE), stringsAsFactors = FALSE)
 25 |   xdata <- readMSData(files = mzmls, pdata = new("NAnnotatedDataFrame",pd),mode = "onDisk")
 26 |   return(xdata)
 27 | }
 28 | 
 29 | 
 30 | 
 31 | load_peak_ranges <- function(result_path) {
 32 |   result_data <- read.table(result_path, header=FALSE, sep=",")
 33 |   range_data <- result_data[, c(4:7)]
 34 |   range_data[, 3] <- range_data[, 3] * 60
 35 |   range_data[, 4] <- range_data[, 4] * 60
 36 |   
 37 |   anno_ranges <- c()
 38 |   for (i in 1:nrow(range_data)) {
 39 |     anno_ranges <- c(anno_ranges, c(t(range_data[i,])))
 40 |   }
 41 |   colnames = c("mzmin", "mzmax", "rtmin", "rtmax")
 42 |   peak_ranges <- matrix(anno_ranges, nrow = nrow(result_data), byrow = TRUE, dimnames = list(NULL, colnames))
 43 |   return(peak_ranges)
 44 | }
 45 | 
 46 | 
 47 | load_peak_data <- function(result_path) {
 48 |   result_data <- read.table(result_path, header=FALSE, sep=",")
 49 |   peak_data <- result_data[, c(1:3)]
 50 |   peak_data[, 2] <- peak_data[, 2] * 60
 51 |   return(peak_data)
 52 | }
 53 | 
 54 | 
 55 | extract_peaks <- function(xdata, result_path, samples) {
 56 |   start_idx <- 0
 57 |   for (i in 1:length(samples)) {
 58 |     result_full_path <- paste0(result_path, samples[i], '.csv')
 59 |     peak_ranges <- load_peak_ranges(result_full_path)
 60 |     peak_data <- load_peak_data(result_full_path)
 61 |     xdata <- manualChromPeaks(xdata, peak_ranges, i)
 62 |     chromPeaks(xdata)[(start_idx + 1):(start_idx + nrow(peak_ranges)), c('mz', 'rt', 'into')] <- unlist(peak_data)
 63 |     start_idx <- start_idx + nrow(peak_ranges)
 64 |   }
 65 |   return(xdata)
 66 | }
 67 | 
 68 | 
 69 | group_align <- function(xdata, bw, binsize) {
 70 |   pdp <- PeakDensityParam(minFraction = 0.9, bw = 30, binSize = binsize, sampleGroups = c(1:length(fileNames(xdata))))
 71 |   xdata <- groupChromPeaks(xdata, param = pdp)
 72 |   pgp <- PeakGroupsParam(minFraction = 0.9)
 73 |   xdata <- adjustRtime(xdata, param = pgp)
 74 |   
 75 |   pdp <- PeakDensityParam(minFraction = 0.5, bw = bw, binSize = binsize, sampleGroups = c(1:length(fileNames(xdata))))
 76 |   grouped <- groupChromPeaks(xdata, param = pdp)
 77 |   return(grouped)
 78 | }
 79 | 
 80 | 
 81 | obiwarp_align <- function(xdata, profStep, bw, binsize) {
 82 |   
 83 |   obi <- ObiwarpParam(binSize = profStep)
 84 |   xdata <- adjustRtime(xdata,param = obi)
 85 |   
 86 |   pdp <- PeakDensityParam(minFraction = 0.5, bw = bw, binSize = binsize, sampleGroups = c(1:length(fileNames(xdata))))
 87 |   grouped <- groupChromPeaks(xdata, param = pdp)
 88 | }
 89 | 
 90 | 
 91 | write_results <- function(grouped, csv_path) {
 92 |   mzs <- assay(quantify(grouped, value="mz"))
 93 |   rts <- assay(quantify(grouped, value="rt")) / 60
 94 |   areas <- assay(quantify(grouped, value="into"))
 95 | 
 96 |   result <- data.frame(mz = rowMedians(mzs, na.rm = TRUE), rt = rowMedians(rts, na.rm = TRUE), area = rowMedians(areas, na.rm = TRUE), need_assign = 0)
 97 |   for (name in colnames(mzs)) {
 98 |     result[paste0(name, '_mz')] <- mzs[, name]
 99 |     result[paste0(name, '_rt')] <- rts[, name]
100 |     result[paste0(name, '_area')] <- areas[, name]
101 |   }
102 |   result[is.na(result)] <- 0
103 |   write.csv(result, csv_path, row.names = FALSE)
104 | }
105 | 
106 | ```
107 | 
108 | 
109 | 
110 | 
111 | Result paths
112 | ```{r}
113 | 
114 | wiff_result_path <- 'D:/workspace/GAligner/data/TripleTOF_6600/metapro/'
115 | raw_result_path <- 'D:/workspace/GAligner/data/QE_HF/metapro/'
116 | mtbls_result_path <- 'D:/workspace/GAligner/data/MTBLS562/metapro/'
117 | 
118 | wiff_samples <- c('SampleA_1', 'SampleA_2', 'SampleA_3', 'SampleA_4', 'SampleB_1', 'SampleB_2', 'SampleB_3', 'SampleB_4')
119 | raw_samples <- c('SA1', 'SA2', 'SA3', 'SA4', 'SA5', 'SB1', 'SB2', 'SB3', 'SB4', 'SB5')
120 | mtbls_samples <- c('12W-1', '12W-2', '12W-3', '12W-4', '12W-5', '12W-6', '12W-7', '12W-8',
121 |                    '24W-1', '24W-2', '24W-3', '24W-4', '24W-5', '24W-6', '24W-7', '24W-8',
122 |                    '32W-1', '32W-2', '32W-3', '32W-4', '32W-5', '32W-6', '32W-7', '32W-8',
123 |                    '4W-1', '4W-2', '4W-3', '4W-4', '4W-5', '4W-6', '4W-7', '4W-8',
124 |                    '52W-1', '52W-2', '52W-3', '52W-4', '52W-5', '52W-6', '52W-7', '52W-8')
125 | ```
126 | 
127 | 
128 | 
129 | 
130 | ```{r}
131 | # TripleTOF_6600 group
132 | wiff_xdata <- load_mzml(mzml_path = "D:/workspace/GAligner/data/TripleTOF_6600/mzml")
133 | wiff_xdata <- extract_peaks(wiff_xdata, wiff_result_path, wiff_samples)
134 | wiff_grouped <- group_align(wiff_xdata, bw=10, binsize=0.02)
135 | write_results(wiff_grouped, "D:/workspace/GAligner/data/TripleTOF_6600_results_metapro/TripleTOF_6600_group_aligned_xcms.csv")
136 | ```
137 | 
138 | 
139 | ```{r}
140 | # TripleTOF_6600 obiwarp
141 | wiff_xdata <- load_mzml(mzml_path = "D:/workspace/GAligner/data/TripleTOF_6600/mzml")
142 | wiff_xdata <- extract_peaks(wiff_xdata, wiff_result_path, wiff_samples)
143 | wiff_grouped <- obiwarp_align(wiff_xdata, profStep = 1, bw = 10, binsize = 0.02)
144 | write_results(wiff_grouped, "D:/workspace/GAligner/data/TripleTOF_6600_results_metapro/TripleTOF_6600_obiwarp_aligned_xcms.csv")
145 | ```
146 | 
147 | 
148 | ```{r}
149 | # QE_HF group
150 | raw_xdata <- load_mzml(mzml_path = "D:/workspace/GAligner/data/QE_HF/mzml")
151 | raw_xdata <- extract_peaks(raw_xdata, raw_result_path, raw_samples)
152 | raw_grouped <- group_align(raw_xdata, bw = 6, binsize = 0.01)
153 | write_results(raw_grouped, "D:/workspace/GAligner/data/QE_HF_results_metapro/QE_HF_group_aligned_xcms.csv")
154 | ```
155 | 
156 | 
157 | ```{r}
158 | # QE_HF obiwarp
159 | raw_xdata <- load_mzml(mzml_path = "D:/workspace/GAligner/data/QE_HF/mzml")
160 | raw_xdata <- extract_peaks(raw_xdata, raw_result_path, raw_samples)
161 | raw_grouped <- obiwarp_align(raw_xdata, profStep = 1, bw = 6, binsize = 0.01)
162 | write_results(raw_grouped, "D:/workspace/GAligner/data/QE_HF_results_metapro/QE_HF_obiwarp_aligned_xcms.csv")
163 | 
164 | ```
165 | 
166 | 
167 | ```{r}
168 | # MTBLS group
169 | mtbls_xdata <- load_mzml(mzml_path = "D:/data/MTBLS562/mzml")
170 | mtbls_xdata <- extract_peaks(mtbls_xdata, mtbls_result_path, mtbls_samples)
171 | align_time <- Sys.time()
172 | mtbls_grouped <- group_align(mtbls_xdata, bw = 10, binsize = 0.02)
173 | write_results(mtbls_grouped, "D:/workspace/GAligner/data/MTBLS562_results_metapro/MTBLS562_group_aligned_xcms.csv")
174 | print("Align time taken: ")
175 | print(Sys.time() - align_time)
176 | ```
177 | 
178 | 
179 | ```{r}
180 | # MTBLS obiwarp
181 | mtbls_xdata <- load_mzml(mzml_path = "D:/data/MTBLS562/mzml")
182 | mtbls_xdata <- extract_peaks(mtbls_xdata, mtbls_result_path, mtbls_samples)
183 | align_time <- Sys.time()
184 | mtbls_grouped <- obiwarp_align(mtbls_xdata, profStep = 1, bw = 10, binsize = 0.02)
185 | write_results(mtbls_grouped, "D:/workspace/GAligner/data/MTBLS562_results_metapro/MTBLS562_obiwarp_aligned_xcms.csv")
186 | print("Align time taken: ")
187 | print(Sys.time() - align_time)
188 | 
189 | ```
190 | 
191 | 
192 | 


--------------------------------------------------------------------------------
/data/evaluate_mzmine2_galigner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from src.params import ResultFileReadingParams, RawFileReadingParams, CoarseRegistrationParams, FineAssignmentParams
 3 | from src.main_galigner import GAligner
 4 | 
 5 | 
 6 | def eval_wiff(folder_name, solver, vlsns_init_mode):
 7 |     result_file_path = os.path.join(os.getcwd(), folder_name, 'mzmine2')
 8 | 
 9 |     result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=1,
10 |                                                          rt_col_num=2, mz_col_num=1, area_col_num=3)
11 |     raw_file_reading_params = RawFileReadingParams()
12 |     coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0,
13 |                                                           rt_tolerance=0.5, rt_residual_threshold=0.05, degree=1)
14 |     fine_assignment_params = FineAssignmentParams(rt_tolerance=0.3, mz_tolerance=0.01, mz_factor=1, rt_factor=1,
15 |                                                   area_factor=1, use_ppm=False, solver=solver)
16 |     if vlsns_init_mode is not None:
17 |         fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode
18 | 
19 |     g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params,
20 |                          fine_assignment_params)
21 |     g_aligner.do_align()
22 | 
23 | 
24 | def eval_raw(folder_name, solver, vlsns_init_mode):
25 |     result_file_path = os.path.join(os.getcwd(), folder_name, 'mzmine2')
26 | 
27 |     result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=1,
28 |                                                          rt_col_num=2, mz_col_num=1, area_col_num=3)
29 |     raw_file_reading_params = RawFileReadingParams()
30 |     coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.005, use_ppm=False, centric_idx=0,
31 |                                                           rt_tolerance=0.3, rt_residual_threshold=0.03, degree=1)
32 |     fine_assignment_params = FineAssignmentParams(rt_tolerance=0.15, mz_tolerance=0.003, mz_factor=1, rt_factor=1,
33 |                                                   area_factor=1, use_ppm=False, solver=solver)
34 |     if vlsns_init_mode is not None:
35 |         fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode
36 | 
37 |     g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params,
38 |                          fine_assignment_params)
39 |     g_aligner.do_align()
40 | 
41 | 
42 | def eval_mtbls(folder_name, solver, vlsns_init_mode):
43 |     result_file_path = os.path.join(os.getcwd(), folder_name, 'mzmine2')
44 | 
45 |     result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=1,
46 |                                                          rt_col_num=2, mz_col_num=1, area_col_num=3)
47 |     raw_file_reading_params = RawFileReadingParams()
48 | 
49 |     coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0,
50 |                                                           rt_tolerance=0.3, rt_residual_threshold=0.05, degree=1)
51 |     fine_assignment_params = FineAssignmentParams(rt_tolerance=0.1, mz_tolerance=0.01, mz_factor=1, rt_factor=1,
52 |                                                   area_factor=1, use_ppm=False, solver=solver)
53 |     if vlsns_init_mode is not None:
54 |         fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode
55 | 
56 |     g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params,
57 |                          fine_assignment_params)
58 |     g_aligner.do_align()
59 | 
60 | 
61 | # TripleTOF_6600
62 | # eval_wiff('TripleTOF_6600', solver='local_bipartite', vlsns_init_mode=None)
63 | # eval_wiff('TripleTOF_6600', solver='greedy', vlsns_init_mode=None)
64 | # eval_wiff('TripleTOF_6600', solver='gurobi', vlsns_init_mode=None)
65 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msr')
66 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msg')
67 | 
68 | # QE_HF
69 | # eval_raw('QE_HF', solver='local_bipartite', vlsns_init_mode=None)
70 | # eval_raw('QE_HF', solver='greedy', vlsns_init_mode=None)
71 | # eval_raw('QE_HF', solver='gurobi', vlsns_init_mode=None)
72 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msr')
73 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msg')
74 | 
75 | # MTBLS562
76 | # eval_mtbls('MTBLS562', solver='local_bipartite', vlsns_init_mode=None)
77 | # eval_mtbls('MTBLS562', solver='greedy', vlsns_init_mode=None)
78 | # eval_mtbls('MTBLS562', solver='gurobi', vlsns_init_mode=None)
79 | # eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msg')
80 | eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msr')
81 | 
82 | 


--------------------------------------------------------------------------------
/data/evaluate_openms_galigner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from src.params import ResultFileReadingParams, RawFileReadingParams, CoarseRegistrationParams, FineAssignmentParams
 3 | from src.main_galigner import GAligner
 4 | 
 5 | 
 6 | def eval_wiff(folder_name, solver, vlsns_init_mode):
 7 |     result_file_path = os.path.join(os.getcwd(), folder_name, 'openms')
 8 | 
 9 |     result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=0,
10 |                                                          rt_col_num=2, mz_col_num=1, area_col_num=3)
11 |     raw_file_reading_params = RawFileReadingParams()
12 |     coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0,
13 |                                                           rt_tolerance=0.5, rt_residual_threshold=0.04, degree=1)
14 |     fine_assignment_params = FineAssignmentParams(rt_tolerance=0.3, mz_tolerance=0.01, mz_factor=1, rt_factor=1,
15 |                                                   area_factor=1, use_ppm=False, solver=solver)
16 |     if vlsns_init_mode is not None:
17 |         fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode
18 | 
19 |     g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params,
20 |                          fine_assignment_params)
21 |     g_aligner.do_align()
22 | 
23 | 
24 | def eval_raw(folder_name, solver, vlsns_init_mode):
25 |     result_file_path = os.path.join(os.getcwd(), folder_name, 'openms')
26 | 
27 |     result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=0,
28 |                                                          rt_col_num=2, mz_col_num=1, area_col_num=3)
29 |     raw_file_reading_params = RawFileReadingParams()
30 |     coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.005, use_ppm=False, centric_idx=0,
31 |                                                           rt_tolerance=0.3, rt_residual_threshold=0.03, degree=1)
32 |     fine_assignment_params = FineAssignmentParams(rt_tolerance=0.15, mz_tolerance=0.003, mz_factor=1, rt_factor=1,
33 |                                                   area_factor=1, use_ppm=False, solver=solver)
34 |     if vlsns_init_mode is not None:
35 |         fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode
36 | 
37 |     g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params,
38 |                          fine_assignment_params)
39 |     g_aligner.do_align()
40 | 
41 | 
42 | def eval_mtbls(folder_name, solver, vlsns_init_mode):
43 |     result_file_path = os.path.join(os.getcwd(), folder_name, 'openms4')
44 | 
45 |     result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=0,
46 |                                                          rt_col_num=2, mz_col_num=1, area_col_num=3)
47 |     raw_file_reading_params = RawFileReadingParams()
48 |     coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0,
49 |                                                           rt_tolerance=0.3, rt_residual_threshold=0.05, degree=1)
50 |     fine_assignment_params = FineAssignmentParams(rt_tolerance=0.1, mz_tolerance=0.01, mz_factor=1, rt_factor=1,
51 |                                                   area_factor=1, use_ppm=False, solver=solver)
52 |     if vlsns_init_mode is not None:
53 |         fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode
54 | 
55 |     g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params,
56 |                          fine_assignment_params)
57 |     g_aligner.do_align()
58 | 
59 | 
60 | # # TripleTOF_6600
61 | # eval_wiff('TripleTOF_6600', solver='local_bipartite', vlsns_init_mode=None)
62 | # eval_wiff('TripleTOF_6600', solver='greedy', vlsns_init_mode=None)
63 | # eval_wiff('TripleTOF_6600', solver='gurobi', vlsns_init_mode=None)
64 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msr')
65 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msg')
66 | #
67 | # # QE_HF
68 | # eval_raw('QE_HF', solver='local_bipartite', vlsns_init_mode=None)
69 | # eval_raw('QE_HF', solver='greedy', vlsns_init_mode=None)
70 | # eval_raw('QE_HF', solver='gurobi', vlsns_init_mode=None)
71 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msr')
72 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msg')
73 | 
74 | # MTBLS562
75 | # eval_mtbls('MTBLS562', solver='local_nearest', vlsns_init_mode=None)
76 | eval_mtbls('MTBLS562', solver='local_bipartite', vlsns_init_mode=None)
77 | # eval_mtbls('MTBLS562', solver='greedy', vlsns_init_mode=None)
78 | # eval_mtbls('MTBLS562', solver='gurobi', vlsns_init_mode=None)
79 | eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msg')
80 | eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msr')


--------------------------------------------------------------------------------
/data/evaluate_xcms_galigner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from src.params import ResultFileReadingParams, RawFileReadingParams, CoarseRegistrationParams, FineAssignmentParams
 3 | from src.main_galigner import GAligner
 4 | 
 5 | 
 6 | def eval_wiff(folder_name, solver, vlsns_init_mode):
 7 |     result_file_path = os.path.join(os.getcwd(), folder_name, 'xcms')
 8 | 
 9 |     result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=1,
10 |                                                          rt_col_num=5, mz_col_num=2, area_col_num=8)
11 |     raw_file_reading_params = RawFileReadingParams()
12 |     coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0,
13 |                                                           rt_tolerance=0.5, rt_residual_threshold=0.05, degree=1)
14 |     fine_assignment_params = FineAssignmentParams(rt_tolerance=0.3, mz_tolerance=0.01, mz_factor=1, rt_factor=1,
15 |                                                   area_factor=1, use_ppm=False, solver=solver)
16 |     if vlsns_init_mode is not None:
17 |         fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode
18 | 
19 |     g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params,
20 |                          fine_assignment_params)
21 |     g_aligner.do_align()
22 | 
23 | 
24 | def eval_raw(folder_name, solver, vlsns_init_mode):
25 |     result_file_path = os.path.join(os.getcwd(), folder_name, 'xcms')
26 | 
27 |     result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=1,
28 |                                                          rt_col_num=5, mz_col_num=2, area_col_num=8)
29 |     raw_file_reading_params = RawFileReadingParams()
30 |     coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.005, use_ppm=False, centric_idx=0,
31 |                                                           rt_tolerance=0.3, rt_residual_threshold=0.03, degree=1)
32 |     fine_assignment_params = FineAssignmentParams(rt_tolerance=0.15, mz_tolerance=0.003, mz_factor=1, rt_factor=1,
33 |                                                   area_factor=1, use_ppm=False, solver=solver)
34 |     if vlsns_init_mode is not None:
35 |         fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode
36 | 
37 |     g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params,
38 |                          fine_assignment_params)
39 |     g_aligner.do_align()
40 | 
41 | def eval_mtbls(folder_name, solver, vlsns_init_mode):
42 |     result_file_path = os.path.join(os.getcwd(), folder_name, 'xcms')
43 | 
44 |     result_file_reading_params = ResultFileReadingParams(result_file_path, skip_line=1,
45 |                                                          rt_col_num=4, mz_col_num=1, area_col_num=7)
46 |     raw_file_reading_params = RawFileReadingParams()
47 |     coarse_registration_params = CoarseRegistrationParams(solver='ransac', mz_tolerance=0.01, use_ppm=False, centric_idx=0,
48 |                                                           rt_tolerance=0.3, rt_residual_threshold=0.05, degree=1)
49 |     fine_assignment_params = FineAssignmentParams(rt_tolerance=0.1, mz_tolerance=0.01, mz_factor=1, rt_factor=1,
50 |                                                   area_factor=1, use_ppm=False, solver=solver)
51 |     if vlsns_init_mode is not None:
52 |         fine_assignment_params.vlsns_solution_init_mode = vlsns_init_mode
53 | 
54 |     g_aligner = GAligner(result_file_reading_params, raw_file_reading_params, coarse_registration_params,
55 |                          fine_assignment_params)
56 |     g_aligner.do_align()
57 | 
58 | 
59 | # # TripleTOF_6600
60 | # eval_wiff('TripleTOF_6600', solver='local_bipartite', vlsns_init_mode=None)
61 | # eval_wiff('TripleTOF_6600', solver='greedy', vlsns_init_mode=None)
62 | # eval_wiff('TripleTOF_6600', solver='gurobi', vlsns_init_mode=None)
63 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msr')
64 | # eval_wiff('TripleTOF_6600', solver='vlsns', vlsns_init_mode='msg')
65 | #
66 | # # QE_HF
67 | # eval_raw('QE_HF', solver='local_bipartite', vlsns_init_mode=None)
68 | # eval_raw('QE_HF', solver='greedy', vlsns_init_mode=None)
69 | # eval_raw('QE_HF', solver='gurobi', vlsns_init_mode=None)
70 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msr')
71 | # eval_raw('QE_HF', solver='vlsns', vlsns_init_mode='msg')
72 | 
73 | # MTBLS562
74 | eval_mtbls('MTBLS562', solver='local_bipartite', vlsns_init_mode=None)
75 | # eval_mtbls('MTBLS562', solver='greedy', vlsns_init_mode=None)
76 | # eval_mtbls('MTBLS562', solver='gurobi', vlsns_init_mode=None)
77 | eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msg')
78 | # eval_mtbls('MTBLS562', solver='vlsns', vlsns_init_mode='msr')
79 | 


--------------------------------------------------------------------------------
/data/openms_result_converter.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | 
 3 | from pyopenms import ConsensusXMLFile, ConsensusMap, FeatureXMLFile, FeatureMap
 4 | import numpy as np
 5 | import csv
 6 | 
 7 | 
 8 | def convert_consensusxml_to_csv(xml_path, csv_path, file_names):
 9 |     consensusMap = ConsensusMap()
10 |     ConsensusXMLFile().load(xml_path, consensusMap)
11 | 
12 |     csv_file = open(csv_path, 'w')
13 |     writer = csv.writer(csv_file, dialect='unix', quoting=csv.QUOTE_NONE, quotechar='')
14 | 
15 |     first_row = ['mz', 'rt', 'area', 'need_assign']
16 |     for file_name in file_names:
17 |         first_row += [file_name + "_mz", file_name + "_rt", file_name + "_area"]
18 |     writer.writerow(first_row)
19 | 
20 |     for i in range(consensusMap.size()):
21 |         consensusFeature = consensusMap[i]
22 |         avg_mz = consensusFeature.getMZ()
23 |         avg_rt = consensusFeature.getRT() / 60
24 |         avg_area = consensusFeature.getIntensity()
25 |         features = np.zeros((len(file_names), 3))
26 |         for feature in consensusFeature.getFeatureList():
27 |             mapIndex = feature.getMapIndex()
28 |             features[mapIndex][0] = feature.getMZ()
29 |             features[mapIndex][1] = feature.getRT() / 60
30 |             features[mapIndex][2] = feature.getIntensity()
31 |         features = features.ravel().tolist()
32 |         row = [avg_mz, avg_rt, avg_area, 0] + features
33 |         writer.writerow(row)
34 |     csv_file.close()
35 |     print(xml_path + " convertion finished.")
36 | 
37 | 
38 | def convert_featurexml_to_csv(xml_path, csv_path):
39 |     featureMap = FeatureMap()
40 |     FeatureXMLFile().load(xml_path, featureMap)
41 | 
42 |     csv_file = open(csv_path, 'w')
43 |     writer = csv.writer(csv_file, dialect='unix', quoting=csv.QUOTE_NONE, quotechar='')
44 |     for feature in featureMap:
45 |         writer.writerow([feature.getMZ(), feature.getRT() / 60, feature.getIntensity()])
46 |     csv_file.close()
47 | 
48 |     print(xml_path + " convertion finished.")
49 | 
50 | 
51 | if __name__ == '__main__':
52 | 
53 |     wiff_sample_names = ['SampleA_1', 'SampleA_2', 'SampleA_3', 'SampleA_4',
54 |                          'SampleB_1', 'SampleB_2', 'SampleB_3', 'SampleB_4']
55 |     raw_sample_names = ['SA1', 'SA2', 'SA3', 'SA4', 'SA5', 'SB1', 'SB2', 'SB3', 'SB4', 'SB5']
56 | 
57 |     mtbls_sample_names = ['12W-1', '12W-2', '12W-3', '12W-4', '12W-5', '12W-6', '12W-7', '12W-8',
58 |                           '24W-1', '24W-2', '24W-3', '24W-4', '24W-5', '24W-6', '24W-7', '24W-8',
59 |                           '32W-1', '32W-2', '32W-3', '32W-4', '32W-5', '32W-6', '32W-7', '32W-8',
60 |                           '4W-1', '4W-2', '4W-3', '4W-4', '4W-5', '4W-6', '4W-7', '4W-8',
61 |                           '52W-1', '52W-2', '52W-3', '52W-4', '52W-5', '52W-6', '52W-7', '52W-8']
62 | 
63 |     # wiff_featurexml_files = glob.glob("D:/workspace/GAligner/data/TripleTOF_6600/openms/*.featureXML")
64 |     # for file in wiff_featurexml_files:
65 |     #     convert_featurexml_to_csv(file, file.replace("featureXML", "csv"))
66 |     #
67 |     # raw_featurexml_files = glob.glob("D:/workspace/GAligner/data/QE_HF/openms/*.featureXML")
68 |     # for file in raw_featurexml_files:
69 |     #     convert_featurexml_to_csv(file, file.replace("featureXML", "csv"))
70 | 
71 |     mtbls_featurexml_files = glob.glob("D:/workspace/GAligner/data/MTBLS562/openms/*.featureXML")
72 |     for file in mtbls_featurexml_files:
73 |         convert_featurexml_to_csv(file, file.replace("featureXML", "csv"))
74 | 
75 |     # wiff_consensusxml_file = "D:/workspace/GAligner/data/TripleTOF_6600_results_openms/wiff_aligned.consensusXML"
76 |     # wiff_csv_file = "D:/workspace/GAligner/data/TripleTOF_6600_results_openms/openms_aligned.csv"
77 |     # convert_consensusxml_to_csv(wiff_consensusxml_file, wiff_csv_file, wiff_sample_names)
78 |     #
79 |     # raw_consensusxml_file = "D:/workspace/GAligner/data/QE_HF_results_openms/raw_aligned.consensusXML"
80 |     # raw_csv_file = "D:/workspace/GAligner/data/QE_HF_results_openms/openms_aligned.csv"
81 |     # convert_consensusxml_to_csv(raw_consensusxml_file, raw_csv_file, raw_sample_names)
82 | 
83 |     mtbls_consensusxml_file = "D:/workspace/GAligner/data/MTBLS562_results_openms/openms_wiff.consensusXML"
84 |     mtbls_csv_file = "D:/workspace/GAligner/data/MTBLS562_results_openms/openms_wiff.csv"
85 |     convert_consensusxml_to_csv(mtbls_consensusxml_file, mtbls_csv_file, mtbls_sample_names)
86 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy>=1.19.2
 2 | scipy>=1.9.3
 3 | pytest>=7.0.1
 4 | pyteomics>=4.4.2
 5 | lxml>=4.9.1
 6 | matplotlib>=3.3.4
 7 | networkx>=3.0
 8 | gurobipy>=9.1.2
 9 | pandas>=1.1.5
10 | ortools>=9.5
11 | scikit_learn>=1.2.0
12 | xlrd>=2.0.1
13 | openpyxl>=3.0.10
14 | 


--------------------------------------------------------------------------------
/src/fine_alignment.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import networkx as nx
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | 
  8 | class Graph:
  9 |     def __init__(self, graph_params):
 10 |         self.mz_tolerance = graph_params.mz_tolerance
 11 |         self.rt_tolerance = graph_params.rt_tolerance
 12 |         self.use_ppm = graph_params.use_ppm
 13 |         self.mz_factor = graph_params.mz_factor
 14 |         self.rt_factor = graph_params.rt_factor
 15 |         self.area_factor = graph_params.area_factor
 16 | 
 17 |     def do_build(self, data_list):
 18 |         g = nx.Graph()
 19 |         data_lens = []
 20 |         start_idxes = [0]
 21 |         node_idx = 0
 22 | 
 23 |         print('\tConverting result to graph nodes...')
 24 |         start_time = time.time()
 25 |         print('\r\t[{}] 0/ 1 Time cost:{:.1f}s'.format('-' * 50, time.time() - start_time), end='')
 26 |         for i, data in enumerate(data_list):
 27 |             data_lens += [len(data)]
 28 |             start_idxes += [start_idxes[-1] + len(data)]
 29 |             for point in data:
 30 |                 g.add_node(node_idx, data_idx=i, mz=point[0], rt=point[1], area=point[2])
 31 |                 node_idx += 1
 32 |         print('\r\t[{}] 1/ 1 Time cost:{:.1f}s'.format('▓' * 50, time.time() - start_time))
 33 | 
 34 |         print('\tAnalyzing node relations and build edges...')
 35 |         start_time = time.time()
 36 |         comparison_idx = 0
 37 |         total_comparisons = ((len(data_list) - 1) // 2) * len(data_list) + ((len(data_list) - 1) % 2) * (
 38 |                 len(data_list) // 2)
 39 | 
 40 |         def build_edges(g, left_data, right_data, left_start_idx, right_start_idx, mz_tolerance, use_ppm, rt_tolerance,
 41 |                         mz_factor, rt_factor, area_factor, window_factor):
 42 |             edges = []
 43 |             right_idx = 0
 44 |             for left_idx in range(len(left_data)):
 45 |                 adj_nodes = np.array(list(nx.edges(g, left_start_idx + left_idx)))
 46 |                 if len(adj_nodes) > 0 and np.sum((adj_nodes[:, 1] >= right_start_idx) *
 47 |                                                  (adj_nodes[:, 1] < right_start_idx + len(right_data))) > 0:
 48 |                     continue
 49 |                 tmp_mz_tolerance = mz_tolerance
 50 |                 if use_ppm:
 51 |                     tmp_mz_tolerance = left_data[left_idx][0] * mz_tolerance * 1e-6
 52 |                 mz_start = left_data[left_idx][0] - tmp_mz_tolerance * window_factor
 53 |                 mz_end = left_data[left_idx][0] + tmp_mz_tolerance * window_factor
 54 |                 if right_data[right_idx][0] > mz_end:
 55 |                     continue
 56 |                 while right_idx < len(right_data) and right_data[right_idx][0] < mz_start:
 57 |                     right_idx += 1
 58 |                 if right_idx >= len(right_data):
 59 |                     break
 60 | 
 61 |                 rt_start = left_data[left_idx][1] - rt_tolerance * window_factor
 62 |                 rt_end = left_data[left_idx][1] + rt_tolerance * window_factor
 63 |                 for right_iter_idx in range(right_idx, len(right_data)):
 64 |                     if right_data[right_iter_idx][0] > mz_end:
 65 |                         break
 66 |                     if (right_data[right_iter_idx][1] < rt_start) \
 67 |                             or (right_data[right_iter_idx][1] > rt_end):
 68 |                         continue
 69 |                     dist = mz_factor * abs(left_data[left_idx][0] - right_data[right_iter_idx][0]) / tmp_mz_tolerance + \
 70 |                         rt_factor * abs(left_data[left_idx][1] - right_data[right_iter_idx][1]) / rt_tolerance
 71 |                     dist /= mz_factor + rt_factor
 72 |                     edges.append([left_start_idx + left_idx, right_start_idx + right_iter_idx, dist])
 73 |             edges = np.array(edges)
 74 | 
 75 |             # for edge in edges[filtered_edges]:
 76 |             for edge in edges:
 77 |                 g.add_edge(int(edge[0]), int(edge[1]), weight=edge[2])
 78 | 
 79 |         for i in range(len(data_list)):
 80 |             for j in range(i + 1, len(data_list)):
 81 |                 left_data = data_list[i]
 82 |                 right_data = data_list[j]
 83 |                 # build_edges(g, left_data, right_data, start_idxes[i], start_idxes[j], self.mz_tolerance, self.use_ppm,
 84 |                 #             self.rt_tolerance / 2, self.mz_factor, self.rt_factor, self.area_factor, pow(2, -1))
 85 |                 build_edges(g, left_data, right_data, start_idxes[i], start_idxes[j], self.mz_tolerance, self.use_ppm,
 86 |                             self.rt_tolerance, self.mz_factor, self.rt_factor, self.area_factor, 1)
 87 |                 comparison_idx += 1
 88 |                 done_progress = int((comparison_idx / total_comparisons) * 50)
 89 |                 print('\r\t[{}{}]{:2d}/{:2d} Time cost:{:.1f}s'.format('▓' * done_progress, '-' * (50 - done_progress),
 90 |                                                                        comparison_idx, total_comparisons,
 91 |                                                                        time.time() - start_time), end='')
 92 |         print()
 93 |         print('\tSplitting graph into sub-graphs...')
 94 |         start_time = time.time()
 95 |         print('\r\t[{}] 0/ 1 Time cost:{:.1f}s'.format('▓' * 50, time.time() - start_time), end='')
 96 |         sub_graphs = [g.subgraph(c) for c in nx.connected_components(g)]
 97 |         max_nodes = 0
 98 |         max_edges = 0
 99 |         for s in sub_graphs:
100 |             if max_nodes < len(s.nodes):
101 |                 max_nodes = len(s.nodes)
102 |             if max_edges < len(s.edges):
103 |                 max_edges = len(s.edges)
104 |         print('\r\t[{}] 1/ 1 Time cost:{:.1f}s'.format('▓' * 50, time.time() - start_time))
105 |         print('\tGraph built. Total %d sub-graphs. Max %d nodes, %d edges.' % (len(sub_graphs), max_nodes, max_edges))
106 | 
107 |         for sub_graph in sub_graphs:
108 |             max_area_map = {}
109 |             nodes = sub_graph.nodes(data=True)
110 |             for node in nodes:
111 |                 if (node[1]['data_idx'] not in max_area_map.keys()) or (max_area_map[node[1]['data_idx']] < node[1]['area']):
112 |                     max_area_map[node[1]['data_idx']] = node[1]['area']
113 |             for edge in sub_graph.edges:
114 |                 dist = sub_graph.edges[edge]['weight'] * (self.mz_factor + self.rt_factor)
115 |                 area_0 = nodes[edge[0]]['area'] / (max_area_map[nodes[edge[0]]['data_idx']] + 1e-6)
116 |                 area_1 = nodes[edge[1]]['area'] / (max_area_map[nodes[edge[1]]['data_idx']] + 1e-6)
117 |                 dist += self.area_factor * (1 - min(area_0, area_1) / (max(area_0, area_1) + 1e-6))
118 |                 dist /= self.mz_factor + self.rt_factor + self.area_factor
119 |                 sub_graph.edges[edge]['weight'] = dist
120 |         return sub_graphs, max_nodes
121 | 


--------------------------------------------------------------------------------
/src/map_solver/greedy_solver.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import networkx as nx
 4 | import numpy as np
 5 | 
 6 | from src.map_solver.base_solver import graph_preprocessing, calc_cost_list, flatten_to_matrix_idx, \
 7 |     assigned_flatten_idx_to_edges
 8 | 
 9 | 
10 | def greedy_assigner(group_node_list, group_len_list, cost_list):
11 |     assign_time = time.time()
12 |     assigned_idx_list = []
13 |     cost_list[-1] = np.inf
14 |     while np.min(cost_list) != np.inf:
15 |     # for i in range(max(group_len_list)):
16 |         min_cost_idx = np.argmin(cost_list)
17 |         assigned_idx_list.append(min_cost_idx)
18 |         # if i == max(group_len_list) - 1:  # save time for last iteration
19 |         #     break
20 |         idx_tuple = flatten_to_matrix_idx(min_cost_idx, group_len_list)
21 |         settled = np.zeros(group_len_list)
22 |         for j, idx in enumerate(idx_tuple):
23 |             if idx >= len(group_node_list[j]):  # not settle dumb nodes
24 |                 continue
25 |             slices = ()
26 |             for k in range(len(idx_tuple)):
27 |                 if k == j:
28 |                     slices += idx,
29 |                     continue
30 |                 tmp_group_len = group_len_list[k]
31 |                 tmp_slice = slice(tmp_group_len)
32 |                 slices += tmp_slice,
33 |             settled[slices] = 1
34 |         cost_list[settled.ravel() == 1] = float('inf')
35 |     assign_time = time.time() - assign_time
36 |     return assigned_idx_list, assign_time
37 | 
38 | 
39 | def greedy_solve(sub_graph, graph_params, debug=False):
40 |     # Preprocessing
41 |     no_need_to_assign, group_node_list, group_len_list, combination_size, pre_time = graph_preprocessing(sub_graph)
42 |     if no_need_to_assign:
43 |         return [sub_graph.nodes(data=True)]
44 | 
45 |     # Prepare cost list
46 |     cost_list, cost_time = calc_cost_list(sub_graph, graph_params, group_node_list, group_len_list, combination_size, greedy=True)
47 | 
48 |     # Greedy assign
49 |     assigned_idx_list, assign_time = greedy_assigner(group_node_list, group_len_list, cost_list)
50 | 
51 |     # Confirm assignment result
52 |     assignment_nodes, refine_time = assigned_flatten_idx_to_edges(assigned_idx_list, sub_graph, group_node_list, group_len_list)
53 |     if debug and len(sub_graph) > 20:
54 |         print(len(sub_graph), combination_size, pre_time, cost_time, assign_time, refine_time)
55 |     return assignment_nodes
56 | 


--------------------------------------------------------------------------------
/src/map_solver/gurobi_solver.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import networkx as nx
 4 | import gurobipy as gp
 5 | from src.map_solver.base_solver import graph_preprocessing, calc_cost_list, flatten_to_matrix_idx,\
 6 |     flatten_idx_digit_match, assigned_flatten_idx_to_edges
 7 | 
 8 | 
 9 | def gurobi_solve(sub_graph, graph_params, debug=False):
10 |     # Preprocessing
11 |     no_need_to_assign, group_node_list, group_len_list, combination_size, pre_time = graph_preprocessing(sub_graph)
12 |     if no_need_to_assign:
13 |         return [sub_graph.nodes(data=True)]
14 |     # Prepare cost list
15 |     cost_list, cost_time = calc_cost_list(sub_graph, graph_params, group_node_list, group_len_list, combination_size, greedy=True)
16 | 
17 |     # Set Gurobi optimizer
18 |     assign_time = time.time()
19 |     m = gp.Model("Multidimensional Assignment Problem")
20 |     m.setParam('OutputFlag', 0)
21 | 
22 |     x = []
23 |     for i in range(combination_size):
24 |         idx_tuple = flatten_to_matrix_idx(i, group_len_list)
25 |         x.append(m.addVar(vtype=gp.GRB.BINARY, name="x" + "%d" * len(group_len_list) % idx_tuple))
26 | 
27 |     # i: digit from right to left
28 |     for i in range(len(group_len_list)):
29 |         non_dumb_node_num = len(group_node_list[len(group_len_list) - 1 - i])
30 |         for j in range(non_dumb_node_num):
31 |             m.addConstr(gp.quicksum([x[k] for k in range(combination_size)
32 |                                     if flatten_idx_digit_match(k, group_len_list, i, j)]) == 1,
33 |                         name='Constraint%d%d' % (i, j))
34 |     m.addConstr(x[-1] == 0, name='ConstraintDumb')
35 | 
36 |     m.setObjective(gp.quicksum(x[i] * cost_list[i] for i in range(combination_size)), gp.GRB.MINIMIZE)
37 |     m.optimize()
38 |     assigned_idx_list = []
39 |     for i in range(len(x)):
40 |         if x[i].x > 1e-6:
41 |             assigned_idx_list.append(i)
42 |     assign_time = time.time() - assign_time
43 | 
44 |     # Confirm assignment result
45 |     assignment_nodes, refine_time = assigned_flatten_idx_to_edges(assigned_idx_list, sub_graph, group_node_list, group_len_list)
46 |     if debug:
47 |         print(len(sub_graph), pre_time, cost_time, assign_time, refine_time)
48 |     return assignment_nodes
49 | 


--------------------------------------------------------------------------------
/src/map_solver/local_bipartite_solver.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import networkx as nx
 4 | import numpy as np
 5 | from scipy.optimize import linear_sum_assignment
 6 | from src.map_solver.base_solver import graph_preprocessing
 7 | 
 8 | 
 9 | def bipartite_assigner(sub_graph, group_node_list):
10 |     assign_time = time.time()
11 | 
12 |     node_length_list = [len(nodes) for nodes in group_node_list]
13 |     descend_idxes = np.argsort(-1 * np.array(node_length_list))
14 | 
15 |     assigned_nodes = group_node_list[descend_idxes[0]].copy()
16 |     assignments = [[node] for node in assigned_nodes]
17 |     for i in range(1, len(descend_idxes)):
18 |         connected_nodes = []
19 |         unconnected_nodes = []
20 |         connected_assignments_idxes = []
21 |         for node in group_node_list[descend_idxes[i]]:
22 |             tmp_assignment_idxes = []
23 |             for j, assignment in enumerate(assignments):
24 |                 match_idx = -1
25 |                 tmp_min_weight = float('inf')
26 |                 for k, assigned_node in enumerate(assignment):
27 |                     if sub_graph.has_edge(node, assigned_node):
28 |                         tmp_weight = sub_graph.get_edge_data(node, assigned_node)['weight']
29 |                         if tmp_weight < tmp_min_weight:
30 |                             tmp_min_weight = tmp_weight
31 |                             match_idx = k
32 |                 if match_idx != -1:
33 |                     tmp_assignment_idxes.append([j, match_idx])
34 |             if len(tmp_assignment_idxes) > 0:
35 |                 connected_nodes.append(node)
36 |                 connected_assignments_idxes.append(tmp_assignment_idxes)
37 |             else:
38 |                 unconnected_nodes.append(node)
39 | 
40 |         if len(connected_nodes) > 0:
41 |             cost_matrix = np.ones([len(connected_nodes), len(assignments)]) * 100
42 |             for m in range(len(connected_nodes)):
43 |                 for match_idxes in connected_assignments_idxes[m]:
44 |                     assigned_node = assignments[match_idxes[0]][match_idxes[1]]
45 |                     tmp_weight = sub_graph.get_edge_data(connected_nodes[m], assigned_node)['weight']
46 |                     cost_matrix[m][match_idxes[0]] = tmp_weight
47 |             assigned_idx = linear_sum_assignment(cost_matrix)
48 | 
49 |             for j in range(len(connected_nodes)):
50 |                 if j not in assigned_idx[0]:
51 |                     unconnected_nodes.append(connected_nodes[j])
52 | 
53 |             for j in range(len(assigned_idx[0])):
54 |                 if cost_matrix[assigned_idx[0][j]][assigned_idx[1][j]] == 100:
55 |                     unconnected_nodes.append(connected_nodes[assigned_idx[0][j]])
56 |                     continue
57 |                 from_node = connected_nodes[assigned_idx[0][j]]
58 |                 assignments[assigned_idx[1][j]].append(from_node)
59 | 
60 |         for node in unconnected_nodes:
61 |             assignments.append([node])
62 | 
63 |         assigned_nodes += group_node_list[descend_idxes[i]]
64 | 
65 |     assign_time = time.time() - assign_time
66 |     return assignments, assign_time
67 | 
68 | 
69 | def local_bipartite_solve(sub_graph, graph_params, debug=False):
70 | 
71 |     # Preprocessing
72 |     no_need_to_assign, group_node_list, group_len_list, combination_size, pre_time = graph_preprocessing(sub_graph)
73 |     if no_need_to_assign:
74 |         return [sub_graph.nodes(data=True)]
75 | 
76 |     # Local bipartite assign
77 |     assignments, assign_time = bipartite_assigner(sub_graph, group_node_list)
78 | 
79 |     # Confirm assignment result
80 |     start_time = time.time()
81 |     assignment_nodes = []
82 |     for nodes in assignments:
83 |         s = sub_graph.subgraph(nodes)
84 |         assignment_nodes.append(s.nodes(data=True))
85 |     refine_time = time.time() - start_time
86 | 
87 |     if debug and len(sub_graph) > 20:
88 |         print(len(sub_graph), combination_size, pre_time, assign_time, refine_time)
89 |     return assignment_nodes
90 | 


--------------------------------------------------------------------------------
/src/map_solver/local_nearest_solver.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import networkx as nx
 4 | import numpy as np
 5 | 
 6 | from src.map_solver.base_solver import graph_preprocessing
 7 | 
 8 | 
 9 | def nearest_assigner(sub_graph, group_node_list):
10 |     assign_time = time.time()
11 | 
12 |     node_length_list = [len(nodes) for nodes in group_node_list]
13 |     descend_idxes = np.argsort(-1 * np.array(node_length_list))
14 | 
15 |     assignments = [[node] for node in group_node_list[descend_idxes[0]]]
16 |     for i in range(1, len(descend_idxes)):
17 |         # build a cost map
18 |         cost_matrix = np.ones([len(group_node_list[descend_idxes[i]]), len(assignments)])
19 |         for j in range(len(group_node_list[descend_idxes[i]])):
20 |             node = group_node_list[descend_idxes[i]][j]
21 |             for k, assignment in enumerate(assignments):
22 |                 tmp_min_weight = 100
23 |                 for assigned_node in assignment:
24 |                     if sub_graph.has_edge(node, assigned_node):
25 |                         tmp_weight = sub_graph.get_edge_data(node, assigned_node)['weight']
26 |                         if tmp_weight < tmp_min_weight:
27 |                             tmp_min_weight = tmp_weight
28 |                 cost_matrix[j][k] = tmp_min_weight
29 | 
30 |         for j in range(cost_matrix.shape[0]):
31 |             min_idx = np.argmin(cost_matrix)
32 |             min_x_idx = min_idx // cost_matrix.shape[1]
33 |             min_y_idx = min_idx - min_x_idx * cost_matrix.shape[1]
34 |             tmp_node = group_node_list[descend_idxes[i]][min_x_idx]
35 |             tmp_cost = cost_matrix[min_x_idx, min_y_idx]
36 |             if tmp_cost != 100:
37 |                 assignments[min_y_idx].append(tmp_node)
38 |             else:
39 |                 assignments.append([tmp_node])
40 |             cost_matrix[min_x_idx, :] = float('inf')
41 |             cost_matrix[:, min_y_idx] = float('inf')
42 | 
43 |     assign_time = time.time() - assign_time
44 |     return assignments, assign_time
45 | 
46 | 
47 | def local_nearest_solve(sub_graph, graph_params, debug=False):
48 | 
49 |     # Preprocessing
50 |     no_need_to_assign, group_node_list, group_len_list, combination_size, pre_time = graph_preprocessing(sub_graph)
51 |     if no_need_to_assign:
52 |         return [sub_graph.nodes(data=True)]
53 | 
54 |     # Local nearest assign
55 |     assignments, assign_time = nearest_assigner(sub_graph, group_node_list)
56 | 
57 |     # Confirm assignment result
58 |     start_time = time.time()
59 |     assignment_nodes = []
60 |     for nodes in assignments:
61 |         s = sub_graph.subgraph(nodes)
62 |         assignment_nodes.append(s.nodes(data=True))
63 |     refine_time = time.time() - start_time
64 | 
65 |     if debug and len(sub_graph) > 20:
66 |         print(len(sub_graph), combination_size, pre_time, assign_time, refine_time)
67 |     return assignment_nodes
68 | 


--------------------------------------------------------------------------------
/src/map_solver/ortools_solver.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | import networkx as nx
  4 | from ortools.linear_solver import pywraplp
  5 | from ortools.sat.python import cp_model
  6 | 
  7 | from src.map_solver.base_solver import graph_preprocessing, calc_cost_list, flatten_to_matrix_idx, \
  8 |     flatten_idx_digit_match, assigned_flatten_idx_to_edges
  9 | 
 10 | 
 11 | def ortools_mip_solve(sub_graph, graph_params, debug=False):
 12 |     # Preprocessing
 13 |     no_need_to_assign, group_node_list, group_len_list, combination_size, pre_time = graph_preprocessing(sub_graph)
 14 |     if no_need_to_assign:
 15 |         return [sub_graph.nodes(data=True)]
 16 |     # Prepare cost list
 17 |     cost_list, cost_time = calc_cost_list(sub_graph, graph_params, group_node_list, group_len_list, combination_size)
 18 | 
 19 |     assign_time = time.time()
 20 | 
 21 |     # Solver
 22 |     # Create the mip solver with the SCIP backend.
 23 |     solver = pywraplp.Solver.CreateSolver('PDLP')
 24 | 
 25 |     # Variables
 26 |     x = []
 27 |     for i in range(combination_size):
 28 |         idx_tuple = flatten_to_matrix_idx(i, group_len_list)
 29 |         x.append(solver.IntVar(0, 1, name="x" + "%d" * len(group_len_list) % idx_tuple))
 30 | 
 31 |     # Constraints
 32 |     for i in range(len(group_len_list)):
 33 |         non_dumb_node_num = len(group_node_list[len(group_len_list) - 1 - i])
 34 |         for j in range(non_dumb_node_num):
 35 |             solver.Add(solver.Sum([x[k] for k in range(combination_size)
 36 |                                    if flatten_idx_digit_match(k, group_len_list, i, j)]) == 1,
 37 |                        name='Constraint%d%d' % (i, j))
 38 | 
 39 |     # Objective
 40 |     objective_terms = []
 41 |     for i in range(combination_size):
 42 |         objective_terms.append(x[i] * cost_list[i])
 43 |     solver.Minimize(solver.Sum(objective_terms))
 44 | 
 45 |     # Solve
 46 |     status = solver.Solve()
 47 | 
 48 |     assigned_idx_list = []
 49 |     for i in range(len(x)):
 50 |         if x[i].solution_value() > 0.5:
 51 |             assigned_idx_list.append(i)
 52 |     assign_time = time.time() - assign_time
 53 | 
 54 |     # Confirm assignment result
 55 |     assignment_nodes, refine_time = assigned_flatten_idx_to_edges(assigned_idx_list, sub_graph,
 56 |                                                                                     group_node_list, group_len_list)
 57 |     if debug:
 58 |         print(len(sub_graph), pre_time, cost_time, assign_time, refine_time)
 59 |     return assignment_nodes
 60 | 
 61 | 
 62 | def ortools_cp_solve(sub_graph, graph_params, debug=False):
 63 |     # Preprocessing
 64 |     no_need_to_assign, group_node_list, group_len_list, combination_size, pre_time = graph_preprocessing(sub_graph)
 65 |     if no_need_to_assign:
 66 |         return [sub_graph.nodes(data=True)]
 67 |     # Prepare cost list
 68 |     cost_list, cost_time = calc_cost_list(sub_graph, graph_params, group_node_list, group_len_list, combination_size)
 69 | 
 70 |     assign_time = time.time()
 71 | 
 72 |     # Model
 73 |     model = cp_model.CpModel()
 74 | 
 75 |     # Variables
 76 |     x = []
 77 |     for i in range(combination_size):
 78 |         idx_tuple = flatten_to_matrix_idx(i, group_len_list)
 79 |         x.append(model.NewBoolVar('x' + '%d' * len(group_len_list) % idx_tuple))
 80 | 
 81 |     # Constraints
 82 |     for i in range(len(group_len_list)):
 83 |         non_dumb_node_num = len(group_node_list[len(group_len_list) - 1 - i])
 84 |         for j in range(non_dumb_node_num):
 85 |             model.AddExactlyOne([x[k] for k in range(combination_size)
 86 |                                  if flatten_idx_digit_match(k, group_len_list, i, j)])
 87 | 
 88 |     # Objective
 89 |     objective_terms = []
 90 |     for i in range(combination_size):
 91 |         objective_terms.append(x[i] * cost_list[i])
 92 |     model.Minimize(sum(objective_terms))
 93 | 
 94 |     # Solve
 95 |     solver = cp_model.CpSolver()
 96 |     status = solver.Solve(model)
 97 | 
 98 |     assigned_idx_list = []
 99 |     for i in range(len(x)):
100 |         if solver.BooleanValue(x[i]):
101 |             assigned_idx_list.append(i)
102 |     assign_time = time.time() - assign_time
103 | 
104 |     # Confirm assignment result
105 |     assignment_nodes, refine_time = assigned_flatten_idx_to_edges(assigned_idx_list, sub_graph,
106 |                                                                                     group_node_list, group_len_list)
107 |     if debug:
108 |         print(len(sub_graph), pre_time, cost_time, assign_time, refine_time)
109 |     return assignment_nodes
110 | 
111 | 


--------------------------------------------------------------------------------
/src/raw_file_reader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import time
 4 | 
 5 | from pyteomics import mzml
 6 | 
 7 | 
 8 | class Spectrum:
 9 |     def __init__(self, rt, mzs, intensities, base_peak_intensity, base_peak_mz, total_ion_current):
10 |         self.rt = rt
11 |         self.mzs = mzs
12 |         self.intensities = intensities
13 |         self.base_peak_intensity = base_peak_intensity
14 |         self.base_peak_mz = base_peak_mz
15 |         self.total_ion_current = total_ion_current
16 | 
17 | 
18 | class RawFileReader:
19 |     def __init__(self, raw_params):
20 |         self.min_intensity = max(0, raw_params.min_intensity)
21 | 
22 |     def load_ms1_spectra(self, result_path):
23 |         glob_paths = glob.glob(result_path.split('.')[0] + '*.[mM][zZ][mM][lL]')
24 |         assert len(glob_paths) == 1, 'File Error! Result and RAW file name and number should be the same. '
25 |         raw_path = glob_paths[0]
26 | 
27 |         ms1_spectra = []
28 |         ms_file = mzml.read(raw_path)
29 |         for spectrum in ms_file:
30 |             if spectrum.get('ms level') != 1:
31 |                 continue
32 |             mzs = spectrum.get('m/z array')
33 |             intensities = spectrum.get('intensity array')
34 |             rt = spectrum.get('scanList').get('scan')[0].get('scan start time')
35 |             idx = intensities > self.min_intensity
36 |             mzs = mzs[idx]
37 |             intensities = intensities[idx]
38 |             base_peak_intensity = spectrum.get('base peak intensity')
39 |             base_peak_mz = spectrum.get('base peak m/z')
40 |             total_ion_current = spectrum.get('total ion current')
41 |             ms1_spectra += [Spectrum(rt, mzs, intensities, base_peak_intensity, base_peak_mz, total_ion_current)]
42 |         return ms1_spectra
43 | 


--------------------------------------------------------------------------------
/src/result_file_reader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import time
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | class ResultFileReader:
 9 |     def __init__(self, result_params):
10 | 
11 |         if os.path.isfile(result_params.result_folder_path):
12 |             exit('ERROR: result_folder_path must be a folder.')
13 | 
14 |         self.skip_line = result_params.skip_line
15 |         self.mz_col_idx = result_params.mz_col_idx
16 |         self.rt_col_idx = result_params.rt_col_idx
17 |         self.area_col_idx = result_params.area_col_idx
18 |         self.result_folder_path = result_params.result_folder_path
19 | 
20 |     def load_result(self, result_path):
21 |         if result_path.endswith('.tsv') or result_path.endswith('.txt') or result_path.endswith('.hills.csv'):
22 |             separator = '\t'
23 |         else:
24 |             separator = ','
25 | 
26 |         result_file = open(result_path, 'r')
27 |         for j in range(self.skip_line):
28 |             header = result_file.readline().split(separator)
29 |         result_data = np.array([line.strip().replace('"', '').split(separator) for line in result_file])
30 |         results = result_data[:, (self.mz_col_idx, self.rt_col_idx, self.area_col_idx)].astype(np.float32)
31 |         return results
32 | 
33 |     def _load_result_paths(self, folder_path):
34 |         file_paths = []
35 |         file_count = 0
36 |         files = glob.glob(os.path.join(folder_path, '*'))
37 |         if len(files) == 0:
38 |             return file_paths
39 |         for file in files:
40 |             if os.path.isfile(file) and (file.endswith('.csv') or file.endswith('.tsv') or file.endswith('.txt')):
41 |                 file_paths.append(file)
42 |                 file_count += 1
43 |             if os.path.isdir(file):
44 |                 sub_file_paths, sub_file_count = self._load_result_paths(file)
45 |                 if len(sub_file_paths) > 0:
46 |                     file_paths.append(sub_file_paths)
47 |                     file_count += sub_file_count
48 |         return file_paths, file_count
49 | 
50 |     def load_result_paths(self):
51 |         file_paths, file_count = self._load_result_paths(self.result_folder_path)
52 |         return file_paths, file_count
53 | 


--------------------------------------------------------------------------------
/src/tools/graph_viewer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | 
  4 | import networkx as nx
  5 | from matplotlib import cm
  6 | import matplotlib.pyplot as plt
  7 | import numpy as np
  8 | 
  9 | root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 10 | 
 11 | 
 12 | def plt_coarse_registration_results(coarse_registration_data, save=True, save_folder=''):
 13 |     plt.rcParams['savefig.dpi'] = 1200
 14 |     plt.figure(figsize=(8, 4))
 15 |     path = os.path.join(root_dir, 'experiments', save_folder, 'coarse_alignment_figs')
 16 |     if not os.path.exists(path):
 17 |         os.mkdir(path)
 18 |     # for i in range(len(coarse_registration_data)):
 19 |     for i in range(len(coarse_registration_data)):
 20 |         ori_rts = coarse_registration_data[i][0]
 21 |         warped_rts = coarse_registration_data[i][1]
 22 |         plt.plot(warped_rts, warped_rts - ori_rts, label=i+1, linewidth=1)
 23 |     plt.xlabel('Warped RT')
 24 |     plt.ylabel('Warped RT - Raw RT')
 25 |     plt.legend()
 26 |     plt.savefig(os.path.join(path, time.strftime('%H%M%S', time.localtime()) + '_coarse_registration_residual.png')) if save else plt.show()
 27 |     plt.clf()
 28 | 
 29 |     plt.rcParams['savefig.dpi'] = 1200
 30 |     fig = plt.figure(figsize=(8, 6))
 31 |     ax1 = fig.add_subplot(2, 1, 1)
 32 |     ax2 = fig.add_subplot(2, 1, 2)
 33 |     for i in range(len(coarse_registration_data)):
 34 |         ori_rts = coarse_registration_data[i][0]
 35 |         warped_rts = coarse_registration_data[i][1]
 36 |         ints = coarse_registration_data[i][2]
 37 |         ax1.plot(ori_rts, ints, label=i+1, linewidth=1)
 38 |         ax2.plot(warped_rts, ints, label=i+1, linewidth=1)
 39 |     ax1.set_xlabel('Raw RT')
 40 |     ax2.set_xlabel('Warped RT')
 41 |     ax1.set_ylabel('Intensity')
 42 |     ax2.set_ylabel('Intensity')
 43 |     plt.legend()
 44 |     plt.tight_layout()
 45 |     plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.45)
 46 |     plt.savefig(os.path.join(path, time.strftime('%H%M%S', time.localtime()) + '_coarse_registration_tic.png')) if save else plt.show()
 47 |     plt.clf()
 48 | 
 49 | 
 50 | def plt_scatter(g, save=False, save_folder='', save_name=''):
 51 |     plt.rcParams['savefig.dpi'] = 1200
 52 |     mzs = list(nx.get_node_attributes(g, 'mz').values())
 53 |     rts = list(nx.get_node_attributes(g, 'rt').values())
 54 |     c = list(nx.get_node_attributes(g, 'data_idx'))
 55 |     plt.scatter(rts, mzs, c=c, cmap='tab10')
 56 |     path = os.path.join(root_dir, 'experiments', save_folder, 'fine_assignment_figs')
 57 |     if not os.path.exists(path):
 58 |         os.mkdir(path)
 59 |     plt.savefig(os.path.join(path, save_name + '_scatter.png')) if save else plt.show()
 60 |     plt.clf()
 61 | 
 62 | 
 63 | def plt_assignment(g, node_assignment_group, save=False, save_folder='', save_name='', show_weight=True):
 64 |     plt.rcParams['savefig.dpi'] = 1200
 65 |     plt.rcParams['figure.dpi'] = 400
 66 |     mzs = list(nx.get_node_attributes(g, 'mz').values())
 67 |     rts = list(nx.get_node_attributes(g, 'rt').values())
 68 |     c = list(nx.get_node_attributes(g, 'data_idx'))
 69 |     path = os.path.join(root_dir, 'experiments', save_folder, 'fine_assignment_figs')
 70 |     if not os.path.exists(path):
 71 |         os.mkdir(path)
 72 |     plt.scatter(rts, mzs, c=c, s=15, cmap='tab10')
 73 |     for assignment_nodes in node_assignment_group:
 74 |         color = np.random.rand(3,)
 75 |         nodes = []
 76 |         for node in assignment_nodes:
 77 |             nodes.append(node[0])
 78 |         for node_set in nx.connected_components(g.subgraph(nodes)):
 79 |             sub_graph = g.subgraph(node_set)
 80 |             assignment_edges = nx.minimum_spanning_tree(sub_graph).edges
 81 | 
 82 |             for edge in assignment_edges:
 83 |                 mzs = []
 84 |                 rts = []
 85 |                 mzs.append(g.nodes[edge[0]]['mz'])
 86 |                 mzs.append(g.nodes[edge[1]]['mz'])
 87 |                 rts.append(g.nodes[edge[0]]['rt'])
 88 |                 rts.append(g.nodes[edge[1]]['rt'])
 89 |                 weight = '%.2f' % g.get_edge_data(edge[0], edge[1])['weight']
 90 |                 plt.plot(rts, mzs, alpha=0.4, c=color)
 91 |                 if show_weight:
 92 |                     plt.text((rts[0] + rts[1]) / 2, (mzs[0] + mzs[1]) / 2, weight)
 93 |     plt.savefig(os.path.join(path, save_name + '_assigned.png')) if save else plt.show()
 94 |     plt.clf()
 95 | 
 96 | 
 97 | def plt_all_edges(g, save=False, save_folder='', save_name='', show_weight=True):
 98 |     plt.rcParams['savefig.dpi'] = 400
 99 |     plt.rcParams['figure.dpi'] = 400
100 |     mzs = list(nx.get_node_attributes(g, 'mz').values())
101 |     rts = list(nx.get_node_attributes(g, 'rt').values())
102 |     c = list(nx.get_node_attributes(g, 'data_idx'))
103 |     path = os.path.join(root_dir, 'experiments', save_folder, 'fine_assignment_figs')
104 |     if not os.path.exists(path):
105 |         os.mkdir(path)
106 |     plt.scatter(rts, mzs, c=c, s=15, cmap='tab10')
107 | 
108 |     assignment_edges = g.edges
109 |     for edge in assignment_edges:
110 |         mzs = []
111 |         rts = []
112 |         mzs.append(g.nodes[edge[0]]['mz'])
113 |         mzs.append(g.nodes[edge[1]]['mz'])
114 |         rts.append(g.nodes[edge[0]]['rt'])
115 |         rts.append(g.nodes[edge[1]]['rt'])
116 |         weight = '%.2f' % g.get_edge_data(edge[0], edge[1])['weight']
117 |         plt.plot(rts, mzs, alpha=0.4)
118 |         if show_weight:
119 |             plt.text((rts[0] + rts[1]) / 2, (mzs[0] + mzs[1]) / 2, weight)
120 |     plt.savefig(os.path.join(path, save_name + '_all_edges.png')) if save else plt.show()
121 |     plt.clf()
122 | 
123 | 
124 | def show(g):
125 |     pos, labels = _get_layout(g)
126 |     cmap = cm.get_cmap('tab10')
127 |     nx.draw(g, pos=pos, node_size=10, cmap=cmap, node_color=list(labels.values()), font_weight='bold')
128 |     plt.show()
129 | 
130 | 
131 | def show_multipartite(g):
132 |     pos, labels = _get_layout(g)
133 |     cmap = cm.get_cmap('tab10')
134 |     pos = nx.multipartite_layout(g, subset_key='data_idx')
135 |     nx.draw(g, pos=pos, node_size=60, cmap=cmap, node_color=list(labels.values()), font_weight='bold')
136 |     plt.show()
137 | 
138 | 
139 | def show_assignment(g, assignment_group):
140 |     a = nx.Graph()
141 |     a.add_nodes_from(g.nodes(data=True))
142 |     for assignment in assignment_group:
143 |         a.add_edges_from(assignment)
144 |     show(a)
145 | 
146 | 
147 | def _get_layout(g):
148 |     mzs = np.array(list(nx.get_node_attributes(g, 'mz').values()))
149 |     rts = np.array(list(nx.get_node_attributes(g, 'rt').values()))
150 |     pos = {}
151 |     labels = {}
152 |     for node in list(g.nodes(data=True)):
153 |         mz = node[1]['mz']
154 |         rt = node[1]['rt']
155 |         pos_mz = (mz - np.min(mzs))
156 |         pos_rt = (rt - np.min(rts))
157 |         pos[node[0]] = np.array([pos_rt, pos_mz]).astype(np.float32)
158 |         labels[node[0]] = node[1]['data_idx']
159 |     return pos, labels
160 | 


--------------------------------------------------------------------------------
/src/tools/param_loader.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import argparse
 3 | 
 4 | from src.params import ResultFileReadingParams, RawFileReadingParams, CoarseRegistrationParams, FineAssignmentParams
 5 | 
 6 | 
 7 | def load_params(param_path):
 8 |     param_file = open(param_path, 'r')
 9 |     lines = param_file.read().split('\n')
10 |     param_map = {}
11 |     for line in lines:
12 |         splits = line.strip().split(':')
13 |         if len(splits) < 2:
14 |             continue
15 |         elif len(splits) == 2:
16 |             param_map[splits[0]] = splits[1]
17 |         elif len(splits) == 3:  # windows path
18 |             param_map[splits[0]] = splits[1] + ':' + splits[2]
19 |         else:
20 |             print('Wrong parameters.')
21 |             return None, None, None, None
22 | 
23 |     # Result File Reading Params
24 |     result_file_path = param_map['result_file_path']
25 |     skip_line = param_map['skip_line']
26 |     rt_col_num = param_map['rt_col_num']
27 |     mz_col_num = param_map['mz_col_num']
28 |     area_col_num = param_map['area_col_num']
29 | 
30 |     assert result_file_path is not None
31 |     assert skip_line is not None
32 |     assert rt_col_num is not None
33 |     assert mz_col_num is not None
34 |     assert area_col_num is not None
35 | 
36 |     result_file_reading_params = ResultFileReadingParams(result_folder_path=result_file_path,
37 |                                                          skip_line=skip_line, rt_col_num=rt_col_num,
38 |                                                          mz_col_num=mz_col_num, area_col_num=area_col_num)
39 | 
40 |     # Raw File Reading Params
41 |     raw_file_path = param_map['raw_file_path']
42 |     min_intensity = float(param_map['min_intensity'])
43 | 
44 |     raw_file_reading_params = RawFileReadingParams(raw_file_path=raw_file_path, min_intensity=min_intensity)
45 | 
46 |     # Coarse Registration Params
47 |     bin_size = float(param_map['bin_size'])
48 |     percent_anchors = float(param_map['percent_anchors'])
49 |     score_type = param_map['score_type']
50 |     gap_init = float(param_map['gap_init'])
51 |     gap_extend = float(param_map['gap_extend'])
52 |     factor_diag = float(param_map['factor_diag'])
53 |     factor_gap = float(param_map['factor_gap'])
54 |     local_alignment = int(param_map['local_alignment'])
55 |     init_penalty = float(param_map['init_penalty'])
56 | 
57 |     coarse_registration_params = CoarseRegistrationParams(bin_size=bin_size, percent_anchors=percent_anchors,
58 |                                                           score_type=score_type,
59 |                                                           gap_init=gap_init, gap_extend=gap_extend,
60 |                                                           factor_diag=factor_diag, factor_gap=factor_gap,
61 |                                                           local_alignment=local_alignment, init_penalty=init_penalty)
62 | 
63 |     # Fine Assignment Params
64 |     rt_tolerance = float(param_map['rt_tolerance'])
65 |     mz_tolerance = float(param_map['mz_tolerance'])
66 |     use_ppm = bool(param_map['use_ppm'])
67 |     mz_factor = float(param_map['mz_factor'])
68 |     rt_factor = float(param_map['rt_factor'])
69 |     area_factor = float(param_map['area_factor'])
70 |     solver = param_map['solver']
71 |     vlsns_init_mode = param_map['vlsns_solution_init_mode']
72 |     vlsns_init_num = int(param_map['vlsns_solution_init_number'])
73 |     vlsns_update_mode = param_map['vlsns_solution_update_mode']
74 | 
75 |     fine_assignment_params = FineAssignmentParams(mz_tolerance=mz_tolerance, rt_tolerance=rt_tolerance,
76 |                                                   use_ppm=use_ppm, mz_factor=mz_factor, rt_factor=rt_factor,
77 |                                                   area_factor=area_factor,
78 |                                                   solver=solver, vlsns_init_mode=vlsns_init_mode,
79 |                                                   vlsns_init_num=vlsns_init_num, vlsns_update_mode=vlsns_update_mode)
80 | 
81 |     return result_file_reading_params, raw_file_reading_params, coarse_registration_params, fine_assignment_params
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     parser = argparse.ArgumentParser(description='GAligner parameter loader')
86 | 
87 |     # Result file reading params
88 |     parser.add_argument('--param_path', type=str, help='Path to parameter file', required=True)
89 |     args = parser.parse_args()
90 | 
91 |     load_params(args.param_path)
92 |     import configparser
93 |     parser = configparser.ConfigParser()
94 |     parser.get()


--------------------------------------------------------------------------------
/src/tools/trace_recorder.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import csv
  3 | import numpy as np
  4 | 
  5 | root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  6 | 
  7 | 
  8 | def save_params(save_folder, result_file_params, raw_file_params, coarse_registration_params, fine_assignment_params):
  9 |     result_root_path = os.path.join(root_dir, 'experiments')
 10 |     if not os.path.exists(result_root_path):
 11 |         os.mkdir(result_root_path)
 12 |     path = os.path.join(result_root_path, save_folder)
 13 |     if not os.path.exists(path):
 14 |         os.mkdir(path)
 15 |     file = open(os.path.join(path, 'params.txt'), 'w')
 16 | 
 17 |     # ResultFileReadingParams
 18 |     file.write('# Result File Reading Params' + '\n')
 19 |     file.write('\tresult_file_path: ' + result_file_params.result_folder_path + '\n')
 20 |     file.write('\tskip_line: ' + str(result_file_params.skip_line) + '\n')
 21 |     file.write('\trt_col_num: ' + str(result_file_params.rt_col_idx + 1) + '\n')
 22 |     file.write('\tmz_col_num: ' + str(result_file_params.mz_col_idx + 1) + '\n')
 23 |     file.write('\tarea_col_num: ' + str(result_file_params.area_col_idx + 1) + '\n')
 24 |     file.write(os.linesep)
 25 | 
 26 |     # RawFileReadingParams
 27 |     file.write('# Raw File Reading Params' + '\n')
 28 |     file.write('\tmin_intensity: ' + str(raw_file_params.min_intensity) + '\n')
 29 |     file.write(os.linesep)
 30 | 
 31 |     # CoarseRegistrationParams
 32 |     file.write('# Coarse Registration Params' + '\n')
 33 |     file.write('\tbin_size: ' + str(coarse_registration_params.bin_size) + '\n')
 34 |     file.write('\tpercent_anchors: ' + str(coarse_registration_params.percent_anchors) + '\n')
 35 |     file.write('\tscore_type: ' + str(coarse_registration_params.score_type) + '\n')
 36 |     file.write('\tgap_init: ' + str(coarse_registration_params.gap_init) + '\n')
 37 |     file.write('\tgap_extend: ' + str(coarse_registration_params.gap_extend) + '\n')
 38 |     file.write('\tfactor_diag: ' + str(coarse_registration_params.factor_diag) + '\n')
 39 |     file.write('\tfactor_gap: ' + str(coarse_registration_params.factor_gap) + '\n')
 40 |     file.write('\tlocal_alignment: ' + str(coarse_registration_params.local_alignment) + '\n')
 41 |     file.write('\tinit_penalty: ' + str(coarse_registration_params.init_penalty) + '\n')
 42 |     file.write(os.linesep)
 43 | 
 44 |     # FineAssignmentParams
 45 |     file.write('# Fine Assignment Params' + '\n')
 46 |     file.write('\trt_tolerance: ' + str(fine_assignment_params.rt_tolerance) + '\n')
 47 |     file.write('\tmz_tolerance: ' + str(fine_assignment_params.mz_tolerance) + '\n')
 48 |     file.write('\tuse_ppm: ' + str(fine_assignment_params.use_ppm) + '\n')
 49 |     file.write('\tmz_factor: ' + str(fine_assignment_params.mz_factor) + '\n')
 50 |     file.write('\trt_factor: ' + str(fine_assignment_params.rt_factor) + '\n')
 51 |     file.write('\tarea_factor: ' + str(fine_assignment_params.area_factor) + '\n')
 52 |     file.write('\tsolver: ' + str(fine_assignment_params.solver) + '\n')
 53 |     file.write('\tvlsns_solution_init_mode: ' + str(fine_assignment_params.vlsns_solution_init_mode) + '\n')
 54 |     file.write('\tvlsns_solution_init_number: ' + str(fine_assignment_params.vlsns_solution_init_number) + '\n')
 55 |     file.write('\tvlsns_solution_update_mode: ' + str(fine_assignment_params.vlsns_solution_update_mode) + '\n')
 56 |     file.write(os.linesep)
 57 | 
 58 |     file.close()
 59 | 
 60 | 
 61 | def prepare_result_rows(result_data_list, assignment_nodes_list, need_assign_list):
 62 |     node_start_idxes = [0]
 63 |     for i, data in enumerate(result_data_list):
 64 |         node_start_idxes += [node_start_idxes[-1] + len(data)]
 65 | 
 66 |     row_length = 4
 67 |     row_start_idxes = []
 68 |     for result_data in result_data_list:
 69 |         row_start_idxes.append(row_length)
 70 |         tmp_row_length = len(result_data[0])
 71 |         if tmp_row_length == 3:
 72 |             row_length += 3
 73 |         else:
 74 |             row_length += tmp_row_length - 4
 75 | 
 76 |     rows = []
 77 |     for i, nodes in enumerate(assignment_nodes_list):
 78 |         assembled_row = np.zeros(row_length)
 79 |         tmp_mzs = []
 80 |         tmp_rts = []
 81 |         tmp_areas = []
 82 |         for node in nodes:
 83 |             data_idx = node[1]['data_idx']
 84 |             row_start_idx = row_start_idxes[data_idx]
 85 |             result_data_idx = node[0] - node_start_idxes[data_idx]
 86 |             result_data_row = result_data_list[data_idx][result_data_idx]
 87 |             if len(result_data_row) == 3:
 88 |                 assembled_row[row_start_idx: row_start_idx + 3] = np.array(result_data_row)
 89 |             else:
 90 |                 tmp_payload = result_data_row[4:]
 91 |                 assembled_row[row_start_idx: row_start_idx + len(tmp_payload)] = np.array(tmp_payload)
 92 |                 need_assign_list[i] = need_assign_list[i] or result_data_row[3]
 93 |             tmp_mzs.append(node[1]['mz'])
 94 |             tmp_rts.append(node[1]['rt'])
 95 |             tmp_areas.append(node[1]['area'])
 96 |         tmp_mzs.sort()
 97 |         tmp_rts.sort()
 98 |         tmp_areas.sort()
 99 |         assembled_row[0] = tmp_mzs[len(tmp_mzs) // 2]
100 |         assembled_row[1] = tmp_rts[len(tmp_rts) // 2]
101 |         assembled_row[2] = tmp_areas[len(tmp_areas) // 2]
102 |         assembled_row[3] = need_assign_list[i]
103 |         rows.append(assembled_row)
104 |     return np.array(rows)
105 | 
106 | 
107 | def save_alignment_results(result_data_list, file_names, save_folder, min_sample=None):
108 |     if min_sample is None:
109 |         min_sample = (len(file_names) + 1) // 2
110 | 
111 |     filtered_idxes = np.sum(result_data_list[:, [4 + 3 * i for i in range(len(file_names))]] > 0, axis=-1) > min_sample
112 | 
113 |     result_root_path = os.path.join(root_dir, 'experiments')
114 |     if not os.path.exists(result_root_path):
115 |         os.mkdir(result_root_path)
116 |     path = os.path.join(result_root_path, save_folder)
117 |     if not os.path.exists(path):
118 |         os.mkdir(path)
119 |     file_path = os.path.join(path, 'aligned_result.csv')
120 |     file = open(file_path, 'w')
121 |     writer = csv.writer(file, dialect='unix', quoting=csv.QUOTE_NONE, quotechar='')
122 |     first_row = ['mz', 'rt', 'area', 'need_assign']
123 |     for file_name in file_names:
124 |         first_row += [file_name + "_mz", file_name + "_rt", file_name + "_area"]
125 |     writer.writerow(first_row)
126 |     writer.writerows(result_data_list[filtered_idxes])
127 |     file.close()
128 |     return file_path
129 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/History:
--------------------------------------------------------------------------------
1 | 0.9.4
2 | 
3 | * silenced warnings
4 | * moved build to Rakefile (rewrote build system and to be much cleaner)
5 | 
6 | 0.9.3
7 | 
8 | * added some include statements so that it would compile on more recent g++ compilers.  No change in code (or binaries) from 0.9.2
9 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2006, The University of Texas at Austin("U.T. Austin"). All rights reserved.  
 2 | 
 3 | Software by John T. Prince under the direction of Edward M. Marcotte.
 4 | 
 5 | By using this software the USER indicates that he or she has read, understood and will comply with the following:
 6 | 
 7 | U. T. Austin hereby grants USER permission to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of this software and its documentation for any purpose and without fee, provided that a full copy of this notice is included with the software and its documentation.
 8 | 
 9 | Title to copyright to this software and its associated documentation shall at all times remain with U. T. Austin. No right is granted to use in advertising, publicity or otherwise any trademark, service mark, or the name of U. T. Austin.
10 | 
11 | This software and any associated documentation are provided "as is," and U. T. AUSTIN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESSED OR IMPLIED, INCLUDING THOSE OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT USE OF THE SOFTWARE, MODIFICATIONS, OR ASSOCIATED DOCUMENTATION WILL NOT INFRINGE ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER INTELLECTUAL PROPERTY RIGHTS OF A THIRD PARTY. U. T. Austin, The University of Texas System, its Regents, officers, and employees shall not be liable under any circumstances for any direct, indirect, special, incidental, or consequential damages with respect to any claim by USER or any third party on account of or arising from the use, or inability to use, this software or its associated documentation, even if U. T. Austin has been advised of the possibility of those damages.
12 | 
13 | Submit software operation questions to: Edward M. Marcotte, Department of Chemistry and Biochemistry, U. T. Austin, Austin, Texas 78712.
14 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/README.html:
--------------------------------------------------------------------------------
  1 | 
  2 | <html>
  3 |   <head><title>README.md</title></head>
  4 |   <body>
  5 | <h1>OBI-Warp</h1>
  6 | 
  7 | <p>Ordered Bijective Interpolated Warping (OBI-Warp) aligns matrices along a
  8 | single axis using Dynamic Time Warping (DTW) and a one-to-one (bijective)
  9 | interpolated warp function.  OBI-Warp harnesses the non-linear, comprehensive
 10 | alignment power of DTW and builds on the discrete, non-bijective output of DTW
 11 | to give natural interpolants that can be used across multiple datasets.</p>
 12 | 
 13 | <p>OBI-Warp was developed specifically for the chromatographic alignment of
 14 | complex mass spectrometry (MS) proteomics data.  Using high confidence MS/MS
 15 | identifications as time standards, OBI-Warp default parameters have been
 16 | optimized to give accurate alignments under a variety of real-world conditions
 17 | including datasets with little overlapping signal.  Command-line options to
 18 | override defaults are available (e.g., gap penalty, local weights and number
 19 | of bijective anchors).  Though developed for MS proteomics data, OBI-Warp is
 20 | suited to a wide variety of alignment problems.</p>
 21 | 
 22 | <p>Pearson's correlation coefficient, covariance, dot product, and Euclidean
 23 | distance have been implemented as the available vector similarity functions.
 24 | Redundant calculations for correlation coefficient and covariance are cached
 25 | in the n x m comparisons to give the algorithmic equivalent of calculating the
 26 | dot product.</p>
 27 | 
 28 | <p>The dynamic programming algorithm is written to allow any arbitrary gap
 29 | penalty function, or users may use a linear initiation and elongation penalty.
 30 | Local weighting schemes may also be controlled.</p>
 31 | 
 32 | <h3>Links</h3>
 33 | 
 34 | <ul>
 35 | <li><a href="://sourceforge.net/projects/obi-warp/">Project Summary Page</a></li>
 36 | <li><a href="://sourceforge.net/project/showfiles.php?group_id=161548">Download OBI-Warp</a></li>
 37 | <li><a href="://dx.doi.org/10.1021/ac0605344">Analytical Chemistry Publication</a></li>
 38 | <li><a href="://bioinformatics.icmb.utexas.edu/obi-warp/">Supplementary Material for Publication</a> (<em>this server is currently down</em>)</li>
 39 | </ul>
 40 | 
 41 | 
 42 | <h2>Building</h2>
 43 | 
 44 | <p>Building is tested rigorously on Ubuntu and should work fine on any POSIX
 45 | system.  Windows compilation under VC++ should be possible and compilation
 46 | with cygwin, or msys/mingw should work without any problems.</p>
 47 | 
 48 | <h3>Prerequisites</h3>
 49 | 
 50 | <p><a href="http://www.ruby-lang.org">ruby</a> and <em>rake</em>. <em>rake</em> comes standard with the
 51 | newer ruby (1.9.X), but it can also be installed using
 52 | <a href="http://rubygems.org/pages/download">rubygems</a> (for ruby versions below 1.9):</p>
 53 | 
 54 | <pre><code>gem install rake
 55 | </code></pre>
 56 | 
 57 | <p>Optional: <em>valgrind</em> can be used for memory testing and
 58 | <a href="http://www.bioinformatics.ubc.ca/matrix2png/download.html">matrix2png</a> can be
 59 | used to create images of the various matrices.</p>
 60 | 
 61 | <h3>Compiling</h3>
 62 | 
 63 | <pre><code>rake
 64 | # creates bin/obiwarp   (notice it is in the *bin* directory)
 65 | </code></pre>
 66 | 
 67 | <p>This will compile all the code and link it into the obiwarp executable.
 68 | <strong>NOTE</strong>: All executables will be created in the <strong>bin</strong> directory (testing
 69 | executables stay in the lib dir).</p>
 70 | 
 71 | <p>If you want to explore other options:</p>
 72 | 
 73 | <pre><code>rake -T
 74 | </code></pre>
 75 | 
 76 | <p>From within the <em>lib</em> directory, the generation of any file, intermediate, or
 77 | test can be invoked by name.  For example:</p>
 78 | 
 79 | <pre><code># build the lmat2lmata binary:
 80 | rake lmat2lmata    # note, it will be in created in the bin directory
 81 | # this also works
 82 | rake ../bin/lmat2lmata
 83 | </code></pre>
 84 | 
 85 | <p>From the top directory, all the tasks (not necessarily filetasks) are available:</p>
 86 | 
 87 | <pre><code># from the top level directory
 88 | rake memtest
 89 | # from the lib directory
 90 | rake memtest
 91 | </code></pre>
 92 | 
 93 | <h3>Installation</h3>
 94 | 
 95 | <p>Binaries are compiled and depositied in the <strong>bin</strong> folder. System
 96 | installation is left to the user, but it can be as simple as:</p>
 97 | 
 98 | <pre><code>rake      # make sure it is compiled and linked
 99 | sudo cp bin/obiwarp /usr/local/bin/
100 | </code></pre>
101 | 
102 | <h3>Testing</h3>
103 | 
104 | <pre><code># run all tests
105 | rake test
106 | # if you have valgrind installed:
107 | rake memtest
108 | rake test_cmdparser.rb  # for tests written in ruby
109 | rake run_test_dynprog   # for tests written in cxxtest 'run_test_&lt;whatever&gt;'
110 | </code></pre>
111 | 
112 | <p>tasks found in the lib Rakefile are also available from the top level Rakefile using
113 | the 'lib:' prefix:</p>
114 | 
115 | <pre><code>rake clean      # from inside the lib directory
116 | rake lib:clean  # from the top dir
117 | </code></pre>
118 |   </body>
119 | </html>
120 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/README.md:
--------------------------------------------------------------------------------
  1 | # OBI-Warp
  2 | 
  3 | Ordered Bijective Interpolated Warping (OBI-Warp) aligns matrices along a
  4 | single axis using Dynamic Time Warping (DTW) and a one-to-one (bijective)
  5 | interpolated warp function.  OBI-Warp harnesses the non-linear, comprehensive
  6 | alignment power of DTW and builds on the discrete, non-bijective output of DTW
  7 | to give natural interpolants that can be used across multiple datasets.
  8 | 
  9 | OBI-Warp was developed specifically for the chromatographic alignment of
 10 | complex mass spectrometry (MS) proteomics data.  Using high confidence MS/MS
 11 | identifications as time standards, OBI-Warp default parameters have been
 12 | optimized to give accurate alignments under a variety of real-world conditions
 13 | including datasets with little overlapping signal.  Command-line options to
 14 | override defaults are available (e.g., gap penalty, local weights and number
 15 | of bijective anchors).  Though developed for MS proteomics data, OBI-Warp is
 16 | suited to a wide variety of alignment problems.
 17 | 
 18 | Pearson's correlation coefficient, covariance, dot product, and Euclidean
 19 | distance have been implemented as the available vector similarity functions.
 20 | Redundant calculations for correlation coefficient and covariance are cached
 21 | in the n x m comparisons to give the algorithmic equivalent of calculating the
 22 | dot product.
 23 | 
 24 | The dynamic programming algorithm is written to allow any arbitrary gap
 25 | penalty function, or users may use a linear initiation and elongation penalty.
 26 | Local weighting schemes may also be controlled.
 27 | 
 28 | ### Links
 29 | 
 30 | * [Project Summary Page](://sourceforge.net/projects/obi-warp/)
 31 | * [Download OBI-Warp](://sourceforge.net/project/showfiles.php?group_id=161548)
 32 | * [Analytical Chemistry Publication](://dx.doi.org/10.1021/ac0605344)
 33 | * [Supplementary Material for Publication](://bioinformatics.icmb.utexas.edu/obi-warp/) (*this server is currently down*)
 34 | 
 35 | ## Building
 36 | 
 37 | Building is tested rigorously on Ubuntu and should work fine on any POSIX
 38 | system.  Windows compilation under VC++ should be possible and compilation
 39 | with cygwin, or msys/mingw should work without any problems.
 40 | 
 41 | ### Prerequisites
 42 | 
 43 | [ruby](http://www.ruby-lang.org) and *rake*. *rake* comes standard with the
 44 | newer ruby (1.9.X), but it can also be installed using
 45 | [rubygems](http://rubygems.org/pages/download) (for ruby versions below 1.9):
 46 | 
 47 |     gem install rake
 48 | 
 49 | Optional: *valgrind* can be used for memory testing and
 50 | [matrix2png](http://www.bioinformatics.ubc.ca/matrix2png/download.html) can be
 51 | used to create images of the various matrices.
 52 | 
 53 | ### Compiling
 54 | 
 55 |     rake
 56 |     # creates bin/obiwarp   (notice it is in the *bin* directory)
 57 | 
 58 | This will compile all the code and link it into the obiwarp executable.
 59 | **NOTE**: All executables will be created in the **bin** directory (testing
 60 | executables stay in the lib dir).
 61 | 
 62 | If you want to explore other options:
 63 | 
 64 |     rake -T
 65 | 
 66 | From within the *lib* directory, the generation of any file, intermediate, or
 67 | test can be invoked by name.  For example:
 68 | 
 69 |     # build the lmat2lmata binary:
 70 |     rake lmat2lmata    # note, it will be in created in the bin directory
 71 |     # this also works
 72 |     rake ../bin/lmat2lmata
 73 | 
 74 | From the top directory, all the tasks (not necessarily filetasks) are available:
 75 | 
 76 |     # from the top level directory
 77 |     rake memtest
 78 |     # from the lib directory
 79 |     rake memtest
 80 | 
 81 | ### Installation
 82 | 
 83 | Binaries are compiled and depositied in the **bin** folder. System
 84 | installation is left to the user, but it can be as simple as:
 85 | 
 86 |     rake      # make sure it is compiled and linked
 87 |     sudo cp bin/obiwarp /usr/local/bin/
 88 | 
 89 | ### Testing
 90 | 
 91 |     # run all tests
 92 |     rake test
 93 |     # if you have valgrind installed:
 94 |     rake memtest
 95 |     rake test_cmdparser.rb  # for tests written in ruby
 96 |     rake run_test_dynprog   # for tests written in cxxtest 'run_test_<whatever>'
 97 | 
 98 | 
 99 | tasks found in the lib Rakefile are also available from the top level Rakefile using
100 | the 'lib:' prefix:
101 | 
102 |     rake clean      # from inside the lib directory
103 |     rake lib:clean  # from the top dir
104 | 
105 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/Rakefile:
--------------------------------------------------------------------------------
  1 | require 'rake/clean'
  2 | require 'rake/packagetask'
  3 | 
  4 | $INCLUDED_FILES = nil
  5 | 
  6 | CLEAN.add('pkg')
  7 | CLEAN.existing!
  8 | CLOBBER.add(*FileList["bin/*"])
  9 | 
 10 | def unix?
 11 |   RUBY_PLATFORM =~ /(aix|darwin|linux|(net|free|open)bsd|cygwin|solaris|irix|hpux)/i
 12 | end
 13 | 
 14 | # lame
 15 | def get_tasks(rakefile)
 16 |   get_tasks = %Q{ruby -e '
 17 |     load "#{rakefile}" 
 18 |     tm=nil
 19 |     ObjectSpace.each_object {|v| tm=v if v.is_a?(Rake::Application) }
 20 |     p tm.tasks.select {|v| v.is_a?(Rake::Task) }.map {|v| [v.name, v.comment] }
 21 |     '
 22 |   }
 23 |   eval `#{get_tasks}`
 24 | end
 25 | 
 26 | def get_real_lib_files(rakefile)
 27 |   get_tasks = %Q{ruby -e '
 28 |     load "#{rakefile}" 
 29 |     '
 30 |   }
 31 | end
 32 | 
 33 | # position can be :major, :minor, or :patch
 34 | def bump_version(position=:patch)
 35 |   index = {:major => 0, :minor => 1, :patch => 2}
 36 |   versions = VERSION.split('.').map(&:to_i)
 37 |   versions[ index[position] ] += 1
 38 |   version_string = versions.join('.')
 39 |   File.open(VERSION_FILE,'w') {|out| out.puts version_string }
 40 |   # update code from VERSION
 41 |   replaced = IO.readlines(CODE_VERSION).map do |line|
 42 |     if line.match(/VERSION\s*=/)
 43 |       %Q{char * VERSION = (char *)"#{version_string}";} + "\n"
 44 |     else ; line
 45 |     end
 46 |   end.join
 47 |   File.open(CODE_VERSION,'w') {|out| out.print replaced }
 48 | end
 49 | 
 50 | VERSION_FILE = 'VERSION'
 51 | VERSION = IO.read(VERSION_FILE).chomp
 52 | 
 53 | 
 54 | 
 55 | =begin
 56 |  catchall that allows any task in the subdirectory to be run from the top
 57 | rule "" do |t|
 58 |   cd "lib"
 59 |   sh "rake #{t.name}"
 60 |   cd TOPDIR
 61 | end
 62 | =end
 63 | 
 64 | QUIET = {:verbose => false }
 65 | TOPDIR = File.expand_path(File.dirname(__FILE__))
 66 | LIBDIR = File.expand_path('lib')
 67 | 
 68 | # this is lame, but I can't figure out a better way to do this right now
 69 | cd LIBDIR, QUIET
 70 | tasks = get_tasks("Rakefile") 
 71 | cd TOPDIR, QUIET
 72 | LIB_NS = :lib
 73 | 
 74 | TASKS_TO_DUP = []
 75 | namespace LIB_NS do
 76 |   tasks.each do |name, comment|
 77 |     if comment
 78 |       TASKS_TO_DUP << [name, comment]
 79 |       #desc comment
 80 |       task name do 
 81 |         cd LIBDIR
 82 |         sh "rake #{name}"
 83 |         cd TOPDIR, QUIET
 84 |       end
 85 |     end
 86 |   end
 87 | end
 88 | 
 89 | TASKS_TO_DUP.each do |name, comment|
 90 |   desc comment
 91 |   task name => ["#{LIB_NS}:#{name}"]
 92 | end
 93 | 
 94 | TEMPLATES_STAMPED = FileList["lib/**/*TEMPLATE.*"].map! {|v| v.sub('_TEMPLATE','') }
 95 | INCLUDED_FILES = FileList["lib/**/*", "bin/*", "*", *TEMPLATES_STAMPED].reject {|v| v =~ /\.o$/ }
 96 | 
 97 | Rake::PackageTask.new("obiwarp", VERSION) do |p|
 98 |   p.need_tar = true
 99 |   p.need_zip = true
100 |   p.package_files.include(INCLUDED_FILES)
101 | end
102 | 
103 | desc 'safe, complete build process (avoid package)'
104 | task :build => [:clobber, :doc, :stamp_templates, :package]
105 | 
106 | Rake::Task[:clean].prerequisites ||= []
107 | Rake::Task[:clean].prerequisites << :clobber_package
108 | 
109 | CODE_VERSION = "lib/obiwarp.cpp"
110 | 
111 | %w(major minor patch).each do |v|
112 |   desc "bumps #{v} number in VERSION (and #{CODE_VERSION})"
113 |   task "version:bump:#{v}" do
114 |     bump_version(v.to_sym)
115 |   end
116 | end
117 | 
118 | task :version do
119 |   print IO.read(VERSION_FILE)
120 | end
121 | 
122 | task :default => 'obiwarp'
123 | 
124 | desc "generate html page with bluecloth"
125 | task :doc do
126 |   sh "bluecloth README.md > README.html"
127 | end
128 | 
129 | desc "build a native binary"
130 | task "binary" do
131 |   cd LIBDIR
132 |   sh %Q{export CFLAGS="$CFLAGS -O3" ; rake obiwarp}
133 |   cd TOPDIR
134 |   mv "bin/obiwarp", "bin/obiwarp-#{RUBY_PLATFORM}" + (unix? ? '' : '.exe')
135 | end
136 | 
137 | desc "build a windows binary with mingw (mingw-g++ needed)"
138 | task "windows_binary" do
139 |   cd LIBDIR
140 |   sh %Q{export COMPILER=mingw ; export CFLAGS="$CFLAGS -O3" ; rake obiwarp}
141 |   cd TOPDIR
142 |   mv "bin/obiwarp", "bin/obiwarp.exe"
143 | end
144 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/VERSION:
--------------------------------------------------------------------------------
1 | 0.9.4
2 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/Rakefile:
--------------------------------------------------------------------------------
  1 | require 'fileutils'
  2 | require 'rake/clean'
  3 | require 'open3'
  4 | 
  5 | $VALGRIND_VERBOSE = false
  6 | 
  7 | # build 32 bit on linux with "export CFLAGS=-m32"
  8 | 
  9 | VAR = { 
 10 |   'CFLAGS' => ENV['CFLAGS'],
 11 |   :compiler => (ENV['COMPILER'] == 'mingw') ? 'i586-mingw32msvc-g++' : 'g++',
 12 |   :bindir => '../bin',
 13 | }
 14 | 
 15 | SDK_INCLUDES =  %w(cxxtest ../sdk)
 16 | SDK_DIR = "../sdk"
 17 | Fl = FileList ; FU = FileUtils
 18 | 
 19 | INCLUDES = SDK_INCLUDES.map {|f| '-I' + File.join(SDK_DIR,f) }.join(' ')
 20 | 
 21 | def have_matrix2png?
 22 |   begin
 23 |     reply = `matrix2png --version`
 24 |     reply && reply.match(/Usage:/)
 25 |   rescue StandardError
 26 |     false
 27 |   end
 28 | end
 29 | 
 30 | def have_valgrind?
 31 |   begin
 32 |     `valgrind --help`
 33 |   rescue SystemCallError
 34 |     false
 35 |   end
 36 | end
 37 | 
 38 | def build_from_template(template_filename, opts={})
 39 |   opt = { :file_template_tag => '_TEMPLATE', 
 40 |     :type_template_tag => 'FLOAT', 
 41 |     :abbr_template_tag => 'ABR',
 42 |     :start_flag => %r{//\s+BEGIN TEMPLATE},
 43 |     :end_flag => %r{//\s+END TEMPLATE},
 44 |     :to_build => { 'float' => 'F', 'double' => 'D', 'int' => 'I' }
 45 |   }.merge(opts)
 46 |   outfile = template_filename.gsub(opt[:file_template_tag],'')
 47 | 
 48 |   abr_template_tag = 'ABR'
 49 |   to_expand = []
 50 |   pre_section = []
 51 |   post_section = []
 52 |   in_template_section = nil
 53 |   IO.foreach(template_filename) do |line|
 54 |     pre_section << line if in_template_section.nil?
 55 |     in_template_section = false if line =~ opt[:end_flag]
 56 |     to_expand << line if in_template_section
 57 |     in_template_section = true if line =~ opt[:start_flag] 
 58 |     post_section << line if in_template_section == false
 59 |   end
 60 | 
 61 |   all_lines_expanded = []
 62 |   opt[:to_build].each do |c_type, abbr|
 63 |     all_lines_expanded << to_expand.map {|line| line.gsub(opt[:type_template_tag], c_type).gsub(opt[:abbr_template_tag],abbr) }
 64 |   end
 65 |   File.open(outfile,'w') do |out|
 66 |     out.print( pre_section.join << all_lines_expanded.join << post_section.join )
 67 |   end
 68 | end
 69 | 
 70 | MY_BIN_DIR = File.expand_path("../bin")
 71 | 
 72 | def link(executable, object_files)
 73 |   sh "#{VAR[:compiler]} #{VAR["CFLAGS"]} -o #{executable} #{object_files.join(" ")}"
 74 | end
 75 | 
 76 | template_rule = lambda do |name, ext| 
 77 |   file "#{name}.#{ext}" => "#{name}_TEMPLATE.#{ext}" do |fn|
 78 |     build_from_template(fn.prerequisites[0])
 79 |   end
 80 | end
 81 | 
 82 | def compile(object_file, prereq)
 83 |   sh "#{VAR[:compiler]} #{VAR["CFLAGS"]} -c -o #{object_file} #{prereq} #{INCLUDES}"
 84 | end
 85 | 
 86 | OBIWARP_EXE = File.join(VAR[:bindir], 'obiwarp')
 87 | UTILITIES = %w(mat2mata mata2mat lmat2chrms lmat2lmata lmat2png lmata2lmat)
 88 | BASE = %w(vec mat lmat dynprog cmdparser pngio)
 89 | BASE_OBJECT_FILES = BASE.map {|v| v << ".o" }
 90 | 
 91 | TEMPLATE_FILES = Fl["*_TEMPLATE.*"]
 92 | TEMPLATE_OUTPUT_FILES = TEMPLATE_FILES.map {|f| f.sub('_TEMPLATE','') }
 93 | 
 94 | TEMPLATE_FILES.each do |fn|
 95 |   file(fn.sub('_TEMPLATE','') => [fn]) { build_from_template(fn) }
 96 | end
 97 | 
 98 | desc 'create the files from template: *_TEMPLATE.*'
 99 | task :stamp_templates => TEMPLATE_OUTPUT_FILES
100 | 
101 | desc 'compile *.o files for core obi-warp library'
102 | task :compile_objects => (BASE_OBJECT_FILES + %w(obiwarp.o))
103 | 
104 | TESTEXECS = []
105 | CPP_TEST_FILES_H = Fl["test_*.h"].reject {|v| v=~/TEMPLATE/} + %w(test_vec.h test_mat.h)
106 | CPP_TEST_FILES_H.delete("test_pngio.h") unless have_matrix2png?
107 | CPP_TEST_FILES_H.uniq!
108 | 
109 | CPP_TEST_FILES_H.each do |hfile|
110 |   cppfile = hfile.ext('cpp')
111 |   ofile = hfile.ext('o')
112 |   no_ext = hfile.sub('.h','')
113 |   CLEAN << cppfile << ofile
114 |   CLOBBER << no_ext
115 |   file cppfile => [hfile] do |t|
116 |     sh "perl -w #{SDK_DIR}/cxxtest/cxxtestgen.pl --error-printer -o #{t.name} #{hfile}"
117 |   end
118 |   # BASE_OBJECT_FILES are required here only so that templates are stamped and
119 |   # .h files will be present
120 |   file ofile => [cppfile, *BASE_OBJECT_FILES] {|t| compile(ofile, cppfile) }
121 |   file no_ext => [ofile] { link(no_ext, [ofile, *BASE_OBJECT_FILES]) }
122 |   TESTEXECS << no_ext
123 | end
124 | 
125 | TESTRUBYFILES = Fl["test_*.rb"]
126 | TESTRUBYFILES.each do |fn|
127 |   if fn =~ /converter/
128 |     task fn => [:utilities] {|t| sh "ruby #{t.name}" }
129 |   else
130 |     task fn => [OBIWARP_EXE] {|t| sh "ruby #{t.name}" }
131 |   end
132 | end
133 | 
134 | rule '.o' => ['.cpp', '.h'] do |t|
135 |   compile(t.name, t.prerequisites[0])
136 | end
137 | 
138 | rule '.o' => ['.cpp'] do |t|
139 |   compile(t.name, t.prerequisites[0])
140 | end
141 | 
142 | EXECUTABLES_TO_DOCUMENT = %w(obiwarp)
143 | UTILITIES_IN_BINDIR = UTILITIES.map {|v| File.join(VAR[:bindir], v) }
144 | ALL_USE_EXECS = [OBIWARP_EXE, *UTILITIES_IN_BINDIR]
145 | 
146 | # create executables
147 | ALL_USE_EXECS.each do |binpath|
148 |   basename = File.basename(binpath)
149 |   CLOBBER << binpath
150 |   file binpath => (BASE_OBJECT_FILES + ["#{basename}.o"]) do |t|
151 |     FU.mkpath VAR[:bindir] 
152 |     link(binpath, t.prerequisites)
153 |     #puts "*** GO TO: '#{File.expand_path(VAR[:bindir])}' to find \"#{executable}\" ***"
154 |   end
155 |   if EXECUTABLES_TO_DOCUMENT.include?(basename)
156 |     desc "create #{binpath}"
157 |   end
158 |   task basename => [binpath]
159 | end
160 | 
161 | desc "create: #{UTILITIES.join(' ')}"
162 | task :utilities => UTILITIES_IN_BINDIR
163 | 
164 | RUNTESTEXECS = TESTEXECS.map do |exe|
165 |   runtest = "run_#{exe}"
166 |   task runtest => [exe] {|t| sh File.join(Dir.getwd,exe) }
167 |   runtest 
168 | end
169 | 
170 | ALL_MEMTESTS = TESTEXECS.map do |exe|
171 |   basename = File.basename(exe)
172 |   memtest = "memtest_#{basename}"
173 |   task memtest => [exe] do
174 |     if have_valgrind?
175 |       fn = File.join(Dir.getwd, exe)
176 |       if $VALGRIND_VERBOSE
177 |         sh "valgrind --tool=memcheck --leak-check=yes #{fn}"
178 |       else
179 |         Open3.popen3("valgrind --tool=memcheck --leak-check=yes #{fn}") do |stdin,stdout,stderr|
180 |           stderr.each do |line|
181 |             if md = line.match(/==\d+== (malloc\/free:.*)/)
182 |               puts md[1]
183 |             end
184 |             if md = line.match(/total heap usage: (([\d\,]*) allocs, ([\d\,]*) frees, [\d\,]* bytes allocated)/)
185 |               puts "*** #{basename}: #{md[1]}"
186 |               if md[2] != md[3]
187 |                 puts (ast="*"*40) + " WARNING!!!! potential memory leak! " + ast
188 |               end
189 |             end
190 |           end
191 |         end
192 |       end
193 |     else
194 |       puts "requires valgrind to run memcheck! (Linux)"
195 |     end
196 |   end
197 |   memtest
198 | end
199 | 
200 | desc "run memory checks against all execs (run each separate: memtest_<exe>)"
201 | task :memtest => ALL_MEMTESTS
202 | 
203 | desc ([%Q{"test_*.rb"}] + %w(and) + %w("run_test_") + TESTEXECS.map{|v|v.sub('test_','')}).join(' ')
204 | task :test => TESTRUBYFILES + RUNTESTEXECS
205 | 
206 | task :default => OBIWARP_EXE
207 | 
208 | ############################################################
209 | # CLEANUP
210 | ############################################################
211 | 
212 | CLEAN.add(Fl["*.o"], TEMPLATE_OUTPUT_FILES).existing!
213 | CLOBBER.existing!
214 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/cmdparser.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _CMDPARSER_H
 3 | #define _CMDPARSER_H
 4 | 
 5 | 
 6 | class CmdParser {
 7 |     public:
 8 |         // internal
 9 |         int _i;
10 |         char ** _argv;
11 | 
12 |         // options:
13 |         char * format;
14 |         char * outfile;
15 |         bool images;
16 |         char * timefile;
17 |         char * score;
18 |         bool nostdnrm;
19 |         bool local;
20 |         float factor_diag;
21 |         float factor_gap;
22 |         float gap_init;
23 |         float gap_extend;
24 |         float init_penalty;
25 |         float response;
26 |         void help_func(int arg_cnt);
27 |         void print_version(char * version);
28 |         //bool warp_data;
29 |         char * smat_out;
30 |         char * smat_in;
31 | 
32 |         char * infiles[2];
33 | 
34 |         char * progname;
35 |         
36 |         // initialize
37 |         CmdParser(int argc, char ** argv, char * version);
38 | 
39 |         // internal:
40 |         void set_flag(bool &val);
41 |         void set_string(char ** string_ptr);
42 |         void set_float(float &val);
43 |         void set_comma_list_float(float &val1, float &val2);
44 |         void verify_infiles();
45 |         void get_format_from_file1();
46 |         bool file_is_readable(char * filename);
47 |         void set_defaults_by_score(char * score_arg);
48 |         bool eq(char * first, char * sec);
49 | 
50 | };
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/dynprog.h:
--------------------------------------------------------------------------------
  1 | #ifndef _DYNPROG_H
  2 | #define _DYNPROG_H
  3 | 
  4 | #include "math.h"
  5 | 
  6 | #include "vec.h"
  7 | #include "mat.h"
  8 | 
  9 | using namespace VEC;
 10 | 
 11 | 
 12 | class DynProg {
 13 |     private:
 14 |         float DEFAULT_GAP_PENALTY_SLOPE;
 15 |     public:
 16 |         int stupid;
 17 |         MatF* _smat;
 18 |         MatF _asmat;
 19 |         MatI _tb;
 20 |         MatI _tbpath;
 21 |         MatF _tbscores;  // the tbpath with the score at each position
 22 |         MatI _gapmat;
 23 |         VecI _mCoords;
 24 |         VecI _nCoords;
 25 |         VecF _sCoords;
 26 |         float _bestScore; // the scores at each m,n coordinate!
 27 |         float _prob;
 28 |        
 29 |         DynProg() { DEFAULT_GAP_PENALTY_SLOPE = 2.f; }
 30 | 
 31 |         // If gap_penalty array len = 0, then a linear gap penalty based on the
 32 |         // average matrix score will be used
 33 |         // neither diag or gap factor can be 0.0 for minimization
 34 |         void find_path(MatF &smat, VecF &gap_penalty, int minimize=0, float diag_factor=2.f, float gap_factor=1.f, int local=0, float init_penalty=0.0f);
 35 |         // If gap_penalty array len = 0, then a linear gap penalty based on the
 36 |         // average matrix score will be used
 37 |         // a gap is introduced without adding in the score of the matrix
 38 |         // at that index
 39 |         //void find_path_with_gaps(MatF &smat, VecF &gap_penalty, int minimize=0, int local=0, float init_penalty=0.0f);
 40 |         void default_gap_penalty(MatF &smat, VecF &out);
 41 |        
 42 |         ~DynProg() {}
 43 |         // x are the times along the n axis of the tbpath
 44 |         // fx are the equivalents along the y axis of the tbpath
 45 |         
 46 |         // RETURNS the actual number of internal anchors used
 47 |         void warp_map(VecI &mOut, VecI &nOut, float percent_anchors, int minimize=0);
 48 |         void best_anchors(VecI &mBijShort, VecI &nBijShort, VecF &sBijShort, VecI &mCoords, VecI &nCoords, VecI &mOut, VecI &nOut, int num_internal_anchors);
 49 |         void best_anchors(VecI &mCoordsBijShort, VecI &nCoordsBijShort, VecF &sCoordsBijShort, VecI &mOut, VecI &nOut, int num_internal_anchors);
 50 |         void bijective_anchors(VecI &mCoords, VecI &nCoords, VecF &scores, VecI &mBijShort, VecI &nBijShort, VecF &sBijShort);
 51 |         
 52 |         // NEED to redo these and affirm correctness...
 53 |         // warps the mMat along the m axis where mMat coordinates are
 54 |         // specified by mCoords and the new, warped coordinates are 
 55 |         // supplied by nCoords
 56 | //        void warp(VecI &mCoords, VecI &nCoords, MatF &mMat, MatF &warpedOut, bool mCoord_row_nums=0);
 57 | //        void warp(VecI &mCoords, VecI &nCoords, VecF &mVec, VecF &warpedOut, bool mCoord_row_nums=0);
 58 | //        void warp(VecF &mCoords, VecF &nCoords, VecF &mVec, VecF &warpedOut, bool mCoord_row_nums=0);
 59 |         // Calculates the sum of the sq of the residuals of the warped nVals
 60 |         float sum_sq_res_yeqx(VecF &m_tm, VecF &n_tm, VecI &mWarpMap, VecI &nWarpMap, VecF &mVals, VecF &nVals);
 61 |         void path_accuracy_details(VecF &mWarpMapFt, VecF &nWarpMapFt, VecF &mVals, VecF &nVals, VecF &sq_res_yeqx, VecF &abs_diff, int linear_interp=0);
 62 |         void path_accuracy(VecF &mWarpMapFt, VecF &nWarpMapFt, VecF &mVals, VecF &nVals, float &sum_sq_res_yeqx, float &avg_sq_res_yeqx, float &sum_abs_diff, float &avg_abs_diff, int linear_interp=0);
 63 |         void path_accuracy(VecF &m_tm, VecF &n_tm, VecI &mWarpMap, VecI &nWarpMap, VecF &mVals, VecF &nVals, float &sum_sq_res_yeqx, float &avg_sq_res_yeqx, float &sum_abs_diff, float &avg_abs_diff, int linear_interp=0);
 64 |         void _max(float diag, float top, float left, float &val, int &pos);
 65 |         void _min(float diag, float top, float left, float &val, int &pos);
 66 |         float _global_max(MatF& asmat, int& m_index, int& n_index); 
 67 |         float _max_right(MatF& asmat, int& m_index);
 68 |         float _max_bottom(MatF& asmat, int& n_index);
 69 |         float _global_min(MatF& asmat, int& m_index, int& n_index); 
 70 |         float _min_right(MatF& asmat, int& m_index);
 71 |         float _min_bottom(MatF& asmat, int& n_index);
 72 |         static int exponential_less_before(float order, int len,  float *expon, float *lessbefore) {
 73 |             float val;
 74 |             for (int i = 0; i < len; i++) {
 75 |                 val = pow(i,order);
 76 |                 expon[i] = val;
 77 |                 lessbefore[i] = val - expon[i - 1];
 78 |             }
 79 | 			return 1;
 80 |         }
 81 | 
 82 |         // Score matrices arranged like this
 83 |         // nCoords -> scans along the x axis
 84 |         // mCoords run | 
 85 |         //             V
 86 |         //             scans along the y axis
 87 |         void score_product(MatF &mCoords, MatF &nCoords, MatF &scores);
 88 |         void score_covariance(MatF &mCoords, MatF &nCoords, MatF &scores);
 89 |         void score_pearsons_r(MatF &mCoords, MatF &nCoords, MatF &scores);
 90 |         void score_pearsons_r2(MatF &mCoords, MatF &nCoords, MatF &scores);
 91 |         void score_mutual_info(MatF &mCoords, MatF &nCoords, MatF &scores, int num_bins=2);
 92 |         void score_euclidean(MatF &mCoords, MatF &nCoords, MatF &scores);
 93 |         // convenience method for scoring
 94 |         void score(MatF &mCoords, MatF &nCoords, MatF &scores, const char *type, int mi_num_bins=2);
 95 | 							 
 96 | //   DynProg::expandFlag(mat1, 2, 1)
 97 | //   
 98 | //       before              after
 99 | //    2 0 0 0 0 0         2 2 2 0 0 0
100 | //    0 2 0 0 0 0         2 2 2 2 0 0  
101 | //    0 0 2 0 0 0    =>   2 2 2 2 2 0
102 | //    0 0 0 2 0 0         0 2 2 2 2 2
103 | //    0 0 0 0 2 0         0 0 2 2 2 2
104 | //    0 0 0 0 0 2         0 0 0 2 2 2 
105 |         static void expandFlag(MatI &flagged, int flag, int numSteps, MatI &expanded);
106 |         // val[i] -= val[i-1]  (inplace)
107 |         void less_before(VecF &arr);
108 |         // linear function mx + b where m is slope and b is y intercept
109 |         // each value is less the value of of the array before it
110 |         void linear_less_before(float m, float b, int len, VecF &lessbefore);
111 |         // linear function mx + b where m is slope and b is y intercept
112 |         void linear(float m, float b, int len,  VecF &arr);
113 |         
114 | 
115 |         //void replaceAlignmentPathRandom(MatF& mat, MatI& toReplace);
116 |         //float toProb(int halfWindow, short int numShuffles, char *type, float init_penalty, int minimum);
117 | };
118 | 
119 | 
120 | #endif
121 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/lmat.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _LMAT_H
 3 | #define _LMAT_H
 4 | 
 5 | #include "vec.h"
 6 | #include "mat.h"
 7 | 
 8 | 
 9 | using namespace VEC;
10 | 
11 | class LMat {
12 |     private:
13 | #define LEN_LARGEST_NUM (30)
14 | #define LARGEST_NUM_MZ_VALS (40000)
15 | #define LARGEST_NUM_TIME_VALS (40000)
16 |     public:
17 |         int _mz_vals;
18 |         int _tm_vals;
19 | 
20 |         // All constructors call new!
21 |         // All swaps of these MUST delete their memory before swapping!
22 |         MatF *_mat;
23 |         VecF *_mz;
24 |         VecF *_tm;
25 | 
26 |         LMat();
27 |         // Takes a binary lmat file as input
28 |         LMat(const char *file);
29 |         ~LMat();
30 |         int mzlen() { return _mz_vals; }
31 |         int tmlen() { return _tm_vals; }
32 |         int num_mz() { return _mz_vals; }
33 |         int num_tm() { return _tm_vals; }
34 |         MatF * mat() { return _mat; }
35 |         VecF * mz() { return _mz; }
36 |         VecF * tm() { return _tm; }
37 | 
38 |         float hi_mz() { return (*_mz)[_mz_vals-1]; }
39 |         float lo_mz() { return (*_mz)[0]; }
40 |         float hi_tm() { return (*_tm)[_tm_vals-1]; }
41 |         float lo_tm() { return (*_tm)[0]; }
42 |         void mz_axis_vals(VecI &mzCoords, VecF &mzVals);
43 |         void tm_axis_vals(VecI &tmCoords, VecF &tmVals);
44 |         void set_from_ascii(const char *file);
45 |         void set_from_binary(const char *file);
46 |         // Sets the matrix and gives m and n axis labels as the indices
47 |         void set_from_binary_mat(const char *file);
48 |         void set_from_ascii_mat(const char *file);
49 | 
50 | 
51 |         // selfTimes and equivTimes are the anchor points for the warping
52 |         // function..
53 |         // warps the time values (not the actual data values)
54 |         void warp_tm(VecF &selfTimes, VecF &equivTimes);
55 | 
56 |         // expects one line with the # mz vals and next with the vals
57 |         void set_mz_from_ascii(FILE *fpt);
58 |         // expects one line with the # tm vals and next with the vals
59 |         void set_tm_from_ascii(FILE *fpt);
60 |         // expects the matrix in ascii format
61 |         void set_mat_from_ascii(FILE *ptr, int rows, int cols);
62 |         // writes the lmat in binary to a file (or STDOUT if NULL)
63 |         void write(const char *file=NULL);
64 |         // writes the lmat in ascii to a file (or STDOUT if NULL)
65 |         void print(const char *file=NULL);
66 | 
67 |         // obviously not the final resting place
68 |         void chomp_plus_spaces( char *str);
69 | };
70 | 
71 | #endif
72 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/lmat2chrms.cpp:
--------------------------------------------------------------------------------
 1 | // STDLIB:
 2 | #include <cstdlib>
 3 | #include <cstdio>
 4 | #include <iostream>
 5 | #include "string.h"
 6 | 
 7 | // MINE
 8 | #include "lmat.h"
 9 | 
10 | 
11 | using namespace VEC;
12 | int main (int argc, char *argv[]) {
13 | 
14 |     if (argc == 1) { 
15 |         std::cerr << 
16 |         "**********************************************************************\n" <<
17 |         "usage: lmat2chrms file.lmat m/z ...\n" <<
18 |         "**********************************************************************\n";
19 |         exit(1);
20 |     }
21 |     /************************************************************
22 |      * GET ARGUMENTS
23 |      ************************************************************/ 
24 |     int i;
25 |     char file[1024];
26 | 
27 |     LMat lmat;
28 |     strcpy(file, argv[1]);
29 |     lmat.set_from_binary(file);
30 |     MatF trans;
31 |     lmat.mat()->transpose(trans);
32 |     //printf("rows %d cols %d\n", lmat.mat()->rows(), lmat.mat()->cols());
33 |     //printf("rows %d cols %d\n", trans.rows(), trans.cols());
34 |     VecF *vecs = new VecF[trans.rows()];
35 |     int num_vecs;
36 |     trans.row_vecs(num_vecs, vecs);
37 | 
38 |     char fn[1024];
39 |     char toplotfn[1024];
40 | 
41 |     strcpy(fn, file);
42 |     char *pch;
43 |     pch = strstr(fn, ".lmat");
44 |     *pch = '\0';
45 | 
46 |     char fnbase[1024];
47 |     strcpy(fnbase, fn);
48 |     char *start = strrchr(fnbase, '/');
49 |     if (start != NULL) {
50 |         strcpy(fnbase, ++start);
51 |     }
52 | 
53 |     strcpy(toplotfn, fn);
54 |     strcat(toplotfn, ".toplot");
55 |     std::ofstream fh(toplotfn);
56 |     printf("WRITING TO: %s\n", toplotfn);
57 | 
58 |     fh << "XYData" << "\n";
59 |     fh << fnbase << "\n";
60 |     fh << fnbase << " chromatograms" << "\n";
61 |     fh << "time (sec)" << "\n";
62 |     fh << "ion counts" << "\n";
63 | 
64 |     for (int i = 2; i < argc; ++i) {
65 |         fh << "m/z " << argv[i] << "\n";
66 |         //std::cout << "m/z " << argv[i] << "\n";
67 |         int ind = lmat.mz()->index(atof(argv[i]));
68 |         //printf("IND: %d\n", ind);
69 |         lmat.tm()->print(fh, 1);
70 |         vecs[ind].print(fh, 1);
71 |     }
72 |         
73 |     delete[] vecs;
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/lmat2lmata.cpp:
--------------------------------------------------------------------------------
 1 | // STDLIB:
 2 | #include <cstdlib>
 3 | #include <cstdio>
 4 | #include <iostream>
 5 | #include "string.h"
 6 | 
 7 | // MINE
 8 | #include "lmat.h"
 9 | 
10 | 
11 | using namespace VEC;
12 | int main (int argc, char *argv[]) {
13 | 
14 |     if (argc == 1) { 
15 |         std::cerr << 
16 |         "**********************************************************************\n" <<
17 |         "usage: lmat2lmata file.lmat ... \n" <<
18 |         "**********************************************************************\n" <<
19 |         "converts binary lmat file into ascii file (does NOT delete original)\n" <<
20 |         "outputs file.lmata\n" <<
21 |         "\n" <<
22 |         "FORMATS: \n" <<
23 |         "lmat = binary (all vals 4 bytes; all on one line):\n" << 
24 |         "        (int) # rows, (floats) m axis values (i.e. time vals),\n" <<
25 |         "        (int) # cols, (floats) n axis values (i.e. m/z vals),\n" <<
26 |         "        (floats) matrix data values row1, row2, row3 ...\n" <<
27 |         "lmata = ascii format (space delimited, with newlines as shown below):\n" <<
28 |         "         # rows\n" <<
29 |         "         m axis values (i.e. time vals)\n" <<
30 |         "         # cols\n" <<
31 |         "         n axis values (i.e. m/z vals)\n" <<
32 |         "         matrix data row1\n" <<
33 |         "         matrix data row2\n" <<
34 |         "         matrix data row3 ...\n" <<
35 |         "**********************************************************************\n";
36 |         exit(1);
37 |     }
38 |     /************************************************************
39 |      * GET ARGUMENTS
40 |      ************************************************************/ 
41 |     int i;
42 |     char file[1024];
43 |     char outfile[1024];
44 | 
45 |     LMat lmat;
46 |     for (i = 1; i < argc; i++) {
47 |         strcpy(file, argv[i]);
48 |         strcpy(outfile, file); 
49 |         int outfile_strlen = strlen(outfile);
50 |         outfile[outfile_strlen] = 'a';
51 |         outfile[outfile_strlen+1] = '\0';
52 |         //std::cerr << "creating: " << outfile << "\n";
53 |         lmat.set_from_binary(file);
54 |         lmat.print(outfile);
55 |     }
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/lmat2png.cpp:
--------------------------------------------------------------------------------
 1 | // STDLIB:
 2 | #include <cstdlib>
 3 | #include <cstdio>
 4 | #include <iostream>
 5 | #include "string.h"
 6 | 
 7 | // MINE
 8 | #include "vec.h"
 9 | #include "mat.h"
10 | #include "lmat.h"
11 | #include "dynprog.h"
12 | #include "pngio.h"
13 | 
14 | 
15 | char file[1024];
16 | 
17 | int BINARY = 0;
18 | 
19 | int main (int argc, char *argv[]) {
20 |     /************************************************************
21 |      * GET ARGUMENTS
22 |      ************************************************************/ 
23 |     if (argc == 1) { 
24 |         std::cerr << 
25 |         "*****************************************************************\n" <<
26 |         "usage: lmat2png [-b] file1 ...\n" <<
27 |         "*****************************************************************\n" <<
28 |         "requires png2matrix callable\n" <<
29 |         "*****************************************************************\n";
30 |         exit(1);
31 |     }
32 |     int i;
33 |     for (i = 1; i < argc; i++) {
34 |         if (!strcmp(argv[i],"-b")) {
35 |             BINARY = 1;
36 |         }
37 |     }
38 |     for (i = 1; i < argc; i++) {
39 |         if (strcmp(argv[i],"-b")) {  // if this is a file (not an arg)
40 |             char outfile[1024];
41 |             strcpy(file, argv[i]);
42 |             strcpy(outfile, file);
43 |             char *ptr;
44 |             ptr = strstr(outfile, ".lmat");  //works for lmat and lmata
45 |             *ptr = '\0';
46 |             strcat(outfile, ".png");
47 |             LMat lmat;
48 |             if (BINARY) {
49 |                 lmat.set_from_binary(file);
50 |             }
51 |             else {
52 |                 lmat.set_from_ascii(file);
53 |             }
54 |             PngIO wrt(1);
55 |             wrt.write(outfile, *lmat.mat());
56 |         }
57 |     }
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/lmata2lmat.cpp:
--------------------------------------------------------------------------------
 1 | // STDLIB:
 2 | #include <cstdlib>
 3 | #include <cstdio>
 4 | #include <iostream>
 5 | #include "string.h"
 6 | 
 7 | // MINE
 8 | #include "lmat.h"
 9 | 
10 | 
11 | using namespace VEC;
12 | int main (int argc, char *argv[]) {
13 | 
14 |     if (argc == 1) { 
15 |         std::cerr << 
16 |         "**********************************************************************\n" <<
17 |         "usage: lmata2lmat file.lmata ... \n" <<
18 |         "**********************************************************************\n" <<
19 |         "outputs file.lmat\n" <<
20 |         "converts ascii lmata file into binary file (does NOT delete original)\n" <<
21 |         "\n" <<
22 |         "FORMATS: \n" <<
23 |         "lmat = binary (all vals 4 bytes; all on one line):\n" << 
24 |         "        (int) # rows, (floats) m axis values (i.e. time vals),\n" <<
25 |         "        (int) # cols, (floats) n axis values (i.e. m/z vals),\n" <<
26 |         "        (floats) matrix data values row1, row2, row3 ...\n" <<
27 |         "lmata = ascii format (space delimited, with newlines as shown below):\n" <<
28 |         "         # rows\n" <<
29 |         "         m axis values (i.e. time vals)\n" <<
30 |         "         # cols\n" <<
31 |         "         n axis values (i.e. m/z vals)\n" <<
32 |         "         matrix data row1\n" <<
33 |         "         matrix data row2\n" <<
34 |         "         matrix data row3 ...\n" <<
35 |         "**********************************************************************\n";
36 |         exit(1);
37 |     }
38 |     /************************************************************
39 |      * GET ARGUMENTS
40 |      ************************************************************/ 
41 |     int i;
42 |     char file[1024];
43 |     char outfile[1024];
44 | 
45 |     LMat lmat;
46 |     for (i = 1; i < argc; i++) {
47 |         strcpy(file, argv[i]);
48 |         strcpy(outfile, file); 
49 |         outfile[strlen(outfile)-1] = '\0';
50 |         //std::cerr << "creating: " << outfile << "\n";
51 |         lmat.set_from_ascii(file);
52 |         lmat.write(outfile);
53 |     }
54 | }
55 | 
56 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/mat2mata.cpp:
--------------------------------------------------------------------------------
 1 | // STDLIB:
 2 | #include <cstdlib>
 3 | #include <cstdio>
 4 | #include <iostream>
 5 | #include "string.h"
 6 | 
 7 | // MINE
 8 | #include "mat.h"
 9 | 
10 | 
11 | using namespace VEC;
12 | int main (int argc, char *argv[]) {
13 | 
14 |     if (argc == 1) { 
15 |         std::cerr << 
16 |         "**********************************************************************\n" <<
17 |         "usage: mat2mata file.mat ... \n" <<
18 |         "**********************************************************************\n" <<
19 |         "outputs file.mata\n" <<
20 |         "converts binary file into ascii mata file(does NOT delete original)\n" <<
21 |         "\n" <<
22 |         "FORMATS: \n" <<
23 |         "mat = binary (all vals 4 bytes):\n" << 
24 |         "        (int)#rows,(int)#cols,(floats)matrix_data_values...\n" <<
25 |         "mata = ascii format (space delimited, with newlines as shown below):\n" <<
26 |         "         #rows, #cols\n" <<
27 |         "         matrix data row1\n" <<
28 |         "         matrix data row2\n" <<
29 |         "         matrix data row3 ...\n" <<
30 |         "**********************************************************************\n";
31 |         exit(1);
32 |     }
33 |     /************************************************************
34 |      * GET ARGUMENTS
35 |      ************************************************************/ 
36 |     int i;
37 |     char file[1024];
38 |     char outfile[1024];
39 | 
40 |     MatF mat;
41 |     for (i = 1; i < argc; i++) {
42 |         strcpy(file, argv[i]);
43 |         strcpy(outfile, file); 
44 |         int outfile_strlen = strlen(outfile);
45 |         outfile[outfile_strlen] = 'a';
46 |         outfile[outfile_strlen+1] = '\0';
47 |         //std::cerr << "creating: " << outfile << "\n";
48 |         mat.set_from_binary(file);
49 |         mat.print(outfile);
50 |     }
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/mat_TEMPLATE.h:
--------------------------------------------------------------------------------
  1 | #ifndef _MAT_H
  2 | #define _MAT_H
  3 | 
  4 | #include "vec.h"
  5 | 
  6 | /*************************************************************
  7 |  * Creation from existing object/array is always shallow!.  
  8 |  * Will delete any memory allocated.
  9 |  * Will NOT delete any memory not allocated.
 10 |  * If you want deep then use copy function!
 11 |  ************************************************************/ 
 12 | 
 13 | 
 14 | namespace VEC {
 15 | 
 16 | class MatI;
 17 | class MatF;
 18 | class MatD;
 19 | 
 20 | // BEGIN TEMPLATE
 21 | 
 22 | class MatABR {
 23 | 
 24 |     public:
 25 |         // length
 26 |         int _m;
 27 |         int _n;
 28 |         VecABR _dat;
 29 |         // Constructors:
 30 |         MatABR();
 31 |         MatABR(int m, int n);
 32 |         MatABR(int m, int n, const FLOAT &val);
 33 |        
 34 | 
 35 |         // (copied from vec.h)
 36 |         // if (shallow == 1 (true)) then no memory is deleted upon destruction
 37 |         // if (shallow == 0 (false)) then delete[] is called
 38 |         // FOR THIS CONSTRUCTOR ONLY, there is no DEEP copying, EVER!
 39 |         MatABR(int m, int n, FLOAT *arr, bool shallow=0);
 40 | 
 41 |         // (copied from vec.h)
 42 |         // if (shallow == 0 (false)) a DEEP copy is made of the data
 43 |         // if (shallow == 1 (true)) a copy of the pointer is made
 44 |         // if (shallow) then no memory is released upon destruction
 45 |         // shallow is used for a quick copy with which to work 
 46 |         MatABR(const MatABR &A, bool shallow=0);
 47 | 
 48 |         operator FLOAT*() { return (FLOAT*)_dat; }
 49 |         operator const FLOAT*() { return (FLOAT*)_dat; }
 50 |         FLOAT* pointer() { return (FLOAT*)_dat; }
 51 |         FLOAT* pointer(int m) { return &_dat[m*_n]; }
 52 |         // creates vec objects 
 53 |         // caller must have allocated the array for the vec objects
 54 |         // the data is a shallow copy!
 55 |         // transpose and call row_vecs for col_vecs!
 56 |         void row_vecs(int &cnt, VecABR *vecs);
 57 | 
 58 |         MatABR & operator=(const FLOAT &val);
 59 |         // DEEP
 60 |         MatABR & operator=(MatABR &A);
 61 |         ~MatABR();
 62 |         // Deep copy unless shallow == true
 63 |         void copy(MatABR &receiver, bool shallow=0) const;
 64 | 
 65 |         void set_from_ascii(std::ifstream &stream, int m, int n, MatABR &out);
 66 |         void set_from_ascii(std::ifstream &stream, MatABR &out);
 67 |         void set_from_ascii(const char *file, bool without_axes=0);
 68 |         void set_from_binary(const char *file);
 69 |         void file_rows_cols(std::ifstream &stream, int &rows, int &cols);
 70 |         // tnt_array2d_utils.h has a good example (use ifstream)
 71 | 
 72 |         // shallow copy and no ownership of memory
 73 |         void set(int m, int n, FLOAT *arr);
 74 |         // shallow copy and no ownership of memory
 75 |         void set(MatABR &A);
 76 | 
 77 |         bool all_equal() {
 78 |             return _dat.all_equal();
 79 |         }
 80 | 
 81 |         // Deletes the object's memory (if not shallow) and takes ownership
 82 |         // of the array memory (we will call delete[])
 83 |         void take(int m, int n, FLOAT *arr);
 84 |         // Deletes previous memory (if not shallow) and takes ownership
 85 |         // of the other's memory.
 86 |         void take(MatABR &A);
 87 | 
 88 |         // flattens the matrix and returns a vector
 89 |         void to_vec(VecABR &outvec, bool shallow=0);
 90 | 
 91 |         bool operator==(const MatABR &A);
 92 |         
 93 |         bool shallow() { return _dat.shallow(); }
 94 |         int dim1() const { return _m; }
 95 |         int dim2() const { return _n; }
 96 |         int mlen() const { return _m; }
 97 |         int nlen() const { return _n; }
 98 |         int rows() const { return _m; }
 99 |         int cols() const { return _n; }
100 | 
101 |         FLOAT& operator()(int m, int n) {
102 | #ifdef JTP_BOUNDS_CHECK
103 |             if (n < 0) { puts("n < 0"); exit(1); }
104 |             if (n >= _n) { puts("n >= _n"); exit(1); }
105 |             if (m < 0) { puts("m < 0"); exit(1); }
106 |             if (m >= _m) { puts("m >= _m"); exit(1); }
107 | #endif
108 |             return _dat[m*_n + n]; 
109 |         }
110 |         const FLOAT& operator()(int m, int n) const {
111 | #ifdef JTP_BOUNDS_CHECK
112 |             if (n < 0) { puts("n < 0"); exit(1); }
113 |             if (n >= _n) { puts("n >= _n"); exit(1); }
114 |             if (m < 0) { puts("m < 0"); exit(1); }
115 |             if (m >= _m) { puts("m >= _m"); exit(1); }
116 | #endif
117 |             return _dat[m*_n + n]; 
118 |         }
119 | 
120 |         // NOTE: All assignment operators act on the caller!
121 |         void operator+=(const MatABR &A);
122 |         void operator-=(const MatABR &A);
123 |         void operator*=(const MatABR &A);
124 |         void operator/=(const MatABR &A);
125 |         void operator+=(const FLOAT val) { _dat += val; }
126 |         void operator-=(const FLOAT val) { _dat -= val; }
127 |         void operator*=(const FLOAT val) { _dat *= val; }
128 |         void operator/=(const FLOAT val) { _dat /= val; }
129 | 
130 |     
131 |         void add(const MatABR &toadd, MatABR &out);
132 |         void sub(const MatABR &tosub, MatABR &out);
133 |         void mul(const MatABR &tomul, MatABR &out);
134 |         void div(const MatABR &todiv, MatABR &out);
135 | 
136 |         // returns the transpose in out
137 |         void transpose(MatABR &out);
138 | 
139 |         void std_normal() { _dat.std_normal(); }
140 |         void logarithm(double base) { _dat.logarithm(base); }
141 |         void expand(MatABR &result, FLOAT match, int expand_x_lt, int expand_x_rt, int expand_y_up, int expand_y_dn, int expand_diag_lt_up, int expand_diag_rt_up, int expand_diag_lt_dn, int expand_diag_rt_dn );
142 | 
143 |         void min_max(FLOAT &_min, FLOAT &_max) { _dat.min_max(_min,_max); }
144 |         double avg() { return _dat.avg(); }
145 |         //void operator++();
146 |         //void operator--();
147 |         
148 |         FLOAT sum() { return _dat.sum(); } // return the sum of the entire matrix
149 |         FLOAT sum(int m);  // return the sum of a given row
150 |         // Returns in a vector all the values matching mask value
151 |         void mask_as_vec(FLOAT return_val, MatI &mask, VecABR &out);
152 | 
153 |         // prints the bare matrix as ascii
154 |         void print(bool without_axes=0);
155 |         void print(const char *file, bool without_axes=0);
156 |         void print(std::ostream &fout, bool without_axes=0);
157 | 
158 |         // writes the matrix as binary (includes # rows first and # cols as
159 |         // ints)
160 |         void write(const char *file=NULL);
161 | 
162 |         
163 |         // @TODO need to write these guys:
164 |         // prints the matrix in binary format:
165 |         // (int) num cols (int) num rows (FLOAT) data
166 | //        void write(const char *file);
167 | //        void write(std::ofstream &fout);
168 | 
169 |     private:
170 |         void _copy(FLOAT *p1, const FLOAT *p2, int len) const;
171 | 
172 | }; // End class MatABR
173 | 
174 | // END TEMPLATE
175 | 
176 | } // End namespace
177 | 
178 | #endif
179 | 
180 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/mata2mat.cpp:
--------------------------------------------------------------------------------
 1 | // STDLIB:
 2 | #include <cstdlib>
 3 | #include <cstdio>
 4 | #include <iostream>
 5 | #include "string.h"
 6 | 
 7 | // MINE
 8 | #include "mat.h"
 9 | 
10 | 
11 | using namespace VEC;
12 | int main (int argc, char *argv[]) {
13 | 
14 |     if (argc == 1) { 
15 |         std::cerr << 
16 |         "**********************************************************************\n" <<
17 |         "usage: mata2mat file.mata ... \n" <<
18 |         "**********************************************************************\n" <<
19 |         "outputs file.mat\n" <<
20 |         "converts ascii mata file into binary file (does NOT delete original)\n" <<
21 |         "\n" <<
22 |         "FORMATS: \n" <<
23 |         "mat = binary (all vals 4 bytes):\n" << 
24 |         "        (int)#rows,(int)#cols,(floats)matrix_data_values...\n" <<
25 |         "mata = ascii format (space delimited, with newlines as shown below):\n" <<
26 |         "         #rows, #cols\n" <<
27 |         "         matrix data row1\n" <<
28 |         "         matrix data row2\n" <<
29 |         "         matrix data row3 ...\n" <<
30 |         "**********************************************************************\n";
31 |         exit(1);
32 |     }
33 |     /************************************************************
34 |      * GET ARGUMENTS
35 |      ************************************************************/ 
36 |     int i;
37 |     char file[1024];
38 |     char outfile[1024];
39 | 
40 |     MatF mat;
41 |     for (i = 1; i < argc; i++) {
42 |         strcpy(file, argv[i]);
43 |         strcpy(outfile, file); 
44 |         outfile[strlen(outfile)-1] = '\0';
45 |         //std::cerr << "creating: " << outfile << "\n";
46 |         mat.set_from_ascii(file);
47 |         mat.write(outfile);
48 |     }
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/not_using/README.txt:
--------------------------------------------------------------------------------
1 | requires argtable library to compile and run and I've removed that as a dependency
2 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/not_using/outliers.cpp:
--------------------------------------------------------------------------------
  1 | // STDLIB:
  2 | #include <cstdio>
  3 | #include <iostream>
  4 | #include "string.h"
  5 | #include "math.h"
  6 | 
  7 | // 3RD PARTY 
  8 | #include <argtable2.h>
  9 | 
 10 | // MINE
 11 | #include "vec.h"
 12 | #include "mat.h"
 13 | 
 14 | #define DEBUG (0)
 15 | 
 16 | using namespace VEC;
 17 | 
 18 | int mymain(double *deviations, int deviations_cnt, const char **infiles, int infile_cnt );
 19 | 
 20 | int main (int argc, char **argv) {
 21 |     struct arg_lit  *help = arg_lit0("h", "help", "prints this help and exits");
 22 |     struct arg_dbl *deviations = arg_dbl0("d", "dev", "<deviations>", "deviations cutoff (default 4.0)");
 23 |     struct arg_file *infiles = arg_filen(NULL, NULL, NULL,1,1024, "files to align (first is template)");
 24 |     struct arg_end  *end     = arg_end(20);
 25 |     void* argtable[] = {help, deviations, infiles, end};
 26 |     const char* progname = "outliers";
 27 |     int nerrors;
 28 |     int exitcode=0;
 29 | 
 30 |     /* verify the argtable[] entries were allocated sucessfully */
 31 |     if (arg_nullcheck(argtable) != 0)
 32 |     {
 33 |         /* NULL entries were detected, some allocations must have failed */
 34 |         printf("%s: insufficient memory\n",progname);
 35 |         exitcode=1;
 36 |         goto exit;
 37 |     }
 38 | 
 39 |     /* set any command line default values prior to parsing */
 40 | 
 41 |     /* Parse the command line as defined by argtable[] */
 42 |     nerrors = arg_parse(argc,argv,argtable);
 43 | 
 44 |     if (help->count > 0 || argc < 2)
 45 |     {
 46 |         printf("*************************************************************************\n");
 47 |         printf("Usage: %s ", progname);
 48 |         arg_print_syntax(stdout,argtable,"\n");
 49 |         printf("\n");
 50 |         printf("tosses out outliers from regression line beyond a certain deviation\n");
 51 |         printf("\n");
 52 |         arg_print_glossary(stdout,argtable,"  %-26s %s\n");
 53 |         printf("\n");
 54 |         printf("*************************************************************************\n");
 55 | 
 56 |         exitcode=0;
 57 |         goto exit;
 58 |     }
 59 | 
 60 |     /* If the parser returned any errors then display them and exit */
 61 |     if (nerrors > 0)
 62 |     {
 63 |         /* Display the error details contained in the arg_end struct.*/
 64 |         arg_print_errors(stdout,end,progname);
 65 |         printf("Try '%s --help' for more information.\n",progname);
 66 |         exitcode=1;
 67 |         goto exit;
 68 |     }
 69 | 
 70 |     /* normal case: take the command line options at face value */
 71 |     exitcode = mymain(deviations->dval, deviations->count,  infiles->filename, infiles->count);
 72 | 
 73 |     exit:
 74 |     /* deallocate each non-null entry in argtable[] */
 75 |     arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
 76 | 
 77 |     return exitcode;
 78 | }
 79 | 
 80 | int mymain(double *deviations, int deviations_cnt, const char **infiles, int infile_cnt ) {
 81 |     int i;
 82 |   
 83 |     for (int cnt = 0; cnt < infile_cnt; ++cnt) {
 84 |         double dev;
 85 |         if (deviations_cnt == 1) {
 86 |             dev = deviations[0];  // if they give one deviation, use it for all
 87 |         }
 88 |         else if (deviations_cnt == 0) {
 89 |             // Set deviations default:
 90 |             dev = 4.0;
 91 |         }
 92 |         else {
 93 |             dev = deviations[cnt]; // multiple deviatsion, one for each file
 94 |         }
 95 |         char file[1024];
 96 |         strcpy(file, infiles[cnt]);
 97 | 
 98 |         if (DEBUG) {
 99 |             std::cerr << "**********************************************\n";
100 |             std::cerr << "file: " << file << "\n";
101 |             std::cerr << "**********************************************\n";
102 |         }
103 | 
104 |         MatD mat;
105 |         mat.set_from_ascii(file, 1);
106 |         VecD vecs[2];
107 |         int cnt;
108 |         MatD as_rows;
109 |         mat.transpose(as_rows);
110 |         as_rows.row_vecs(cnt, vecs);
111 |         //vecs[0].print();
112 |         //vecs[1].print();
113 | 
114 |         double rsq, slope, y_intercept;
115 |         VecD::rsq_slope_intercept(vecs[0], vecs[1], rsq, slope, y_intercept);
116 |         //printf("rsq %f slope %f intercept %f\n", rsq, slope, y_intercept);
117 |         //mat.print();
118 |         // mx + b
119 |         // y = slope(x) + intercept
120 | 
121 |         // Get the differences from the regression line
122 |         // expected_y = slope(x) + intercept
123 |         // ydiff = actual_y - expected_y
124 |         // run = ydiff/slope
125 |         ////////////////// ydiff = abs(actual_y - expected_y)
126 |         // ydiff / run = tan a
127 |         // sin a = run / x
128 |         // final = run/( sin(arctan(ydiff/run)) )
129 |         /////////////////// if (actual_y - expected_y) < 0 the diff should be (-)
130 |         VecD residuals(vecs[0].length());
131 |         for (i = 0; i < vecs[0].length(); ++i) {
132 |             double expected_y = (slope*vecs[0][i]) + y_intercept;
133 |             double ydiff = vecs[1][i] - expected_y;
134 |             double run = ydiff/slope;
135 |             residuals[i] = run/( sin(atan(ydiff/run)) );
136 |         }
137 |         //puts("RESIDUALS: ");
138 |         //residuals.print();
139 |         //puts("END RESIDUALS: ");
140 | 
141 |         // get the mean and standard deviation
142 |         double mean, stddev;
143 |         residuals.sample_stats(mean, stddev);
144 |         //printf("m: %f std: %f\n", mean, stddev);
145 | 
146 |         // for each difference calculate standard deviations
147 |         MatD acceptable_tmp(vecs[0].length(), 2);
148 | 
149 |         int num_accept = 0;
150 |         int not_accept = 0;
151 |         for (i = 0; i < residuals.length(); ++i) {
152 |             // #stddevsaway = abs(residuals[i] - mean)/stddev );
153 |             double point_devs = (residuals[i] - mean)/stddev;
154 |             if (point_devs < 0.0) { point_devs = -1.0*point_devs; } // abs val
155 | 
156 |             if (point_devs <= dev) {  // acceptable
157 |                 //printf("acceptable dev: %f\n", point_devs);
158 |                 acceptable_tmp(num_accept,0) = vecs[0][i];
159 |                 acceptable_tmp(num_accept,1) = vecs[1][i];
160 |                 ++num_accept;
161 |             }
162 |             else {  // not acceptable! toss out
163 |                 //printf("NOT ACCEPTABLE: %f, %f\n", vecs[0][i], vecs[1][i]);
164 |                 ++not_accept;
165 |             }
166 |         }
167 | 
168 |         printf("TOSSED %d points > %.1f deviations from regression line (of %d total) reading file: %s\n", not_accept, dev, residuals.length(), file);
169 |         MatD accept(num_accept,2,(double*)acceptable_tmp,1);
170 | 
171 | 
172 |         // print to file named file+.4.0out
173 |         char devs_str[10];
174 |         sprintf(devs_str, "%.1f",dev);
175 |         strcat(file, ".");
176 |         strcat(file, devs_str); 
177 |         strcat(file, "out"); 
178 |         //puts("Accept");
179 |         //accept.print();
180 |         accept.print(file,1);
181 |     }
182 |     return 0;
183 | }
184 | 
185 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/not_using/test_outliers.h:
--------------------------------------------------------------------------------
 1 | #include <cxxtest/TestSuite.h>
 2 | #include <cstdlib>
 3 | #include "mat.h"
 4 | #include "vec.h"
 5 | 
 6 | 
 7 | using namespace VEC;
 8 | class OutliersTestSuite : public CxxTest::TestSuite 
 9 | {
10 |     public:
11 |         // Assures that the same data is representated before and after
12 |         // conversions
13 |         void test_outliers( void ) {
14 |             system("./outliers -d 1.2 tfiles/tmptimes.txt");
15 |             //TS_ASSERT_EQUALS(fromascii.mzlen(), ch_mz_vals);
16 |         }
17 | 
18 | 
19 | };
20 | 
21 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/obiwarp.cpp:
--------------------------------------------------------------------------------
  1 | // STDLIB:
  2 | #include <cstdio>
  3 | #include <iostream>
  4 | #include <fstream>
  5 | #include "string.h"
  6 | #include <cstdlib>
  7 | 
  8 | // MINE
  9 | #include "vec.h"
 10 | #include "mat.h"
 11 | #include "lmat.h"
 12 | #include "dynprog.h"
 13 | #include "pngio.h"
 14 | #include "cmdparser.h"
 15 | 
 16 | /********************************************/
 17 | char * VERSION = (char *)"0.9.4";
 18 | /********************************************/
 19 | 
 20 | #define DEBUG (0)
 21 | 
 22 | bool format_is_labelless(const char *format);
 23 | 
 24 | int main (int argc, char **argv) {
 25 |     // NOTE: use outfile as indicator if option passed in as opts.outfile!
 26 |     // because we can set opts.outfile to NULL and other routines will
 27 |     // automatically write to stdout!
 28 |     bool outfile = 0;
 29 |     bool outfile_is_stdout = 0;
 30 | 
 31 |     CmdParser opts(argc, argv, VERSION);
 32 | 
 33 |     if (opts.outfile != NULL) {
 34 |         outfile = 1;
 35 |         if (!strcmp(opts.outfile, "STDOUT")) {
 36 |             outfile_is_stdout = 1;
 37 |             opts.outfile = NULL;
 38 |         }
 39 |     }
 40 | 
 41 |     char file1[1024];
 42 |     char file2[1024];
 43 |     strcpy(file1, opts.infiles[0]);
 44 |     strcpy(file2, opts.infiles[1]); 
 45 | 
 46 |     // ************************************************************
 47 |     // * READ IN FILES TO GET MAT 
 48 |     // ************************************************************
 49 |     LMat lmat1;
 50 |     LMat lmat2;
 51 |     MatF smat;
 52 |     DynProg dyn;
 53 | 
 54 |     if (!strcmp(opts.format, "mat")) {
 55 |         lmat1.set_from_binary_mat(file1);
 56 |         lmat2.set_from_binary_mat(file2);
 57 |     }
 58 |     else if (!strcmp(opts.format, "mata")) {
 59 |         lmat1.set_from_ascii_mat(file1);
 60 |         lmat2.set_from_ascii_mat(file2);
 61 |     }
 62 |     else if (!strcmp(opts.format, "lmat")) {
 63 |         lmat1.set_from_binary(file1);
 64 |         lmat2.set_from_binary(file2);
 65 |     }
 66 |     else if (!strcmp(opts.format, "lmata")) {
 67 |         lmat1.set_from_ascii(file1);
 68 |         lmat2.set_from_ascii(file2);
 69 |     }
 70 |     ////puts("LMAT1 AND LMAT2"); lmat1.print(); lmat2.print();
 71 | 
 72 |     // ************************************************************
 73 |     // * SCORE THE MATRICES
 74 |     // ************************************************************
 75 |     if (DEBUG) {
 76 |         std::cerr << "Scoring the mats!\n";
 77 |     }
 78 |     if (opts.smat_in != NULL) {
 79 |         smat.set_from_binary(opts.smat_in);
 80 |         dyn._smat = &smat;
 81 |     }
 82 |     else {
 83 |         dyn.score(*(lmat1.mat()), *(lmat2.mat()), smat, opts.score);
 84 |         // SETTING THE SMAT TO BE std normal
 85 |         if (!opts.nostdnrm) {
 86 |             if (!smat.all_equal()) { 
 87 |                 smat.std_normal();
 88 |             }
 89 |         }
 90 |         if (!strcmp(opts.score,"euc")) {
 91 |             smat *= -1; // inverting euclidean
 92 |         }
 93 |     }
 94 |     if (opts.smat_out != NULL) {
 95 |         std::cerr << "Writing binary smat to '" << opts.smat_out << "'\n";
 96 |         smat.write(opts.smat_out);
 97 |         //smat.print(smat_out_files[0]);
 98 |         exit(0);
 99 |     }
100 | 
101 |     // ************************************************************
102 |     // * PREPARE GAP PENALTY ARRAY
103 |     // ************************************************************
104 |    
105 |     MatF time_tester;
106 |     MatF time_tester_trans;
107 |     VecF mpt;
108 |     VecF npt;
109 |     VecF mOut_tm;
110 |     VecF nOut_tm;
111 | 
112 |     int gp_length = smat.rows() + smat.cols();
113 | 
114 |     VecF gp_array;
115 |     dyn.linear_less_before(opts.gap_extend,opts.gap_init,gp_length,gp_array);
116 | 
117 |     // ************************************************************
118 |     // * DYNAMIC PROGRAM
119 |     // ************************************************************ 
120 |     int minimize = 0;
121 |     if (DEBUG) {
122 |         std::cerr << "Dynamic Time Warping Score Matrix!\n";
123 |     }
124 |     dyn.find_path(smat, gp_array, minimize, opts.factor_diag, opts.factor_gap, opts.local, opts.init_penalty);
125 | 
126 |     VecI mOut;
127 |     VecI nOut;
128 |     dyn.warp_map(mOut, nOut, opts.response, minimize);
129 |     //puts("mOUT"); mOut.print(); nOut.print();
130 | 
131 |     // Major output unless its the only case where we don't need warped time
132 |     // values
133 |     if (!(outfile_is_stdout && format_is_labelless(opts.format))) {
134 |         // MAJOR OUTPUT:
135 |         VecF nOutF;
136 |         VecF mOutF;
137 |         lmat1.tm_axis_vals(mOut, mOutF);
138 |         lmat2.tm_axis_vals(nOut, nOutF); //
139 |         lmat2.warp_tm(nOutF, mOutF); 
140 |         lmat2.tm()->print(1);
141 |     }
142 | 
143 |     // No labels on matrix and we have an outfile to produce
144 |     // Needs to be after MAJOR OUTPUT since it warps the data!
145 |     if (format_is_labelless(opts.format) && outfile) {
146 |         // @TODO: implement data warping here
147 |     }
148 | 
149 |     // All subroutines below should write to the specified file
150 |     // if the file == NULL then they should write to stdout!
151 |     // opts.outfile is set to NULL if "STDOUT" is specified!
152 |     if (outfile) {
153 |         if (!strcmp(opts.format, "mat")) {
154 |             lmat2.mat()->write(opts.outfile);
155 |         }
156 |         else if (!strcmp(opts.format, "mata")) {
157 |             lmat2.mat()->print(opts.outfile);
158 |         }
159 |         else if (!strcmp(opts.format, "lmat")) {
160 |             lmat2.write(opts.outfile);
161 |         }
162 |         else if (!strcmp(opts.format, "lmata")) {
163 |             lmat2.print(opts.outfile);
164 |         }
165 |         else {
166 |             std::cerr << "Can't output to" << opts.format << "format (yet)\n";
167 |             exit(0);
168 |         }
169 |     }
170 | 
171 |     // After all other output to stdout
172 |     if (opts.timefile != NULL) {
173 |         time_tester.set_from_ascii(opts.timefile, 1);  // no headers on the files
174 |         time_tester.transpose(time_tester_trans);
175 |         mpt.set(time_tester_trans.cols(), time_tester_trans.pointer(0));
176 |         npt.set(time_tester_trans.cols(), time_tester_trans.pointer(1));
177 |         float ssr, asr, sad, aad;
178 |         dyn.path_accuracy((*lmat1._tm), (*lmat2._tm), mOut, nOut, mpt, npt, ssr, asr, sad, aad);
179 |         printf("%f %f %f %f\n", ssr, asr, sad, aad);
180 |     }
181 | 
182 | 
183 |     if (opts.images) {
184 |         PngIO wrt(1);
185 |         char base_fn[1024];
186 |         strcpy(base_fn, "obi-warp_");
187 |         char tb_fn[1024];
188 |         strcpy(tb_fn, base_fn);
189 |         strcat(tb_fn, "tb.png");
190 |         //char *tb_fn = "tb.png";
191 |         wrt.write(tb_fn, dyn._tb);
192 |         char tbpath_fn[1024];
193 |         strcpy(tbpath_fn, base_fn);
194 |         strcat(tbpath_fn, "tbpath.png");
195 |         wrt.write(tbpath_fn, dyn._tbpath);
196 | 
197 |         char asmat_fn[1024];
198 |         strcpy(asmat_fn, base_fn);
199 |         strcat(asmat_fn, "asmat.png");
200 |         //wrt.write(asmat_fn, dyn._asmat);
201 | 
202 |         //strcpy(base_fn, "tb.png");
203 |         //char *tbpath_fn = "tbpath.png";
204 |         //char *tbscores_fn = "tbscores.png";
205 |         //wrt.write(tbscores_fn, dyn._tbscores);
206 |         //char *asmat_fn = "asmat.png";
207 |         //wrt.write(asmat_fn, dyn._asmat);
208 |         char *smat_fn = (char *)"smat.png";
209 |         //wrt.write(smat_fn, *dyn._smat);
210 |     }
211 | 
212 | /*
213 |    char silly[100];
214 |    strcpy(silly, "png_");
215 |    char tmpp[5];
216 |    sprintf(tmpp, "%d", i);
217 |    strcat(silly, tmpp); 
218 |    strcat(silly, ".png");
219 | 
220 |    PngIO wrt(0);
221 | //wrt.write(silly, dyn._tbpath);
222 | wrt.write(silly, _scorepath);
223 | */
224 | 
225 | return 0;
226 | }
227 | 
228 | bool format_is_labelless(const char *format) {
229 |     if (!strcmp(format,"mat") || !strcmp(format,"mata")) {
230 |         return 1;
231 |     }
232 |     else {
233 |         return 0;
234 |     }
235 | }
236 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/obiwarp.dsp:
--------------------------------------------------------------------------------
  1 | # Microsoft Developer Studio Project File - Name="obiwarp" - Package Owner=<4>
  2 | # Microsoft Developer Studio Generated Build File, Format Version 6.00
  3 | # ** DO NOT EDIT **
  4 | 
  5 | # TARGTYPE "Win32 (x86) Console Application" 0x0103
  6 | 
  7 | CFG=obiwarp - Win32 Debug
  8 | !MESSAGE This is not a valid makefile. To build this project using NMAKE,
  9 | !MESSAGE use the Export Makefile command and run
 10 | !MESSAGE 
 11 | !MESSAGE NMAKE /f "obiwarp.mak".
 12 | !MESSAGE 
 13 | !MESSAGE You can specify a configuration when running NMAKE
 14 | !MESSAGE by defining the macro CFG on the command line. For example:
 15 | !MESSAGE 
 16 | !MESSAGE NMAKE /f "obiwarp.mak" CFG="obiwarp - Win32 Debug"
 17 | !MESSAGE 
 18 | !MESSAGE Possible choices for configuration are:
 19 | !MESSAGE 
 20 | !MESSAGE "obiwarp - Win32 Release" (based on "Win32 (x86) Application")
 21 | !MESSAGE "obiwarp - Win32 Debug" (based on "Win32 (x86) Application")
 22 | !MESSAGE 
 23 | 
 24 | # Begin Project
 25 | # PROP AllowPerConfigDependencies 0
 26 | # PROP Scc_ProjName ""
 27 | # PROP Scc_LocalPath ""
 28 | CPP=cl.exe
 29 | RSC=rc.exe
 30 | 
 31 | !IF  "$(CFG)" == "obiwarp - Win32 Release"
 32 | 
 33 | # PROP BASE Use_MFC 0
 34 | # PROP BASE Use_Debug_Libraries 0
 35 | # PROP BASE Output_Dir "Release"
 36 | # PROP BASE Intermediate_Dir "Release"
 37 | # PROP BASE Target_Dir ""
 38 | # PROP Use_MFC 0
 39 | # PROP Use_Debug_Libraries 0
 40 | # PROP Output_Dir "Release"
 41 | # PROP Intermediate_Dir "Release"
 42 | # PROP Target_Dir ""
 43 | # ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
 44 | # ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
 45 | # ADD BASE RSC /l 0x409 /d "NDEBUG"
 46 | # ADD RSC /l 0x409 /d "NDEBUG"
 47 | BSC32=bscmake.exe
 48 | # ADD BASE BSC32 /nologo
 49 | # ADD BSC32 /nologo
 50 | LINK32=link.exe
 51 | # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib  kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
 52 | # ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib  kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
 53 | 
 54 | !ELSEIF  "$(CFG)" == "obiwarp - Win32 Debug"
 55 | 
 56 | # PROP BASE Use_MFC 0
 57 | # PROP BASE Use_Debug_Libraries 1
 58 | # PROP BASE Output_Dir "Debug"
 59 | # PROP BASE Intermediate_Dir "Debug"
 60 | # PROP BASE Target_Dir ""
 61 | # PROP Use_MFC 0
 62 | # PROP Use_Debug_Libraries 1
 63 | # PROP Output_Dir "Debug"
 64 | # PROP Intermediate_Dir "Debug"
 65 | # PROP Target_Dir ""
 66 | # ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ  /c
 67 | # ADD CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ  /c
 68 | # ADD BASE RSC /l 0x409 /d "_DEBUG"
 69 | # ADD RSC /l 0x409 /d "_DEBUG"
 70 | BSC32=bscmake.exe
 71 | # ADD BASE BSC32 /nologo
 72 | # ADD BSC32 /nologo
 73 | LINK32=link.exe
 74 | # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib  kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
 75 | # ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib  kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nodefaultlib:"libcmtd.lib" /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
 76 | 
 77 | !ENDIF 
 78 | 
 79 | # Begin Target
 80 | 
 81 | # Name "obiwarp - Win32 Release"
 82 | # Name "obiwarp - Win32 Debug"
 83 | # Begin Group "Source"
 84 | 
 85 | # PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
 86 | # Begin Source File
 87 | SOURCE=./vec.cpp
 88 | # End Source File
 89 | # Begin Source File
 90 | SOURCE=./mat.cpp
 91 | # End Source File
 92 | # Begin Source File
 93 | SOURCE=./lmat.cpp
 94 | # End Source File
 95 | # Begin Source File
 96 | SOURCE=./dynprog.cpp
 97 | # End Source File
 98 | # Begin Source File
 99 | SOURCE=./cmdparser.cpp
100 | # End Source File
101 | # Begin Source File
102 | SOURCE=./pngio.cpp
103 | # End Source File
104 | # Begin Source File
105 | SOURCE=./obiwarp.cpp
106 | # End Source File
107 | # End Group
108 | # Begin Group "Headers"
109 | # PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
110 | # Begin Source File
111 | SOURCE=./vec.h
112 | # End Source File
113 | # Begin Source File
114 | SOURCE=./mat.h
115 | # End Source File
116 | # Begin Source File
117 | SOURCE=./lmat.h
118 | # End Source File
119 | # Begin Source File
120 | SOURCE=./dynprog.h
121 | # End Source File
122 | # Begin Source File
123 | SOURCE=./cmdparser.h
124 | # End Source File
125 | # Begin Source File
126 | SOURCE=./pngio.h
127 | # End Source File
128 | # Begin Source File
129 | SOURCE=./obiwarp.h
130 | # End Source File
131 | # End Group
132 | # End Target
133 | # End Project
134 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/pngio.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | #include <iostream>
  5 | 
  6 | #include "assert.h"
  7 | #include "pngio.h"
  8 | 
  9 | #define PNG_DEBUG 3
 10 | 
 11 | #include "mat.h"
 12 | 
 13 | using namespace VEC;
 14 | 
 15 | PngIO::PngIO(bool bw): _bw(bw) {
 16 | }
 17 | 
 18 | void PngIO::write(char *file, MatI &mat) {
 19 |     // This could be made faster (faster printing of matrix [access pointer
 20 |     // directly] and input to matrix2png stdin
 21 |     char *_tmp = (char *)"tmp.tmp.tmp";
 22 |     FILE *pOUT = fopen(_tmp, "w");
 23 |     assert(pOUT);
 24 |     
 25 |     // Print data to file:
 26 |     fputs("CORNERLABEL", pOUT);
 27 |     for (int i = 0; i < mat.cols(); ++i) {
 28 |         fprintf(pOUT, "\t%d", i);
 29 |     }
 30 |     fputs("\n", pOUT);
 31 | 
 32 |     for (int m = 0; m < mat.rows(); ++m) {
 33 |         fprintf(pOUT, "%d", m);
 34 |         for (int n = 0; n < mat.cols(); ++n) {
 35 |             fprintf(pOUT, "\t%d", mat(m,n));
 36 |             //printf("%d ", mat(m,n));
 37 |         }
 38 |         //printf("\n");
 39 |         fputs("\n", pOUT);
 40 |     }
 41 |     fclose(pOUT);
 42 |     
 43 |     // CREATE system call to matrix2png:
 44 |     char str1[1000]; 
 45 |     strcpy (str1, "matrix2png -data ");
 46 |     strcat(str1, _tmp);
 47 |     strcat(str1, " ");
 48 |     if (_bw) {
 49 |         strcat(str1, "-mincolor white -maxcolor black");
 50 |     }
 51 |     else {
 52 |         strcat(str1, "-mincolor green -maxcolor red");
 53 |     }
 54 |     strcat(str1, " >");
 55 |     strcat(str1, file);
 56 |     printf("*****************************************************\n");
 57 |     printf("Calling: %s\n", str1);
 58 |     int ret = system(str1);
 59 |     printf("SYSTEM RETURNED %d\n", ret);
 60 |     printf("*****************************************************\n");
 61 | 
 62 |     // CLEANUP:
 63 |     remove(_tmp);
 64 | }
 65 | 
 66 | void PngIO::write(char *file, MatF &mat) {
 67 |     // This could be made faster (faster printing of matrix [access pointer
 68 |     // directly] and input to matrix2png stdin
 69 |     char *_tmp = (char *)"tmp.tmp.tmp";
 70 |     FILE *pOUT = fopen(_tmp, "w");
 71 |     assert(pOUT);
 72 |     
 73 |     // Print data to file:
 74 |     fputs("CORNERLABEL", pOUT);
 75 |     for (int i = 0; i < mat.cols(); ++i) {
 76 |         fprintf(pOUT, "\t%d", i);
 77 |     }
 78 |     fputs("\n", pOUT);
 79 | 
 80 |     for (int m = 0; m < mat.rows(); ++m) {
 81 |         fprintf(pOUT, "%d", m);
 82 |         for (int n = 0; n < mat.cols(); ++n) {
 83 |             fprintf(pOUT, "\t%f", mat(m,n));
 84 |             //printf("%d ", mat(m,n));
 85 |         }
 86 |         //printf("\n");
 87 |         fputs("\n", pOUT);
 88 |     }
 89 |     fclose(pOUT);
 90 |     
 91 |     // CREATE system call to matrix2png:
 92 |     char str1[1000]; 
 93 |     strcpy (str1, (char *)"matrix2png -data ");
 94 |     strcat(str1, _tmp);
 95 |     strcat(str1, " ");
 96 |     if (_bw) {
 97 |         strcat(str1, "-mincolor white -maxcolor black");
 98 |     }
 99 |     else {
100 |         strcat(str1, "-mincolor green -maxcolor red");
101 |     }
102 |     strcat(str1, " >");
103 |     strcat(str1, file);
104 |     printf("*****************************************************\n");
105 |     printf("Calling: %s\n", str1);
106 |     int ret = system(str1);
107 |     printf("SYSTEM RETURNED %d\n", ret);
108 |     printf("*****************************************************\n");
109 | 
110 |     // CLEANUP:
111 |     remove(_tmp);
112 | }
113 | 
114 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/pngio.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _PNGIO_H
 3 | #define _PNGIO_H
 4 | 
 5 | #include "mat.h"
 6 | 
 7 | using namespace VEC;
 8 | 
 9 | class PngIO {
10 |     private:
11 |         int _bw;
12 | 
13 |     public:
14 |         PngIO(bool bw=0);
15 |         void write(char *file, MatI &mat);
16 |         //bool write(char *file, VecI vec);
17 |         void write(char *file, MatF &mat);
18 | };
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/test_cmdparser.rb:
--------------------------------------------------------------------------------
  1 | 
  2 | require 'test/unit'
  3 | 
  4 | 
  5 | $WIN32 = false
  6 | 
  7 | if ENV["OS"] =~ /Windows/
  8 |   $WIN32 = true
  9 | end
 10 | 
 11 | OBIWARP_PATH = "../bin/obiwarp" + ($WIN32 ? ".exe" : "")
 12 | 
 13 | TFILES = "tfiles/"
 14 | LMAT1 = TFILES + 'tmp1.lmat'
 15 | LMAT2 = TFILES + 'tmp1B.lmat'
 16 | 
 17 | class MyTests < Test::Unit::TestCase
 18 |   def diagnostics(reply)
 19 |     #puts reply
 20 |     hash = {}
 21 |     looking = false
 22 |     reply.split("\n").each do |line|
 23 |       if line =~ /\*{10,}/
 24 |         if looking
 25 |           looking = false
 26 |         else
 27 |           looking = true
 28 |         end
 29 |       elsif line =~ /(.*): (.*)/ && looking
 30 |         hash[$1.dup] = $2.dup
 31 |       end 
 32 |     end
 33 |     #p hash
 34 |     hash
 35 |   end
 36 |   
 37 |   def ob
 38 |     OBIWARP_PATH
 39 |   end
 40 | 
 41 |   def test_min_input
 42 |     #puts OBIWARP_PATH
 43 |     assert( File.exist?(ob), "obiwarp executable is in #{OBIWARP_PATH}")
 44 |     reply = `"#{ob}"`
 45 |     assert_match( /USAGE:/, reply, "no values passed in" )
 46 |     assert_match( /USAGE: #{File.basename(ob).gsub(/\.exe$/, '')}/, reply, "help progname matches executable")
 47 |     reply = `"#{ob}" only_1_file`
 48 |     assert_match( /USAGE:/, reply, "only one file passed in" )
 49 |   end
 50 | 
 51 |   def test_bad_files_input
 52 |     reply = `#{ob} badfile1 badfile2`
 53 |     assert_match(/Cannot open/, reply)
 54 |   end
 55 | 
 56 |   def test_opts
 57 |     t_opt(["--format"], "format", "mat");
 58 |     # What about no format give (should be same as LMAT1)
 59 |     t_opt_nil("format", "lmat")
 60 |     t_opt(%w(--outfile -o), "outfile", "myoutfilename");
 61 |     t_opt(%w(--images), "images");
 62 |     t_opt(%w(--timefile -t), "timefile", "mytimefile");
 63 | 
 64 |     expect = %w(cor cov prd euc)
 65 |     expect.each do |arg|
 66 |       t_opt(%w(--score -s), "score", arg)
 67 |     end
 68 |     t_opt(%w(--local -l), "local");
 69 |     t_opt(%w(--nostdnrm), "nostdnrm");
 70 |     t_opt_split(%w(--factor -f), "factor_diag", "factor_gap", "3.2,2.2")
 71 |     t_opt_split(%w(--gap -g), "gap_init", "gap_extend", "3.2,2.2")
 72 |     t_opt(%w(--init -i), "init_penalty", 2.1)
 73 |     t_opt(%w(--response -r), "response", 2.3);
 74 |   end
 75 | 
 76 |   ##########################################
 77 |   # HELPER FUNCS:
 78 |   ##########################################
 79 | 
 80 |   # For testing args like this: '23.3,5.2'
 81 |   def t_opt_split(opt_list, varname1, varname2, val)
 82 |     opt_list.each do |opt|
 83 |       reply =  `#{ob} #{opt} #{val} --diagnostics #{LMAT1} #{LMAT2}`
 84 |       hash = diagnostics(reply)
 85 |       val1, val2 = val.split(",")
 86 |       assert_equal("#{val1}", hash[varname1])
 87 |       assert_equal("#{val2}", hash[varname2])
 88 |     end
 89 |   end
 90 | 
 91 |   # for a variable we expect to see, even though no variables are passed in
 92 |   def t_opt_nil(varname, val)
 93 |     cmd = "#{ob} --diagnostics #{LMAT1} #{LMAT2}"
 94 |     reply = `#{cmd}`
 95 |     hash = diagnostics(reply)
 96 |     assert_equal("#{val}", hash[varname])
 97 |   end
 98 | 
 99 |   # for testing normal options
100 |   # opt_list is a list of equivalent options
101 |   # varname is the name of the diagnostic hash variable name
102 |   # val is the value of the option passed in and the value expected out
103 |   # if val == nil then the option is a flag and the output should be == 1
104 |   def t_opt(opt_list, varname, val=nil)
105 |     opt_list.each do |opt|
106 |       cmd = "#{ob} #{opt} #{val} --diagnostics #{LMAT1} #{LMAT2}"
107 |       #puts cmd
108 |       reply = `#{cmd}`
109 |       #puts "REPLY"
110 |       #puts reply
111 |       #puts "END PREPFYUDF"
112 |       hash = diagnostics(reply)
113 |       #p hash
114 |       if val == nil
115 |         assert_equal("1", hash[varname])
116 |       else
117 |         #puts "AHSHH" + "#{val}"
118 |         #puts hash["score"]
119 |         assert_equal("#{val}", hash[varname])
120 |       end
121 |     end
122 |   end
123 | 
124 | end
125 | 
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/test_lmat.h:
--------------------------------------------------------------------------------
  1 | #include <cxxtest/TestSuite.h>
  2 | #include <cstdlib>
  3 | #include "lmat.h"
  4 | #include "mat.h"
  5 | #include "vec.h"
  6 | 
  7 | 
  8 | using namespace VEC;
  9 | class LMatTestSuite : public CxxTest::TestSuite 
 10 | {
 11 |     public:
 12 |         void test_creation( void ) {
 13 |             LMat testing;
 14 |             TS_ASSERT_EQUALS(testing._mz_vals, 0);
 15 |             TS_ASSERT_EQUALS(testing._tm_vals, 0);
 16 |         }
 17 | 
 18 |         void test_warp_tm( void ) {
 19 |             float self_arr[5] = {0,3,5,6,12};
 20 |             float other_arr[5] = {1,3,7,9,20};
 21 |             VecF selfy(5,self_arr,1);
 22 |             VecF other(5,other_arr,1);
 23 |             float time_arr[16] = {0,1,2,3,4,4.1,5,6,7,8,9,10,10.001,11,12.1, 13.1};
 24 |             float answ_arr[16] = {1, 1.28863, 2.02171, 3, 4.75862, 4.98493, 7, 9, 10.9224, 12.8188, 14.6819, 16.5046, 16.5064, 18.2797, 20.1687, 21.8204};
 25 |             VecF answ(16, answ_arr,1);
 26 |             LMat obj;
 27 |             obj._tm->set(16, time_arr);
 28 |             obj.warp_tm(selfy, other); 
 29 |             for (int i = 0; i < 16; i++) {
 30 |                 TS_ASSERT_DELTA(answ[1], obj.tm()->at(1), 0.001);
 31 |             }
 32 |         }
 33 | 
 34 |         void test_ascii_read_write( void ) {
 35 |             LMat readin;
 36 |             readin.set_from_ascii("tfiles/tmp1.lmata");
 37 |             TS_ASSERT_EQUALS(readin.mzlen(), 30);
 38 |             TS_ASSERT_EQUALS(readin.tmlen(), 40);
 39 |             float _mz[30] = { 400,401,402,403,404,405,406,407,408,409,
 40 |                               410,411,412,413,414,415,416,417,418,419,
 41 |                               420,421,422,423,424,425,426,427,428,429
 42 |             };
 43 |             VecF mzv(30,_mz,1);
 44 |             TS_ASSERT_EQUALS( mzv, *(readin.mz()) );
 45 |             TS_ASSERT_DELTA( (*readin.mat())(0,0), 6139950.06794636, 0.1 );
 46 |             TS_ASSERT_DELTA( (*readin.mat())(39,29), 2292810.65100822, 0.1 );
 47 |             TS_ASSERT_DELTA((*readin.mat())(7,8), 1397963.17842461, 0.1 );
 48 | 
 49 |             // *******************************************
 50 |             // TEST COORDS
 51 |             // *******************************************
 52 |             VecI obj1(4);
 53 |             obj1[0] = 1;
 54 |             obj1[1] = 3;
 55 |             obj1[2] = 4;
 56 |             obj1[3] = 8;
 57 |             VecF out;
 58 |             readin.mz_axis_vals(obj1,out);
 59 |             TS_ASSERT_DELTA(out[0], 401, 0.001);
 60 |             TS_ASSERT_DELTA(out[1], 403, 0.001);
 61 |             TS_ASSERT_DELTA(out[2], 404, 0.001);
 62 |             TS_ASSERT_DELTA(out[3], 408, 0.001);
 63 |             readin.tm_axis_vals(obj1,out);
 64 |             TS_ASSERT_DELTA(out[0], 1212.34, 0.001);
 65 |             TS_ASSERT_DELTA(out[1], 1236.34, 0.001);
 66 |             TS_ASSERT_DELTA(out[2], 1248.34, 0.001);
 67 |             TS_ASSERT_DELTA(out[3], 1296.34, 0.001);
 68 |             // *******************************************
 69 | 
 70 |             char *tmpfile = (char *)"tmp.tmp.tmp";
 71 |             readin.print(tmpfile);
 72 |             
 73 |             LMat readnew;
 74 |             readnew.set_from_ascii(tmpfile);
 75 |             TS_ASSERT_EQUALS(readnew.mzlen(), 30);
 76 |             TS_ASSERT_EQUALS(readnew.tmlen(), 40);
 77 |             TS_ASSERT_EQUALS(readnew.tmlen(), 40);
 78 |             TS_ASSERT_EQUALS( mzv, *(readnew.mz()) );
 79 |             TS_ASSERT_DELTA( (*readnew.mat())(0,0), 6139950.06794636, 0.1 );
 80 |             TS_ASSERT_DELTA( (*readnew.mat())(39,29), 2292810.65100822, 0.1 );
 81 |             TS_ASSERT_DELTA((*readnew.mat())(7,8), 1397963.17842461, 0.1 );
 82 |             remove(tmpfile); 
 83 |                 
 84 |             // Test printing to stdout
 85 |             //readin.set_from_ascii("tfiles/tmp1.lmata");
 86 |             //readin.print();
 87 |         }
 88 | 
 89 |         void test_creation_from_mat( void ) {
 90 |             LMat obj;
 91 |             obj.set_from_binary_mat("tfiles/file1.mat");
 92 |             TS_ASSERT_EQUALS(obj.tmlen(), 4);
 93 |             TS_ASSERT_EQUALS(obj.mzlen(), 3);
 94 |             TS_ASSERT_EQUALS((*obj.mat())(0,0), 1.0);
 95 |             TS_ASSERT_EQUALS((*obj.mat())(3,2), 12.0);
 96 |         }
 97 | 
 98 |         void test_creation_from_mata( void ) {
 99 |             LMat obj;
100 |             obj.set_from_ascii_mat("tfiles/file1.mata");
101 |             TS_ASSERT_EQUALS(obj.tmlen(), 4);
102 |             TS_ASSERT_EQUALS(obj.mzlen(), 3);
103 |             TS_ASSERT_EQUALS((*obj.mat())(0,0), 1.0);
104 |             TS_ASSERT_EQUALS((*obj.mat())(3,2), 12.0);
105 |         }
106 | 
107 |         void test_binary_read_write( void ) {
108 |             int ch_mz_vals = 30;
109 |             int ch_tm_vals = 40;
110 |             LMat readin;
111 |             readin.set_from_ascii("tfiles/tmp1.lmata");
112 |             TS_ASSERT_EQUALS(readin.mzlen(), ch_mz_vals);
113 |             TS_ASSERT_EQUALS(readin.tmlen(), ch_tm_vals);
114 |             float *mptr = (float*)(*readin.mat());
115 |             TS_ASSERT_DELTA(mptr[0], 6139950.06794636, 0.1 );
116 |             TS_ASSERT_DELTA(mptr[(ch_mz_vals*ch_tm_vals)-1], 2292810.65100822, 0.1 );
117 |             TS_ASSERT_DELTA((*readin.mat())(7,8), 1397963.17842461, 0.1 );
118 | 
119 |             char *tmpfile = (char *)"tmp2.tmp.tmp";
120 |             readin.write(tmpfile);
121 |             LMat readnew(tmpfile);
122 | 
123 |             float _mz[30] = { 400,401,402,403,404,405,406,407,408,409,
124 |                               410,411,412,413,414,415,416,417,418,419,
125 |                               420,421,422,423,424,425,426,427,428,429
126 |             };
127 |             float _tm[40] = { 1200.34, 1212.34, 1224.34, 1236.34, 1248.34, 
128 |                 1260.34, 1272.34, 1284.34, 1296.34, 1308.34, 1320.34, 1332.34, 
129 |                 1344.34, 1356.34, 1368.34, 1380.34, 1392.34, 1404.34, 1416.34, 
130 |                 1428.34, 1440.34, 1452.34, 1464.34, 1476.34, 1488.34, 1500.34, 
131 |                 1512.34, 1524.34, 1536.34, 1548.34, 1560.34, 1572.34, 1584.34, 
132 |                 1596.34, 1608.34, 1620.34, 1632.34, 1644.34, 1656.34, 1668.34 
133 |             };
134 |             VecF mzv(ch_mz_vals,_mz,1);
135 |             VecF tmv(ch_tm_vals,_tm,1);
136 |             TS_ASSERT_EQUALS(readnew.mzlen(), ch_mz_vals);
137 |             TS_ASSERT_EQUALS(readnew.tmlen(), ch_tm_vals);
138 |             TS_ASSERT_EQUALS( mzv, *(readnew.mz()) );
139 |             TS_ASSERT_EQUALS( tmv, *(readnew.tm()) );
140 |             TS_ASSERT_DELTA( (*readnew.mat())(0,0), 6139950.06794636, 0.1 );
141 |             TS_ASSERT_DELTA( (*readnew.mat())(7,8),1397963.17842461, 0.1 );
142 |             TS_ASSERT_DELTA( (*readnew.mat())(39,29), 2292810.65100822, 0.1 );
143 |             remove(tmpfile);
144 | 
145 |             // Test writing binary file to stdout
146 |             //readin.write();
147 |         }
148 |       
149 | 
150 | };
151 | 
152 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/test_lmat_converters.h:
--------------------------------------------------------------------------------
 1 | #include <cxxtest/TestSuite.h>
 2 | #include <cstdlib>
 3 | #include "lmat.h"
 4 | #include "mat.h"
 5 | #include "vec.h"
 6 | 
 7 | 
 8 | using namespace VEC;
 9 | class LMatTestSuite : public CxxTest::TestSuite 
10 | {
11 |     public:
12 |         // Assures that the same data is representated before and after
13 |         // conversions
14 |         void test_conversions( void ) {
15 |             // Depends on three file conversions performed elsewhere:
16 |             // sh "./lmata2lmat tfiles/tmp1.lmata"
17 |             // File.copy('tfiles/tmp1.lmat', 'tfiles/tmp1B.lmat')
18 |             // sh "./lmat2lmata tfiles/tmp1B.lmat"
19 | 
20 |             int ch_mz_vals = 30;
21 |             int ch_tm_vals = 40;
22 | 
23 |             float _mz[30] = { 400,401,402,403,404,405,406,407,408,409,
24 |                               410,411,412,413,414,415,416,417,418,419,
25 |                               420,421,422,423,424,425,426,427,428,429
26 |             };
27 |             float _tm[40] = { 1200.34, 1212.34, 1224.34, 1236.34, 1248.34, 
28 |                 1260.34, 1272.34, 1284.34, 1296.34, 1308.34, 1320.34, 1332.34, 
29 |                 1344.34, 1356.34, 1368.34, 1380.34, 1392.34, 1404.34, 1416.34, 
30 |                 1428.34, 1440.34, 1452.34, 1464.34, 1476.34, 1488.34, 1500.34, 
31 |                 1512.34, 1524.34, 1536.34, 1548.34, 1560.34, 1572.34, 1584.34, 
32 |                 1596.34, 1608.34, 1620.34, 1632.34, 1644.34, 1656.34, 1668.34 
33 |             };
34 |             VecF mzv(ch_mz_vals,_mz,1);
35 |             VecF tmv(ch_tm_vals,_tm,1);
36 | 
37 |             // ************************************************** 
38 |             // Set from ascii
39 |             // ************************************************** 
40 |             LMat fromascii;
41 |             fromascii.set_from_ascii("tfiles/tmp1.lmata");
42 | 
43 |             // Assert that this guy is like we expect
44 |             TS_ASSERT_EQUALS(fromascii.mzlen(), ch_mz_vals);
45 |             TS_ASSERT_EQUALS(fromascii.tmlen(), ch_tm_vals);
46 |             TS_ASSERT_EQUALS( mzv, *(fromascii.mz()) );
47 |             TS_ASSERT_EQUALS( tmv, *(fromascii.tm()) );
48 |             TS_ASSERT_DELTA( (*fromascii.mat())(0,0), 6139950.06794636, 0.1 );
49 |             TS_ASSERT_DELTA( (*fromascii.mat())(7,8),1397963.17842461, 0.1 );
50 |             TS_ASSERT_DELTA( (*fromascii.mat())(39,29), 2292810.65100822, 0.1 );
51 | 
52 |             // ************************************************** 
53 |             // Read from binary 
54 |             // ************************************************** 
55 |             LMat readnew("tfiles/tmp1.lmat");
56 | 
57 |             // Assert that it is identical to 'fromascii'
58 |             TS_ASSERT_EQUALS(fromascii.mzlen(), readnew.mzlen());
59 |             TS_ASSERT_EQUALS(fromascii.tmlen(), readnew.tmlen());
60 |             // Problems in WINDOWS HERE::
61 |             TS_ASSERT_SAME_DATA((float*)(*fromascii.mat()),(float*)(*readnew.mat()),ch_mz_vals*ch_tm_vals);
62 |             TS_ASSERT_SAME_DATA((float*)(*fromascii.mz()),(float*)(*readnew.mz()),ch_mz_vals);
63 |             TS_ASSERT_SAME_DATA((float*)(*fromascii.tm()),(float*)(*readnew.tm()),ch_tm_vals);
64 | 
65 |             // ************************************************** 
66 |             // read from ascii
67 |             // ************************************************** 
68 |             LMat fromascii2;
69 |             fromascii2.set_from_ascii("tfiles/tmp1B.lmata");
70 |       
71 |             TS_ASSERT_EQUALS(fromascii.mzlen(), fromascii2.mzlen());
72 |             TS_ASSERT_EQUALS(fromascii.tmlen(), fromascii2.tmlen());
73 | //            TS_ASSERT_SAME_DATA((float*)(*fromascii.mat()),(float*)(*fromascii2.mat()),ch_mz_vals*ch_tm_vals);
74 |             TS_ASSERT_SAME_DATA((float*)(*fromascii.mz()),(float*)(*fromascii2.mz()),ch_mz_vals);
75 |             TS_ASSERT_SAME_DATA((float*)(*fromascii.tm()),(float*)(*fromascii2.tm()),ch_tm_vals);
76 | 
77 |         }
78 | 
79 | 
80 | };
81 | 
82 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/test_mat_converters.rb:
--------------------------------------------------------------------------------
 1 | 
 2 | require 'test/unit'
 3 | require 'fileutils'
 4 | 
 5 | TFILES = "tfiles/"
 6 | MAT1 = TFILES + 'file1.mata'
 7 | BINDIR = "../bin"
 8 | 
 9 | class MatConvertersTest < Test::Unit::TestCase
10 |   def test_mata2mat
11 |     tmpmata = TFILES + "trash.mata"
12 |     tmpmat_out = TFILES + "trash.mat"
13 |     FileUtils.cp MAT1, tmpmata
14 |     pr = "mata2mat"
15 |     system "#{BINDIR}/#{pr} #{tmpmata}"
16 |     assert(File.exist?(tmpmat_out), "#{tmpmat_out} exists")
17 |     arr = IO.read(tmpmat_out).unpack('iif*')
18 |     exp = [4,3, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]
19 |     assert_equal(exp, arr)
20 |     File.unlink(tmpmata)
21 |     assert(!File.exist?(tmpmata), "#{tmpmata} does not exist")
22 | 
23 |     pr = "mat2mata"
24 |     tmpmat = TFILES + 'trash.mat'
25 |     tmpmat_out = TFILES + 'trash.mata'
26 |     assert(File.exist?(tmpmat), "#{tmpmat} exists")
27 |     system "#{BINDIR}/#{pr} #{tmpmat}"
28 |     assert(File.exist?(tmpmat_out), "#{tmpmat_out} exists")
29 |     arr = IO.readlines(tmpmat_out)
30 |     assert_equal(%w(4 3), arr[0].split(" "))
31 |     assert_equal(%w(1 2 3), arr[1].split(" "))
32 |     assert_equal(%w(10 11 12), arr[4].split(" "))
33 |       
34 |     File.unlink(tmpmat)
35 |     File.unlink(tmpmat_out)
36 |   end
37 | 
38 |   
39 | end
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/test_obiwarp.h:
--------------------------------------------------------------------------------
 1 | #include <cxxtest/TestSuite.h>
 2 | #include <cstdlib>
 3 | #include "mat.h"
 4 | #include "vec.h"
 5 | 
 6 | 
 7 | using namespace VEC;
 8 | class AlignTestSuite : public CxxTest::TestSuite 
 9 | {
10 |     public:
11 |         // @TODO: either get these smat tests working or move them over to ruby
12 |         //
13 |         // Assures that the same data is representated before and after
14 |         // conversions
15 |         void test_smat_io( void ) {
16 | #ifdef WIN32
17 |             //system("../bin/obiwarp.exe -a -s product -x smat_product.tmp tfiles/tmp1.lmata tfiles/tmp2.lmata");
18 | #else
19 |             //system("../bin/obiwarp -a -s product -x smat_product.tmp tfiles/tmp1.lmata tfiles/tmp2.lmata");
20 | #endif
21 |             TS_ASSERT_EQUALS(0, 0);
22 | //            MatF smat_f;
23 | //            char *tmpfile = "smat_product.tmp";
24 | //
25 | //            smat_f.set_from_binary(tmpfile);
26 | //            TS_ASSERT_DELTA(smat_f(0,0), 1.63204e+14, 1.0e11);
27 | //            TS_ASSERT_DELTA(smat_f(0,1), 1.46614e+14, 1.0e11);
28 | //            TS_ASSERT_DELTA(smat_f(39,45), 2.02115e+14, 1.0e11);
29 | //            TS_ASSERT_EQUALS(smat_f.rows(), 40);
30 | //            TS_ASSERT_EQUALS(smat_f.cols(), 46);
31 |             /*
32 |             if (WIN32) {
33 |                 system("obiwarp -a --smat_in smat_product.tmp tfiles/tmp1.lmata tfiles/tmp2.lmata");
34 |             }
35 |             else {
36 |                 system("./obiwarp -a --smat_in smat_product.tmp tfiles/tmp1.lmata tfiles/tmp2.lmata");
37 |             }
38 |             */
39 | //            remove(tmpfile);
40 |         }
41 |         
42 | };
43 | 
44 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/test_obiwarp.rb:
--------------------------------------------------------------------------------
  1 | 
  2 | require 'test/unit'
  3 | require 'ostruct'
  4 | 
  5 | $WIN32 = false; if ENV["OS"] == 'Windows_NT' then $WIN32 = true end
  6 | 
  7 | # updat_values copied from ruby facets:
  8 | # http://facets.rubyforge.org/doc/api/core/classes/Hash.html
  9 | class Hash
 10 |   def update_values
 11 |     each{ |k,v| store( k, yield(v) ) }
 12 |   end
 13 | end
 14 | 
 15 | OBIWARP_PATH = "../bin/obiwarp" + ($WIN32 ? ".exe" : '')
 16 | TFILES = "tfiles/"
 17 | 
 18 | hash = {
 19 |   :lmata1 => 'tmp1.lmata',
 20 |   :lmata1B => 'tmp1B.lmata',
 21 |   :lmata2 => 'tmp2.lmata',
 22 | 
 23 |   :lmat1 => 'tmp1.lmat',
 24 |   :lmat1B => 'tmp1B.lmat',
 25 |   :lmat2 => 'tmp2.lmat',
 26 |   :lmat_warped_default_G => 'tmp1B.lmat.warped_default',
 27 |   :lmat_warped_default => 'tmp1B.lmat.warped',
 28 | 
 29 |   :mat1 => 'tmp1.mat',
 30 |   :mat2 => 'file1.mat',
 31 |   :mat3 => 'file3.mat',
 32 |   :mat4 => 'file4.mat',
 33 |   :mata1 => 'tmp1.mata',
 34 |   :mata2 => 'file1.mata',
 35 |   :mata3 => 'file3.mata',
 36 |   :mata4 => 'file4.mata',
 37 |   :mat1_no_header => 'tmp1_no_header.mat',
 38 |   :mat2_no_header_messy => 'tmp1_no_header_messy.mat',
 39 | }.update_values {|v| TFILES + v }
 40 | 
 41 | F = OpenStruct.new(hash)
 42 | 
 43 | # Basic tests to ensure that files are being read, things are being warped
 44 | # when they are supposed to, etc.  Options are tested in cmdparser.
 45 | class ObiWarpTest < Test::Unit::TestCase
 46 | 
 47 |   @@lmat1_times = "1200.34 1212.34 1224.34 1236.34 1248.34 1260.34 1272.34 1284.34 1296.34 1308.34 1320.34 1332.34 1344.34 1356.34 1368.34 1380.34 1392.34 1404.34 1416.34 1428.34 1440.34 1452.34 1464.34 1476.34 1488.34 1500.34 1512.34 1524.34 1536.34 1548.34 1560.34 1572.34 1584.34 1596.34 1608.34 1620.34 1632.34 1644.34 1656.34 1668.34\n"
 48 |   @@mat1_times = "0 1 2 3 4 5\n"
 49 |   @@mat4_times = "0 1 2 3 4 5 6 7 8\n"
 50 |   def ob; OBIWARP_PATH end
 51 | 
 52 |   def test_min_input
 53 |     assert( File.exist?(ob), "obiwarp executable is in #{OBIWARP_PATH}")
 54 |     reply = `#{ob}`
 55 |     assert_match( /USAGE:/, reply, "no values passed in" )
 56 |     assert_match( /USAGE: #{File.basename(ob).gsub(/\.exe$/,'')}/, reply, "help progname matches executable")
 57 |     reply = `#{ob} only_1_file`
 58 |     assert_match( /USAGE:/, reply, "only one file passed in" )
 59 |   end
 60 | 
 61 |   # asserts that the file exists and is the same as "against" and deletes "file"
 62 | 
 63 |   def test_self_vs_self
 64 |     { F.mat1 => @@mat1_times,
 65 |       F.mata1 => @@mat1_times,
 66 |       F.lmat1 => @@lmat1_times,
 67 |       F.lmata1 => @@lmat1_times,
 68 |       F.mat4 => @@mat4_times,
 69 |     }.each do |k,v| 
 70 |       vs_self(k,v)
 71 |     end
 72 |   end
 73 | 
 74 |   def vs_self(file, expected)
 75 |     reply = `#{ob} #{file} #{file}`
 76 |     assert_equal(expected, reply)
 77 |   end
 78 | 
 79 |   def test_vs_other
 80 |     [
 81 |       [F.mat3, F.mat4, @@mat4_times],
 82 |       [F.mata3, F.mata4, @@mat4_times],
 83 |       [F.lmat2, F.lmat1, @@lmat1_times],
 84 |       [F.lmata2, F.lmata1, @@lmat1_times],
 85 |     ].each do |pair|
 86 |       assert_new_times(*pair)
 87 |     end
 88 |   end
 89 | 
 90 |   # returns true if the reply != the given times
 91 |   def assert_new_times(file1, file2, file2_times)
 92 |     reply = `#{ob} #{file1} #{file2}`
 93 |     #puts reply
 94 |     assert_equal(file2_times.chomp.split(" ").size, reply.chomp.split(" ").size, "same number of values")
 95 |     assert_not_equal(file2_times, reply, "times should not be the same after warping")
 96 |   end
 97 | 
 98 |   ##############################################
 99 |   ## HELPERS:
100 |   ##############################################
101 | 
102 | end
103 | 
104 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/test_pngio.h:
--------------------------------------------------------------------------------
 1 | #include <cxxtest/TestSuite.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include "mat.h"
 5 | #include "vec.h"
 6 | #include "pngio.h"
 7 | 
 8 | 
 9 | using namespace VEC;
10 | class PngIOTestSuite : public CxxTest::TestSuite 
11 | {
12 |     public:
13 |         void test_simple( void ) {
14 |             MatI silly(4,4);
15 |             //silly = 8;
16 |             for (int m = 0; m < silly.rows(); ++m) {
17 |                 for (int n = 0; n < silly.cols(); ++n) {
18 |                     silly(m,n) = 0;
19 |                 }
20 |             }
21 |             silly(0,2) = 1;
22 |             silly(1,3) = 1;
23 |             silly(2,4) = 1;
24 | 
25 |             PngIO ioguy(1);
26 |             ioguy.write("trial.png", silly);
27 |             
28 |             remove("trial.png");
29 |         }
30 | };
31 | 
32 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/file1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/file1.mat


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/file1.mata:
--------------------------------------------------------------------------------
1 | 4 3
2 | 1 2 3
3 | 4 5 6
4 | 7 8 9
5 | 10 11 12
6 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/file3.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/file3.mat


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/file3.mata:
--------------------------------------------------------------------------------
 1 | 8 10
 2 | 220 21 20 34 56 76 23 19 43 657
 3 | 230 21 29 34 56 76 10 18 43 657
 4 | 240 21 32 34 56 76 43 17 43 657
 5 | 230 21 42 34 56 46 43 16 43 600
 6 | 220 21 52 34 56 76 43 15 43 557
 7 | 210 21 62 34 56 76 43 16 43 457
 8 | 200 21 82 34 56 76 43 17 43 357
 9 | 130 21 92 34 56 76 43 18 43 257
10 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/file4.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/file4.mat


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/file4.mata:
--------------------------------------------------------------------------------
 1 | 9 10
 2 | 240 20 32 34 56 76 43 17 43 657
 3 | 230 21 42 34 56 46 43 16 43 600
 4 | 210 21 50 34 56 76 43 15 43 552
 5 | 210 23 62 34 66 56 43 16 43 457
 6 | 200 21 82 34 56 76 43 17 43 350
 7 | 130 22 90 34 56 76 43 18 43 237
 8 | 220 21 20 34 56 76 23 19 43 657
 9 | 230 21 29 34 56 76 10 18 43 657
10 | 230 21 29 34 56 76 10 18 43 657
11 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/tmp1.lmat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/tmp1.lmat


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/tmp1.lmat.pts:
--------------------------------------------------------------------------------
1 | 1259.0 404.2 1000
2 | 1269.0 428.5 1300
3 | 1590.4 404.4 1500
4 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/tmp1.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/tmp1.mat


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/tmp1.mata:
--------------------------------------------------------------------------------
1 | 6 10
2 | 230 21 22 34 56 76 43 12 43 657
3 | 230 21 22 34 56 76 43 12 43 657
4 | 230 21 22 34 56 76 43 12 43 657
5 | 230 21 22 34 56 76 43 12 43 657
6 | 230 21 22 34 56 76 43 12 43 657
7 | 230 21 22 34 56 76 43 12 43 657
8 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/tmp1B.lmat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/tmp1B.lmat


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/tmp1B.lmat.warped_default:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/tmp1B.lmat.warped_default


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/tmp1_no_header.mata:
--------------------------------------------------------------------------------
1 | 230 21 22 34 56 76 43 12 43 657
2 | 230 21 22 34 56 76 43 12 43 657
3 | 230 21 22 34 56 76 43 12 43 657
4 | 230 21 22 34 56 76 43 12 43 657
5 | 230 21 22 34 56 76 43 12 43 657
6 | 230 21 22 34 56 76 43 12 43 657
7 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/tmp1_no_header_messy.mata:
--------------------------------------------------------------------------------
1 | 230 21 22 34 56 76 43 12 43 657 
2 | 230 21 22 34 56 76 43 12 43 657 
3 | 230 21 22 34 56 76 43 12 43 657
4 | 230 21 22 34 56 76 43 12 43 657 
5 | 230 21 22 34 56 76 43 12 43 657
6 | 230 21 22 34 56 76 43 12 43 657
7 | 
8 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/tmp2.lmat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSi-Studio/G-Aligner/b82f6f4ac6aeddf93bb22d24a8051fec451aad8c/third_party/obiwarp/lib/tfiles/tmp2.lmat


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/tfiles/tmptimes.txt:
--------------------------------------------------------------------------------
1 | 1215.0 1208.0
2 | 1272.0 1284.0
3 | 1536.0 1464.0
4 | 1620.0 1600.0
5 | 1656.0 1700.0
6 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/variations/README.txt:
--------------------------------------------------------------------------------
1 | # cp or link these to the obiwarp file to get variant behavior
2 | # (these may or may not actually work since I don't test these)
3 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/variations/get_ssr_asr_aad.cpp:
--------------------------------------------------------------------------------
 1 | // STDLIB:
 2 | #include <cstdio>
 3 | #include "string.h"
 4 | 
 5 | // MINE
 6 | #include "vec.h"
 7 | #include "mat.h"
 8 | 
 9 | #define DEBUG (0)
10 | 
11 | using namespace VEC;
12 | 
13 | int main (int argc, char **argv) {
14 |     MatF tester;
15 |     MatF tester_trans;
16 |     VecF mpt;
17 |     VecF npt;
18 |     for (int c = 1; c < argc; ++c) {
19 |         tester.set_from_ascii(argv[c], 1);  // no headers on the files
20 |         tester.transpose(tester_trans);
21 |         mpt.set(tester_trans.cols(), tester_trans.pointer(0));
22 |         npt.set(tester_trans.cols(), tester_trans.pointer(1));
23 |         double ssr = VecF::sum_sq_res_yeqx(mpt, npt);
24 |         double asr = VecF::avg_sq_res_yeqx(mpt, npt);
25 |         double aad = VecF::avg_abs_diff(mpt, npt);
26 |         printf("%s %f %f %f\n", argv[c], ssr, asr, aad);
27 |     }
28 |     return 1;
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/variations/obiwarp_probs.cpp:
--------------------------------------------------------------------------------
  1 | // STDLIB:
  2 | #include <cstdio>
  3 | #include <iostream>
  4 | #include <fstream>
  5 | #include "string.h"
  6 | 
  7 | // MINE
  8 | #include "vec.h"
  9 | #include "mat.h"
 10 | #include "lmat.h"
 11 | #include "dynprog.h"
 12 | #include "pngio.h"
 13 | #include "cmdparser.h"
 14 | 
 15 | #define DEBUG (0)
 16 | 
 17 | int main (int argc, char **argv) {
 18 | 
 19 |     CmdParser opts(argc, argv);
 20 | 
 21 |     int NUM_INTERNAL_ANCHORS = 10000;   // Max this out!
 22 |     char file1[1024];
 23 |     char file2[1024];
 24 |     strcpy(file1, opts.infiles[0]);
 25 |     strcpy(file2, opts.infiles[1]); 
 26 |     char outfilename[1024 + 7];
 27 | 
 28 |     int mi_bins = 5;
 29 |     //char toError[300];
 30 |     
 31 |     if (DEBUG) {
 32 |         std::cerr << "**********************************************\n";
 33 |         std::cerr << "opts.local: " << opts.local << "\n";
 34 |         std::cerr << "opts.images: " << opts.images << "\n";
 35 |         std::cerr << "opts.score: " << opts.score << "\n";
 36 |         std::cerr << "opts.outfile: "<< opts.outfile << "\n";
 37 |         std::cerr << "opts.timefile: " << opts.timefile << "\n";
 38 |         std::cerr << "file1: " << file1 << "\n";
 39 |         std::cerr << "file2: " << file2 << "\n";
 40 |         std::cerr << "**********************************************\n";
 41 |     }
 42 | 
 43 | 
 44 |     
 45 |     if (opts.outfile == NULL) {
 46 |         strcpy(outfilename, file2);
 47 |         strcat(outfilename, ".warped");
 48 |     }
 49 |     else {
 50 |         strcpy(outfilename, opts.outfile);
 51 |     }
 52 | 
 53 |     // ************************************************************
 54 |     // * READ IN FILES TO GET MAT 
 55 |     // ************************************************************
 56 |     LMat lmat1;
 57 |     LMat lmat2;
 58 |     MatF mat1;
 59 |     MatF mat2;
 60 |     MatF smat;
 61 |     DynProg dyn;
 62 | 
 63 | //    if (opts.axes) {
 64 | //        if (opts.binary) {
 65 | //            lmat1.set_from_binary(file1);
 66 | //            lmat2.set_from_binary(file2);
 67 | //            lmat1._mat->copy(mat1, 1);
 68 | //            lmat2._mat->copy(mat2, 1);
 69 | //        }
 70 | //        else {
 71 | //            lmat1.set_from_ascii(file1);
 72 | //            lmat2.set_from_ascii(file2);
 73 | //            lmat1._mat->copy(mat1, 1);
 74 | //            lmat2._mat->copy(mat2, 1);
 75 | //        }
 76 | //    }
 77 | //    else {
 78 | //        //mat1.set_from_ascii(file1);  @TODO: write this guy
 79 | //        //mat2.set_from_ascii(file2); 
 80 | //    }
 81 | //
 82 | //    // ************************************************************
 83 | //    // * SCORE THE MATRICES
 84 | //    // ************************************************************
 85 | //    if (DEBUG) {
 86 | //        std::cerr << "Scoring the mats!\n";
 87 | //    }
 88 | //    if (opts.smat_in != NULL) {
 89 | //        smat.set_from_binary(opts.smat_in);
 90 | //        dyn._smat = &smat;
 91 | //    }
 92 | //    else {
 93 | //        dyn.score(mat1, mat2, smat, opts.score, mi_bins);
 94 | //        // SETTING THE SMAT TO BE std normal
 95 | //        smat -= smat.avg();
 96 | //        double mean, stdev;
 97 | //        smat._dat.sample_stats(mean, stdev);
 98 | //        smat /= stdev;
 99 | //        if (!strcmp(opts.score,"euclidean")) {
100 | //            smat *= -1; // inverting euclidean
101 | //        }
102 | //    }
103 | //    if (opts.smat_out != NULL) {
104 | //        printf("Writing binary smat to '%s'\n", opts.smat_out);
105 | //        smat.write(opts.smat_out);
106 | //        //smat.print(smat_out_files[0]);
107 | //        exit(0);
108 | //    }
109 | //
110 | //    // ************************************************************
111 | //    // * PREPARE GAP PENALTY ARRAY
112 | //    // ************************************************************
113 | //   
114 | //    MatF tester;
115 | //    MatF tester_trans;
116 | //    VecF mpt;
117 | //    VecF npt;
118 | //    VecF mOut_tm;
119 | //    VecF nOut_tm;
120 | //
121 | //    double average = smat.avg();
122 | //    int gp_length = smat.rows() + smat.cols();
123 | //
124 | //    VecF gp_array;
125 | //    dyn.linear_less_before(opts.gap_extend,opts.gap_init,gp_length,gp_array);
126 | //
127 | //    // ************************************************************
128 | //    // * DYNAMIC PROGRAM
129 | //    // ************************************************************ 
130 | //    int minimize = 0;
131 | //    if (DEBUG) {
132 | //        std::cerr << "Dynamic Time Warping Score Matrix!\n";
133 | //    }
134 | //    dyn.find_path(smat, gp_array, minimize, opts.factor_diag, opts.factor_gap, opts.local, opts.init_penalty);
135 | //
136 | //    VecI mOut;
137 | //    VecI nOut;
138 | //    dyn.warp_map(mOut, nOut, minimize, NUM_INTERNAL_ANCHORS);
139 | //
140 | //    if (opts.timefile != NULL) {
141 | //        tester.set_from_ascii(opts.timefile, 1);  // no headers on the files
142 | //        tester.transpose(tester_trans);
143 | //        mpt.set(tester_trans.cols(), tester_trans.pointer(0));
144 | //        npt.set(tester_trans.cols(), tester_trans.pointer(1));
145 | //        float ssr, asr, sad, aad;
146 | //        dyn.path_accuracy((*lmat1._tm), (*lmat2._tm), mOut, nOut, mpt, npt, ssr, asr, sad, aad);
147 | //        //printf("average residual^2 (sec): %f\n", asr);
148 | //        //printf("average abs time diff (sec): %f\n", aad);
149 | //        printf("%f %f %f %f\n", ssr, asr, sad, aad);
150 | //    }
151 | //
152 | //    // Warp the second lmat run!
153 | //    if (opts.axes) {
154 | //        VecF nOutF;
155 | //        VecF mOutF;
156 | //        lmat1.tm_axis_vals(mOut, mOutF);
157 | //        lmat2.tm_axis_vals(nOut, nOutF); //
158 | //        lmat2.warp_tm(nOutF, mOutF); 
159 | //    }
160 | //    else {
161 | //        // or warp the mat itself!
162 | //        // @TODO: write the warping of mat itself!
163 | //    }
164 | //
165 | //    if (opts.binary) {
166 | //        //lmat2.print(outfilename);
167 | //        lmat2.write(outfilename);
168 | //    }
169 | //    else {
170 | //        lmat2.print(outfilename);
171 | //    }
172 | //
173 | //
174 | //
175 | //    if (opts.images) {
176 | //        PngIO wrt(1);
177 | //        char base_fn[1024];
178 | //        strcpy(base_fn, "obi-warp_");
179 | //        char tb_fn[1024];
180 | //        strcpy(tb_fn, base_fn);
181 | //        strcat(tb_fn, "tb.png");
182 | //        //char *tb_fn = "tb.png";
183 | //        wrt.write(tb_fn, dyn._tb);
184 | //        char tbpath_fn[1024];
185 | //        strcpy(tbpath_fn, base_fn);
186 | //        strcat(tbpath_fn, "tbpath.png");
187 | //        wrt.write(tbpath_fn, dyn._tbpath);
188 | //
189 | //        char asmat_fn[1024];
190 | //        strcpy(asmat_fn, base_fn);
191 | //        strcat(asmat_fn, "asmat.png");
192 | //        //wrt.write(asmat_fn, dyn._asmat);
193 | //
194 | //        //strcpy(base_fn, "tb.png");
195 | //        //char *tbpath_fn = "tbpath.png";
196 | //        //char *tbscores_fn = "tbscores.png";
197 | //        //wrt.write(tbscores_fn, dyn._tbscores);
198 | //        //char *asmat_fn = "asmat.png";
199 | //        //wrt.write(asmat_fn, dyn._asmat);
200 | //        char *smat_fn = "smat.png";
201 | //        //wrt.write(smat_fn, *dyn._smat);
202 | //    }
203 | //
204 | ///*
205 | //   char silly[100];
206 | //   strcpy(silly, "png_");
207 | //   char tmpp[5];
208 | //   sprintf(tmpp, "%d", i);
209 | //   strcat(silly, tmpp); 
210 | //   strcat(silly, ".png");
211 | //
212 | //   PngIO wrt(0);
213 | ////wrt.write(silly, dyn._tbpath);
214 | //wrt.write(silly, _scorepath);
215 | //*/
216 | //
217 | 
218 | return 0;
219 | }
220 | 
221 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/variations/obiwarp_speed.cpp:
--------------------------------------------------------------------------------
  1 | // STDLIB:
  2 | #include <cstdio>
  3 | #include <iostream>
  4 | #include <fstream>
  5 | #include "string.h"
  6 | 
  7 | // 3RD PARTY 
  8 | #include <argtable2.h>
  9 | #include "tnt_stopwatch.h"
 10 | 
 11 | 
 12 | // MINE
 13 | #include "vec.h"
 14 | #include "mat.h"
 15 | #include "dynprog.h"
 16 | #include "pngio.h"
 17 | 
 18 | #define DEBUG (0)
 19 | 
 20 | 
 21 | int mymain(const char **SCORE_ARR, int score_cnt);
 22 | 
 23 | int main (int argc, char **argv) {
 24 |     // Create the argument structures:
 25 |     struct arg_str  *score    = arg_str0("s", "score", "<scoretype>", "similarity score to compare vectors");
 26 |     struct arg_end  *end      = arg_end(20);
 27 | 
 28 |     void* argtable[] = {score,end};
 29 |     const char* progname = "obi-warp";
 30 |     int nerrors;
 31 |     int exitcode=0;
 32 | 
 33 |     /* set default values*/
 34 |     double diag_factor = 2.f;
 35 |     double gap_factor = 1.f;
 36 | 
 37 |     /* verify the argtable[] entries were allocated sucessfully */
 38 |     if (arg_nullcheck(argtable) != 0)
 39 |     {
 40 |         /* NULL entries were detected, some allocations must have failed */
 41 |         printf("%s: insufficient memory\n",progname);
 42 |         exitcode=1;
 43 |         goto exit;
 44 |     }
 45 | 
 46 | 
 47 |     /* Parse the command line as defined by argtable[] */
 48 |     nerrors = arg_parse(argc,argv,argtable);
 49 | 
 50 |     exitcode = mymain(
 51 |             score->sval, score->count
 52 |             );
 53 | 
 54 |     exit:
 55 |     /* deallocate each non-null entry in argtable[] */
 56 |     arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
 57 | 
 58 |     return exitcode;
 59 | }
 60 | 
 61 | int mymain(const char **SCORE_ARR, int score_cnt) {
 62 |   
 63 |     char SCORE[1024];
 64 |     if (score_cnt) {
 65 |         strcpy(SCORE, SCORE_ARR[0]);
 66 |     }
 67 |     else {
 68 |         strcpy(SCORE, "covariance");
 69 |     }
 70 | 
 71 |     if (strlen(SCORE) <= 1) {
 72 |         strcpy(SCORE, "covariance");
 73 |     }
 74 | 
 75 | 
 76 |     int num_scores = 10;
 77 |     int num_peaks = 1000;
 78 | 
 79 |     // Write the file for plotting each guy:
 80 |     char plotfilename[1024];
 81 |     char plotfilename_toplot[1024];
 82 |     strcpy(plotfilename, "speed_test_");
 83 |     strcat(plotfilename, SCORE);
 84 |     strcpy(plotfilename_toplot, plotfilename);
 85 |     strcat(plotfilename_toplot, ".toplot");
 86 |     std::ofstream fh(plotfilename_toplot);
 87 |     printf("WRITING TO: %s\n", plotfilename_toplot);
 88 |     fh << "XYData" << "\n";
 89 |     fh << plotfilename << "\n";
 90 |     fh << "Scoring Functions Speed Comparison (on draco) " << num_scores << " scores\n";
 91 |     fh << "N scans (compared N X N times)\n";
 92 |     fh << "sqrt( time to complete " << num_scores << " scores (sec) )\n";
 93 |     MatF smat;
 94 |     DynProg dyn;
 95 | 
 96 |     int i;
 97 |     int num_its = 10;
 98 |     VecI xaxis(num_its);
 99 |     for (i = 0; i < num_its; ++i) {
100 |         xaxis[i] = i * 10;
101 |     }
102 | 
103 |     fh << SCORE << "numpeaks" << num_peaks << "\n";
104 |     std::cout << "SCORE " << SCORE << "\n";
105 | 
106 |     VecF yresult(num_its);
107 |     TNT::Stopwatch st;
108 | 
109 |     for (i = 0; i < xaxis.length(); ++i) {
110 |         int num_scans = xaxis[i];
111 |         MatF mat1(num_scans,num_peaks, 20.f);
112 |         MatF mat2(num_scans,num_peaks, 12.5f);
113 |         MatF smat_slow(mat1.rows(), mat2.rows());
114 | 
115 |         int cnt1, cnt2;
116 |         VecF *row_vecs1 = new VecF[mat1.rows()];
117 |         VecF *row_vecs2 = new VecF[mat2.rows()];
118 |         mat1.row_vecs(cnt1, row_vecs1);
119 |         mat2.row_vecs(cnt2, row_vecs2);
120 |         if (!strcmp(SCORE, "covariance_slow")) {
121 |             std::cout << "INSIDE" << SCORE << "\n";
122 |             st.start();
123 |             for (int j = 0; j < num_scores; ++j) {
124 |                 for (int m = 0; m < cnt1; ++m) {
125 |                     for (int n = 0; n < cnt2; ++n) {
126 |                         smat_slow(m,n) = VecF::covariance(row_vecs1[m], row_vecs2[n]);
127 |                     }
128 |                 }
129 |             }
130 |             float timed = st.read();
131 |             yresult[i] = timed;
132 |         }
133 |         else if (!strcmp(SCORE, "pearsonsr_slow")) {
134 |             std::cout << "INSIDE" << SCORE << "\n";
135 |             st.start();
136 |             for (int j = 0; j < num_scores; ++j) {
137 |                 for (int m = 0; m < cnt1; ++m) {
138 |                     for (int n = 0; n < cnt2; ++n) {
139 |                         smat_slow(m,n) = VecF::pearsons_r(row_vecs1[m], row_vecs2[n]);
140 |                     }
141 |                 }
142 |             }
143 |             float timed = st.read();
144 |             yresult[i] = timed;
145 |         }
146 |         else if (!strcmp(SCORE, "product_slow")) {
147 |             std::cout << "INSIDE" << SCORE << "\n";
148 |             st.start();
149 |             for (int j = 0; j < num_scores; ++j) {
150 |                 for (int m = 0; m < cnt1; ++m) {
151 |                     for (int n = 0; n < cnt2; ++n) {
152 |                         smat_slow(m,n) = VecF::dot_product(row_vecs1[m], row_vecs2[n]);
153 |                     }
154 |                 }
155 |             }
156 |             float timed = st.read();
157 |             yresult[i] = timed;
158 |         }
159 |         else if (!strcmp(SCORE, "euclidean_slow")) {
160 |             std::cout << "INSIDE" << SCORE << "\n";
161 |             st.start();
162 |             for (int j = 0; j < num_scores; ++j) {
163 |                 for (int m = 0; m < cnt1; ++m) {
164 |                     for (int n = 0; n < cnt2; ++n) {
165 |                         smat_slow(m,n) = VecF::euclidean(row_vecs1[m], row_vecs2[n]);
166 |                     }
167 |                 }
168 |             }
169 |             float timed = st.read();
170 |             yresult[i] = timed;
171 |         }
172 |         else {
173 |             std::cout << "INSIDE" << SCORE << "\n";
174 |             st.start();
175 |             for (int j = 0; j < num_scores; ++j) {
176 |                 dyn.score(mat1, mat2, smat, SCORE, 5);
177 |             }
178 |             float timed = st.read();
179 |             yresult[i] = timed;
180 |         }
181 |     }
182 |     xaxis;
183 |     yresult.square_root();
184 |     xaxis.print(fh,1);
185 |     yresult.print(fh,1);
186 |     fh.close();
187 | 
188 |     return 0;
189 | }
190 | 
191 | 
192 | 
193 | // Print to file to plot
194 | // title (gap penalty optimization etc.....)
195 | // filename (<b>intercept_linear_gap_penalty_optimization )
196 |     // "slope of gap penalty array"
197 |     // "avg of sq of residuals"
198 |     // mvals ...
199 |     // avgs ...
200 |     // mvals ...
201 |     // avgs ...
202 |     // mvals ...
203 |     // avgs ...
204 | 
205 | 


--------------------------------------------------------------------------------
/third_party/obiwarp/lib/variations/smat_dist.cpp:
--------------------------------------------------------------------------------
  1 | // STDLIB:
  2 | #include <cstdio>
  3 | #include <iostream>
  4 | #include "string.h"
  5 | 
  6 | // MINE
  7 | #include "vec.h"
  8 | #include "mat.h"
  9 | #include "lmat.h"
 10 | #include "dynprog.h"
 11 | #include "pngio.h"
 12 | 
 13 | 
 14 | char file1[1024];
 15 | char file2[1024];
 16 | int mi_bins = 5;
 17 | float init_penalty = 0.f;
 18 | char toError[300];
 19 | 
 20 | int AXES = 0;
 21 | int BINARY = 0;
 22 | int LOCAL = 0;
 23 | int IMAGES = 0;
 24 | int LOGYESNO = 0;
 25 | char SCORE[10];
 26 | 
 27 | 
 28 | int main (int argc, char *argv[]) {
 29 |     /************************************************************
 30 |      * GET ARGUMENTS
 31 |      ************************************************************/ 
 32 |     strcpy(SCORE, "covariance");
 33 |     int file1_found_already = 0;
 34 |     if (argc == 1) { 
 35 |         std::cerr << "usage: smat_dist [-a] [-b] [-l] [-g] [-s <scoretype>] file1 file2\n" <<
 36 |         "FORMAT: \n" <<
 37 |         "Data should be in an m(rows)x n(cols) matrix (space delimited)\n" <<
 38 |         "where each line contains one row of data.  Should be same # cols.\n" << 
 39 |         "Data will be aligned along the m axis.\n" <<
 40 |         "ARGUMENTS (default marked by asterik*): \n" <<
 41 |         "b|binary = file is binary [precision?, etc] rather than *ascii\n" <<
 42 |         "a|axes = 1st line in file contains x coordinates, 2nd the y\n" << 
 43 |         "s|score = scoring function: *covariance, product (dot product)\n" << 
 44 |         "          pearsons_r, pearsons_r2, mutual_info\n" <<
 45 |         "l|local = local rather than *global alignment\n" <<
 46 |         "i|images = creates png images of the alignment process\n" <<
 47 |         "g|log = takes the log (base 2) of smat\n" <<
 48 |         "[space between argument and value, please.]\n";
 49 |         exit(1);
 50 |     }
 51 |     for (int i = 1; i < argc; i++) {
 52 |         if (!strcmp(argv[i],"-a")) {
 53 |             AXES = 1;
 54 |         }
 55 |         else if (!strcmp(argv[i],"-b")) {
 56 |             BINARY = 1;
 57 |         }
 58 |         else if (!strcmp(argv[i],"-g")) {
 59 |             LOGYESNO = 1;
 60 |         }
 61 |         else if (!strcmp(argv[i],"-i")) {
 62 |             IMAGES = 1;
 63 |         }
 64 |         else if (!strcmp(argv[i],"-l")) {
 65 |             LOCAL = 1;
 66 |         }
 67 |         else if (!strcmp(argv[i],"-s")) {
 68 |             i++;
 69 |             strcpy(SCORE, argv[i]);
 70 |         }
 71 |         // if it doesn't match an option then it is our file!
 72 |         else {
 73 |             if (file1_found_already) {
 74 |                 strcpy(file2, argv[i]);
 75 |             }
 76 |             else {
 77 |                 strcpy(file1, argv[i]);
 78 |                 file1_found_already = 1;
 79 |             }
 80 |         }
 81 |     }
 82 |     std::cerr << "**********************************************\n";
 83 |     std::cerr << "SCORE: " << SCORE << "\n";
 84 |     std::cerr << "file1: " << file1 << "\n";
 85 |     std::cerr << "file2: " << file2 << "\n";
 86 |     std::cerr << "LOCAL: " << LOCAL << "\n";
 87 |     std::cerr << "BINARY: " << BINARY << "\n";
 88 |     std::cerr << "IMAGES: " << IMAGES << "\n";
 89 |     std::cerr << "AXES: " << AXES << "\n";
 90 |     std::cerr << "LOG: " << LOGYESNO << "\n";
 91 |     std::cerr << "**********************************************\n";
 92 | 
 93 |     /************************************************************
 94 |      * READ IN FILES TO GET MAT 
 95 |      ************************************************************/ 
 96 |     LMat lmat1;
 97 |     LMat lmat2;
 98 |     MatF mat1;
 99 |     MatF mat2;
100 |     MatF smat;
101 |     DynProg dyn;
102 |     
103 |     if (AXES) {
104 |         lmat1.set_from_ascii(file1);
105 |         lmat2.set_from_ascii(file2);
106 |         lmat1._mat->copy(mat1, 1);
107 |         lmat2._mat->copy(mat2, 1);
108 |     }
109 |     else {
110 |         //mat1.set_from_ascii(file1);  @TODO: write this guy
111 |         //mat2.set_from_ascii(file2); 
112 |     }
113 | 
114 |     /************************************************************
115 |      * SCORE THE MATRICES
116 |      ************************************************************/ 
117 |     std::cerr << "Scoring the mats!\n";
118 |     dyn.score(mat1, mat2, smat, SCORE, mi_bins);
119 | 
120 |     /************************************************************
121 |      * PREPARE GAP PENALTY ARRAY
122 |      ************************************************************/ 
123 |     VecF gp_array;  // use default for now
124 | 
125 |     /************************************************************
126 |      * DYNAMIC PROGRAM
127 |      ************************************************************/ 
128 | 
129 |     int minimize = 0;
130 |     std::cerr << "Dynamic Time Warping Score Matrix!\n";
131 |     dyn.find_path_with_gaps(smat, gp_array, minimize, LOCAL, init_penalty);
132 |     printf("DYNPROG SCORE: %f\n", dyn._bestScore);
133 | 
134 |     // Run through various distances:
135 |     int reply;
136 |     int steps;
137 |     char steps_st[3];
138 |     char basefilename[255];
139 | 
140 |     // strip the lmata:
141 |     char *pointer;
142 |     pointer = strstr(file1, ".lmata");
143 |     *pointer = '\0';
144 |     pointer = strstr(file2, ".lmata");
145 |     *pointer = '\0';
146 | 
147 |     strcpy(basefilename, file1);
148 |     strcat(basefilename, "_");
149 |     strcat(basefilename, file2);
150 |     strcat(basefilename, "_");
151 |     strcat(basefilename, SCORE);
152 |     strcat(basefilename, "_");
153 |     strcat(basefilename, "steps");
154 |     strcat(basefilename, "_");
155 |     char finalfn[255];
156 |     if (LOGYESNO) {
157 |         strcat(basefilename, "logbase2");
158 |         strcat(basefilename, "_");
159 |         smat.logarithm(2);
160 |     }
161 | 
162 |     for (int steps = 0; steps < 50; steps += steps + 1) {
163 |         MatI tbpathe;
164 |         dyn._tbpath.expand(tbpathe,1,steps,steps,steps,steps,0,0,0,0); 
165 |     
166 |         strcpy(finalfn,basefilename);
167 |         sprintf(steps_st, "%d", steps);
168 |         strcat(finalfn,steps_st);
169 | 
170 |         VecF result;
171 |         smat.mask_as_vec(1, tbpathe, result);
172 | 
173 |         VecD _bins;
174 |         VecI _freqs;
175 |         result.hist(100, _bins, _freqs); 
176 |         char *hist_fn = "hist.txt";
177 |         std::ofstream fh(hist_fn);
178 |         // print filename and title:
179 |         fh << finalfn << "\n"; // filename
180 |         fh << finalfn << "\n"; // title
181 |         // print the x and y axis labels:
182 |         fh << "score" << "\n";
183 |         fh << "frequency" << "\n";
184 |         // print the data:
185 |         _bins.print(fh);
186 |         _freqs.print(fh);
187 |         fh.close();
188 |         reply = system("plot_xy.rb hist.txt -b"); 
189 |         if (reply == -1) { puts("Error!"); }
190 |         else { puts("success"); }
191 |     }
192 | 
193 |     if (IMAGES) {
194 |         PngIO wrt(1);
195 |         //char tb_fn[100];
196 |         //strcpy(tb_fn, "tb.png");
197 |         char *tb_fn = "tb.png";
198 |         wrt.write(tb_fn, dyn._tb);
199 |         char *tbpath_fn = "tbpath.png";
200 |         wrt.write(tbpath_fn, dyn._tbpath);
201 |         char *asmat_fn = "asmat.png";
202 |         wrt.write(asmat_fn, dyn._asmat);
203 |         char *smat_fn = "smat.png";
204 |         wrt.write(smat_fn, *dyn._smat);
205 |     }
206 | 
207 |     
208 |     /*
209 |     char silly[100];
210 |     strcpy(silly, "png_");
211 |     char tmpp[5];
212 |     sprintf(tmpp, "%d", i);
213 |     strcat(silly, tmpp); 
214 |     strcat(silly, ".png");
215 | 
216 |     PngIO wrt(0);
217 |     //wrt.write(silly, dyn._tbpath);
218 |     wrt.write(silly, _scorepath);
219 |     */
220 | }
221 | 
222 | 


--------------------------------------------------------------------------------
/third_party/py_obiwarp.cc:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <cstring>
  3 | #include <iostream>
  4 | #include <fstream>
  5 | #include "string.h"
  6 | 
  7 | #include <pybind11/pybind11.h>
  8 | #include <pybind11/numpy.h>
  9 | #include "obiwarp/lib/vec.h"
 10 | #include "obiwarp/lib/mat.h"
 11 | #include "obiwarp/lib/lmat.h"
 12 | #include "obiwarp/lib/dynprog.h"
 13 | 
 14 | 
 15 | #define DEBUG (0)
 16 | namespace py = pybind11;
 17 | 
 18 | LMat* create_lmat_from_memory(int len_rt, double *rt, int len_mz, double *mz, double *intensity)
 19 | {
 20 |     LMat *lmat = new LMat();
 21 |     delete lmat->_mz;
 22 |     delete lmat->_tm;
 23 |     delete lmat->_mat;
 24 | 
 25 |     // Get the time values:
 26 |     lmat->_tm_vals = len_rt;
 27 |     float *tm_tmp = new float[len_rt];
 28 |     for(int i=0; i < len_rt; i++) {
 29 |       tm_tmp[i] = rt[i];
 30 |     }
 31 |     lmat->_tm = new VecF(len_rt, tm_tmp);
 32 | 
 33 |     // Get the mz values:
 34 |     lmat->_mz_vals = len_mz;
 35 |     float *mz_tmp = new float[len_mz];
 36 |     for(int i=0; i < len_mz; i++) {
 37 |       mz_tmp[i] = mz[i];
 38 |     }
 39 |     lmat->_mz = new VecF(len_mz, mz_tmp);
 40 | 
 41 |     // Read the matrix:
 42 |     int rows_by_cols = len_rt * len_mz;
 43 |     float *mat_tmp = new float[rows_by_cols];
 44 |     for(int i=0; i < rows_by_cols; i++) {
 45 |       mat_tmp[i] = intensity[i];
 46 |     }
 47 |     lmat->_mat = new MatF(len_rt, len_mz, mat_tmp);
 48 | 
 49 |     return lmat;
 50 | }
 51 | 
 52 | 
 53 | py::array_t<float> obiwarp(py::array_t<double> py_rt, py::array_t<double> py_mz, py::array_t<double> py_intensity,
 54 |                            py::array_t<double> py_rt2, py::array_t<double> py_mz2, py::array_t<double> py_intensity2,
 55 | 			               float percent_anchors, const char *score,
 56 | 			               float gap_init, float gap_extend,
 57 | 			               float factor_diag, float factor_gap,
 58 | 			               int local_alignment, float init_penalty)
 59 | {
 60 |     // ************************************************************
 61 |     // * CONVERT ARRAY TO MAT
 62 |     // ************************************************************
 63 |     int len_rt = py_rt.request().size;
 64 |     int len_mz = py_mz.request().size;
 65 |     int len_rt2 = py_rt2.request().size;
 66 |     int len_mz2 = py_mz2.request().size;
 67 |     double *rt = (double *)py_rt.request().ptr;
 68 |     double *mz = (double *)py_mz.request().ptr;
 69 |     double *intensity = (double *)py_intensity.request().ptr;
 70 |     double *rt2 = (double *)py_rt2.request().ptr;
 71 |     double *mz2 = (double *)py_mz2.request().ptr;
 72 |     double *intensity2 = (double *)py_intensity2.request().ptr;
 73 |     LMat* lmat1 = create_lmat_from_memory(len_rt, rt, len_mz, mz, intensity);
 74 |     LMat* lmat2 = create_lmat_from_memory(len_rt2, rt2, len_mz2, mz2, intensity2);
 75 | 
 76 |     // ************************************************************
 77 |     // * SCORE THE MATRICES
 78 |     // ************************************************************
 79 |     if (DEBUG) {
 80 |       std::cerr << "Input parameter confirmed!\n";
 81 |       std::cerr << " - rt_len = " << lmat1->_tm_vals << "\n";
 82 |       std::cerr << " - mz_len = " << lmat1->_mz_vals << "\n";
 83 |     }
 84 | 
 85 |     MatF smat;
 86 |     DynProg dyn;
 87 |     dyn.score(*(lmat1->mat()), *(lmat2->mat()), smat, score);
 88 | 
 89 |     if (DEBUG) {
 90 |       std::cerr << "Matrix scored!\n";
 91 |     }
 92 | 
 93 |     if (!strcmp(score,"euc")) {
 94 |       smat *= -1; // inverting euclidean
 95 |     }
 96 | 
 97 | 
 98 |     // ************************************************************
 99 |     // * PREPARE GAP PENALTY ARRAY
100 |     // ************************************************************
101 | 
102 |     MatF time_tester;
103 |     MatF time_tester_trans;
104 |     VecF mpt;
105 |     VecF npt;
106 |     VecF mOut_tm;
107 |     VecF nOut_tm;
108 | 
109 |     int gp_length = smat.rows() + smat.cols();
110 | 
111 |     VecF gp_array;
112 |     dyn.linear_less_before(gap_extend, gap_init, gp_length, gp_array);
113 | 
114 |     // ************************************************************
115 |     // * DYNAMIC PROGRAM
116 |     // ************************************************************
117 |     int minimize = 0;
118 |     dyn.find_path(smat, gp_array, minimize, factor_diag, factor_gap, local_alignment, init_penalty);
119 |     if (DEBUG) {
120 |         std::cerr << "Dynamic Time Warping path found!\n";
121 |     }
122 | 
123 |     VecI mOut;
124 |     VecI nOut;
125 |     dyn.warp_map(mOut, nOut, percent_anchors, minimize);
126 |     if (DEBUG) {
127 |         std::cerr << "Warping anchors decided!\n";
128 |     }
129 | 
130 |     VecF nOutF;
131 |     VecF mOutF;
132 |     lmat1->tm_axis_vals(mOut, mOutF);
133 |     lmat2->tm_axis_vals(nOut, nOutF);
134 |     lmat2->warp_tm(nOutF, mOutF);
135 |     if (DEBUG) {
136 |         std::cerr << "Piecewise cubic hermite interpolation finished!\n";
137 |     }
138 | 
139 |     py::array_t<float> warped_rt2 = py::array_t<float>(len_rt2);
140 |     py::buffer_info buffer = warped_rt2.request();
141 | 
142 |     float* result = (float *)buffer.ptr;
143 |     for(int i=0; i < len_rt2;i++){
144 |       result[i] = lmat2->tm()->pointer()[i];
145 |     }
146 | 
147 |     delete lmat1;
148 |     delete lmat2;
149 | 
150 |     return warped_rt2;
151 | }
152 | 
153 | PYBIND11_MODULE(py_obiwarp, m) {
154 |     m.doc() = "Python Bindings for Obiwarp library.";
155 |     m.attr("__version__") = "0.9.4";
156 | 
157 |     m.def("obiwarp", &obiwarp, "Perform obiwarp function.");
158 | }
159 | 
160 | 


--------------------------------------------------------------------------------
/third_party/setup.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import glob
  4 | from setuptools import setup, Extension, find_packages
  5 | from setuptools.command.build_ext import build_ext
  6 | import setuptools
  7 | 
  8 | __version__ = '0.9.4'
  9 | requirements_list = ['pybind11>=2.4', 'numpy']
 10 | 
 11 | maindir = os.path.join(".", "obiwarp")
 12 | libdir = os.path.join(maindir, "lib")
 13 | source_files = ['py_obiwarp.cc']
 14 | file_types = ['*.c', '*.cc', '*.cpp']
 15 | exclude_patterns = ['TEMPLATE', '2']
 16 | libraries = []
 17 | extra_objects = []
 18 | 
 19 | for file_type in file_types:
 20 |     for f in glob.glob(os.path.join(libdir, file_type)):
 21 |         exclude = False
 22 |         for pattern in exclude_patterns:
 23 |             if pattern in f:
 24 |                 exclude = True
 25 |                 break
 26 |         if not exclude:
 27 |             source_files += [f]
 28 | 
 29 | ext_modules = [
 30 |     Extension(
 31 |         'py_obiwarp',
 32 |         source_files,
 33 |     ),
 34 | ]
 35 | 
 36 | # As of Python 3.6, CCompiler has a `has_flag` method.
 37 | # cf http://bugs.python.org/issue26689
 38 | def has_flag(compiler, flagname):
 39 |     """Return a boolean indicating whether a flag name is supported on
 40 |     the specified compiler.
 41 |     """
 42 |     import tempfile
 43 |     with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f:
 44 |         f.write('int main (int argc, char **argv) { return 0; }')
 45 |         try:
 46 |             compiler.compile([f.name], extra_postargs=[flagname])
 47 |         except setuptools.distutils.errors.CompileError:
 48 |             return False
 49 |     return True
 50 | 
 51 | 
 52 | def cpp_flag(compiler):
 53 |     """Return the -std=c++[11/14] compiler flag.
 54 | 
 55 |     #The c++14 is prefered over c++11 (when it is available).
 56 |     # This somehow can fail on a Mac with clang
 57 |     #"""
 58 |     if has_flag(compiler, '-std=c++11'):
 59 |         return '-std=c++11'
 60 |     else:
 61 |         raise RuntimeError('Unsupported compiler -- at least C++11 support '
 62 |                            'is needed!')
 63 | 
 64 | 
 65 | class BuildExt(build_ext):
 66 |     """A custom build extension for adding compiler-specific options."""
 67 |     c_opts = {
 68 |         'msvc': ['/EHsc', '/openmp', '/O2'],
 69 |         'unix': ['-O3', '-march=native', '-std=c99'],
 70 |         #'unix': ['-O0', '-march=native', '-g'],
 71 |     }
 72 |     link_opts = {
 73 |         'unix': [],
 74 |         'msvc': [],
 75 |     }
 76 | 
 77 |     if sys.platform == 'darwin':
 78 |         c_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7']
 79 |         link_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7']
 80 |     else:
 81 |         c_opts['unix'].append("-fopenmp")
 82 |         link_opts['unix'].extend(['-fopenmp', '-pthread'])
 83 | 
 84 |     def build_extensions(self):
 85 |         ct = self.compiler.compiler_type
 86 |         opts = self.c_opts.get(ct, [])
 87 |         if ct == 'unix':
 88 |             opts.append('-DVERSION_INFO="%s"' % self.distribution.get_version())
 89 |             opts.append(cpp_flag(self.compiler))
 90 |             if has_flag(self.compiler, '-fvisibility=hidden'):
 91 |                 opts.append('-fvisibility=hidden')
 92 |         elif ct == 'msvc':
 93 |             opts.append('/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version())
 94 | 
 95 |         # extend include dirs here (don't assume numpy/pybind11 are installed when first run, since
 96 |         # pip could have installed them as part of executing this script
 97 |         import pybind11
 98 |         import numpy as np
 99 |         for ext in self.extensions:
100 |             ext.extra_compile_args.extend(opts)
101 |             ext.extra_link_args.extend(self.link_opts.get(ct, []))
102 |             ext.include_dirs.extend([
103 |                 # Path to pybind11 headers
104 |                 pybind11.get_include(),
105 |                 pybind11.get_include(True),
106 | 
107 |                 # Path to numpy headers
108 |                 np.get_include()
109 |             ])
110 | 
111 |         build_ext.build_extensions(self)
112 | 
113 | 
114 | setup(
115 |     name='py_obiwarp',
116 |     version='0.9.4',
117 |     author="CSi-Studio",
118 |     author_email="csi@csibio.net",
119 |     maintainer="Ruimin Wang",
120 |     license="The MIT License",
121 |     description='Python bindings for OBI-Warp',
122 |     long_description='Python bindings for OBI-Warp algorithm, which was also used in XCMS for mass spectrometry data alignment.',
123 |     keywords="ObiWarp, Alignment, Warping",
124 |     packages=find_packages(),
125 |     ext_modules=ext_modules,
126 |     install_requires=requirements_list,
127 |     setup_requires=requirements_list,
128 |     cmdclass={'build_ext': BuildExt},
129 |     test_suite="tests",
130 |     zip_safe=False,
131 |     python_requires=">=3.6"
132 | )
133 | 


--------------------------------------------------------------------------------