├── resources ├── test-files │ ├── sample.list │ ├── sample.22.bed │ ├── sample.22.bnt │ ├── sample_case.22.bed │ ├── sample_case.22.bnt │ ├── sample_control.22.bed │ ├── sample_control.22.bnt │ ├── sample.22.bim │ ├── sample.qc │ ├── sample_case.22.bim │ ├── sample_control.22.bim │ ├── sample_case.fam │ ├── sample_control.fam │ └── sample.fam ├── j2ssh-core.jar ├── iText-5.0.1.jar ├── j2ssh-common.jar ├── commons-logging.jar ├── jcommon-1.0.15.jar ├── jfreechart-1.0.13_lasso_select.jar ├── l4j.xml ├── build_evoker.pl ├── build.xml └── evoker-documentation.tex ├── docs ├── sorted.png ├── unsorted.png ├── bad4_cropped.png ├── bad5_cropped.png ├── bad7_cropped.png ├── bad_cropped.png ├── good4_cropped.png └── good5_cropped.png ├── src ├── resources │ ├── int2bnt.pl │ ├── illumina_parser.pl │ ├── oxford_parser.pl │ └── evoker-helper.pl └── evoker │ ├── Utils.java │ ├── PDFWorker.java │ ├── Types.java │ ├── LinkedAxis.java │ ├── PDFFile.java │ ├── LoggingDialog.java │ ├── EvokerPDF.java │ ├── QCFilterData.java │ ├── RemoteBinaryData.java │ ├── RemoteBedfileData.java │ ├── BinaryData.java │ ├── Marker.java │ ├── RemoteBinaryFloatData.java │ ├── SettingsDialog.java │ ├── BedfileDataFile.java │ ├── BEDFileWriter.java │ ├── GenfileDataFile.java │ ├── BinaryFloatDataFile.java │ ├── BinaryDataFile.java │ ├── OpenDirectoryDialog.java │ ├── NaturalOrderComparator.java │ ├── SampleData.java │ ├── EvokerPoint2D.java │ ├── PDFDialog.java │ ├── Lasso.java │ ├── WrapLayout.java │ ├── MarkerListDialog.java │ ├── MarkerData.java │ ├── DataConnectionDialog.java │ ├── PlotPanel.java │ ├── BEDFileChanger.java │ ├── DataClient.java │ ├── EvokerChartPanel.java │ └── PlotData.java ├── .gitignore ├── release.sh ├── LICENSE.md └── README.md /resources/test-files/sample.list: -------------------------------------------------------------------------------- 1 | snp0 2 | snp1 3 | snp2 4 | snp3 5 | snp4 6 | -------------------------------------------------------------------------------- /docs/sorted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/docs/sorted.png -------------------------------------------------------------------------------- /docs/unsorted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/docs/unsorted.png -------------------------------------------------------------------------------- /docs/bad4_cropped.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/docs/bad4_cropped.png -------------------------------------------------------------------------------- /docs/bad5_cropped.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/docs/bad5_cropped.png -------------------------------------------------------------------------------- /docs/bad7_cropped.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/docs/bad7_cropped.png -------------------------------------------------------------------------------- /docs/bad_cropped.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/docs/bad_cropped.png -------------------------------------------------------------------------------- /docs/good4_cropped.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/docs/good4_cropped.png -------------------------------------------------------------------------------- /docs/good5_cropped.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/docs/good5_cropped.png -------------------------------------------------------------------------------- /resources/j2ssh-core.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/resources/j2ssh-core.jar -------------------------------------------------------------------------------- /src/resources/int2bnt.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/src/resources/int2bnt.pl -------------------------------------------------------------------------------- /resources/iText-5.0.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/resources/iText-5.0.1.jar -------------------------------------------------------------------------------- /resources/j2ssh-common.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/resources/j2ssh-common.jar -------------------------------------------------------------------------------- /resources/commons-logging.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/resources/commons-logging.jar -------------------------------------------------------------------------------- /resources/jcommon-1.0.15.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/resources/jcommon-1.0.15.jar -------------------------------------------------------------------------------- /src/resources/illumina_parser.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/src/resources/illumina_parser.pl -------------------------------------------------------------------------------- /resources/test-files/sample.22.bed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/resources/test-files/sample.22.bed -------------------------------------------------------------------------------- /resources/test-files/sample.22.bnt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/resources/test-files/sample.22.bnt -------------------------------------------------------------------------------- /resources/test-files/sample_case.22.bed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/resources/test-files/sample_case.22.bed -------------------------------------------------------------------------------- /resources/test-files/sample_case.22.bnt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/resources/test-files/sample_case.22.bnt -------------------------------------------------------------------------------- /resources/test-files/sample_control.22.bed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/resources/test-files/sample_control.22.bed -------------------------------------------------------------------------------- /resources/test-files/sample_control.22.bnt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/resources/test-files/sample_control.22.bnt -------------------------------------------------------------------------------- /resources/jfreechart-1.0.13_lasso_select.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/HEAD/resources/jfreechart-1.0.13_lasso_select.jar -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | *.DS_Store 3 | Evoker.jar 4 | /build.xml 5 | /build/ 6 | /evoker/ 7 | /docs/*.aux 8 | /docs/*.log 9 | /docs/*.pdf 10 | /docs/*.toc 11 | -------------------------------------------------------------------------------- /resources/test-files/sample.22.bim: -------------------------------------------------------------------------------- 1 | 1 snp0 0 1 C A 2 | 1 snp1 0 2 C A 3 | 1 snp2 0 3 C A 4 | 1 snp3 0 4 A C 5 | 1 snp4 0 5 C A 6 | 1 snp5 0 6 C A 7 | 1 snp6 0 7 A C 8 | 1 snp7 0 8 A C 9 | 1 snp8 0 9 C A 10 | 1 snp9 0 10 C A 11 | -------------------------------------------------------------------------------- /resources/test-files/sample.qc: -------------------------------------------------------------------------------- 1 | ind0 2 | ind1 3 | ind2 4 | ind3 5 | ind4 6 | ind5 7 | ind6 8 | ind7 9 | ind8 10 | ind9 11 | ind50 12 | ind51 13 | ind52 14 | ind53 15 | ind54 16 | ind55 17 | ind56 18 | ind57 19 | ind58 20 | ind59 -------------------------------------------------------------------------------- /resources/test-files/sample_case.22.bim: -------------------------------------------------------------------------------- 1 | 1 snp0 0 1 C A 2 | 1 snp1 0 2 C A 3 | 1 snp2 0 3 C A 4 | 1 snp3 0 4 A C 5 | 1 snp4 0 5 C A 6 | 1 snp5 0 6 C A 7 | 1 snp6 0 7 A C 8 | 1 snp7 0 8 A C 9 | 1 snp8 0 9 C A 10 | 1 snp9 0 10 C A 11 | -------------------------------------------------------------------------------- /resources/test-files/sample_control.22.bim: -------------------------------------------------------------------------------- 1 | 1 snp0 0 1 C A 2 | 1 snp1 0 2 C A 3 | 1 snp2 0 3 C A 4 | 1 snp3 0 4 A C 5 | 1 snp4 0 5 C A 6 | 1 snp5 0 6 C A 7 | 1 snp6 0 7 A C 8 | 1 snp7 0 8 A C 9 | 1 snp8 0 9 C A 10 | 1 snp9 0 10 C A 11 | -------------------------------------------------------------------------------- /src/evoker/Utils.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.nio.file.Paths; 4 | 5 | public final class Utils { 6 | public static String join(String path, String filename) { 7 | return Paths.get(path, filename).toString(); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /release.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cp resources/build.xml . 3 | ant evoker 4 | ant clean 5 | rm -fr release build.xml 6 | mkdir evoker 7 | cp -r Evoker.jar docs/evoker-documentation.pdf src/resources/ resources/test-files evoker 8 | rm -r evoker/.[!.]* 9 | tar zcvf evoker_2.4.1.tar.gz evoker 10 | -------------------------------------------------------------------------------- /src/evoker/PDFWorker.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.io.IOException; 4 | 5 | import javax.swing.SwingWorker; 6 | 7 | 8 | public class PDFWorker extends SwingWorker{ 9 | 10 | Genoplot theGenoplot; 11 | 12 | 13 | PDFWorker(Genoplot g){ 14 | super(); 15 | theGenoplot = g; 16 | } 17 | 18 | public Object doInBackground() throws IOException { 19 | theGenoplot.printPDFsInBackground(); 20 | return null; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/evoker/Types.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | public class Types { 4 | 5 | public enum FileFormat { 6 | DEFAULT, OXFORD, UKBIOBANK 7 | } 8 | 9 | public enum CoordinateSystem { 10 | CART, POLAR, UKBIOBANK 11 | } 12 | 13 | public enum SortBy { 14 | COLLECTIONBATCH_ASCEND, COLLECTIONBATCH_DESCEND, MAF_ASCEND, MAF_DESCEND, GPC_ASCEND, GPC_DESCEND, HWEPVAL_ASCEND, HWEPVAL_DESCEND 15 | } 16 | 17 | public enum Sex { 18 | MALE, FEMALE, UNKNOWN, NOT_SEX 19 | } 20 | 21 | } -------------------------------------------------------------------------------- /src/evoker/LinkedAxis.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import org.jfree.chart.axis.NumberAxis; 4 | 5 | 6 | public class LinkedAxis extends NumberAxis { 7 | 8 | private double min; 9 | private double max; 10 | 11 | public LinkedAxis(String label, double min, double max){ 12 | super(label); 13 | 14 | this.min = min; 15 | this.max = max; 16 | } 17 | 18 | public void setAutoRange(boolean auto){ 19 | //we want the automatic range on this puppy to be linked to all other graphs, 20 | //which we set in the constructor. schweet. 21 | if (auto){ 22 | setLowerBound(min); 23 | setUpperBound(max); 24 | } 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /resources/l4j.xml: -------------------------------------------------------------------------------- 1 | 2 | false 3 | gui 4 | ./Evoker.jar 5 | ./Evoker.exe 6 | 7 | 8 | 9 | normal 10 | http://java.com/download 11 | 12 | false 13 | false 14 | 15 | 16 | 17 | 18 | 1.5.0 19 | 20 | preferJre 21 | 1024 22 | 23 | -------------------------------------------------------------------------------- /src/evoker/PDFFile.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.io.File; 4 | import java.io.FileOutputStream; 5 | import java.io.IOException; 6 | import com.itextpdf.text.*; 7 | import com.itextpdf.text.pdf.PdfWriter; 8 | 9 | public class PDFFile { 10 | 11 | private PdfWriter pdf; 12 | private Document document; 13 | private boolean open = false; 14 | 15 | PDFFile(File file) throws DocumentException, IOException { 16 | document = new Document(PageSize.A4); 17 | pdf = PdfWriter.getInstance(document, new FileOutputStream(file)); 18 | document.open(); 19 | setFileOpen(true); 20 | } 21 | 22 | private void setFileOpen(boolean b) { 23 | open = b; 24 | } 25 | 26 | boolean isFileOpen() { 27 | return open; 28 | } 29 | 30 | Document getDocument() { 31 | return document; 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /src/evoker/LoggingDialog.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import javax.swing.*; 4 | import java.awt.*; 5 | 6 | public class LoggingDialog extends JDialog { 7 | private JTextArea ta; 8 | 9 | LoggingDialog(JFrame parent){ 10 | super(parent, "Evoker Log"); 11 | 12 | ta = new JTextArea(); 13 | ta.setEditable(false); 14 | JScrollPane scrollzor = new JScrollPane(ta,JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, 15 | JScrollPane.HORIZONTAL_SCROLLBAR_NEVER); 16 | scrollzor.setPreferredSize(new Dimension(400,400)); 17 | setContentPane(scrollzor); 18 | 19 | ta.setFont(new Font("Monospaced",Font.PLAIN,12)); 20 | ta.setLineWrap(true); 21 | ta.setWrapStyleWord(true); 22 | 23 | ta.append("*********\n"); 24 | ta.append("Evoker...\n"); 25 | ta.append("*********\n\n"); 26 | } 27 | 28 | public void log(String text){ 29 | ta.append(text+"\n"); 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /src/evoker/EvokerPDF.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.io.File; 4 | import java.io.FileOutputStream; 5 | import java.io.IOException; 6 | import com.itextpdf.text.*; 7 | import com.itextpdf.text.pdf.PdfWriter; 8 | 9 | public class EvokerPDF { 10 | 11 | private PdfWriter pdf; 12 | private Document document; 13 | private boolean open = false; 14 | final int PLOT_WIDTH = 500; 15 | final int PLOT_HEIGHT = 500; 16 | 17 | 18 | 19 | EvokerPDF(File file, int numCollections) throws DocumentException, IOException { 20 | document = new Document(); 21 | document.setPageSize(new Rectangle(PLOT_WIDTH * numCollections, PLOT_HEIGHT)); 22 | document.setMargins(10, 10, 10, 10); 23 | pdf = PdfWriter.getInstance(document, new FileOutputStream(file)); 24 | document.open(); 25 | setFileOpen(true); 26 | } 27 | 28 | private void setFileOpen(boolean b) { 29 | open = b; 30 | } 31 | 32 | boolean isFileOpen() { 33 | return open; 34 | } 35 | 36 | Document getDocument() { 37 | return document; 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2010-2014 James Morris & Jeffrey Barrett 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/evoker/QCFilterData.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.util.LinkedList; 4 | import java.util.StringTokenizer; 5 | import java.util.Vector; 6 | import java.io.BufferedReader; 7 | import java.io.FileReader; 8 | import java.io.IOException; 9 | 10 | public class QCFilterData { 11 | 12 | Vector toExclude; 13 | 14 | QCFilterData(String qcFilename) throws IOException{ 15 | 16 | this.toExclude = new Vector(); 17 | 18 | BufferedReader qcReader = new BufferedReader(new FileReader(qcFilename)); 19 | String currentLine; 20 | StringTokenizer st; 21 | while ((currentLine = qcReader.readLine()) != null){ 22 | st = new StringTokenizer(currentLine); 23 | String name = st.nextToken(); 24 | toExclude.add(name); 25 | Genoplot.ld.log("Exclude: " + name); 26 | } 27 | qcReader.close(); 28 | } 29 | 30 | QCFilterData() { 31 | toExclude = new Vector(); 32 | } 33 | 34 | public void add(String sample) { 35 | toExclude.add(sample); 36 | } 37 | 38 | public boolean isExcluded (String sample) { 39 | return toExclude.contains(sample); 40 | } 41 | } -------------------------------------------------------------------------------- /resources/test-files/sample_case.fam: -------------------------------------------------------------------------------- 1 | ind0 ind0 0 0 1 1 2 | ind1 ind1 0 0 1 1 3 | ind2 ind2 0 0 1 1 4 | ind3 ind3 0 0 1 1 5 | ind4 ind4 0 0 1 1 6 | ind5 ind5 0 0 1 1 7 | ind6 ind6 0 0 1 1 8 | ind7 ind7 0 0 1 1 9 | ind8 ind8 0 0 1 1 10 | ind9 ind9 0 0 1 1 11 | ind10 ind10 0 0 1 1 12 | ind11 ind11 0 0 1 1 13 | ind12 ind12 0 0 1 1 14 | ind13 ind13 0 0 1 1 15 | ind14 ind14 0 0 1 1 16 | ind15 ind15 0 0 1 1 17 | ind16 ind16 0 0 1 1 18 | ind17 ind17 0 0 1 1 19 | ind18 ind18 0 0 1 1 20 | ind19 ind19 0 0 1 1 21 | ind20 ind20 0 0 1 1 22 | ind21 ind21 0 0 1 1 23 | ind22 ind22 0 0 1 1 24 | ind23 ind23 0 0 1 1 25 | ind24 ind24 0 0 1 1 26 | ind25 ind25 0 0 1 1 27 | ind26 ind26 0 0 1 1 28 | ind27 ind27 0 0 1 1 29 | ind28 ind28 0 0 1 1 30 | ind29 ind29 0 0 1 1 31 | ind30 ind30 0 0 1 1 32 | ind31 ind31 0 0 1 1 33 | ind32 ind32 0 0 1 1 34 | ind33 ind33 0 0 1 1 35 | ind34 ind34 0 0 1 1 36 | ind35 ind35 0 0 1 1 37 | ind36 ind36 0 0 1 1 38 | ind37 ind37 0 0 1 1 39 | ind38 ind38 0 0 1 1 40 | ind39 ind39 0 0 1 1 41 | ind40 ind40 0 0 1 1 42 | ind41 ind41 0 0 1 1 43 | ind42 ind42 0 0 1 1 44 | ind43 ind43 0 0 1 1 45 | ind44 ind44 0 0 1 1 46 | ind45 ind45 0 0 1 1 47 | ind46 ind46 0 0 1 1 48 | ind47 ind47 0 0 1 1 49 | ind48 ind48 0 0 1 1 50 | ind49 ind49 0 0 1 1 51 | -------------------------------------------------------------------------------- /src/evoker/RemoteBinaryData.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.io.IOException; 4 | import java.math.BigInteger; 5 | 6 | public abstract class RemoteBinaryData extends BinaryData{ 7 | 8 | DataClient dc; 9 | 10 | RemoteBinaryData(DataClient dc, int numInds, MarkerData md, String collection, String chromosome){ 11 | super (numInds,md,collection, chromosome); 12 | this.dc = dc; 13 | } 14 | public void checkFile(byte[] headers) throws IOException{ 15 | throw new IOException("checkFile() for remote files requires a filename)"); 16 | } 17 | public void checkFile(String filename, byte[] headers) throws IOException{ 18 | 19 | BigInteger fileSize = new BigInteger(dc.getFTP().stat(filename).getSize().toString()); 20 | BigInteger checkSize = BigInteger.valueOf(new Long(numSNPs)).multiply(BigInteger.valueOf(new Long(bytesPerRecord))); 21 | 22 | if (!fileSize.equals(checkSize.add(BigInteger.valueOf(new Long(headers.length))))) { 23 | if (!fileSize.equals(checkSize.add(new BigInteger("8")))){ 24 | throw new IOException(filename + " is not properly formatted.\n(Incorrect length.)"); 25 | } 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /resources/test-files/sample_control.fam: -------------------------------------------------------------------------------- 1 | ind50 ind50 0 0 1 1 2 | ind51 ind51 0 0 1 1 3 | ind52 ind52 0 0 1 1 4 | ind53 ind53 0 0 1 1 5 | ind54 ind54 0 0 1 1 6 | ind55 ind55 0 0 1 1 7 | ind56 ind56 0 0 1 1 8 | ind57 ind57 0 0 1 1 9 | ind58 ind58 0 0 1 1 10 | ind59 ind59 0 0 1 1 11 | ind60 ind60 0 0 1 1 12 | ind61 ind61 0 0 1 1 13 | ind62 ind62 0 0 1 1 14 | ind63 ind63 0 0 1 1 15 | ind64 ind64 0 0 1 1 16 | ind65 ind65 0 0 1 1 17 | ind66 ind66 0 0 1 1 18 | ind67 ind67 0 0 1 1 19 | ind68 ind68 0 0 1 1 20 | ind69 ind69 0 0 1 1 21 | ind70 ind70 0 0 1 1 22 | ind71 ind71 0 0 1 1 23 | ind72 ind72 0 0 1 1 24 | ind73 ind73 0 0 1 1 25 | ind74 ind74 0 0 1 1 26 | ind75 ind75 0 0 1 1 27 | ind76 ind76 0 0 1 1 28 | ind77 ind77 0 0 1 1 29 | ind78 ind78 0 0 1 1 30 | ind79 ind79 0 0 1 1 31 | ind80 ind80 0 0 1 1 32 | ind81 ind81 0 0 1 1 33 | ind82 ind82 0 0 1 1 34 | ind83 ind83 0 0 1 1 35 | ind84 ind84 0 0 1 1 36 | ind85 ind85 0 0 1 1 37 | ind86 ind86 0 0 1 1 38 | ind87 ind87 0 0 1 1 39 | ind88 ind88 0 0 1 1 40 | ind89 ind89 0 0 1 1 41 | ind90 ind90 0 0 1 1 42 | ind91 ind91 0 0 1 1 43 | ind92 ind92 0 0 1 1 44 | ind93 ind93 0 0 1 1 45 | ind94 ind94 0 0 1 1 46 | ind95 ind95 0 0 1 1 47 | ind96 ind96 0 0 1 1 48 | ind97 ind97 0 0 1 1 49 | ind98 ind98 0 0 1 1 50 | ind99 ind99 0 0 1 1 51 | -------------------------------------------------------------------------------- /src/evoker/RemoteBedfileData.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.util.ArrayList; 4 | import java.io.IOException; 5 | import java.io.BufferedInputStream; 6 | import java.io.FileInputStream; 7 | import java.io.File; 8 | 9 | public class RemoteBedfileData extends RemoteBinaryData { 10 | 11 | RemoteBedfileData(DataClient dc, int numInds, MarkerData md, String collection, String name, String chromosome) throws IOException { 12 | super(dc, numInds, md, collection, chromosome); 13 | bytesPerRecord = (int)Math.ceil(((double)numInds)/4); 14 | checkFile(name, bedMagic); 15 | } 16 | 17 | RemoteBedfileData(DataClient dc, int numInds, MarkerData md, String collection, String chromosome) { 18 | super(dc, numInds, md, collection, chromosome); 19 | bytesPerRecord = (int)Math.ceil(((double)numInds)/4); 20 | } 21 | 22 | 23 | public ArrayList getRecord(String name)throws IOException{ 24 | int snpIndex = md.getIndex(name,md.getSampleCollectionIndex(collection)); 25 | 26 | if (snpIndex > -1){ 27 | //ask data client to get this SNP 28 | dc.getSNPFiles(name,md.getChrom(name),collection,snpIndex,numInds,totNumSNPs); 29 | 30 | BedfileDataFile bed = new BedfileDataFile(Utils.join(dc.getLocalDir(), collection+"."+name+".bed"), 31 | this); 32 | 33 | return bed.getRecord(0); 34 | }else{ 35 | return null; 36 | } 37 | } 38 | } -------------------------------------------------------------------------------- /src/evoker/BinaryData.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.io.*; 4 | import java.util.ArrayList; 5 | 6 | public abstract class BinaryData { 7 | 8 | protected byte[] bedMagic; 9 | protected byte[] bntMagic; 10 | 11 | protected MarkerData md; 12 | protected String collection; 13 | protected String chromosome; 14 | protected int numInds; 15 | protected int numSNPs; 16 | protected int bytesPerRecord; 17 | 18 | /** default header offset for bnt files */ 19 | protected int bntHeaderOffset; 20 | /** default header offset for bed files */ 21 | protected int bedHeaderOffset; 22 | /** to hold the total number of snps when using remote data, as the total number of snps is required for checking Oxformat header information*/ 23 | protected int totNumSNPs; 24 | 25 | BinaryData(int numInds, MarkerData md, String collection, String chromosome){ 26 | this.numInds = numInds; 27 | this.numSNPs = md.getNumSNPs(collection + chromosome); 28 | this.totNumSNPs = md.getNumSNPs(collection); 29 | this.md = md; 30 | this.collection = collection; 31 | this.chromosome = chromosome; 32 | 33 | bedMagic = new byte[]{0x6c, 0x1b, 0x01}; 34 | bntMagic = new byte[]{0x1a, 0x31}; 35 | bntHeaderOffset = 2; 36 | bedHeaderOffset = 3; 37 | } 38 | 39 | public abstract ArrayList getRecord(String markerName) throws IOException; 40 | 41 | public abstract void checkFile(byte[] headers) throws IOException; 42 | 43 | } 44 | -------------------------------------------------------------------------------- /src/evoker/Marker.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | 4 | /** 5 | * Holds information about the line the SNP information is hold at for each bim file ID 6 | */ 7 | 8 | public class Marker { 9 | int[] indices; 10 | byte chrom; 11 | char alleleA; 12 | char alleleB; 13 | 14 | public Marker (int numCollections, char alleleA, char alleleB, byte chrom){ 15 | this.indices = new int[numCollections]; 16 | for (int i = 0; i < numCollections; i++){ 17 | indices[i] = -1; 18 | } 19 | this.alleleA = alleleA; 20 | this.alleleB = alleleB; 21 | this.chrom = chrom; 22 | } 23 | 24 | public void addSampleCollection(int sampleIndex, int markerIndex, char alleleA, char alleleB, String snp){ 25 | // if ((alleleA == this.alleleA || alleleA == this.alleleB) && (alleleB == this.alleleA || alleleB == this.alleleB) ){ 26 | // 27 | // } else{ 28 | // Genoplot.ld.log("Warning, alleles do not match across collections for "+snp); 29 | // //TODO: if they are just flipped around, should be able to figure that out and swap 'em 30 | // } 31 | indices[sampleIndex] = markerIndex; 32 | } 33 | 34 | /** 35 | * Returns the Index of a given SNP within a collection. (or: at which position it is) 36 | * 37 | * @param collection id bzw number 38 | * @return the index 39 | */ 40 | public int getIndex(int i){ 41 | return indices[i]; 42 | } 43 | 44 | public byte getChrom(){ 45 | return chrom; 46 | } 47 | 48 | public char[] getAlleles(){ 49 | return new char[]{alleleA,alleleB}; 50 | } 51 | } 52 | 53 | 54 | -------------------------------------------------------------------------------- /src/evoker/RemoteBinaryFloatData.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.util.ArrayList; 4 | import java.io.IOException; 5 | import java.io.BufferedInputStream; 6 | import java.io.FileInputStream; 7 | import java.io.File; 8 | import java.nio.ByteBuffer; 9 | import java.nio.ByteOrder; 10 | import evoker.Types.FileFormat; 11 | 12 | public class RemoteBinaryFloatData extends RemoteBinaryData { 13 | 14 | protected int valuesPerEntry; 15 | protected FileFormat fileFormat; 16 | 17 | RemoteBinaryFloatData(DataClient dc, int numInds, MarkerData md, String collection, int vals, String name, String chromosome, FileFormat fileFormat) throws IOException { 18 | super(dc, numInds, md, collection, chromosome); 19 | this.valuesPerEntry = vals; 20 | bytesPerRecord = valuesPerEntry * 4 * numInds; 21 | this.fileFormat = fileFormat; 22 | 23 | if (fileFormat == FileFormat.UKBIOBANK) { 24 | bntMagic = new byte[]{}; 25 | bntHeaderOffset = 0; 26 | } 27 | 28 | checkFile(name, bntMagic); 29 | } 30 | 31 | RemoteBinaryFloatData(DataClient dc, int numInds, MarkerData md, String collection, int vals, String chromosome) { 32 | super(dc, numInds, md, collection, chromosome); 33 | this.valuesPerEntry = vals; 34 | bytesPerRecord = valuesPerEntry * 4 * numInds; 35 | } 36 | 37 | public ArrayList getRecord(String name) throws IOException{ 38 | int snpIndex = md.getIndex(name,md.getSampleCollectionIndex(collection)); 39 | 40 | if (snpIndex > -1){ 41 | BinaryFloatDataFile bnt = new BinaryFloatDataFile( 42 | Utils.join(dc.getLocalDir(),collection+"."+name+".bnt"), 43 | this, this.fileFormat); 44 | 45 | return bnt.getRecord(0); 46 | }else{ 47 | return null; 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /resources/test-files/sample.fam: -------------------------------------------------------------------------------- 1 | ind0 ind0 0 0 1 1 2 | ind1 ind1 0 0 1 1 3 | ind2 ind2 0 0 1 1 4 | ind3 ind3 0 0 1 1 5 | ind4 ind4 0 0 1 1 6 | ind5 ind5 0 0 1 1 7 | ind6 ind6 0 0 1 1 8 | ind7 ind7 0 0 1 1 9 | ind8 ind8 0 0 1 1 10 | ind9 ind9 0 0 1 1 11 | ind10 ind10 0 0 1 1 12 | ind11 ind11 0 0 1 1 13 | ind12 ind12 0 0 1 1 14 | ind13 ind13 0 0 1 1 15 | ind14 ind14 0 0 1 1 16 | ind15 ind15 0 0 1 1 17 | ind16 ind16 0 0 1 1 18 | ind17 ind17 0 0 1 1 19 | ind18 ind18 0 0 1 1 20 | ind19 ind19 0 0 1 1 21 | ind20 ind20 0 0 1 1 22 | ind21 ind21 0 0 1 1 23 | ind22 ind22 0 0 1 1 24 | ind23 ind23 0 0 1 1 25 | ind24 ind24 0 0 1 1 26 | ind25 ind25 0 0 1 1 27 | ind26 ind26 0 0 1 1 28 | ind27 ind27 0 0 1 1 29 | ind28 ind28 0 0 1 1 30 | ind29 ind29 0 0 1 1 31 | ind30 ind30 0 0 1 1 32 | ind31 ind31 0 0 1 1 33 | ind32 ind32 0 0 1 1 34 | ind33 ind33 0 0 1 1 35 | ind34 ind34 0 0 1 1 36 | ind35 ind35 0 0 1 1 37 | ind36 ind36 0 0 1 1 38 | ind37 ind37 0 0 1 1 39 | ind38 ind38 0 0 1 1 40 | ind39 ind39 0 0 1 1 41 | ind40 ind40 0 0 1 1 42 | ind41 ind41 0 0 1 1 43 | ind42 ind42 0 0 1 1 44 | ind43 ind43 0 0 1 1 45 | ind44 ind44 0 0 1 1 46 | ind45 ind45 0 0 1 1 47 | ind46 ind46 0 0 1 1 48 | ind47 ind47 0 0 1 1 49 | ind48 ind48 0 0 1 1 50 | ind49 ind49 0 0 1 1 51 | ind50 ind50 0 0 1 1 52 | ind51 ind51 0 0 1 1 53 | ind52 ind52 0 0 1 1 54 | ind53 ind53 0 0 1 1 55 | ind54 ind54 0 0 1 1 56 | ind55 ind55 0 0 1 1 57 | ind56 ind56 0 0 1 1 58 | ind57 ind57 0 0 1 1 59 | ind58 ind58 0 0 1 1 60 | ind59 ind59 0 0 1 1 61 | ind60 ind60 0 0 1 1 62 | ind61 ind61 0 0 1 1 63 | ind62 ind62 0 0 1 1 64 | ind63 ind63 0 0 1 1 65 | ind64 ind64 0 0 1 1 66 | ind65 ind65 0 0 1 1 67 | ind66 ind66 0 0 1 1 68 | ind67 ind67 0 0 1 1 69 | ind68 ind68 0 0 1 1 70 | ind69 ind69 0 0 1 1 71 | ind70 ind70 0 0 1 1 72 | ind71 ind71 0 0 1 1 73 | ind72 ind72 0 0 1 1 74 | ind73 ind73 0 0 1 1 75 | ind74 ind74 0 0 1 1 76 | ind75 ind75 0 0 1 1 77 | ind76 ind76 0 0 1 1 78 | ind77 ind77 0 0 1 1 79 | ind78 ind78 0 0 1 1 80 | ind79 ind79 0 0 1 1 81 | ind80 ind80 0 0 1 1 82 | ind81 ind81 0 0 1 1 83 | ind82 ind82 0 0 1 1 84 | ind83 ind83 0 0 1 1 85 | ind84 ind84 0 0 1 1 86 | ind85 ind85 0 0 1 1 87 | ind86 ind86 0 0 1 1 88 | ind87 ind87 0 0 1 1 89 | ind88 ind88 0 0 1 1 90 | ind89 ind89 0 0 1 1 91 | ind90 ind90 0 0 1 1 92 | ind91 ind91 0 0 1 1 93 | ind92 ind92 0 0 1 1 94 | ind93 ind93 0 0 1 1 95 | ind94 ind94 0 0 1 1 96 | ind95 ind95 0 0 1 1 97 | ind96 ind96 0 0 1 1 98 | ind97 ind97 0 0 1 1 99 | ind98 ind98 0 0 1 1 100 | ind99 ind99 0 0 1 1 101 | -------------------------------------------------------------------------------- /src/evoker/SettingsDialog.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import javax.swing.*; 4 | 5 | import java.awt.event.ActionListener; 6 | import java.awt.event.ActionEvent; 7 | 8 | public class SettingsDialog extends JDialog implements ActionListener { 9 | 10 | private Genoplot gp; 11 | private JTextField heightField; 12 | private JTextField widthField; 13 | 14 | public SettingsDialog(Genoplot parent){ 15 | super(parent,"Plot settings",true); 16 | 17 | gp = parent; 18 | 19 | JPanel contents = new JPanel(); 20 | contents.setLayout(new BoxLayout(contents,BoxLayout.Y_AXIS)); 21 | 22 | JPanel heightPanel = new JPanel(); 23 | heightPanel.add(new JLabel("Height: ")); 24 | heightField = new JTextField(5); 25 | heightPanel.add(heightField); 26 | contents.add(heightPanel); 27 | 28 | JPanel widthPanel = new JPanel(); 29 | widthPanel.add(new JLabel("Width: ")); 30 | widthField = new JTextField(5); 31 | widthPanel.add(widthField); 32 | contents.add(widthPanel); 33 | 34 | JPanel butPan = new JPanel(); 35 | JButton okbut = new JButton("OK"); 36 | getRootPane().setDefaultButton(okbut); 37 | okbut.addActionListener(this); 38 | butPan.add(okbut); 39 | JButton cancelbut = new JButton("Cancel"); 40 | cancelbut.addActionListener(this); 41 | butPan.add(cancelbut); 42 | contents.add(butPan); 43 | 44 | this.setContentPane(contents); 45 | } 46 | 47 | public void actionPerformed(ActionEvent e) { 48 | if (e.getActionCommand().equals("OK")){ 49 | int plotHeight = 0; 50 | int plotWidth = 0; 51 | 52 | try { 53 | plotHeight = Integer.parseInt(heightField.getText()); 54 | plotWidth = Integer.parseInt(widthField.getText()); 55 | } catch (NumberFormatException nfe){ 56 | JOptionPane.showMessageDialog(this, "Dimentions must be numerical values"); 57 | } 58 | 59 | if(plotHeight > 0 && plotWidth > 0) { 60 | gp.setPlotHeight(plotHeight); 61 | gp.setPlotWidth(plotWidth); 62 | gp.setPlotAreaSize(); 63 | gp.refreshPlot(); 64 | this.dispose(); 65 | } else { 66 | JOptionPane.showMessageDialog(this, "Dimentions must be greater than 0"); 67 | } 68 | 69 | } else if (e.getActionCommand().equals("Cancel")){ 70 | this.dispose(); 71 | } 72 | } 73 | } -------------------------------------------------------------------------------- /src/evoker/BedfileDataFile.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.io.*; 4 | import java.util.ArrayList; 5 | 6 | public class BedfileDataFile extends BinaryDataFile{ 7 | 8 | 9 | static String[] alleleMap = {"N","A","C","G","T"}; 10 | 11 | BedfileDataFile(String filename, int numInds, MarkerData md, String collection, String chromosome) throws IOException{ 12 | super(filename, numInds, md, collection, chromosome); 13 | 14 | bytesPerRecord = (int)Math.ceil(((double)numInds)/4); 15 | checkFile(bedMagic); 16 | } 17 | 18 | BedfileDataFile(String filename, RemoteBedfileData rbd) throws IOException{ 19 | super(filename, rbd.numInds, rbd.md, rbd.collection, rbd.chromosome); 20 | 21 | bytesPerRecord = (int)Math.ceil(((double)numInds)/4); 22 | numSNPs = 1; 23 | 24 | checkFile(bedMagic); 25 | } 26 | 27 | public ArrayList getRecord(String name){ 28 | //we subclass this so we can force the type of data in the ArrayList 29 | return super.getRecord(name); 30 | } 31 | 32 | public ArrayList getRecord(long snpIndex) throws IOException{ 33 | //have index, now load bed file 34 | BufferedInputStream bedIS = new BufferedInputStream(new FileInputStream(file),8192); 35 | 36 | 37 | //skip to SNP of interest 38 | //sometimes the skip() method doesn't skip as far as you ask, so you have to keep flogging it 39 | //java sux. 40 | long remaining = (snpIndex * bytesPerRecord)+bedHeaderOffset; 41 | while ((remaining = remaining - bedIS.skip(remaining)) > 0){ 42 | } 43 | 44 | //read raw snp data 45 | byte[] rawSnpData = new byte[bytesPerRecord]; 46 | bedIS.read(rawSnpData, 0, bytesPerRecord); 47 | // close the input stream 48 | bedIS.close(); 49 | //convert into array of genotypes 50 | //genotype code is: 51 | //0 == homo 1 52 | //1 == missing 53 | //2 == hetero 54 | //3 == homo 2 55 | byte[] snpData = new byte[numInds+bedHeaderOffset]; 56 | int genoCount = 0; 57 | for (byte aRawSnpData : rawSnpData) { 58 | snpData[genoCount++] = (byte) ((aRawSnpData & (byte) 0x03) >>> 0); 59 | snpData[genoCount++] = (byte) ((aRawSnpData & (byte) 0x0c) >>> 2); 60 | snpData[genoCount++] = (byte) ((aRawSnpData & (byte) 0x30) >>> 4); 61 | snpData[genoCount++] = (byte) (((int)aRawSnpData & (int)0xc0) >>> 6); 62 | //note: may be up to 3 extra entries at end of snpData array -- don't use snpData.length! 63 | } 64 | 65 | 66 | ArrayList genos = new ArrayList(); 67 | for (int i = 0; i < numInds; i++){ 68 | genos.add(snpData[i]); 69 | } 70 | 71 | return genos; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /resources/build_evoker.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | ## Description: script to create a tar ball of the latest Evoker release 4 | ## Usage: ./build_evoker james_morris81 1.1.1 5 | ## Input: sourceforge user name and new Evoker version number 6 | ## Author: jm20@sanger.ac.uk 7 | 8 | use strict; 9 | use File::Path; 10 | 11 | my $release; 12 | my $user; 13 | 14 | if (@ARGV == 2) { 15 | $user = $ARGV[0]; 16 | $release = $ARGV[1]; 17 | 18 | } else { 19 | die "Script requires a sourcforge user name and new release name/number\n"; 20 | } 21 | 22 | ## create a release directory and platform sub directories 23 | mkdir("evoker_$release"); 24 | mkdir("evoker_$release/win"); 25 | mkdir("evoker_$release/mac"); 26 | mkdir("evoker_$release/other"); 27 | mkdir("evoker_$release/all"); 28 | 29 | ## checkout the .java files from cvs 30 | system("cvs -z3 -d:ext:$user\@evoker.cvs.sourceforge.net:/cvsroot/evoker checkout src/evoker"); 31 | 32 | ## checkout the resource files from cvs 33 | system("cvs -z3 -d:ext:$user\@evoker.cvs.sourceforge.net:/cvsroot/evoker checkout resources"); 34 | 35 | ## move all the xml files for the build 36 | system("cp resources/*.xml ./"); 37 | 38 | ## build the source code 39 | system("ant evoker"); 40 | system("ant windows"); 41 | system("ant mac"); 42 | system("ant clean"); 43 | 44 | ## checkout the documentation from cvs 45 | system("cvs -z3 -d:ext:$user\@evoker.cvs.sourceforge.net:/cvsroot/evoker checkout docs"); 46 | 47 | ## compile the documentation tex file into a pdf 48 | system("cp docs/* ./"); 49 | system("pdflatex evoker-documentation.tex"); 50 | system("pdflatex evoker-documentation.tex"); 51 | 52 | ## copy the .jar, .exe and mac app 53 | system("cp -R Evoker* evoker_$release/all/"); 54 | system("cp -R Evoker.exe evoker_$release/win/"); 55 | system("cp -R Evoker.app evoker_$release/mac/"); 56 | system("cp -R Evoker.jar evoker_$release/other/"); 57 | 58 | for my $platform ('all','win','mac','other') { 59 | 60 | system("cp resources/evoker-helper.pl evoker_$release/$platform/"); 61 | system("cp resources/int2bnt.pl evoker_$release/$platform/"); 62 | system("cp resources/illumina_parser.pl evoker_$release/$platform/"); 63 | system("cp resources/sample_* evoker_$release/$platform/"); 64 | system("cp resources/sample.list evoker_$release/$platform/"); 65 | system("cp resources/sample.qc evoker_$release/$platform/"); 66 | system("cp evoker-documentation.pdf evoker_$release/$platform/"); 67 | system("mv evoker_$release/$platform/evoker-documentation.pdf evoker_$release/$platform/EvokerHelp.pdf"); 68 | system("tar -cvf evoker_$release/evoker_$release\_$platform.tar evoker_$release/$platform/"); 69 | system("gzip evoker_$release/evoker_$release\_$platform.tar"); 70 | 71 | } 72 | 73 | ## remove all the unwanted files and dirs 74 | rmtree(['src','resources','docs','Evoker.app']); 75 | system("rm evoker-documentation*"); 76 | system("rm *.png"); 77 | system('rm *.xml'); 78 | system("rm Evoker*"); 79 | unlink('Evoker.jar'); 80 | unlink('Evoker.exe'); 81 | -------------------------------------------------------------------------------- /src/evoker/BEDFileWriter.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.io.BufferedOutputStream; 4 | import java.io.File; 5 | import java.io.FileNotFoundException; 6 | import java.io.FileOutputStream; 7 | import java.io.IOException; 8 | 9 | /** Write BED-Files */ 10 | public class BEDFileWriter { 11 | 12 | private BufferedOutputStream out; // the output stream 13 | private byte buffer = 0; 14 | private byte placesLeftInBuffer = 4; 15 | 16 | /** 17 | * Write to a specified file 18 | * @param file to write to 19 | * @throws FileNotFoundException 20 | * @throws IOException 21 | */ 22 | public BEDFileWriter(File file) throws FileNotFoundException, IOException { 23 | assert file != null; 24 | out = new BufferedOutputStream(new FileOutputStream(file)); 25 | writeHeader(); 26 | } 27 | 28 | /** 29 | * Writes bytes to the file 30 | * @param i 31 | * @throws IOException 32 | */ 33 | public void write(byte[] i) throws IOException { 34 | out.write(i); 35 | } 36 | 37 | /** 38 | * Writes an array of genotypes (internal notation!) to the file 39 | * @param genotypes 40 | */ 41 | public void writeGenotypes(int[] genotypes) throws IOException { 42 | for (int geno : genotypes) { 43 | writeGenotype(geno); 44 | } 45 | } 46 | 47 | /** 48 | * Writes the magic numbers to the file 49 | * @throws IOException 50 | */ 51 | private void writeHeader() throws IOException { 52 | out.write(Integer.parseInt("01101100", 2)); 53 | out.write(Integer.parseInt("00011011", 2)); 54 | out.write(Integer.parseInt("00000001", 2)); 55 | } 56 | 57 | /** 58 | * Writes single genotype to file 59 | * 60 | * 00 Homozygote "1"/"1" -> internal notation: 0 61 | * 01 Heterozygote -> internal notation: 2 62 | * 11 Homozygote "2"/"2" -> internal notation: 3 63 | * 10 Missing genotype -> internal notation: 1 64 | */ 65 | public void writeGenotype(int type) throws IOException { 66 | 67 | buffer = (byte) (buffer >>> 2); 68 | 69 | switch (type) { 70 | case 0: 71 | break; 72 | case 1: 73 | buffer = (byte) (buffer | 0x40); 74 | break; 75 | case 2: 76 | buffer = (byte) (buffer | 0x80); 77 | break; 78 | case 3: 79 | buffer = (byte) (buffer | 0xc0); 80 | break; 81 | } 82 | 83 | placesLeftInBuffer--; 84 | 85 | if (placesLeftInBuffer == 0) { 86 | clearBuffer(); 87 | placesLeftInBuffer = 4; 88 | } 89 | } 90 | 91 | /** 92 | * Pads the buffer with zeros and flushes it 93 | * 94 | * @throws IOException 95 | */ 96 | private void clearBuffer() throws IOException { 97 | 98 | } 99 | 100 | /** 101 | * Pads the buffer with zeros and flushes it 102 | * @throws IOException 103 | */ 104 | public void flush() throws IOException { 105 | clearBuffer(); 106 | } 107 | 108 | /** 109 | * Closes the file (flushes it beforehand) 110 | * @throws IOException 111 | */ 112 | public void close() throws IOException { 113 | out.flush(); 114 | out.close(); 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /resources/build.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /src/evoker/GenfileDataFile.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.io.BufferedInputStream; 4 | import java.io.FileInputStream; 5 | import java.io.IOException; 6 | import java.util.ArrayList; 7 | import java.util.zip.GZIPInputStream; 8 | 9 | 10 | public class GenfileDataFile extends BinaryDataFile{ 11 | 12 | GenfileDataFile(String filename, int numInds, MarkerData md, String collection, String chromosome) throws IOException{ 13 | super(filename, numInds, md, collection, chromosome); 14 | 15 | // there are 3 four byte values for each ind in the ox format files 16 | bytesPerRecord = 3 * 4 * numInds; 17 | 18 | checkFile(bedMagic); 19 | } 20 | 21 | GenfileDataFile(String filename, int numInds, MarkerData md, String collection, boolean zipped, String chromosome) throws IOException{ 22 | super(filename, numInds, md, collection, chromosome); 23 | 24 | // there are 3 four byte values for each ind in the ox format files 25 | bytesPerRecord = 3 * 4 * numInds; 26 | compressed = true; 27 | // set the header offset here for now as it would normally be set in the checkfile method 28 | bedHeaderOffset = 8; 29 | // compressed file - do not use checkFile() 30 | // TODO: method for checking compressed files 31 | 32 | 33 | } 34 | 35 | public ArrayList getRecord(long snpIndex) throws IOException{ 36 | //have index, now load gen file 37 | BufferedInputStream genIS; 38 | ArrayList genos = new ArrayList(); 39 | 40 | if (this.isCompressed()){ 41 | 42 | genIS = new BufferedInputStream(new GZIPInputStream(new FileInputStream(file),8192)); 43 | 44 | } else{ 45 | 46 | genIS = new BufferedInputStream(new FileInputStream(file),8192); 47 | 48 | } 49 | 50 | //skip to SNP of interest 51 | //sometimes the skip() method doesn't skip as far as you ask, so you have to keep flogging it 52 | //java sux. 53 | long remaining = (snpIndex * bytesPerRecord)+bedHeaderOffset; 54 | while ((remaining = remaining - genIS.skip(remaining)) > 0){ 55 | } 56 | 57 | //read raw snp data 58 | byte[] binSnpData = new byte[bytesPerRecord]; 59 | genIS.read(binSnpData, 0, bytesPerRecord); 60 | // close the input stream 61 | genIS.close(); 62 | // convert the binary data array into a float array 63 | float[] floatSnpData = new float[numInds*3]; 64 | int count = 0; 65 | for (int start = 0; start < bytesPerRecord; start = start + 4) { 66 | floatSnpData[count] = arr2float(binSnpData, start); 67 | count++; 68 | } 69 | 70 | // loop through each set of three values and then decide on the genotype 71 | for (int loop = 0; loop < floatSnpData.length; loop = loop + 3) { 72 | 73 | float aa = floatSnpData[loop]; 74 | float ab = floatSnpData[loop+1]; 75 | float bb = floatSnpData[loop+2]; 76 | 77 | //convert into array of genotypes 78 | //genotype code is: 79 | //0 == homo 1 80 | //1 == missing 81 | //2 == hetero 82 | //3 == homo 2 83 | if (aa > 0.9) { 84 | genos.add((byte)0); 85 | } else if (ab >0.9){ 86 | genos.add((byte)2); 87 | } else if (bb > 0.9) { 88 | genos.add((byte)3); 89 | } else { 90 | genos.add((byte)1); 91 | } 92 | } 93 | 94 | return genos; 95 | 96 | } 97 | public static float arr2float (byte[] arr, int start) { 98 | int i = 0; 99 | int len = 4; 100 | int cnt = 0; 101 | byte[] tmp = new byte[len]; 102 | for (i = start; i < (start + len); i++) { 103 | tmp[cnt] = arr[i]; 104 | cnt++; 105 | } 106 | int accum = 0; 107 | i = 0; 108 | for ( int shiftBy = 0; shiftBy < 32; shiftBy += 8 ) { 109 | accum |= ( (long)( tmp[i] & 0xff ) ) << shiftBy; 110 | i++; 111 | } 112 | return Float.intBitsToFloat(accum); 113 | } 114 | 115 | 116 | } 117 | -------------------------------------------------------------------------------- /src/evoker/BinaryFloatDataFile.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.util.ArrayList; 4 | import java.util.zip.GZIPInputStream; 5 | import java.io.BufferedInputStream; 6 | import java.io.FileInputStream; 7 | import java.io.IOException; 8 | import java.io.RandomAccessFile; 9 | import java.nio.ByteBuffer; 10 | import java.nio.ByteOrder; 11 | import evoker.Types.FileFormat; 12 | 13 | public class BinaryFloatDataFile extends BinaryDataFile{ 14 | 15 | private int valuesPerEntry; 16 | private FileFormat fileFormat; 17 | 18 | /** 19 | * Non-zipped local intensity files. 20 | * @param filename 21 | * @param numInds 22 | * @param md 23 | * @param collection 24 | * @param vals 25 | * @param chromosome 26 | * @param fileFormat 27 | * @throws IOException 28 | */ 29 | BinaryFloatDataFile(String filename, int numInds, MarkerData md, String collection, int vals, String chromosome, FileFormat fileFormat) 30 | throws IOException{ 31 | super(filename, numInds, md,collection, chromosome); 32 | this.valuesPerEntry = vals; 33 | this.fileFormat = fileFormat; 34 | bytesPerRecord = valuesPerEntry * 4 * numInds; 35 | 36 | if (fileFormat == FileFormat.UKBIOBANK) { 37 | bntMagic = new byte[]{}; 38 | bntHeaderOffset = 0; 39 | } 40 | checkFile(bntMagic); 41 | } 42 | 43 | /** 44 | * Zipped local intensity files. 45 | * 46 | * @param filename 47 | * @param numInds 48 | * @param md 49 | * @param collection 50 | * @param vals 51 | * @param zipped 52 | * @param chromosome 53 | * @throws IOException 54 | */ 55 | BinaryFloatDataFile(String filename, int numInds, MarkerData md, String collection, int vals, boolean zipped, String chromosome) 56 | throws IOException{ 57 | super(filename, numInds, md,collection, chromosome); 58 | this.valuesPerEntry = vals; 59 | bytesPerRecord = valuesPerEntry * 4 * numInds; 60 | compressed = true; 61 | bntHeaderOffset = 8; 62 | // compressed file - do not use checkFile() 63 | // TODO: method for checking compressed files 64 | } 65 | 66 | BinaryFloatDataFile(String filename, RemoteBinaryFloatData rbfd, FileFormat fileFormat) throws IOException{ 67 | super(filename,rbfd.numInds,rbfd.md,rbfd.collection, rbfd.chromosome); 68 | 69 | this.valuesPerEntry = rbfd.valuesPerEntry; 70 | bytesPerRecord = rbfd.bytesPerRecord; 71 | this.numSNPs = 1; 72 | 73 | if (fileFormat == FileFormat.UKBIOBANK) { 74 | bntMagic = new byte[]{}; 75 | bntHeaderOffset = 0; 76 | } 77 | 78 | checkFile(bntMagic); 79 | } 80 | 81 | public ArrayList getRecord(String name){ 82 | //we subclass this so we can force the type of data in the ArrayList 83 | return super.getRecord(name); 84 | } 85 | 86 | ArrayList getRecord(long snpIndex) throws IOException{ 87 | BufferedInputStream intIS; 88 | 89 | if (this.isCompressed()){ 90 | intIS = new BufferedInputStream(new GZIPInputStream(new FileInputStream(file),8192)); 91 | } else{ 92 | intIS = new BufferedInputStream(new FileInputStream(file),8192); 93 | } 94 | 95 | //skip to SNP of interest 96 | //sometimes the skip() method doesn't skip as far as you ask, so you have to keep flogging it 97 | //java sux. 98 | long remaining = (snpIndex * bytesPerRecord)+bntHeaderOffset; 99 | while ((remaining = remaining - intIS.skip(remaining)) > 0){ 100 | } 101 | 102 | //read raw snp data 103 | byte[] rawData = new byte[bytesPerRecord]; 104 | intIS.read(rawData, 0, bytesPerRecord); 105 | // close the input stream 106 | intIS.close(); 107 | ByteBuffer rawDataBuffer = ByteBuffer.wrap(rawData); 108 | rawDataBuffer.order(ByteOrder.LITTLE_ENDIAN); 109 | 110 | ArrayList record = new ArrayList(); 111 | for (int i = 0; i < numInds; i++){ 112 | float[] send = new float[valuesPerEntry]; 113 | for (int j = 0; j < valuesPerEntry; j++){ 114 | send[j] = rawDataBuffer.getFloat(); 115 | } 116 | record.add(send); 117 | } 118 | 119 | return record; 120 | } 121 | } -------------------------------------------------------------------------------- /src/evoker/BinaryDataFile.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.BufferedInputStream; 6 | import java.io.FileInputStream; 7 | import java.math.BigInteger; 8 | import java.nio.ByteBuffer; 9 | import java.nio.ByteOrder; 10 | import java.util.ArrayList; 11 | 12 | public abstract class BinaryDataFile extends BinaryData{ 13 | 14 | File file; 15 | protected boolean compressed; 16 | 17 | BinaryDataFile(String filename, int numInds, MarkerData md, String collection, String chromosome){ 18 | super(numInds,md,collection, chromosome); 19 | this.file = new File(filename); 20 | } 21 | 22 | /** 23 | * Checks whether the file is of a valid format 24 | * 25 | * @param headers 26 | * @throws IOException 27 | */ 28 | public void checkFile(byte[] headers) throws IOException{ 29 | 30 | if (file != null){ 31 | 32 | BigInteger fileSize = BigInteger.valueOf(file.length()); 33 | BigInteger checkSize = BigInteger.valueOf(new Long(numSNPs)).multiply(BigInteger.valueOf(new Long(bytesPerRecord))); 34 | 35 | if (!fileSize.equals(checkSize.add(BigInteger.valueOf(new Long(headers.length))))) { 36 | if (fileSize.equals(checkSize.add(new BigInteger("8")))){ 37 | //alternate Oxford format 38 | //Change headers byte[] to be a new byte[] of the correct things as specified by numSNPs and numInds. 39 | ByteBuffer buf = ByteBuffer.allocate(8); 40 | buf.order(ByteOrder.LITTLE_ENDIAN); 41 | // in the case of remote data we need to compare the total snps as this is the value in the header 42 | buf.putInt(this.totNumSNPs); 43 | if (file.getName().endsWith("bed") || file.getName().endsWith("gen.bin") ){ 44 | // the inds value in the header is the number of columns--three values per ind 45 | buf.putInt(this.numInds*3); 46 | }else if (file.getName().endsWith("bnt") || file.getName().endsWith("int.bin")){ 47 | // the inds value in the header is the number of columns--two values per ind 48 | buf.putInt(this.numInds*2); 49 | } 50 | buf.clear(); 51 | headers = new byte[8]; 52 | buf.get(headers, 0, 8); 53 | 54 | bntHeaderOffset = 8; 55 | bedHeaderOffset = 8; 56 | } else{ 57 | throw new IOException(file + " is not properly formatted.\n(Incorrect length.)"); 58 | } 59 | } 60 | } else{ 61 | //this is a useless message, but it implies badness 10000 62 | throw new IOException("File is null?!?"); 63 | } 64 | 65 | //are the headers acceptable for this file? 66 | BufferedInputStream binaryIS = new BufferedInputStream(new FileInputStream(file),8192); 67 | byte[] fromFile = new byte[headers.length]; 68 | binaryIS.read(fromFile,0,headers.length); 69 | 70 | for (int i = 0; i < headers.length; i++){ 71 | if (fromFile[i] != headers[i]){ 72 | throw new IOException(file + 73 | " is not properly formatted.\n(Magic number is incorrect.)"); 74 | } 75 | } 76 | } 77 | 78 | public void checkFile(byte[] headers, boolean zipped) throws IOException{ 79 | //TODO: method for checking if compressed files are valid evoker files 80 | // get the uncompressed size of the file - for gzip files that where originally smaller than 4gb you can read the last 4 bytes of the file 81 | // at the moment we can assume this type of file is Oxford format 82 | } 83 | 84 | // public ArrayList getRecord(String markerName, String batchName) { 85 | // 86 | // } 87 | 88 | public ArrayList getRecord(String markerName){ 89 | //do some checks on getting the data and handle errors centrally 90 | int snpIndex; 91 | try { 92 | if((snpIndex = md.getIndex(markerName,md.getSampleCollectionIndex(collection))) >= 0) { 93 | return getRecord(snpIndex); 94 | } 95 | }catch(IOException ioe) { 96 | //TODO: handle me 97 | //TODO: I don't know anything about that SNP? 98 | } 99 | return(null); 100 | } 101 | 102 | public boolean isCompressed(){ 103 | return compressed; 104 | } 105 | 106 | abstract ArrayList getRecord(long index) throws IOException; 107 | 108 | } 109 | -------------------------------------------------------------------------------- /src/evoker/OpenDirectoryDialog.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import javax.swing.*; 4 | 5 | import java.util.Enumeration; 6 | import javax.swing.AbstractButton; 7 | import javax.swing.ButtonGroup; 8 | 9 | 10 | import java.awt.event.ActionListener; 11 | import java.awt.event.ActionEvent; 12 | 13 | import evoker.Types.FileFormat; 14 | 15 | 16 | public class OpenDirectoryDialog extends JDialog implements ActionListener { 17 | 18 | // public static enum FileFormat { 19 | // DEFAULT, OXFORD, UKBIOBANK 20 | // } 21 | 22 | private FileFormat fileFormat; 23 | private String directory; 24 | private JTextField directoryField; 25 | private JRadioButton defaultFormatButton; 26 | private JRadioButton oxfordFormatButton; 27 | private JRadioButton ukBioBankFormatButton; 28 | private ButtonGroup bg; 29 | private JFileChooser jfc; 30 | private boolean success; 31 | 32 | public OpenDirectoryDialog(JFrame parent){ 33 | super(parent,"Open Directory",true); 34 | 35 | jfc = new JFileChooser("user.dir"); 36 | 37 | JPanel contents = new JPanel(); 38 | contents.setLayout(new BoxLayout(contents,BoxLayout.Y_AXIS)); 39 | 40 | JPanel directoryPanel = new JPanel(); 41 | directoryPanel.add(new JLabel("Directory: ")); 42 | directoryField = new JTextField(30); 43 | directoryPanel.add(directoryField ); 44 | JButton directoryBrowseButton = new JButton("Browse"); 45 | directoryBrowseButton.addActionListener(this); 46 | directoryPanel.add(directoryBrowseButton); 47 | contents.add(directoryPanel); 48 | 49 | contents.add(new JPanel()); 50 | 51 | bg = new ButtonGroup(); 52 | JPanel formatPanel = new JPanel(); 53 | formatPanel.setLayout(new BoxLayout(formatPanel,BoxLayout.Y_AXIS)); 54 | 55 | defaultFormatButton = new JRadioButton("Default format"); 56 | formatPanel.add(defaultFormatButton); 57 | defaultFormatButton.setSelected(true); 58 | bg.add(defaultFormatButton); 59 | 60 | oxfordFormatButton = new JRadioButton("Oxford format"); 61 | formatPanel.add(oxfordFormatButton); 62 | bg.add(oxfordFormatButton); 63 | 64 | ukBioBankFormatButton = new JRadioButton("UK Biobank v2 format"); 65 | formatPanel.add(ukBioBankFormatButton); 66 | bg.add(ukBioBankFormatButton); 67 | 68 | contents.add(formatPanel); 69 | 70 | JPanel butPan = new JPanel(); 71 | JButton okbut = new JButton("OK"); 72 | getRootPane().setDefaultButton(okbut); 73 | okbut.addActionListener(this); 74 | butPan.add(okbut); 75 | JButton cancelbut = new JButton("Cancel"); 76 | cancelbut.addActionListener(this); 77 | butPan.add(cancelbut); 78 | contents.add(butPan); 79 | 80 | this.setContentPane(contents); 81 | } 82 | 83 | public void actionPerformed(ActionEvent e) { 84 | 85 | if (e.getActionCommand().equals("OK")){ 86 | directory = directoryField.getText(); 87 | if (defaultFormatButton.isSelected()) { fileFormat = FileFormat.DEFAULT; } 88 | else if (oxfordFormatButton.isSelected()) { fileFormat = FileFormat.OXFORD; } 89 | else if (ukBioBankFormatButton.isSelected()) { fileFormat = FileFormat.UKBIOBANK; } 90 | setSuccess(true); 91 | this.dispose(); 92 | }else if (e.getActionCommand().equals("Browse")){ 93 | jfc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); 94 | if (jfc.showOpenDialog(this) == JFileChooser.APPROVE_OPTION){ 95 | directoryField.setText(jfc.getSelectedFile().getAbsolutePath()); 96 | } 97 | }else if (e.getActionCommand().equals("Cancel")){ 98 | setSuccess(false); 99 | this.dispose(); 100 | } 101 | } 102 | 103 | public boolean success() { 104 | return success; 105 | } 106 | 107 | public String getDirectory() { 108 | return directory; 109 | } 110 | 111 | public FileFormat getFileFormat(){ return fileFormat; } 112 | // 113 | // public boolean allPlots() { 114 | // return allPlotsButton.isSelected(); 115 | // } 116 | // 117 | // public boolean yesPlots() { 118 | // return yesPlotsButton.isSelected(); 119 | // } 120 | // 121 | // public boolean maybePlots() { 122 | // return maybePlotsButton.isSelected(); 123 | // } 124 | // 125 | // public boolean noPlots() { 126 | // return noPlotsButton.isSelected(); 127 | // } 128 | // 129 | public void setSuccess(boolean b) { 130 | success = b; 131 | 132 | } 133 | } -------------------------------------------------------------------------------- /src/evoker/NaturalOrderComparator.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | /* 4 | NaturalOrderComparator.java -- Perform 'natural order' comparisons of strings in Java. 5 | Copyright (C) 2003 by Pierre-Luc Paour 6 | Based on the C version by Martin Pool, of which this is more or less a straight conversion. 7 | Copyright (C) 2000 by Martin Pool 8 | This software is provided 'as-is', without any express or implied 9 | warranty. In no event will the authors be held liable for any damages 10 | arising from the use of this software. 11 | Permission is granted to anyone to use this software for any purpose, 12 | including commercial applications, and to alter it and redistribute it 13 | freely, subject to the following restrictions: 14 | 1. The origin of this software must not be misrepresented; you must not 15 | claim that you wrote the original software. If you use this software 16 | in a product, an acknowledgment in the product documentation would be 17 | appreciated but is not required. 18 | 2. Altered source versions must be plainly marked as such, and must not be 19 | misrepresented as being the original software. 20 | 3. This notice may not be removed or altered from any source distribution. 21 | */ 22 | 23 | import java.util.*; 24 | 25 | public class NaturalOrderComparator implements Comparator 26 | { 27 | int compareRight(String a, String b) 28 | { 29 | int bias = 0, ia = 0, ib = 0; 30 | 31 | // The longest run of digits wins. That aside, the greatest 32 | // value wins, but we can't know that it will until we've scanned 33 | // both numbers to know that they have the same magnitude, so we 34 | // remember it in BIAS. 35 | for (;; ia++, ib++) 36 | { 37 | char ca = charAt(a, ia); 38 | char cb = charAt(b, ib); 39 | 40 | if (!Character.isDigit(ca) && !Character.isDigit(cb)) { 41 | return bias; 42 | } 43 | if (!Character.isDigit(ca)) { 44 | return -1; 45 | } 46 | if (!Character.isDigit(cb)) { 47 | return +1; 48 | } 49 | if (ca == 0 && cb == 0) { 50 | return bias; 51 | } 52 | 53 | if (bias == 0) { 54 | if (ca < cb) { 55 | bias = -1; 56 | } else if (ca > cb) { 57 | bias = +1; 58 | } 59 | } 60 | } 61 | } 62 | 63 | public int compare(Object o1, Object o2) 64 | { 65 | String a = o1.toString(); 66 | String b = o2.toString(); 67 | 68 | int ia = 0, ib = 0; 69 | int nza = 0, nzb = 0; 70 | char ca, cb; 71 | 72 | while (true) { 73 | // Only count the number of zeroes leading the last number compared 74 | nza = nzb = 0; 75 | 76 | ca = charAt(a, ia); 77 | cb = charAt(b, ib); 78 | 79 | // skip over leading spaces or zeros 80 | while (Character.isSpaceChar(ca) || ca == '0') { 81 | if (ca == '0') { 82 | nza++; 83 | } else { 84 | // Only count consecutive zeroes 85 | nza = 0; 86 | } 87 | 88 | ca = charAt(a, ++ia); 89 | } 90 | 91 | while (Character.isSpaceChar(cb) || cb == '0') { 92 | if (cb == '0') { 93 | nzb++; 94 | } else { 95 | // Only count consecutive zeroes 96 | nzb = 0; 97 | } 98 | 99 | cb = charAt(b, ++ib); 100 | } 101 | 102 | // Process run of digits 103 | if (Character.isDigit(ca) && Character.isDigit(cb)) { 104 | int bias = compareRight(a.substring(ia), b.substring(ib)); 105 | if (bias != 0) { 106 | return bias; 107 | } 108 | } 109 | 110 | if (ca == 0 && cb == 0) { 111 | // The strings compare the same. Perhaps the caller 112 | // will want to call strcmp to break the tie. 113 | return nza - nzb; 114 | } 115 | if (ca < cb) { 116 | return -1; 117 | } 118 | if (ca > cb) { 119 | return +1; 120 | } 121 | 122 | ++ia; 123 | ++ib; 124 | } 125 | } 126 | 127 | static char charAt(String s, int i) { 128 | return i >= s.length() ? 0 : s.charAt(i); 129 | } 130 | 131 | public static void main(String[] args) 132 | { 133 | String[] strings = new String[] { "1-2", "1-02", "1-20", "10-20", "fred", "jane", "pic01", 134 | "pic2", "pic02", "pic02a", "pic3", "pic4", "pic 4 else", "pic 5", "pic05", "pic 5", 135 | "pic 5 something", "pic 6", "pic 7", "pic100", "pic100a", "pic120", "pic121", 136 | "pic02000", "tom", "x2-g8", "x2-y7", "x2-y08", "x8-y8" }; 137 | 138 | List orig = Arrays.asList(strings); 139 | 140 | System.out.println("Original: " + orig); 141 | 142 | List scrambled = Arrays.asList(strings); 143 | Collections.shuffle(scrambled); 144 | 145 | System.out.println("Scrambled: " + scrambled); 146 | 147 | Collections.sort(scrambled, new NaturalOrderComparator()); 148 | 149 | System.out.println("Sorted: " + scrambled); 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/evoker/SampleData.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.io.File; 4 | import java.util.HashMap; 5 | import java.util.Vector; 6 | import java.io.BufferedReader; 7 | import java.io.FileReader; 8 | import java.io.IOException; 9 | 10 | import evoker.Types.*; 11 | 12 | /** 13 | * Holds the SampleIDs of a given fam file in a Vector 14 | */ 15 | 16 | public class SampleData { 17 | Vector inds; 18 | private HashMap> ukbBatchSampleIndices; 19 | private Vector ukbBatchMembership; 20 | private Vector sexByIndex; 21 | 22 | // If UK Biobank, we need to keep track of any sample IDs with a negative sign as these 23 | // must not be reported 24 | private QCFilterData ukbExclude; 25 | private FileFormat fileFormat; 26 | 27 | 28 | SampleData(String famFilename, FileFormat fileFormat) throws IOException{ 29 | 30 | this.inds = new Vector(); 31 | this.fileFormat = fileFormat; 32 | if (fileFormat == FileFormat.UKBIOBANK) { 33 | ukbBatchSampleIndices = new HashMap>(); 34 | ukbBatchMembership = new Vector(); 35 | sexByIndex = new Vector(); 36 | ukbExclude = new QCFilterData(); 37 | } 38 | 39 | BufferedReader famReader = new BufferedReader(new FileReader(famFilename)); 40 | String currentLine; 41 | String[] tokens; 42 | if (fileFormat == FileFormat.OXFORD){ 43 | //strip headers 44 | famReader.readLine(); 45 | famReader.readLine(); 46 | } 47 | int index = -1; 48 | while ((currentLine = famReader.readLine()) != null) { 49 | index++; 50 | tokens = currentLine.split("\\s"); 51 | String sample = tokens[1]; 52 | inds.add(sample); 53 | 54 | if (fileFormat == FileFormat.UKBIOBANK) { 55 | if (sample.charAt(0) == '-') { 56 | ukbExclude.add(sample); 57 | } 58 | 59 | if (tokens.length == 5) { 60 | throw new IOException("UK Biobank fam file ill-formed: requires a sixth column indicating the batch."); 61 | } 62 | if (tokens.length != 6) { 63 | throw new IOException("UK Biobank fam file ill-formed: requires six columns."); 64 | } 65 | String sexCode = tokens[4]; 66 | // TODO assert that sex is either ('1' = male, '2' = female, '0' = unknown) 67 | 68 | Sex sex; 69 | switch (sexCode) { 70 | case "1": 71 | sex = Sex.MALE; 72 | break; 73 | case "2": 74 | sex = Sex.FEMALE; 75 | break; 76 | default: 77 | sex = Sex.UNKNOWN; 78 | break; 79 | } 80 | 81 | sexByIndex.add(sex); 82 | String batch = tokens[5]; 83 | 84 | //ukbBatchMembership.add(batch); 85 | ukbBatchMembership.add(batch); 86 | if (ukbBatchSampleIndices.containsKey(batch)) { 87 | ukbBatchSampleIndices.get(batch).add(index); 88 | } else { 89 | Vector v = new Vector(1); 90 | v.add(index); 91 | ukbBatchSampleIndices.put(batch, v); 92 | } 93 | } 94 | } 95 | famReader.close(); 96 | } 97 | 98 | SampleData(Vector inds) { 99 | this.inds = inds; 100 | } 101 | 102 | /** 103 | * Returns the index of a given ind 104 | * @param ind 105 | * @return 106 | */ 107 | public int getIndex(String ind){ 108 | for(int i = 0; i < inds.size(); i++){ 109 | String s = inds.get(i); 110 | if(s.equals(ind)){ 111 | return i; 112 | } 113 | } 114 | return -1; 115 | } 116 | 117 | public String getInd(int i){ 118 | return inds.get(i); 119 | } 120 | 121 | public int getNumInds(){ 122 | return inds.size(); 123 | } 124 | 125 | public HashMap> getUkbBatchSampleIndices() { return ukbBatchSampleIndices; } 126 | 127 | public int getNumUkbBatches() { return ukbBatchSampleIndices.size(); } 128 | 129 | public Vector getUkbBatchMembership() { return ukbBatchMembership; } 130 | 131 | public Sex getSexByIndex(Integer index) { return sexByIndex.get(index); } 132 | 133 | public FileFormat getFileFormat() { 134 | return fileFormat; 135 | } 136 | 137 | public void setUkbExclude(QCFilterData ukbExclude) { 138 | this.ukbExclude = ukbExclude; 139 | } 140 | 141 | public QCFilterData getUkbExclude() { 142 | return ukbExclude; 143 | } 144 | } 145 | 146 | -------------------------------------------------------------------------------- /src/evoker/EvokerPoint2D.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 1997, 2006, Oracle and/or its affiliates. All rights reserved. 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 | * 5 | * This code is free software; you can redistribute it and/or modify it 6 | * under the terms of the GNU General Public License version 2 only, as 7 | * published by the Free Software Foundation. Oracle designates this 8 | * particular file as subject to the "Classpath" exception as provided 9 | * by Oracle in the LICENSE file that accompanied this code. 10 | * 11 | * This code is distributed in the hope that it will be useful, but WITHOUT 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 | * version 2 for more details (a copy is included in the LICENSE file that 15 | * accompanied this code). 16 | * 17 | * You should have received a copy of the GNU General Public License version 18 | * 2 along with this work; if not, write to the Free Software Foundation, 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 | * 21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 | * or visit www.oracle.com if you need additional information or have any 23 | * questions. 24 | */ 25 | package evoker; 26 | 27 | import java.io.Serializable; 28 | 29 | /** 30 | * The EvokerPoint2D class defines a point representing a location 31 | * in {@code (x,y)} coordinate space. 32 | *

33 | * This class is only the abstract superclass for all objects that 34 | * store a 2D coordinate. 35 | * The actual storage representation of the coordinates is left to 36 | * the subclass. 37 | * 38 | * @author Jim Graham 39 | * @since 1.2 40 | */ 41 | /** 42 | * Modified for use in evoker 43 | */ 44 | public class EvokerPoint2D implements Cloneable { 45 | 46 | public static long IDAt = 0; 47 | public long ID; 48 | /** 49 | * The X coordinate of this Point2D. 50 | * @since 1.2 51 | * @serial 52 | */ 53 | public double x; 54 | /** 55 | * The Y coordinate of this Point2D. 56 | * @since 1.2 57 | * @serial 58 | */ 59 | public double y; 60 | 61 | /** 62 | * Constructs and initializes a Point2D with 63 | * coordinates (0, 0). 64 | * @since 1.2 65 | */ 66 | public EvokerPoint2D() { 67 | } 68 | 69 | /** 70 | * Constructs and initializes a Point2D with the 71 | * specified coordinates. 72 | * 73 | * @param x the X coordinate of the newly 74 | * constructed Point2D 75 | * @param y the Y coordinate of the newly 76 | * constructed Point2D 77 | * @since 1.2 78 | */ 79 | public EvokerPoint2D(double x, double y) { 80 | this.x = x; 81 | this.y = y; 82 | this.ID = IDAt++; 83 | } 84 | 85 | /** 86 | * {@inheritDoc} 87 | * @since 1.2 88 | */ 89 | public double getX() { 90 | return x; 91 | } 92 | 93 | /** 94 | * {@inheritDoc} 95 | * @since 1.2 96 | */ 97 | public double getY() { 98 | return y; 99 | } 100 | 101 | public double getID(){ 102 | return ID; 103 | } 104 | 105 | /** 106 | * {@inheritDoc} 107 | * @since 1.2 108 | */ 109 | public void setLocation(double x, double y) { 110 | this.x = x; 111 | this.y = y; 112 | } 113 | 114 | /** 115 | * Returns a String that represents the value 116 | * of this Point2D. 117 | * @return a string representation of this Point2D. 118 | * @since 1.2 119 | */ 120 | public String toString() { 121 | return "Point2D.Double[" + x + ", " + y + "]"; 122 | } 123 | 124 | public int hashCode() { 125 | long bits = java.lang.Double.doubleToLongBits(getX()); 126 | bits ^= java.lang.Double.doubleToLongBits(getY()) * 31; 127 | return (((int) bits) ^ ((int) (bits >> 32))); 128 | } 129 | 130 | /** 131 | * Determines whether or not two points are equal. Two instances of 132 | * Point2D are equal if the values of their 133 | * x and y member fields, representing 134 | * their position in the coordinate space, are the same. 135 | * @param obj an object to be compared with this Point2D 136 | * @return true if the object to be compared is 137 | * an instance of Point2D and has 138 | * the same values; false otherwise. 139 | * @since 1.2 */ 140 | public boolean equals(Object obj) { 141 | if (obj instanceof EvokerPoint2D) { 142 | EvokerPoint2D p2d = (EvokerPoint2D) obj; 143 | return (getX() == p2d.getX()) && (getY() == p2d.getY()) && (getID() == p2d.getID()); 144 | } 145 | return false; 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/resources/oxford_parser.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | ## Description: This script createds the .bed, .bim, .bnt and .fam files required by Evoker from Oxford format files 4 | ## Usage: ./oxford_parser /dir 'genotype_cutoff' 5 | ## Input: The script needs to be passed the full path to a directory containing the following files: 6 | ## Study_chr_illumina.gen.bin.gz 7 | ## Study_chr_illumina.int.bin.gz 8 | ## study_chr_illumina.snp 9 | ## Study_affy.sample 10 | ## Output: The script will create the four files required by evoker: 11 | ## Study_chr.bed 12 | ## Study_chr.bim 13 | ## Study_chr.bnt 14 | ## Study.fam 15 | ## Arguments: genotype probability cutoff 16 | ## Note: The *int.bin.gz file is already in a format that Evoker can understand and simply needs unzipping and renaming in the format 'study_chr.bnt' 17 | ## 18 | ## Author: jm20@sanger.ac.uk 19 | 20 | 21 | use strict; 22 | 23 | my $dir; 24 | my $cutoff; 25 | 26 | if (scalar(@ARGV) == 2) { 27 | $dir = $ARGV[0]; 28 | $cutoff = $ARGV[1]; 29 | } elsif (scalar(@ARGV) == 1) { 30 | $dir = $ARGV[0]; 31 | $cutoff = 0.9; 32 | } else { 33 | die "Incorrect Number of Arguments\n"; 34 | } 35 | 36 | ## TODO: make sure the path ends with a / 37 | ## TODO: convert chromosomes X,Y,XY,MT to numbers? 38 | 39 | opendir( DIR, "$dir" ) or die "Can't open '$dir': $!"; 40 | 41 | while ( my $file = readdir(DIR) ) { 42 | 43 | if ( $file =~ /.gen.bin.gz$/ ) { 44 | ## genotype file 45 | open( GEN, "zcat $dir$file |" ) or die "Can't open gen file '$file': $!"; 46 | $file =~ /^(\w+)_(\d+)/; 47 | 48 | open( BED, "> $dir$1.$2.bed" ) or die "Can't open output '$1.$2.bed': $!"; 49 | #magic number and SNP-major mode. 50 | print BED pack( 'B*', "011011000001101100000001" ); 51 | 52 | my $bsnp_num; 53 | read( GEN, $bsnp_num, 4 ); 54 | my $snp_num = unpack( 'i*', $bsnp_num ); 55 | my $bind_num; 56 | read( GEN, $bind_num, 4 ); 57 | my $ind_num = unpack( 'i*', $bind_num ); 58 | $ind_num = $ind_num/3; 59 | 60 | ## for each snp 61 | for ( my $i = 0 ; $i < $snp_num ; $i++ ) { 62 | my $bytecounter = 0; 63 | my $byte = ""; 64 | my $individual; 65 | ## for all the inds in a snp work out the genotypes 66 | for ( my $j = 0 ; $j < $ind_num ; $j++ ) { 67 | ## get the next three float values (12 bytes) 68 | ## AA prob 69 | my $b_aa; 70 | read( GEN, $b_aa, 4 ); 71 | my $aa = unpack( 'f*', $b_aa ); 72 | ## AB prob 73 | my $b_ab; 74 | read( GEN, $b_ab, 4 ); 75 | my $ab = unpack( 'f*', $b_ab ); 76 | ## BB prob 77 | my $b_bb; 78 | read( GEN, $b_bb, 4 ); 79 | my $bb = unpack( 'f*', $b_bb ); 80 | 81 | if ( $aa > $cutoff ) { 82 | $individual = "00"; 83 | } 84 | elsif ( $ab > $cutoff ) { 85 | $individual = "11"; 86 | } 87 | elsif ( $bb > $cutoff ) { 88 | $individual = "10"; 89 | } 90 | else { 91 | ## missing 92 | $individual = "01"; 93 | } 94 | 95 | $byte = $individual . $byte; 96 | $bytecounter++; 97 | 98 | if ( $bytecounter == 4 ) { 99 | #we've completed a byte, so write it. 100 | print BED pack( 'B*', $byte ); 101 | $bytecounter = 0; 102 | $byte = ""; 103 | } 104 | } 105 | 106 | if ( $bytecounter != 0 ) { 107 | for ( my $k = 0 ; $k < 4 - $bytecounter ; $k++ ) { 108 | $byte = "00" . $byte; 109 | } 110 | print BED pack( 'B*', $byte ); 111 | } 112 | 113 | } 114 | 115 | } 116 | elsif ( $file =~ /.snp$/ ) { 117 | open( SNP, $dir . $file ) or die "Can't open snp file '$file': $!"; 118 | $file =~ /^(\w+)_(\d+)/; 119 | open( BIM, "> $dir$1.$2.bim" ) or die "Can't open output '$1.$2.bim': $!"; 120 | my $chr = $2; 121 | 122 | while ( my $line = ) { 123 | chomp($line); 124 | my @values = split( /\s+/, $line ); 125 | my $snp_id = $values[1]; 126 | my $pos = $values[2]; 127 | my $allele_a = $values[3]; 128 | my $allele_b = $values[4]; 129 | ## 'chromosome' 'snp identifier' 'Genetic distance (morgans)' 'Base-pair position (bp units)' 'Allele A' 'Allele B' 130 | print BIM "$chr $snp_id 0 $pos $allele_a $allele_b\n"; 131 | } 132 | close(SNP); 133 | close(BIM); 134 | 135 | } 136 | elsif ( $file =~ /.sample$/ ) { 137 | open( SAM, $dir . $file ) or die "Can't open Sample file '$file': $!"; 138 | $file =~ /^(\w+)_/; 139 | open( FAM, "> $dir$1.fam" ) or die "Can't open output '$1.fam': $!"; 140 | 141 | my $header = ; 142 | my $header2 = ; 143 | while ( my $line = ) { 144 | chomp($line); 145 | my @values = split( /\s+/, $line ); 146 | my $sample_id = $values[1]; 147 | my $sex = $values[4]; 148 | ## 'Family ID' 'Individual ID' 'Paternal ID' 'Maternal ID' 'Sex' 'Phenotype' 149 | print FAM "$sample_id $sample_id 0 0 $sex 0\n"; 150 | } 151 | close(SAM); 152 | close(FAM); 153 | 154 | } 155 | elsif ( $file =~ /.int.bin.gz$/ ) { 156 | $file =~ /^(\w+)_(\d+)/; 157 | ## unzip the intensity file and create a new file using the naming scheme Evoker expects 158 | system("zcat $dir$file > $dir$1.$2.bnt"); 159 | 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /src/evoker/PDFDialog.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import javax.swing.*; 4 | 5 | import java.awt.event.ActionListener; 6 | import java.awt.event.ActionEvent; 7 | 8 | public class PDFDialog extends JDialog implements ActionListener { 9 | 10 | private boolean success; 11 | private String scoresFile; 12 | private String pdfDir; 13 | private JTextField scoresFileField; 14 | private JTextField pdfDirField; 15 | private JButton pdfBrowseButton; 16 | private JLabel pdfDirLabel; 17 | private JCheckBox allPlotsButton; 18 | private JCheckBox yesPlotsButton; 19 | private JCheckBox maybePlotsButton; 20 | private JCheckBox noPlotsButton; 21 | 22 | private JFileChooser jfc; 23 | 24 | public PDFDialog(JFrame parent){ 25 | super(parent,"Generate PDF from Scores",true); 26 | 27 | jfc = new JFileChooser("user.dir"); 28 | 29 | JPanel contents = new JPanel(); 30 | contents.setLayout(new BoxLayout(contents,BoxLayout.Y_AXIS)); 31 | 32 | JPanel scoresFilePanel = new JPanel(); 33 | scoresFilePanel.add(new JLabel("Scores file: ")); 34 | scoresFileField = new JTextField(20); 35 | scoresFilePanel.add(scoresFileField); 36 | JButton scoresBrowseButton = new JButton("Browse"); 37 | scoresBrowseButton.addActionListener(this); 38 | scoresFilePanel.add(scoresBrowseButton); 39 | contents.add(scoresFilePanel); 40 | 41 | contents.add(new JPanel()); 42 | 43 | JPanel savePlotsPanel = new JPanel(); 44 | savePlotsPanel.setLayout(new BoxLayout(savePlotsPanel,BoxLayout.PAGE_AXIS)); 45 | allPlotsButton = new JCheckBox("Save all plots"); 46 | allPlotsButton.setSelected(false); 47 | savePlotsPanel.add(allPlotsButton); 48 | yesPlotsButton = new JCheckBox("Save all Yes plots"); 49 | yesPlotsButton.setSelected(false); 50 | savePlotsPanel.add(yesPlotsButton); 51 | maybePlotsButton = new JCheckBox("Save all Maybe plots"); 52 | maybePlotsButton.setSelected(false); 53 | savePlotsPanel.add(maybePlotsButton); 54 | noPlotsButton = new JCheckBox("Save all No plots"); 55 | noPlotsButton.setSelected(false); 56 | savePlotsPanel.add(noPlotsButton); 57 | 58 | contents.add(savePlotsPanel); 59 | 60 | JPanel pdfDirPanel = new JPanel(); 61 | pdfDirLabel = new JLabel("Destination directory for PDFs: "); 62 | pdfDirPanel.add(pdfDirLabel); 63 | pdfDirField = new JTextField(20); 64 | pdfDirPanel.add(pdfDirField); 65 | pdfBrowseButton = new JButton("Save to"); 66 | pdfBrowseButton.addActionListener(this); 67 | pdfDirPanel.add(pdfBrowseButton); 68 | contents.add(pdfDirPanel); 69 | 70 | contents.add(new JPanel()); 71 | 72 | JPanel butPan = new JPanel(); 73 | JButton okbut = new JButton("OK"); 74 | getRootPane().setDefaultButton(okbut); 75 | okbut.addActionListener(this); 76 | butPan.add(okbut); 77 | JButton cancelbut = new JButton("Cancel"); 78 | cancelbut.addActionListener(this); 79 | butPan.add(cancelbut); 80 | contents.add(butPan); 81 | 82 | this.setContentPane(contents); 83 | } 84 | 85 | public void actionPerformed(ActionEvent e) { 86 | if (e.getActionCommand().equals("OK")){ 87 | scoresFile = scoresFileField.getText(); 88 | pdfDir = pdfDirField.getText(); 89 | setSuccess(true); 90 | this.dispose(); 91 | }else if (e.getActionCommand().equals("Browse")){ 92 | jfc.setFileSelectionMode(JFileChooser.FILES_ONLY); 93 | if (jfc.showOpenDialog(this) == JFileChooser.APPROVE_OPTION){ 94 | scoresFileField.setText(jfc.getSelectedFile().getAbsolutePath()); 95 | } 96 | }else if (e.getActionCommand().equals("Save to")) { 97 | jfc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); 98 | if (jfc.showOpenDialog(this) == JFileChooser.APPROVE_OPTION){ 99 | pdfDirField.setText(jfc.getSelectedFile().getAbsolutePath()); 100 | } 101 | }else if (e.getActionCommand().equals("Cancel")){ 102 | setSuccess(false); 103 | this.dispose(); 104 | } 105 | } 106 | 107 | public boolean success() { 108 | return success; 109 | } 110 | 111 | public String getscoresFile() { 112 | return scoresFile; 113 | } 114 | 115 | public String getPdfDir(){ 116 | return pdfDir; 117 | } 118 | 119 | public boolean allPlots() { 120 | return allPlotsButton.isSelected(); 121 | } 122 | 123 | public boolean yesPlots() { 124 | return yesPlotsButton.isSelected(); 125 | } 126 | 127 | public boolean maybePlots() { 128 | return maybePlotsButton.isSelected(); 129 | } 130 | 131 | public boolean noPlots() { 132 | return noPlotsButton.isSelected(); 133 | } 134 | 135 | public void setSuccess(boolean b) { 136 | success = b; 137 | 138 | } 139 | } -------------------------------------------------------------------------------- /src/resources/evoker-helper.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # 3 | ## generate the .bnt and .bed files for just one SNP. 4 | 5 | use strict; 6 | use POSIX qw(ceil floor); 7 | use IO::Uncompress::Gunzip qw(gunzip $GunzipError); 8 | use File::Spec::Functions; 9 | 10 | my $VERSION = "2.4"; 11 | 12 | if ($ARGV[0] eq "--version") { 13 | print "version: $VERSION\n"; 14 | exit 1; 15 | } 16 | 17 | 18 | my $snp = $ARGV[0]; 19 | my $chr = $ARGV[1]; 20 | my $collection = $ARGV[2]; 21 | my $index = $ARGV[3]; 22 | my $numinds = $ARGV[4]; 23 | my $tot_snps = $ARGV[5]; 24 | my $oxford = $ARGV[6]; 25 | my $platform = $ARGV[7]; 26 | my $ukbiobank_v2 = $ARGV[8]; 27 | my $outpath = $ARGV[9]; 28 | my $cutoff = 0.9; 29 | my $magic_num; 30 | my $bytesPerRecord; 31 | my $buf; 32 | 33 | my $bntpath = "$collection.$snp.bnt"; 34 | my $bedpath = "$collection.$snp.bed"; 35 | if ($outpath) { 36 | $bntpath = File::Spec->catdir($outpath, $bntpath); 37 | $bedpath = File::Spec->catdir($outpath, $bedpath); 38 | } 39 | $bntpath = ">$bntpath"; 40 | $bedpath = ">$bedpath"; 41 | 42 | open (BNTOUT, $bntpath); 43 | open (BEDOUT, $bedpath); 44 | 45 | if ($oxford) { 46 | 47 | ## .bed file 48 | my $gen_file; 49 | ## perl 5+ includes this module, so hopefully most users will have it. 50 | if (-s "$collection\_$chr\_$platform.gen.bin.gz") { 51 | $gen_file = new IO::Uncompress::Gunzip "$collection\_$chr\_$platform.gen.bin.gz"; 52 | } else { 53 | open($gen_file, "<","$collection\_$chr\_$platform.gen.bin"); 54 | } 55 | #magic number 56 | read ($gen_file, $magic_num, 8); 57 | print BEDOUT $magic_num; 58 | 59 | #jump to position 60 | $bytesPerRecord = $numinds*12; 61 | seek ($gen_file, ($index*$bytesPerRecord)+8, 0); 62 | 63 | my $bytecounter = 0; 64 | my $byte = ""; 65 | my $individual; 66 | for ( my $i = 0 ; $i < $numinds ; $i++ ) { 67 | my $b_aa; 68 | read( $gen_file, $b_aa, 4 ); 69 | my $aa = unpack( 'f*', $b_aa ); 70 | 71 | my $b_ab; 72 | read( $gen_file, $b_ab, 4 ); 73 | my $ab = unpack( 'f*', $b_ab ); 74 | 75 | my $b_bb; 76 | read( $gen_file, $b_bb, 4 ); 77 | my $bb = unpack( 'f*', $b_bb ); 78 | 79 | if ( $aa > $cutoff ) { 80 | $individual = "00"; 81 | } 82 | elsif ( $ab > $cutoff ) { 83 | $individual = "10"; 84 | } 85 | elsif ( $bb > $cutoff ) { 86 | $individual = "11"; 87 | } 88 | else { 89 | ## missing genotype 90 | $individual = "01"; 91 | } 92 | 93 | $byte = $individual.$byte; 94 | $bytecounter++; 95 | if ( $bytecounter == 4 ) { 96 | ## completed a byte, so write it. 97 | print BEDOUT pack( 'B*', $byte ); 98 | $bytecounter = 0; 99 | $byte = ""; 100 | } 101 | } 102 | ## fill up any empty bytes 103 | if ( $bytecounter != 0 ) { 104 | for ( my $k = 0 ; $k < 4 - $bytecounter ; $k++ ) { 105 | $byte = "00" . $byte; 106 | } 107 | print BEDOUT pack( 'B*', $byte ); 108 | } 109 | close $gen_file; 110 | 111 | ## .bnt file 112 | my $int_file; 113 | ## perl 5+ includes this module, so hopefully most users will have it. 114 | if (-s "$collection\_$chr\_$platform.int.bin.gz") { 115 | $int_file = new IO::Uncompress::Gunzip "$collection\_$chr\_$platform.int.bin.gz"; 116 | } else { 117 | open ($int_file, "<", "$collection\_$chr\_$platform.int.bin"); 118 | } 119 | 120 | read ($int_file, $magic_num, 8); 121 | print BNTOUT $magic_num; 122 | 123 | #jump to position 124 | $bytesPerRecord = $numinds*8; 125 | seek ($int_file, ($index*$bytesPerRecord)+8, 0); 126 | read ($int_file, $buf, $bytesPerRecord); 127 | print BNTOUT $buf; 128 | close $int_file; 129 | } elsif($ukbiobank_v2){ 130 | ## .bed file 131 | $bytesPerRecord = ceil($numinds/4); 132 | open (BED, "ukb_cal_chr$chr\_v2.bed"); 133 | #magic number and SNP-major mode 134 | read(BED, $magic_num, 3); 135 | print BEDOUT $magic_num; 136 | 137 | #jump to position 138 | seek (BED, ($index*$bytesPerRecord)+3,0); 139 | read (BED, $buf, $bytesPerRecord); 140 | print BEDOUT $buf; 141 | close BED; 142 | 143 | ## .bnt file (==.bin for UKB) 144 | $bytesPerRecord = $numinds*8; 145 | open (BNT, "ukb_int_chr$chr\_v2.bin"); 146 | # UKB doesn't have a header 147 | # read (BNT, $magic_num, 2); 148 | # print BNTOUT $magic_num; 149 | 150 | #jump to position 151 | seek (BNT, ($index*$bytesPerRecord), 0); 152 | read (BNT, $buf, $bytesPerRecord); 153 | print BNTOUT $buf; 154 | close BNT; 155 | } else { 156 | ## .bed file 157 | $bytesPerRecord = ceil($numinds/4); 158 | open (BED, "$collection.$chr.bed"); 159 | #magic number and SNP-major mode 160 | read(BED, $magic_num, 3); 161 | print BEDOUT $magic_num; 162 | 163 | #jump to position 164 | seek (BED, ($index*$bytesPerRecord)+3,0); 165 | read (BED, $buf, $bytesPerRecord); 166 | print BEDOUT $buf; 167 | close BED; 168 | 169 | ## .bnt file 170 | $bytesPerRecord = $numinds*8; 171 | open (BNT, "$collection.$chr.bnt"); 172 | read (BNT, $magic_num, 2); 173 | print BNTOUT $magic_num; 174 | 175 | #jump to position 176 | seek (BNT, ($index*$bytesPerRecord)+2, 0); 177 | read (BNT, $buf, $bytesPerRecord); 178 | print BNTOUT $buf; 179 | close BNT; 180 | 181 | } 182 | 183 | close BEDOUT; 184 | close BNTOUT; 185 | 186 | print "$snp\n"; 187 | 188 | 189 | -------------------------------------------------------------------------------- /src/evoker/Lasso.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.awt.Point; 4 | import java.awt.geom.Path2D; 5 | import java.awt.geom.Point2D; 6 | import java.util.ArrayList; 7 | import java.util.Collection; 8 | import java.util.HashMap; 9 | import org.jfree.chart.entity.ChartEntity; 10 | import org.jfree.chart.entity.EntityCollection; 11 | import org.jfree.chart.entity.StandardEntityCollection; 12 | 13 | /** 14 | * Implements lasso select. USE ONLY WITH PLOT DIAGRAMS. 15 | */ 16 | public class Lasso { 17 | 18 | /** Representing the enclosed area*/ 19 | private Path2D poly; 20 | /** To later hold all ChartEntitys within the polygon after they've been 21 | * calculated once, to not have to search them again. */ 22 | private EntityCollection ec = null; 23 | 24 | public Lasso(double x, double y) { 25 | poly = new Path2D.Double(); 26 | poly.moveTo(x,y); 27 | } 28 | 29 | /** 30 | * 31 | * @param entityCollection containing all ChartEntity-Objects to be searched through 32 | * @return EntityCollection containing all ChartEntity-Object within the borders of the selection 33 | */ 34 | public EntityCollection getContainedEntitys(EntityCollection entityCollection) { 35 | if (ec == null) { 36 | ec = new StandardEntityCollection(); 37 | Collection entities = entityCollection.getEntities(); 38 | for (int i = 0; i < entities.size(); i++) { 39 | ChartEntity entity = entityCollection.getEntity(i); 40 | if (entity.getToolTipText() != null && "poly".equals(entity.getShapeType())) { // get sure (?) we only get data-points 41 | EvokerPoint2D p = getCoordinatesOfEntity(entity); 42 | if (poly.contains(p.getX(),p.getY())) { 43 | ec.add(entity); 44 | } 45 | } 46 | } 47 | } 48 | return ec; 49 | } 50 | 51 | /** 52 | * Returns the previously calculated entities. If 53 | * getContainedEntitys(EntityCollection entityCollection) 54 | * hasn't been called beforehand, it'll return null 55 | * 56 | * @return EntityCollection 57 | */ 58 | public EntityCollection getContainedEntitys() { 59 | return ec; 60 | } 61 | 62 | /** 63 | * Reads out all diagram-coordinates of the points. 64 | * 65 | * @param entityCollection containing all ChartEntity-Objects to be searched through 66 | * @return ArrayList containing 67 | */ 68 | public ArrayList getContainedPoints(EntityCollection entityCollection) { 69 | ArrayList al_ret = new ArrayList(); 70 | getContainedEntitys(entityCollection); 71 | Collection entities = ec.getEntities(); 72 | for (int i = 0; i < entities.size(); i++) { 73 | ChartEntity entity = ec.getEntity(i); 74 | al_ret.add(getCoordinatesOfEntity(entity)); 75 | } 76 | return al_ret; 77 | } 78 | 79 | /** 80 | * Reads out all diagram-coordinates of the points. 81 | * 82 | * @param entityCollection containing all ChartEntity-Objects to be searched through 83 | * @return ArrayList containing 84 | */ 85 | public HashMap getContainedPointsInd(EntityCollection entityCollection) { 86 | HashMap hm_ret = new HashMap(); 87 | getContainedEntitys(entityCollection); 88 | Collection entities = ec.getEntities(); 89 | for (int i = 0; i < entities.size(); i++) { 90 | ChartEntity entity = ec.getEntity(i); 91 | hm_ret.put(getCoordinatesOfEntity(entity), getIndOfEntity(entity)); 92 | } 93 | return hm_ret; 94 | } 95 | 96 | /** 97 | * Returns the (screen-relative) coordinates of an entity 98 | * @param entity object 99 | * @return Point 100 | */ 101 | /** public Point getScreenCoordinatesOfEntity(ChartEntity e) { 102 | String shapeCoords = e.getShapeCoords(); 103 | String[] shapeCoords_array = shapeCoords.split(","); 104 | 105 | // I decided that these points are most like the center of the circle-area 106 | return new Point( 107 | Integer.parseInt(shapeCoords_array[2]), 108 | Integer.parseInt(shapeCoords_array[1])); 109 | }*/ 110 | 111 | /** 112 | * Returns the (Diagram-relative) coordinates of a point 113 | * @param entity-object 114 | * @return Point (containing the coordinates) 115 | */ 116 | public EvokerPoint2D getCoordinatesOfEntity(ChartEntity e) { 117 | String tooltip = e.getToolTipText(); 118 | if (tooltip == null) { 119 | return null; 120 | } 121 | 122 | return new EvokerPoint2D( 123 | Double.parseDouble( 124 | tooltip.substring( 125 | tooltip.indexOf('(') + 1, tooltip.indexOf(','))), 126 | Double.parseDouble( 127 | tooltip.substring( 128 | tooltip.indexOf(',') + 1, 129 | tooltip.indexOf(')')))); 130 | } 131 | 132 | /** 133 | * Returns ind of a ChartEntity 134 | * @param entity object 135 | * @return the ind 136 | */ 137 | public String getIndOfEntity(ChartEntity e) { 138 | String tooltip = e.getToolTipText(); 139 | return tooltip.substring(0, tooltip.indexOf("(") - 1); 140 | } 141 | 142 | /** 143 | * Adds a point to the polygon 144 | * @param x coordinate 145 | * @param y coordinate 146 | */ 147 | public void addPoint(double x, double y) { 148 | poly.lineTo(x,y); 149 | } 150 | 151 | 152 | 153 | public void close(){ 154 | poly.closePath(); 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /src/evoker/WrapLayout.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.awt.*; 4 | import javax.swing.JScrollPane; 5 | import javax.swing.SwingUtilities; 6 | 7 | /** 8 | * FlowLayout subclass that fully supports wrapping of components. 9 | * 10 | * Written by Rob Camick of tips4java.wordpress.com and used with permission as specified: 11 | * "You are free to use and/or modify any or all code posted on the Java Tips Weblog without restriction." 12 | * 13 | * https://tips4java.wordpress.com/2008/11/06/wrap-layout/ 14 | */ 15 | public class WrapLayout extends FlowLayout 16 | { 17 | private Dimension preferredLayoutSize; 18 | 19 | /** 20 | * Constructs a new WrapLayout with a left 21 | * alignment and a default 5-unit horizontal and vertical gap. 22 | */ 23 | public WrapLayout() 24 | { 25 | super(); 26 | } 27 | 28 | /** 29 | * Constructs a new FlowLayout with the specified 30 | * alignment and a default 5-unit horizontal and vertical gap. 31 | * The value of the alignment argument must be one of 32 | * WrapLayout, WrapLayout, 33 | * or WrapLayout. 34 | * @param align the alignment value 35 | */ 36 | public WrapLayout(int align) 37 | { 38 | super(align); 39 | } 40 | 41 | /** 42 | * Creates a new flow layout manager with the indicated alignment 43 | * and the indicated horizontal and vertical gaps. 44 | *

45 | * The value of the alignment argument must be one of 46 | * WrapLayout, WrapLayout, 47 | * or WrapLayout. 48 | * @param align the alignment value 49 | * @param hgap the horizontal gap between components 50 | * @param vgap the vertical gap between components 51 | */ 52 | public WrapLayout(int align, int hgap, int vgap) 53 | { 54 | super(align, hgap, vgap); 55 | } 56 | 57 | /** 58 | * Returns the preferred dimensions for this layout given the 59 | * visible components in the specified target container. 60 | * @param target the component which needs to be laid out 61 | * @return the preferred dimensions to lay out the 62 | * subcomponents of the specified container 63 | */ 64 | @Override 65 | public Dimension preferredLayoutSize(Container target) 66 | { 67 | return layoutSize(target, true); 68 | } 69 | 70 | /** 71 | * Returns the minimum dimensions needed to layout the visible 72 | * components contained in the specified target container. 73 | * @param target the component which needs to be laid out 74 | * @return the minimum dimensions to lay out the 75 | * subcomponents of the specified container 76 | */ 77 | @Override 78 | public Dimension minimumLayoutSize(Container target) 79 | { 80 | Dimension minimum = layoutSize(target, false); 81 | minimum.width -= (getHgap() + 1); 82 | return minimum; 83 | } 84 | 85 | /** 86 | * Returns the minimum or preferred dimension needed to layout the target 87 | * container. 88 | * 89 | * @param target target to get layout size for 90 | * @param preferred should preferred size be calculated 91 | * @return the dimension to layout the target container 92 | */ 93 | private Dimension layoutSize(Container target, boolean preferred) 94 | { 95 | synchronized (target.getTreeLock()) 96 | { 97 | // Each row must fit with the width allocated to the containter. 98 | // When the container width = 0, the preferred width of the container 99 | // has not yet been calculated so lets ask for the maximum. 100 | 101 | int targetWidth = target.getSize().width; 102 | Container container = target; 103 | 104 | while (container.getSize().width == 0 && container.getParent() != null) 105 | { 106 | container = container.getParent(); 107 | } 108 | 109 | targetWidth = container.getSize().width; 110 | 111 | if (targetWidth == 0) 112 | targetWidth = Integer.MAX_VALUE; 113 | 114 | int hgap = getHgap(); 115 | int vgap = getVgap(); 116 | Insets insets = target.getInsets(); 117 | int horizontalInsetsAndGap = insets.left + insets.right + (hgap * 2); 118 | int maxWidth = targetWidth - horizontalInsetsAndGap; 119 | 120 | // Fit components into the allowed width 121 | 122 | Dimension dim = new Dimension(0, 0); 123 | int rowWidth = 0; 124 | int rowHeight = 0; 125 | 126 | int nmembers = target.getComponentCount(); 127 | 128 | for (int i = 0; i < nmembers; i++) 129 | { 130 | Component m = target.getComponent(i); 131 | 132 | if (m.isVisible()) 133 | { 134 | Dimension d = preferred ? m.getPreferredSize() : m.getMinimumSize(); 135 | 136 | // Can't add the component to current row. Start a new row. 137 | 138 | if (rowWidth + d.width > maxWidth) 139 | { 140 | addRow(dim, rowWidth, rowHeight); 141 | rowWidth = 0; 142 | rowHeight = 0; 143 | } 144 | 145 | // Add a horizontal gap for all components after the first 146 | 147 | if (rowWidth != 0) 148 | { 149 | rowWidth += hgap; 150 | } 151 | 152 | rowWidth += d.width; 153 | rowHeight = Math.max(rowHeight, d.height); 154 | } 155 | } 156 | 157 | addRow(dim, rowWidth, rowHeight); 158 | 159 | dim.width += horizontalInsetsAndGap; 160 | dim.height += insets.top + insets.bottom + vgap * 2; 161 | 162 | // When using a scroll pane or the DecoratedLookAndFeel we need to 163 | // make sure the preferred size is less than the size of the 164 | // target containter so shrinking the container size works 165 | // correctly. Removing the horizontal gap is an easy way to do this. 166 | 167 | Container scrollPane = SwingUtilities.getAncestorOfClass(JScrollPane.class, target); 168 | 169 | if (scrollPane != null && target.isValid()) 170 | { 171 | dim.width -= (hgap + 1); 172 | } 173 | 174 | return dim; 175 | } 176 | } 177 | 178 | /* 179 | * A new row has been completed. Use the dimensions of this row 180 | * to update the preferred size for the container. 181 | * 182 | * @param dim update the width and height when appropriate 183 | * @param rowWidth the width of the row to add 184 | * @param rowHeight the height of the row to add 185 | */ 186 | private void addRow(Dimension dim, int rowWidth, int rowHeight) 187 | { 188 | dim.width = Math.max(dim.width, rowWidth); 189 | 190 | if (dim.height > 0) 191 | { 192 | dim.height += getVgap(); 193 | } 194 | 195 | dim.height += rowHeight; 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Evoker 2 | ====== 3 | 4 | Description 5 | ----------- 6 | Evoker is a graphical tool for plotting genotype intensity data in order to assess quality of genotype calls. It implements a compact, binary format which allows rapid access to data, even with hundreds of thousands of observations. 7 | 8 | Evoker consists of two components: 9 | 10 | * A Java desktop application to be used on a local machine 11 | * A perl script `evoker-helper.pl` which will reside on the server where your intensity and genotype data is stored. This script reads small slices of your intensity and genotype files relevant to the SNP of interest and transfers this to the Java application over SSH. 12 | 13 | UK Biobank v2 14 | ------------- 15 | Evoker has been adapted to view UK Biobank v2 data (released July 2017). Evoker expects the UK Biobank files to have the same naming conventions from the original data release. In a directory the following must all exist together: 16 | 17 | For each chromosome of interest, the following files must all sit in the same directory: 18 | 19 | ``` 20 | ukb_cal_chr{chromosome}_v2.bed 21 | ukb_snp_chr{chromosome}_v2.bim 22 | ukb_int_chr{chromosome}_v2.bin 23 | ``` 24 | 25 | In addition, you must point Evoker to the original fam file (the batch information in the final column). 26 | 27 | #### UK Biobank v2 steps 28 | 29 | 1. [Install Evoker and remote helper script](https://github.com/wtsi-medical-genomics/evoker#installing) 30 | 2. On local machine, [open Evoker](https://github.com/wtsi-medical-genomics/evoker#running) 31 | 3. `File` > `Connect to remote server` 32 | 4. Select `UK Biobank v2` file format. Then enter: 33 | 34 | * `Host` the remote server hostname 35 | * `Port` port to SSH to (default 22) 36 | * `Remote directory` the absolute path where the UK Biobank files reside. 37 | * `Local directory` a local location where temporary data slices can be stored. 38 | * `Username` your username on the remote host 39 | * `Password` your password on the remote host 40 | * `Remote FAM file` the location on the remote machine of the `fam` file provided to you by UK Biobank (including the final column which lists the batches). 41 | * `Remote temp directory` it is assumed you will not have write access to the release directory (instituions will most likely share a single release) so please specify a directory you have read/write access to where temporary subsets of the intensity/plink data can be stored on the remote machine. 42 | 43 | 5. Click OK to start transferring the fam and any bim files in the remote directory to your local machine. The speed of this process will depend on your data connection. 44 | 6. Enter the SNP of interest (rsid from the bim file) to view. 45 | 7. Scroll up and down to view all of the batches. If desired sort on Batch name, MAF, HWE p-value, or GPC from the `View` > `Sort` menu. 46 | 47 | **Note**: at present it is not possible to re-call (with the lasso select) UK Biobank v2 data at the moment. This feature will be available in the next release. 48 | 49 | 50 | Evoker Lite 51 | ----------- 52 | For a python/CLI tool to generate cluster plot PNGs (including UK Biobank v2 data) see [Evoker Lite](https://github.com/dlrice/evoker-lite). 53 | 54 | Maintainer 55 | ---------- 56 | Daniel Rice (dr9@sanger.ac.uk) 57 | 58 | Authors 59 | ------- 60 | * James Morris 61 | * Jeff Barrett 62 | 63 | Contributors 64 | ------------ 65 | * Tim Poterba 66 | * Natalie Wirth 67 | * Daniel Rice 68 | 69 | Requirements 70 | ------------ 71 | * Desktop application: Java 8.0 (also known as 1.8) or later. 72 | * Remote helper script `evoker-helper.pl`: Perl 5 73 | 74 | Installing 75 | ---------- 76 | #### Desktop application 77 | Download and extract the tarball of the [latest release](https://github.com/wtsi-medical-genomics/evoker/releases) on your local machine. 78 | 79 | #### Remote helper script (`evoker-helper.pl`) 80 | To view data that is on a remote machine (ie a UNIX server), download `evoker-helper.pl` and add it to your path so that it is executable everywhere. If using bash, the following will download this into a folder in your home directory: 81 | 82 | ```bash 83 | mkdir ~/evoker-helper 84 | curl -o ~/evoker-helper/evoker-helper.pl https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/master/src/resources/evoker-helper.pl 85 | chmod 777 ~/evoker-helper/evoker-helper.pl 86 | echo export PATH=\"~/evoker-helper:\$PATH\" >> ~/.bashrc 87 | . ~/.bashrc 88 | ``` 89 | 90 | To test that it is working go to some (non-home directory) location: 91 | 92 | ```bash 93 | cd / 94 | evoker-helper.pl --version 95 | ``` 96 | 97 | and you should see the version reported to you. 98 | 99 | 100 | Running 101 | ------- 102 | Within the untarred release directory you can either double click Evoker.jar contained within or from the command line: 103 | 104 | ``` 105 | $ java -jar Evoker.jar 106 | ``` 107 | 108 | To run with more memory than the default allocation, the ```-Xmx``` option can be used. For example to specify 1 GB of memeory, 109 | 110 | ``` 111 | $ java -Xmx1024m -jar Evoker.jar 112 | ``` 113 | 114 | For more information see the documentation included in the release. 115 | 116 | Documentation 117 | ------------- 118 | A PDF is included in each release tarball. The latex and image files used to produce this are also available in the docs directory. 119 | 120 | Building 121 | -------- 122 | To build a jar file from source: 123 | 124 | 1. Clone the repository. 125 | 2. Copy ```evoker/resources/build.xml``` into ```evoker/```. 126 | 3. Use the command ```ant evoker``` to build the jar file. 127 | 4. Use the command ```ant clean``` to remove temporary build files. 128 | 129 | Todo 130 | ---- 131 | - [x] Support Windows 132 | - [x] Handle special characters in password 133 | - [x] Exclude individuals with a negative number as their sample ID. 134 | - [ ] Deal with hidden files (eg `.samples.fam`). 135 | - [ ] Gracefully fail if evoker-helper.pl is not reachable at the remote server. 136 | - [ ] Export BED changes when viewing over a remote connection. 137 | - [ ] Plot SNP Posterior ellipses. 138 | - [ ] Save the plot array to remove the need to re-load all data on sorting. 139 | - [ ] View all batches at once (with ability to filter eg on MAF). 140 | 141 | Citation 142 | -------- 143 | James A. Morris, Joshua C. Randall, Julian B. Maller, Jeffrey C. Barrett; Evoker: a visualization tool for genotype intensity data. Bioinformatics 2010; 26 (14): 1786-1787. doi: 10.1093/bioinformatics/btq280 144 | 145 | Website 146 | ------- 147 | [http://www.sanger.ac.uk/science/tools/evoker](http://www.sanger.ac.uk/science/tools/evoker) 148 | 149 | 150 | License 151 | ------- 152 | MIT License (see LICENSE.md) 153 | -------------------------------------------------------------------------------- /src/evoker/MarkerListDialog.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import javax.swing.*; 4 | 5 | import java.awt.event.ActionListener; 6 | import java.awt.event.ActionEvent; 7 | 8 | public class MarkerListDialog extends JDialog implements ActionListener { 9 | 10 | private boolean success = false; 11 | private String markerList; 12 | private String pdfDir; 13 | private boolean savePlots; 14 | private boolean all; 15 | private boolean yes; 16 | private boolean maybe; 17 | private boolean no; 18 | 19 | private JTextField markerlistField; 20 | private JTextField pdfDirField; 21 | private JButton pdfBrowseButton; 22 | private JLabel pdfDirLabel; 23 | private JRadioButton savePlotsButton; 24 | private JCheckBox allPlotsButton; 25 | private JCheckBox yesPlotsButton; 26 | private JCheckBox maybePlotsButton; 27 | private JCheckBox noPlotsButton; 28 | 29 | private JFileChooser jfc; 30 | 31 | public MarkerListDialog(JFrame parent){ 32 | super(parent,"Load Marker List",true); 33 | 34 | jfc = new JFileChooser("user.dir"); 35 | 36 | JPanel contents = new JPanel(); 37 | contents.setLayout(new BoxLayout(contents,BoxLayout.Y_AXIS)); 38 | 39 | JPanel markerlistPanel = new JPanel(); 40 | markerlistPanel.add(new JLabel("Marker list: ")); 41 | markerlistField = new JTextField(20); 42 | markerlistPanel.add(markerlistField); 43 | JButton markerBrowseButton = new JButton("Browse"); 44 | markerBrowseButton.addActionListener(this); 45 | markerlistPanel.add(markerBrowseButton); 46 | contents.add(markerlistPanel); 47 | 48 | contents.add(new JPanel()); 49 | 50 | JPanel savePlotsPanel = new JPanel(); 51 | savePlotsPanel.setLayout(new BoxLayout(savePlotsPanel,BoxLayout.PAGE_AXIS)); 52 | savePlotsButton = new JRadioButton("Save viewed plots to PDF"); 53 | savePlotsButton.setSelected(false); 54 | savePlotsButton.addActionListener(this); 55 | savePlotsPanel.add(savePlotsButton); 56 | allPlotsButton = new JCheckBox("Save all plots"); 57 | allPlotsButton.setSelected(false); 58 | allPlotsButton.setEnabled(false); 59 | savePlotsPanel.add(allPlotsButton); 60 | yesPlotsButton = new JCheckBox("Save all Yes plots"); 61 | yesPlotsButton.setSelected(false); 62 | yesPlotsButton.setEnabled(false); 63 | savePlotsPanel.add(yesPlotsButton); 64 | maybePlotsButton = new JCheckBox("Save all Maybe plots"); 65 | maybePlotsButton.setSelected(false); 66 | maybePlotsButton.setEnabled(false); 67 | savePlotsPanel.add(maybePlotsButton); 68 | noPlotsButton = new JCheckBox("Save all No plots"); 69 | noPlotsButton.setSelected(false); 70 | noPlotsButton.setEnabled(false); 71 | savePlotsPanel.add(noPlotsButton); 72 | 73 | contents.add(savePlotsPanel); 74 | 75 | JPanel pdfDirPanel = new JPanel(); 76 | pdfDirLabel = new JLabel("Destination directory for PDFs: "); 77 | pdfDirLabel.setEnabled(false); 78 | pdfDirPanel.add(pdfDirLabel); 79 | pdfDirField = new JTextField(20); 80 | pdfDirField.setEnabled(false); 81 | pdfDirPanel.add(pdfDirField); 82 | pdfBrowseButton = new JButton("Save to"); 83 | pdfBrowseButton.addActionListener(this); 84 | pdfBrowseButton.setEnabled(false); 85 | pdfDirPanel.add(pdfBrowseButton); 86 | contents.add(pdfDirPanel); 87 | 88 | contents.add(new JPanel()); 89 | 90 | JPanel butPan = new JPanel(); 91 | JButton okbut = new JButton("OK"); 92 | getRootPane().setDefaultButton(okbut); 93 | okbut.addActionListener(this); 94 | butPan.add(okbut); 95 | JButton cancelbut = new JButton("Cancel"); 96 | cancelbut.addActionListener(this); 97 | butPan.add(cancelbut); 98 | contents.add(butPan); 99 | 100 | this.setContentPane(contents); 101 | } 102 | 103 | public void actionPerformed(ActionEvent e) { 104 | if (e.getActionCommand().equals("OK")){ 105 | markerList = markerlistField.getText(); 106 | pdfDir = pdfDirField.getText(); 107 | success = true; 108 | this.dispose(); 109 | }else if (e.getActionCommand().equals("Save viewed plots to PDF")) { 110 | if(savePlotsButton.isSelected()) { 111 | pdfDirLabel.setEnabled(true); 112 | pdfDirField.setEnabled(true); 113 | pdfBrowseButton.setEnabled(true); 114 | allPlotsButton.setEnabled(true); 115 | yesPlotsButton.setEnabled(true); 116 | maybePlotsButton.setEnabled(true); 117 | noPlotsButton.setEnabled(true); 118 | }else { 119 | pdfDirLabel.setEnabled(false); 120 | pdfDirField.setEnabled(false); 121 | pdfBrowseButton.setEnabled(false); 122 | allPlotsButton.setEnabled(false); 123 | yesPlotsButton.setEnabled(false); 124 | maybePlotsButton.setEnabled(false); 125 | noPlotsButton.setEnabled(false); 126 | } 127 | }else if (e.getActionCommand().equals("Browse")){ 128 | jfc.setFileSelectionMode(JFileChooser.FILES_ONLY); 129 | if (jfc.showOpenDialog(this) == JFileChooser.APPROVE_OPTION){ 130 | markerlistField.setText(jfc.getSelectedFile().getAbsolutePath()); 131 | } 132 | }else if (e.getActionCommand().equals("Save to")) { 133 | jfc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); 134 | if (jfc.showOpenDialog(this) == JFileChooser.APPROVE_OPTION){ 135 | pdfDirField.setText(jfc.getSelectedFile().getAbsolutePath()); 136 | } 137 | }else if (e.getActionCommand().equals("Cancel")){ 138 | this.dispose(); 139 | } 140 | } 141 | 142 | public boolean success() { 143 | return success; 144 | } 145 | 146 | public String getMarkerList() { 147 | return markerList; 148 | } 149 | 150 | public String getPdfDir(){ 151 | return pdfDir; 152 | } 153 | 154 | public boolean savePlots() { 155 | return savePlotsButton.isSelected(); 156 | } 157 | 158 | public boolean allPlots() { 159 | return allPlotsButton.isSelected(); 160 | } 161 | 162 | public boolean yesPlots() { 163 | return yesPlotsButton.isSelected(); 164 | } 165 | 166 | public boolean maybePlots() { 167 | return maybePlotsButton.isSelected(); 168 | } 169 | 170 | public boolean noPlots() { 171 | return noPlotsButton.isSelected(); 172 | } 173 | } -------------------------------------------------------------------------------- /src/evoker/MarkerData.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.util.Hashtable; 4 | import java.util.Vector; 5 | import java.io.BufferedReader; 6 | import java.io.FileReader; 7 | import java.io.IOException; 8 | import java.util.HashMap; 9 | import java.util.regex.Pattern; 10 | 11 | /** 12 | * Holds all the data about a set of SNPs, usually for one chromosome. 13 | * 14 | */ 15 | public class MarkerData { 16 | 17 | //Hashtable snpIndexTable; 18 | //Hashtable snpAlleleATable; 19 | //Hashtable snpAlleleBTable; 20 | 21 | //while this seems ridiculous, it is a considerable memory savings which is now not exposed anywhere 22 | //outside this class. Instead of having a hash keyed on strings of chroms, taking up something like 23 | //20 bytes per key, even though there are only a few possibilities. we do this dance to simultaneously 24 | //avoid the memory overhead for millions of entries while allowing "chrom" to be anything, rather than 25 | //just numbers 1..22 etc. 26 | 27 | HashMap chromosomeLookup; // chromosome -> id 28 | HashMap chromosomeBackLookup;// id -> chromosome 29 | 30 | HashMap markerTable; // SNP_Name -> Marker 31 | HashMap collectionIndices;// Collection -> ID (from collectionIndices after first chromosome file) 32 | HashMap snpsPerCollection;// Collection -> Number of SNPs 33 | private int numCollections; // Number of Collections 34 | private int runningCount; // Number of BimFiles so far 35 | 36 | //HashMap>> snpDB = new HashMap>>(); 37 | 38 | public MarkerData(int numCollections){ 39 | this.numCollections = numCollections; 40 | markerTable = new HashMap(); 41 | collectionIndices = new HashMap(); 42 | snpsPerCollection = new HashMap(); 43 | chromosomeLookup = new HashMap(); 44 | chromosomeBackLookup = new HashMap(); 45 | runningCount = -1; 46 | } 47 | 48 | 49 | public int getSampleCollectionIndex(String collection){ 50 | return collectionIndices.get(collection); 51 | } 52 | 53 | /** 54 | * Returns a Random SNP ID from markerTable (all IDs as key) 55 | * @return SNP ID 56 | */ 57 | public String getRandomSNP(){ 58 | Vector v = new Vector(markerTable.keySet()); 59 | return (String)v.get((int)(Math.random()*markerTable.keySet().size())); 60 | } 61 | 62 | 63 | /** 64 | * Adds information of a bim file 65 | * @param bimFile 66 | * @param collection 67 | * @param chromosome 68 | * @param isOx 69 | * @throws IOException 70 | */ 71 | public void addFile(String bimFile, String collection, String chromosome, 72 | boolean isOx) throws IOException { 73 | 74 | // All of the chromosomes for one collection are loaded at once in DataDirectory 75 | // which is why we can increase runningCount once the collection doesn't exist in 76 | // collectionIndices as a key and not worry about returning to it later. 77 | if (!collectionIndices.containsKey(collection)){ 78 | runningCount++; 79 | collectionIndices.put(collection,runningCount); 80 | } 81 | 82 | // We shouldn't get a NullPointerException here because addChromToLookup has been called 83 | // in DataDirectory when the bim or snp file is parsed. 84 | byte chrom = chromosomeLookup.get(chromosome); 85 | String currentLine; 86 | BufferedReader bimReader = new BufferedReader(new FileReader(bimFile)); 87 | 88 | //read through bim file to record marker order so we can quickly index 89 | //into binary files 90 | int index = 0; 91 | String[] bits; 92 | boolean missingAlleles = false; 93 | while ((currentLine = bimReader.readLine()) != null){ 94 | bits = currentLine.split("\\s+"); 95 | StringBuffer snpid = null; 96 | char a = 'A',b = 'B'; 97 | // check the size of the bits array 98 | if(bits.length >= 5) { 99 | snpid = new StringBuffer(bits[1]); 100 | if (isOx){ 101 | a = bits[3].toCharArray()[0]; 102 | b = bits[4].toCharArray()[0]; 103 | }else{ 104 | a = bits[4].toCharArray()[0]; 105 | b = bits[5].toCharArray()[0]; 106 | } 107 | } else if (bits.length == 1){ 108 | // if there is just 1 column assume the file contains only a SNP id 109 | missingAlleles = true; 110 | snpid = new StringBuffer(bits[0]); 111 | } 112 | 113 | // not sure if these files exist 114 | // else if(bits.length == 3) { 115 | // // if there are 3 columns assume the file contains name, id and position 116 | // missingAlleles = true; 117 | // snpid = new StringBuffer(bits[1]); 118 | // } 119 | String stringSnpid = snpid.toString(); 120 | if (!markerTable.containsKey(stringSnpid)) 121 | markerTable.put(stringSnpid, new Marker(numCollections,a,b,chrom)); 122 | 123 | //TP: only the first two args are used in addSampleCollection 124 | markerTable.get(stringSnpid).addSampleCollection(runningCount,index++,a,b,stringSnpid); 125 | 126 | // if(! snpDB.containsKey(collection)) snpDB.put(collection, new HashMap>()); 127 | // if(! snpDB.get(collection).containsKey(chromosome)) snpDB.get(collection).put(chromosome, new Vector()); 128 | // snpDB.get(collection).get(chromosome).add(snpid.toString()); 129 | 130 | } 131 | //TP CHANGED THIS 132 | int snpsSoFar = 0; 133 | if (snpsPerCollection.containsKey(collection)) 134 | snpsSoFar = snpsPerCollection.get(collection); 135 | 136 | snpsPerCollection.put(collection,index + snpsSoFar); 137 | snpsPerCollection.put(collection+chromosome,index); 138 | 139 | if (missingAlleles) { 140 | Genoplot.ld.log("WARNING: SNP file does not contain allele information"); 141 | } 142 | } 143 | 144 | // public HashMap> getSnpInfo(String collection){ 145 | // return snpDB.get(collection); 146 | // } 147 | 148 | public char[] getAlleles(String snp){ 149 | return markerTable.get(snp).getAlleles(); 150 | } 151 | 152 | public String getChrom(String name){ 153 | if (markerTable.get(name) != null){ 154 | return chromosomeBackLookup.get(markerTable.get(name).getChrom()); 155 | }else{ 156 | return null; 157 | } 158 | } 159 | 160 | public HashMap getMarkerTable(){ 161 | return markerTable; 162 | } 163 | 164 | /** 165 | * Returns the number of SNPs contained by a Collection 166 | * @param collectionName 167 | * @return SNP number 168 | */ 169 | public int getNumSNPs(String collection) { 170 | return snpsPerCollection.get(collection); 171 | } 172 | 173 | public Integer getIndex(String markerName, int sampleIndex){ 174 | if (markerTable.get(markerName) != null){ 175 | return markerTable.get(markerName).getIndex(sampleIndex); 176 | }else{ 177 | return -1; 178 | } 179 | } 180 | 181 | public void addChromToLookup(String chrom, byte counter) { 182 | chromosomeLookup.put(chrom,counter); 183 | chromosomeBackLookup.put(counter,chrom); 184 | } 185 | 186 | public boolean exists(String name) { 187 | return markerTable.containsKey(name); 188 | } 189 | 190 | public Boolean isSexSnp(String name) { 191 | return isSexChrom(getChrom(name)); 192 | } 193 | 194 | public Boolean isSexChrom(String chrom) { 195 | return chrom.toLowerCase().contains("x") || chrom.toLowerCase().contains("y"); 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /src/evoker/DataConnectionDialog.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import javax.swing.*; 4 | import java.awt.*; 5 | import java.awt.event.ActionListener; 6 | import java.awt.event.ActionEvent; 7 | import evoker.Types.FileFormat; 8 | 9 | public class DataConnectionDialog extends JDialog implements ActionListener { 10 | 11 | private FileFormat fileFormat; 12 | private JPasswordField pf; 13 | private char[] password; 14 | private String username; 15 | private String remoteDir; 16 | private String localDir; 17 | private String remoteTempDir; 18 | private String host; 19 | private int port; 20 | private String fam; 21 | private JTextField userField; 22 | private JTextField remdirField; 23 | private JTextField remoteTempDirField; 24 | private JTextField famField; 25 | private JTextField locdirField; 26 | private JTextField hostField; 27 | private JTextField portField; 28 | private JPanel famPanel; 29 | private JPanel remoteTempDirPanel; 30 | private JCheckBox emptyIt; 31 | private JRadioButton defaultFormatButton; 32 | private JRadioButton oxfordFormatButton; 33 | private JRadioButton ukBioBankFormatButton; 34 | private Boolean cancelled; 35 | 36 | public DataConnectionDialog(JFrame parent){ 37 | super(parent,"Data Connection",true); 38 | fam = ""; 39 | JPanel contents = new JPanel(); 40 | contents.setLayout(new BoxLayout(contents,BoxLayout.Y_AXIS)); 41 | cancelled = true; 42 | JPanel hostPanel = new JPanel(); 43 | hostPanel.add(new JLabel("Host: ")); 44 | hostField = new JTextField(20); 45 | hostPanel.add(hostField); 46 | 47 | hostPanel.add(new JLabel("Port: ")); 48 | portField = new JTextField("22", 3); 49 | hostPanel.add(portField); 50 | contents.add(hostPanel); 51 | 52 | JPanel remdirPanel = new JPanel(); 53 | remdirPanel.add(new JLabel("Remote directory: ")); 54 | remdirField = new JTextField(30); 55 | remdirPanel.add(remdirField); 56 | contents.add(remdirPanel); 57 | 58 | JPanel localdirPanel = new JPanel(); 59 | localdirPanel.add(new JLabel("Local directory: ")); 60 | locdirField = new JTextField(20); 61 | localdirPanel.add(locdirField); 62 | JButton browseButton = new JButton("Browse"); 63 | browseButton.addActionListener(this); 64 | localdirPanel.add(browseButton); 65 | contents.add(localdirPanel); 66 | 67 | JPanel bottomPanel = new JPanel(); 68 | 69 | JPanel loginPanel = new JPanel(); 70 | loginPanel.setLayout(new BoxLayout(loginPanel,BoxLayout.Y_AXIS)); 71 | 72 | JPanel userPanel = new JPanel(); 73 | userPanel.add(new JLabel("Username: ")); 74 | userField = new JTextField(10); 75 | userPanel.add(userField); 76 | loginPanel.add(userPanel); 77 | 78 | JPanel passPanel = new JPanel(); 79 | passPanel.add(new JLabel("Password: ")); 80 | pf = new JPasswordField(8); 81 | passPanel.add(pf); 82 | loginPanel.add(passPanel); 83 | 84 | bottomPanel.add(loginPanel); 85 | 86 | ButtonGroup bg = new ButtonGroup(); 87 | JPanel formatPanel = new JPanel(); 88 | formatPanel.setLayout(new BoxLayout(formatPanel,BoxLayout.Y_AXIS)); 89 | 90 | defaultFormatButton = new JRadioButton("Default format"); 91 | defaultFormatButton .addActionListener(this); 92 | formatPanel.add(defaultFormatButton); 93 | defaultFormatButton.setSelected(true); 94 | bg.add(defaultFormatButton); 95 | 96 | oxfordFormatButton = new JRadioButton("Oxford format"); 97 | oxfordFormatButton.addActionListener(this); 98 | formatPanel.add(oxfordFormatButton); 99 | bg.add(oxfordFormatButton); 100 | 101 | ukBioBankFormatButton = new JRadioButton("UK BioBank v2 format"); 102 | ukBioBankFormatButton.addActionListener(this); 103 | formatPanel.add(ukBioBankFormatButton); 104 | bg.add(ukBioBankFormatButton); 105 | bottomPanel.add(formatPanel); 106 | contents.add(bottomPanel); 107 | 108 | famPanel = new JPanel(); 109 | famPanel.add(new JLabel("Remote FAM file: ")); 110 | famField = new JTextField(30); 111 | famPanel.add(famField); 112 | famPanel.setVisible(false); 113 | 114 | remoteTempDirPanel = new JPanel(); 115 | remoteTempDirPanel.add(new JLabel("Remote temp directory: ")); 116 | remoteTempDirField = new JTextField(30); 117 | remoteTempDirPanel.add(remoteTempDirField); 118 | remoteTempDirPanel.setVisible(false); 119 | 120 | 121 | contents.add(famPanel); 122 | contents.add(remoteTempDirPanel); 123 | 124 | //TODO: should this be reactivated? 125 | emptyIt = new JCheckBox("Clear local cache?"); 126 | //contents.add(emptyIt); 127 | 128 | JPanel butPan = new JPanel(); 129 | JButton okbut = new JButton("OK"); 130 | getRootPane().setDefaultButton(okbut); 131 | okbut.addActionListener(this); 132 | butPan.add(okbut); 133 | JButton cancelbut = new JButton("Cancel"); 134 | cancelbut.addActionListener(this); 135 | butPan.add(cancelbut); 136 | contents.add(butPan); 137 | 138 | this.setContentPane(contents); 139 | this.setPreferredSize(new Dimension(550,350)); 140 | this.setMinimumSize(new Dimension(550,350)); 141 | } 142 | 143 | public void actionPerformed(ActionEvent e) { 144 | String command = e.getActionCommand(); 145 | if (command.equals("UK BioBank v2 format")) { 146 | famPanel.setVisible(true); 147 | remoteTempDirPanel.setVisible(true); 148 | } else if (command.equals("Default format") || command.equals("Oxford format")) { 149 | famPanel.setVisible(false); 150 | remoteTempDirPanel.setVisible(false); 151 | } else if (command.equals("OK")){ 152 | password = pf.getPassword(); 153 | username = userField.getText(); 154 | remoteDir = remdirField.getText(); 155 | localDir = locdirField.getText(); 156 | host = hostField.getText(); 157 | port = Integer.parseInt(portField.getText()); 158 | cancelled = false; 159 | if (defaultFormatButton.isSelected()) { 160 | fileFormat = FileFormat.DEFAULT; 161 | } else if (oxfordFormatButton.isSelected()) { 162 | fileFormat = FileFormat.OXFORD; 163 | } else if (ukBioBankFormatButton.isSelected()) { 164 | fileFormat = FileFormat.UKBIOBANK; 165 | fam = famField.getText(); 166 | remoteTempDir = remoteTempDirField.getText(); 167 | 168 | } 169 | this.dispose(); 170 | }else if (command.equals("Browse")){ 171 | JFileChooser jfc = new JFileChooser("user.dir"); 172 | jfc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); 173 | if (jfc.showOpenDialog(this) == JFileChooser.APPROVE_OPTION){ 174 | locdirField.setText(jfc.getSelectedFile().getAbsolutePath()); 175 | } 176 | }else if (command.equals("Cancel")){ 177 | cancelled = true; 178 | this.dispose(); 179 | } 180 | } 181 | 182 | public boolean getEmpty(){ 183 | return emptyIt.isSelected(); 184 | } 185 | 186 | public char[] getPassword(){ 187 | return password; 188 | } 189 | 190 | public String getUsername(){ 191 | return username; 192 | } 193 | 194 | public String getRemoteDirectory() { 195 | return remoteDir; 196 | } 197 | 198 | public String getHost() { 199 | return host; 200 | } 201 | 202 | public int getPort() { 203 | return port; 204 | } 205 | 206 | public FileFormat getFileFormat(){ 207 | return fileFormat; 208 | } 209 | 210 | public String getFam() { return fam; } 211 | 212 | public String getRemoteTempDir(){ return remoteTempDir; } 213 | 214 | public boolean isOxformat() { 215 | return getFileFormat() == FileFormat.OXFORD; 216 | } 217 | 218 | public boolean isCancelled() { return cancelled; } 219 | 220 | public void clearPassword(){ 221 | for (int i = 0; i < password.length; i++){ 222 | password[i] = 0; 223 | pf.setText(""); 224 | } 225 | } 226 | 227 | public String getLocalDirectory() { 228 | return localDir; 229 | } 230 | } -------------------------------------------------------------------------------- /src/evoker/PlotPanel.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import org.jfree.data.xy.XYSeriesCollection; 4 | import org.jfree.data.xy.XYDataset; 5 | import org.jfree.chart.JFreeChart; 6 | import org.jfree.chart.ChartFactory; 7 | import org.jfree.chart.ChartPanel; 8 | import org.jfree.chart.ChartUtilities; 9 | import org.jfree.chart.labels.StandardXYToolTipGenerator; 10 | import org.jfree.chart.renderer.xy.XYItemRenderer; 11 | import org.jfree.chart.title.TextTitle; 12 | import org.jfree.chart.plot.PlotOrientation; 13 | import org.jfree.chart.plot.XYPlot; 14 | 15 | import javax.swing.*; 16 | import java.awt.*; 17 | import java.awt.geom.Ellipse2D; 18 | import java.text.NumberFormat; 19 | import java.text.DecimalFormat; 20 | import java.text.DecimalFormatSymbols; 21 | import java.text.FieldPosition; 22 | import java.util.Locale; 23 | import java.io.File; 24 | import java.io.IOException; 25 | import java.io.OutputStream; 26 | 27 | import evoker.Types.CoordinateSystem; 28 | 29 | public class PlotPanel extends JPanel { 30 | 31 | /** 32 | * Mode, determining the way, the Diagram responses to mouse gestures. 33 | * true means lasso select, false means zooming 34 | * in. 35 | */ 36 | 37 | ChartPanel generatePlot; 38 | 39 | protected Genoplot theGenoplot; 40 | private JFreeChart jfc; 41 | private PlotData data; 42 | private String title, xlab, ylab; 43 | private boolean foundData; 44 | static NumberFormat nf = NumberFormat.getInstance(Locale.US); 45 | boolean longStats; 46 | double totalMaf; 47 | int totalSamples; 48 | String collection; 49 | JPanel statistics = null; 50 | 51 | static { 52 | nf.setMaximumFractionDigits(2); 53 | nf.setMinimumFractionDigits(2); 54 | } 55 | 56 | PlotPanel(Genoplot gp, String title, PlotData pd, int plotHeight, int plotWidth, 57 | boolean longStats, double totalMaf, int totalSamples, String collection) { 58 | this.theGenoplot = gp; 59 | this.title = title; 60 | this.data = pd; 61 | this.longStats = longStats; 62 | this.totalMaf = totalMaf; 63 | this.totalSamples = totalSamples; 64 | this.collection = collection; 65 | 66 | switch (pd.getCoordSystem()) { 67 | case POLAR: 68 | this.xlab = String.valueOf("\u03F4"); 69 | this.ylab = String.valueOf("r"); 70 | break; 71 | case UKBIOBANK: 72 | this.xlab = String.valueOf("Contrast: log\u2082(A/B)"); 73 | this.ylab = String.valueOf("Strength: log\u2082(A\u00D7B)/2"); 74 | break; 75 | default: 76 | this.xlab = String.valueOf("X"); 77 | this.ylab = String.valueOf("Y"); 78 | break; 79 | } 80 | 81 | this.setLayout(new BoxLayout(this, BoxLayout.Y_AXIS)); 82 | this.setPreferredSize(new Dimension(plotHeight, plotWidth)); 83 | this.setMaximumSize(new Dimension(plotHeight, plotWidth)); 84 | } 85 | 86 | protected void refresh() { 87 | this.removeAll(); 88 | XYSeriesCollection xysc = data.generatePoints(); 89 | if (xysc != null) { 90 | setFoundData(true); 91 | generatePlot = generatePlot(xysc); 92 | add(generatePlot); 93 | statistics = new JPanel(); 94 | add(generateInfo()); 95 | } else { 96 | setFoundData(false); 97 | this.setBackground(Color.WHITE); 98 | add(Box.createVerticalGlue()); 99 | JLabel l = new JLabel("No data found for " + title); 100 | l.setAlignmentX(Component.CENTER_ALIGNMENT); 101 | add(l); 102 | add(Box.createVerticalGlue()); 103 | } 104 | } 105 | 106 | private void setFoundData(boolean b) { 107 | foundData = b; 108 | } 109 | 110 | void saveToFile(File f) throws IOException { 111 | ChartUtilities.saveChartAsPNG(f, jfc, 400, 400); 112 | } 113 | 114 | public JPanel generateInfo() { 115 | statistics.setBackground(Color.white); 116 | 117 | boolean compressStats = true; 118 | 119 | JLabel mafLabel = new JLabel(); 120 | JLabel gpcLabel = new JLabel(); 121 | JLabel hwpLabel = new JLabel(); 122 | 123 | double maf = data.getMaf(); 124 | double mafAvg = totalMaf / totalSamples; 125 | 126 | 127 | double mafScore = (maf - mafAvg) * Math.sqrt(2 * data.getSampleNum()) / Math.sqrt(mafAvg * (1 - mafAvg)); 128 | 129 | if (longStats) { 130 | if (mafScore > 3) { 131 | mafLabel.setText("MAF: " + nf.format(data.getMaf()) 132 | + ""); 133 | } else { 134 | mafLabel.setText("MAF: " + nf.format(data.getMaf())); 135 | } 136 | 137 | if (1 - data.getGenopc() > 0.02) { 138 | gpcLabel.setText("GPC: " + nf.format(data.getGenopc()) 139 | + ""); 140 | } else { 141 | gpcLabel.setText("GPC: " + nf.format(data.getGenopc())); 142 | } 143 | if (data.getHwpval() < 10e-5) { 144 | hwpLabel.setText("HWE pval: " 145 | + formatPValue(data.getHwpval()) + ""); 146 | } else { 147 | hwpLabel.setText("HWE pval: " + formatPValue(data.getHwpval())); 148 | } 149 | } else { 150 | if (mafScore > 3) { 151 | mafLabel.setText("" + nf.format(data.getMaf()) 152 | + "/"); 153 | } else { 154 | mafLabel.setText(nf.format(data.getMaf()) + "/"); 155 | } 156 | if (1 - data.getGenopc() > 0.02) { 157 | gpcLabel.setText("" + nf.format(data.getGenopc()) 158 | + "/"); 159 | } else { 160 | gpcLabel.setText(nf.format(data.getGenopc()) + "/"); 161 | } 162 | if (data.getHwpval() < 10e-5) { 163 | hwpLabel.setText("" + formatPValue(data.getHwpval()) 164 | + ""); 165 | } else { 166 | hwpLabel.setText(formatPValue(data.getHwpval())); 167 | } 168 | } 169 | 170 | statistics.add(mafLabel); 171 | statistics.add(gpcLabel); 172 | statistics.add(hwpLabel); 173 | 174 | return statistics; 175 | } 176 | 177 | public void updateInfo() { 178 | statistics.removeAll(); 179 | generateInfo(); 180 | statistics.revalidate(); 181 | } 182 | 183 | public String generateInfoStr() { 184 | return "MAF: " + nf.format(data.getMaf()) + "\tGPC: " 185 | + nf.format(data.getGenopc()) + "\tHWE pval: " 186 | + formatPValue(data.getHwpval()); 187 | } 188 | 189 | private ChartPanel generatePlot(XYSeriesCollection xysc) { 190 | 191 | jfc = ChartFactory.createScatterPlot(title, xlab, ylab, xysc, 192 | PlotOrientation.VERTICAL, false, false, false); 193 | jfc.addSubtitle(new TextTitle("(n=" + data.getSampleNum() + ")")); 194 | 195 | XYPlot thePlot = jfc.getXYPlot(); 196 | thePlot.setBackgroundPaint(Color.white); 197 | thePlot.setOutlineVisible(false); 198 | 199 | XYItemRenderer xyd = thePlot.getRenderer(); 200 | Shape dot = new Ellipse2D.Double(-1.5, -1.5, 3, 3); 201 | xyd.setSeriesShape(0, dot); 202 | xyd.setSeriesShape(1, dot); 203 | xyd.setSeriesShape(2, dot); 204 | xyd.setSeriesShape(3, dot); 205 | xyd.setSeriesPaint(0, Color.BLUE); 206 | xyd.setSeriesPaint(1, new Color(180, 180, 180)); 207 | xyd.setSeriesPaint(2, Color.GREEN); 208 | xyd.setSeriesPaint(3, Color.RED); 209 | 210 | xyd.setBaseToolTipGenerator(new ZitPlotToolTipGenerator()); 211 | 212 | EvokerChartPanel cp = new EvokerChartPanel(jfc, data, this); 213 | cp.setDisplayToolTips(true); 214 | cp.setDismissDelay(10000); 215 | cp.setInitialDelay(0); 216 | cp.setReshowDelay(0); 217 | 218 | return cp; 219 | } 220 | 221 | public double getMaxDim() { 222 | return data.getMaxDim() + 0.05 * data.getRange(); 223 | } 224 | 225 | public double getMinDim() { 226 | return data.getMinDim() - 0.05 * data.getRange(); 227 | } 228 | 229 | public void setDimensions() { 230 | setDimensions(getMinDim(), getMaxDim()); 231 | } 232 | 233 | public void setDimensionsToData() { 234 | double minX = data.getMinX(); 235 | double maxX = data.getMaxX(); 236 | double minY = data.getMinY(); 237 | double maxY = data.getMaxY(); 238 | 239 | double xPadding = 0.05 * (maxX - minX); 240 | minX -= xPadding; 241 | maxX += xPadding; 242 | 243 | double yPadding = 0.05 * (maxY - minY); 244 | minY -= yPadding; 245 | maxY += yPadding; 246 | 247 | setDimensions(minX, maxX, minY, maxY); 248 | } 249 | 250 | public void setDimensions(double minX, double maxX, double minY, double maxY) { 251 | if (jfc != null) { 252 | jfc.getXYPlot().setRangeAxis(new LinkedAxis(ylab, minY, maxY)); 253 | jfc.getXYPlot().getRangeAxis().setRange(minY, maxY); 254 | jfc.getXYPlot().setDomainAxis(new LinkedAxis(xlab, minX, maxX)); 255 | jfc.getXYPlot().getDomainAxis().setRange(minX, maxX); 256 | } 257 | } 258 | 259 | public void setDimensions(double min, double max) { 260 | if (jfc != null) { 261 | switch (data.getCoordSystem()) { 262 | case POLAR: 263 | jfc.getXYPlot().setRangeAxis(new LinkedAxis(ylab, min, max)); 264 | jfc.getXYPlot().getRangeAxis().setRange(min, max); 265 | jfc.getXYPlot().setDomainAxis(new LinkedAxis(xlab, 0, 2)); 266 | jfc.getXYPlot().getDomainAxis().setRange(0, 2); 267 | break; 268 | case UKBIOBANK: 269 | jfc.getXYPlot().setRangeAxis(new LinkedAxis(ylab, min, max)); 270 | jfc.getXYPlot().getRangeAxis().setRange(min, max); 271 | jfc.getXYPlot().setDomainAxis(new LinkedAxis(xlab, min, max)); 272 | jfc.getXYPlot().getDomainAxis().setRange(min, max); 273 | break; 274 | default: 275 | jfc.getXYPlot().setRangeAxis(new LinkedAxis(ylab, min, max)); 276 | jfc.getXYPlot().getRangeAxis().setRange(min, max); 277 | jfc.getXYPlot().setDomainAxis(new LinkedAxis(xlab, min, max)); 278 | jfc.getXYPlot().getDomainAxis().setRange(min, max); 279 | break; 280 | } 281 | } 282 | } 283 | 284 | public static String formatPValue(double pval) { 285 | DecimalFormat df; 286 | // java truly sucks for simply restricting the number of sigfigs but 287 | // still 288 | // using scientific notation when appropriate 289 | if (pval < 0.001) { 290 | df = new DecimalFormat("0.0E0", new DecimalFormatSymbols(Locale.US)); 291 | } else { 292 | df = new DecimalFormat("0.000", new DecimalFormatSymbols(Locale.US)); 293 | } 294 | return df.format(pval, new StringBuffer(), 295 | new FieldPosition(NumberFormat.INTEGER_FIELD)).toString(); 296 | } 297 | 298 | class ZitPlotToolTipGenerator extends StandardXYToolTipGenerator { 299 | 300 | public double round3(double n) { 301 | double result = n * 100000; 302 | result = Math.round(result); 303 | result = result / 100000; 304 | return result; 305 | } 306 | 307 | public ZitPlotToolTipGenerator() { 308 | super(); 309 | } 310 | 311 | public String generateToolTip(XYDataset dataset, int series, int item) { 312 | return data.getIndInClass(series, item) + " (" 313 | + round3(dataset.getXValue(series, item)) + ", " 314 | + round3(dataset.getYValue(series, item)) + ")"; 315 | // + dataset.getXValue(series, item) + ", " 316 | // + dataset.getYValue(series, item) + ")"; 317 | } 318 | } 319 | 320 | public JFreeChart getChart() { 321 | return jfc; 322 | } 323 | 324 | public PlotData getPlotData() { 325 | return data; 326 | } 327 | 328 | public boolean hasData() { 329 | return foundData; 330 | } 331 | 332 | public String getTitle() { return title; } 333 | 334 | public String toString() { return title; } 335 | 336 | public String getCollection() { return collection; } 337 | 338 | } 339 | -------------------------------------------------------------------------------- /src/evoker/BEDFileChanger.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.io.BufferedInputStream; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.FileOutputStream; 7 | import java.nio.channels.FileChannel; 8 | import java.io.FileNotFoundException; 9 | import java.io.IOException; 10 | import java.util.HashMap; 11 | import java.util.Vector; 12 | 13 | import evoker.DataDirectory.ExtensionFilter; 14 | 15 | /** 16 | * Change the genotype information within a bed file (and write to a new one) 17 | */ 18 | public class BEDFileChanger { 19 | 20 | /** Holds the list of inds (in correct order) */ 21 | Vector inds = null; 22 | /** Holds the changes made to the collection (_not_ in correct order)*/ 23 | HashMap>> changes; // chromosome -> snp -> [ind -> change] 24 | /** */ 25 | HashMap markerTable; 26 | /** Name of the collection to save */ 27 | String collection = null; 28 | /** Path for file to be read */ 29 | String path = null; 30 | /** internal ID of collection */ 31 | int collectionID = -1; 32 | /** Absolute path of output file */ 33 | String toWriteTo = null; 34 | /** Information about conversion from the internal genotype notation to the bed-one. */ 35 | HashMap genotpeCoding = new HashMap(); 36 | /** Number of SNPs contained in this collection */ 37 | int noOfSnps = -1; 38 | //TP CHANGED THIS 39 | /** DATA DIRECTORY, required for looking up the number of SNPs associated with collections and chromosomes, 40 | * stored in a hashmap in MarkerData*/ 41 | DataDirectory db; 42 | /**a boolean to store whether to print .bim and .fam files*/ 43 | boolean printFullFileset; 44 | /** 45 | * Create a bed file from a given one according to the specified changes 46 | * 47 | * @param collectionID 48 | * @param collection 49 | * @param path 50 | * @param sd 51 | * @param noOfSnps 52 | * @param markerTable 53 | * @param changes 54 | * @param toWriteTo 55 | * @throws IOException 56 | */ 57 | BEDFileChanger(int collectionID, String collection, String path, Vector inds, int noOfSnps, 58 | HashMap markerTable, HashMap>> changes, 59 | String toWriteTo, DataDirectory db, boolean printFullFileset) throws IOException { 60 | assert collectionID >= 0 && collection != null && path != null && inds != null && 61 | noOfSnps != 0 && markerTable != null && changes != null && !changes.keySet().isEmpty() && 62 | toWriteTo != null; 63 | 64 | this.inds = inds; 65 | this.changes = (HashMap>>) changes.clone(); 66 | this.markerTable = markerTable; 67 | this.collection = collection; 68 | this.collectionID = collectionID; 69 | this.path = path; 70 | this.toWriteTo = toWriteTo; 71 | this.noOfSnps = noOfSnps; 72 | //TP CHANGED THIS 73 | this.db = db; 74 | this.printFullFileset = printFullFileset; 75 | setGenotypeCoding(); 76 | write(); 77 | } 78 | 79 | /** 80 | * Create a bed file from a given one according to the specified changes. 81 | * Same as the other constructor, but collects the information itself. 82 | * 83 | * @param containing related information 84 | * @param collection name to save 85 | * @param absolute file(path) to write to 86 | * @throws IOException 87 | */ 88 | //TP CHANGED THIS 89 | BEDFileChanger(DataDirectory db, String collection, String file, boolean printFullFileset) throws IOException { 90 | new BEDFileChanger(db.getMarkerData().collectionIndices.get(collection), collection, db.getDisplayName(), 91 | db.samplesByCollection.get(collection).inds, db.getMarkerData().getNumSNPs(collection), 92 | db.getMarkerData().getMarkerTable(), db.changesByCollection.get(collection), file, db, printFullFileset); 93 | } 94 | 95 | /** 96 | * Sets the connection between the internal genotype coding and the bed-file-one 97 | */ 98 | private void setGenotypeCoding() { 99 | genotpeCoding.put((byte) 0, 0x00); //homo1 100 | genotpeCoding.put((byte) 1, 0x40); //missing 101 | genotpeCoding.put((byte) 2, 0x80); //hetero 102 | genotpeCoding.put((byte) 3, 0xc0); //homo2 103 | } 104 | 105 | /** 106 | * The writing algorithm. 107 | * 108 | * @throws FileNotFoundException 109 | * @throws IOException 110 | */ 111 | private void write() throws FileNotFoundException, IOException { 112 | int bytesPerSnp = (int) Math.ceil(((double) inds.size()) / 4); 113 | 114 | // for all the changed chromosomes. 115 | for (String chromosome : changes.keySet()) { 116 | //read file 117 | File f = new File(path + "/" + collection + "." + chromosome + ".bed"); 118 | BufferedInputStream bis = new BufferedInputStream(new FileInputStream(f), 8192); 119 | 120 | //TP CHANGED THIS 121 | //output a .fam and .bim file for each .bed file 122 | //search through the current directory for files with the right names to copy 123 | if (printFullFileset) {File dir = new File(path); 124 | File dirList[] = dir.listFiles(); 125 | for (File thisFile : dirList) { 126 | String fileName = thisFile.getName(); 127 | String bimFileExt = (collection + "." + chromosome + ".bim"); 128 | String famFileExt = (collection + ".fam"); 129 | 130 | if (fileName.endsWith(bimFileExt)) 131 | { 132 | File destination = new File(path + "/" + collection + "mod." + chromosome + ".bim"); 133 | copyFile(thisFile, destination); 134 | } 135 | 136 | else if (fileName.endsWith(famFileExt)) 137 | { 138 | File destination = new File(path + "/" + collection + "mod." + chromosome + ".fam"); 139 | copyFile(thisFile, destination); 140 | } 141 | } 142 | } 143 | 144 | //write file 145 | File f_write = new File(toWriteTo + "." + chromosome + ".bed"); 146 | if(f_write.exists()) f_write.delete(); 147 | BEDFileWriter bfw = new BEDFileWriter(f_write); 148 | 149 | //skip header 150 | long toskip = 3; 151 | while ((toskip = toskip - bis.skip(toskip)) > 0); 152 | long snpAt = 0; 153 | 154 | byte[] rawSnpData = null; 155 | 156 | // until there is no snp left for this chromosome to change. 157 | while (changes.get(chromosome).keySet().size() > 0) { 158 | String nextSnpToStopAt = null; 159 | long nextSnpIndex = Long.MAX_VALUE; 160 | 161 | // find the first snip to be changed 162 | for (String s : changes.get(chromosome).keySet()) { 163 | Marker m = markerTable.get(s); 164 | int index = m.getIndex(collectionID); 165 | if (index < nextSnpIndex) { 166 | nextSnpIndex = index; 167 | nextSnpToStopAt = s; 168 | } 169 | } 170 | 171 | // read in snp per snp, write it to file until we reach a changed snp 172 | long skip = nextSnpIndex - snpAt; 173 | for (int i = 0; i < skip; i++) { 174 | rawSnpData = new byte[bytesPerSnp]; 175 | bis.read(rawSnpData, 0, bytesPerSnp); 176 | bfw.write(rawSnpData); 177 | snpAt++; 178 | } 179 | 180 | // read that whole snp in 181 | rawSnpData = new byte[bytesPerSnp]; 182 | bis.read(rawSnpData, 0, bytesPerSnp); 183 | 184 | // find changed inds 185 | for (String ind : changes.get(chromosome).get(nextSnpToStopAt).keySet()) { 186 | long indexOfInd = inds.indexOf(ind); 187 | int indexOfIndInArray = (int) (indexOfInd / 4); 188 | int posInTheByteFromBeginning = (int) (3 - (indexOfInd % 4)); // still to be used as index, as it is turned around, big endian >.< 189 | rawSnpData[indexOfIndInArray] = changeByte(rawSnpData[indexOfIndInArray], posInTheByteFromBeginning, changes.get(chromosome).get(nextSnpToStopAt).get(ind)); 190 | } 191 | bfw.write(rawSnpData); 192 | snpAt++; 193 | 194 | // remove snp from the todo list 195 | changes.get(chromosome).remove(nextSnpToStopAt); 196 | } 197 | //TP changed this 198 | // there is nothing to change anymore, but there are still snps to copy. 199 | for (; snpAt < db.getMarkerData().getNumSNPs(collection+chromosome); snpAt++) { 200 | rawSnpData = new byte[bytesPerSnp]; 201 | bis.read(rawSnpData, 0, bytesPerSnp); 202 | bfw.write(rawSnpData); 203 | } 204 | 205 | bfw.flush(); 206 | bfw.close(); 207 | } 208 | } 209 | 210 | /** 211 | * Change genotype within a byte 212 | * 213 | * @param byte to change 214 | * @param potition of the double-bit to change 215 | * @param genotype to change to (internal id-notation) 216 | * @return changed bit 217 | */ 218 | private byte changeByte(byte b, int posInTheByteFromBeginning, byte changeTo) { 219 | int byteToChange = b & 0xff; // java seems to convert bytes to ints while processing them, that'd give problems... (no, java does not suck, java is great.) 220 | int toOrWith = genotpeCoding.get(changeTo); 221 | int toResetTo0 = 0xc0; 222 | 223 | toOrWith = toOrWith >>> (posInTheByteFromBeginning * 2); 224 | toResetTo0 = toResetTo0 >>> (posInTheByteFromBeginning * 2); 225 | 226 | return (byte) ((byteToChange & ~toResetTo0) | toOrWith); 227 | } 228 | 229 | 230 | //a function to copy a file 231 | //source help: http://blog-en.openalfa.com/how-to-rename-move-or-copy-a-file-in-java 232 | @SuppressWarnings("resource") 233 | private static void copyFile(File sourceFile, File destFile) throws IOException { 234 | if(!destFile.exists()) { 235 | destFile.createNewFile(); 236 | } 237 | 238 | FileChannel origin = null; 239 | FileChannel destination = null; 240 | try { 241 | origin = new FileInputStream(sourceFile).getChannel(); 242 | destination = new FileOutputStream(destFile).getChannel(); 243 | 244 | long count = 0; 245 | long size = origin.size(); 246 | while((count += destination.transferFrom(origin, count, size-count)) 0){ 108 | Genoplot.ld.log("Files in the local directory"); 109 | // ask the user if they wants it emptied 110 | int n = JOptionPane.showConfirmDialog( 111 | genoplot.getContentPane(), 112 | "The local directory selected is not empty.\n Would you like to clear all files in this directory?", 113 | "Clear the local directory?", 114 | JOptionPane.YES_NO_OPTION, 115 | JOptionPane.QUESTION_MESSAGE ); 116 | // n 0 = yes 1 = no 117 | if (n == 0) { 118 | for (File localFile : localFiles){ 119 | try { 120 | localFile.delete(); 121 | }catch (SecurityException se){ 122 | JOptionPane.showMessageDialog(null,se.getMessage(), "File delete error", JOptionPane.ERROR_MESSAGE); 123 | } 124 | } 125 | } 126 | } 127 | displayName = dcd.getHost()+":"+remoteDir; 128 | }else{ 129 | throw new IOException("Authentication to host '"+dcd.getHost()+"' failed."); 130 | } 131 | } 132 | } 133 | 134 | public String[] getFilesInRemoteDir() throws IOException{ 135 | if (files != null){ 136 | String[] out = new String[files.size()]; 137 | for (int i = 0; i < files.size(); i++){ 138 | out[i] = ((SftpFile)files.get(i)).getFilename(); 139 | } 140 | return out; 141 | }else{ 142 | throw new IOException("Cannot ls files in remote directory"); 143 | } 144 | } 145 | 146 | public boolean getConnectionStatus(){ 147 | return (ssh != null); 148 | } 149 | 150 | public void getSNPFiles(String snp, String chrom, String collection, int index, int numinds, int totNumSNPs) throws IOException{ 151 | String filestem = collection+"."+snp; 152 | if (!(new File(Utils.join(localDir, filestem+".bed")).exists() && 153 | new File(Utils.join(localDir, filestem+".bnt")).exists())){ 154 | long prev = System.currentTimeMillis(); 155 | 156 | SessionChannelClient session = ssh.openSessionChannel(); 157 | session.startShell(); 158 | 159 | // variable to pass to the evoker-helper.pl script 160 | int oxStatus; 161 | if (fileFormat == FileFormat.OXFORD){ 162 | oxStatus = 1; 163 | } else { 164 | oxStatus = 0; 165 | } 166 | 167 | int ukbiobank_v2; 168 | String outpath; 169 | if (fileFormat == FileFormat.UKBIOBANK) { 170 | ukbiobank_v2 = 1; 171 | outpath = remoteTempDir; 172 | filestem = Utils.join(remoteTempDir, filestem); 173 | } else { 174 | ukbiobank_v2 = 0; 175 | outpath = "0"; 176 | } 177 | 178 | 179 | //Fire off the script on the remote server to get the requested slice of data 180 | OutputStream out = session.getOutputStream(); 181 | //String cmd = "cd "+ remoteDir + "\nperl evoker-helper.pl "+ snp + " " + chrom + " " + 182 | String cmd = "cd "+ remoteDir + "\nevoker-helper.pl "+ snp + " " + chrom + " " + 183 | collection + " " + index + " " + numinds + " " + totNumSNPs + " " + oxStatus + " " + 184 | this.getOxPlatform() + " " + ukbiobank_v2 + " " + outpath + "\n"; 185 | out.write(cmd.getBytes()); 186 | 187 | 188 | //monitor the remote server for news that the script has been finished 189 | //this is pretty slow -- is there a better way? 190 | InputStream in = session.getInputStream(); 191 | byte buffer[] = new byte[1024]; 192 | int read; 193 | long start = System.currentTimeMillis(); 194 | 195 | while((System.currentTimeMillis() - start)/1000 < 120) { 196 | try{ 197 | read = in.read(buffer); 198 | String outstr = new String(buffer, 0, read); 199 | if (outstr.contains(snp)){ 200 | break; 201 | } else if (outstr.contains("write_error")) { 202 | throw new IOException("user does not have write privileges"); 203 | } 204 | }catch (IOException ioe){ 205 | ssh.disconnect(); 206 | throw new IOException("Problem with remote directory permissions:\n"+ioe.getMessage()); 207 | } 208 | 209 | } 210 | 211 | if ((System.currentTimeMillis() - start)/1000 >= 120) { 212 | // if nothing is output from evoker-helper.pl in 2 minutes then die 213 | throw new IOException("evoker-helper.pl is not responsive check the script will run"); 214 | } 215 | 216 | session.close(); 217 | 218 | String[] filetypes = {".bed", ".bnt"}; 219 | for (String filetype : filetypes) { 220 | File remoteFile = new File(filestem + filetype); 221 | String filename = remoteFile.getName(); 222 | String localFilePath = Utils.join(localDir, filename); 223 | // Need UNIX-style paths on the remote machine 224 | String remoteFilePath = remoteFile.getPath().replace("\\", "/"); 225 | ftp.get(remoteFilePath, localFilePath); 226 | ftp.rm(remoteFilePath); 227 | } 228 | 229 | double time = ((double)(System.currentTimeMillis() - prev))/1000; 230 | Genoplot.ld.log(snp +" for "+ collection +" was fetched in "+ time + "s."); 231 | }else{ 232 | Genoplot.ld.log(snp +" for "+ collection +" was cached."); 233 | } 234 | 235 | } 236 | 237 | 238 | public boolean isOxFormat() { 239 | return fileFormat == FileFormat.OXFORD; 240 | } 241 | 242 | public File prepMetaFiles() throws IOException { 243 | files = ftp.ls(); 244 | 245 | String famending = ".fam"; 246 | String bimending = ".bim"; 247 | if (fileFormat == FileFormat.OXFORD) { 248 | famending = ".sample"; 249 | bimending = ".snp"; 250 | } 251 | 252 | Iterator i = files.iterator(); 253 | 254 | if (fileFormat == FileFormat.UKBIOBANK) { 255 | // UKB provides a separate fam files 256 | File f = new File(famPath); 257 | String famFile = f.getName(); 258 | 259 | 260 | 261 | if (!new File(Utils.join(localDir, famFile)).exists()) { 262 | try { 263 | ftp.get(famPath); 264 | } catch (IOException e) { 265 | // TODO Auto-generated catch block 266 | e.printStackTrace(); 267 | } 268 | } 269 | 270 | } else { 271 | while (i.hasNext()) { 272 | String filename = ((SftpFile) i.next()).getFilename(); 273 | if (filename.endsWith(famending)) { 274 | 275 | if (!new File(Utils.join(localDir, filename)).exists()) { 276 | try { 277 | ftp.get(filename); 278 | } catch (IOException e) { 279 | // TODO Auto-generated catch block 280 | e.printStackTrace(); 281 | } 282 | } 283 | } 284 | 285 | } 286 | } 287 | 288 | gp.pm.setMaximum(files.size() * 2); 289 | int loopCount = 0; 290 | i = files.iterator(); 291 | while (i.hasNext()) { 292 | gp.pm.setProgress(++loopCount); 293 | if (gp.pm.isCanceled()) { 294 | return null; 295 | } 296 | String filename = ((SftpFile) i.next()).getFilename(); 297 | if (filename.endsWith(bimending)) { 298 | 299 | if (!new File(Utils.join(localDir, filename)).exists()) { 300 | try { 301 | ftp.get(filename); 302 | } catch (IOException e) { 303 | // TODO Auto-generated catch block 304 | e.printStackTrace(); 305 | } 306 | } 307 | } 308 | 309 | } 310 | 311 | i = files.iterator(); 312 | while (i.hasNext()) { 313 | gp.pm.setProgress(++loopCount); 314 | if (gp.pm.isCanceled()) { 315 | return null; 316 | } 317 | String filename = ((SftpFile) i.next()).getFilename(); 318 | if (filename.endsWith(".qc")) { 319 | if (!new File(Utils.join(localDir, filename)).exists()) { 320 | try { 321 | ftp.get(filename); 322 | } catch (IOException e) { 323 | // TODO Auto-generated catch block 324 | e.printStackTrace(); 325 | } 326 | } 327 | } 328 | 329 | } 330 | 331 | return new File(localDir); 332 | } 333 | 334 | public void setOxPlatform(String oxPlatform) { 335 | this.oxPlatform = oxPlatform; 336 | } 337 | 338 | public String getOxPlatform() { 339 | return oxPlatform; 340 | } 341 | 342 | public SftpClient getFTP() { 343 | return ftp; 344 | } 345 | 346 | } 347 | -------------------------------------------------------------------------------- /src/evoker/EvokerChartPanel.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.awt.Graphics2D; 4 | import java.awt.Point; 5 | import java.awt.event.ActionEvent; 6 | import java.awt.event.KeyEvent; 7 | import java.awt.event.MouseEvent; 8 | import java.awt.geom.Line2D; 9 | import java.awt.geom.Point2D; 10 | import java.awt.geom.Rectangle2D; 11 | import java.io.File; 12 | import java.io.FileNotFoundException; 13 | import java.io.IOException; 14 | import java.text.DecimalFormat; 15 | import java.util.ArrayList; 16 | import java.util.HashMap; 17 | import java.util.List; 18 | import java.util.logging.Level; 19 | import java.util.logging.Logger; 20 | import javax.swing.ButtonGroup; 21 | import javax.swing.JFileChooser; 22 | import javax.swing.JMenu; 23 | import javax.swing.JMenuItem; 24 | import javax.swing.JPopupMenu; 25 | import javax.swing.JRadioButtonMenuItem; 26 | import org.jfree.chart.ChartPanel; 27 | import org.jfree.chart.JFreeChart; 28 | import org.jfree.chart.plot.XYPlot; 29 | import org.jfree.data.xy.XYDataItem; 30 | import org.jfree.data.xy.XYSeries; 31 | import org.jfree.data.xy.XYSeriesCollection; 32 | import org.jfree.ui.ExtensionFileFilter; 33 | 34 | import evoker.Genoplot.MouseMode; 35 | 36 | public class EvokerChartPanel extends ChartPanel { 37 | 38 | /** Holding the last Point, the mouse was dragged to (for drawing a line while selecting) */ 39 | Point lastDragPoint = null; 40 | /** Will hold the lasso Object, while the selection is being made*/ 41 | Lasso lasso = null; 42 | /** Enable mouse listeners to perform zoom*/ 43 | public static final String ZOOM_ENABLE_COMMAND = "ZOOM_ENABLE"; 44 | /** Enable mouse listeners to perform lasso select*/ 45 | public static final String LASSO_SELECT_ENABLE_COMMAND = "LASSO_SELECT_ENABLE"; 46 | /*The genotype selection menu*/ 47 | private JPopupMenu genotypeSelectPopup; 48 | /** The genotype data being displayed */ 49 | private PlotData plotData = null; 50 | /** The panel calling the ChartPanel*/ 51 | private PlotPanel plotPanel = null; 52 | private boolean weAreLassoing = false; 53 | private JRadioButtonMenuItem jrbZoom; 54 | private JRadioButtonMenuItem jrbLasso; 55 | 56 | EvokerChartPanel(JFreeChart jfc, PlotData pdata, PlotPanel ppanel) { 57 | super(jfc); 58 | // set up genotype select menu (... :] ) 59 | this.genotypeSelectPopup = createGenotypeSelectPopup(); 60 | this.plotData = pdata; 61 | this.plotPanel = ppanel; 62 | 63 | createPopupMenu_(); 64 | } 65 | 66 | private double rescaleMouseClickX(Point p){ 67 | Rectangle2D plotArea = this.getScreenDataArea(); 68 | XYPlot plot = (XYPlot) super.getChart().getPlot(); 69 | double x = plot.getDomainAxis().java2DToValue(p.getX(), plotArea, plot.getDomainAxisEdge()); 70 | return(x); 71 | 72 | } 73 | 74 | private double rescaleMouseClickY(Point p){ 75 | Rectangle2D plotArea = this.getScreenDataArea(); 76 | XYPlot plot = (XYPlot) super.getChart().getPlot(); 77 | double y = plot.getRangeAxis().java2DToValue(p.getY(), plotArea, plot.getRangeAxisEdge()); 78 | return(y); 79 | } 80 | 81 | /** 82 | * Handles a 'mouse pressed' event. 83 | *

84 | * This event is the popup trigger on Unix/Linux. For Windows, the popup 85 | * trigger is the 'mouse released' event. 86 | * 87 | * @param e The mouse event. 88 | */ 89 | public void mousePressed(MouseEvent e) { 90 | if(!e.isPopupTrigger()) { 91 | if (plotPanel.theGenoplot.getMouseMode() == MouseMode.LASSO) { 92 | lastDragPoint = e.getPoint(); 93 | lasso = new Lasso(rescaleMouseClickX(lastDragPoint),rescaleMouseClickY(lastDragPoint)); 94 | weAreLassoing = true; 95 | } 96 | else { 97 | super.mousePressed(e); 98 | } 99 | }else{ 100 | if (popup != null) { 101 | if (plotPanel.theGenoplot.getMouseMode() == MouseMode.ZOOM) jrbZoom.setSelected(true); 102 | if (plotPanel.theGenoplot.getMouseMode() == MouseMode.LASSO) jrbLasso.setSelected(true); 103 | displayPopupMenu(e.getX(), e.getY()); 104 | } 105 | } 106 | } 107 | 108 | /** 109 | * Handles a 'mouse dragged' event. 110 | * 111 | * @param e the mouse event. 112 | */ 113 | public void mouseDragged(MouseEvent e) { 114 | if(weAreLassoing) { 115 | // draw line from last Point to the current one 116 | if (lastDragPoint != null) { 117 | Graphics2D g2 = (Graphics2D) getGraphics(); 118 | g2.draw(new Line2D.Double(lastDragPoint.getX(), lastDragPoint.getY(), e.getX(), e.getY())); 119 | } 120 | lastDragPoint = new Point(e.getX(), e.getY()); 121 | 122 | lasso.addPoint(rescaleMouseClickX(lastDragPoint),rescaleMouseClickY(lastDragPoint)); 123 | }else { 124 | super.mouseDragged(e); 125 | } 126 | } 127 | 128 | /** 129 | * Handles a 'mouse released' event. On Windows, we need to check if this 130 | * is a popup trigger, but only if we haven't already been tracking a zoom 131 | * rectangle. 132 | * 133 | * @param e information about the event. 134 | */ 135 | public void mouseReleased(MouseEvent e) { 136 | if (weAreLassoing) { 137 | lastDragPoint = new Point(e.getX(), e.getY()); 138 | lasso.addPoint(rescaleMouseClickX(lastDragPoint),rescaleMouseClickY(lastDragPoint)); 139 | lastDragPoint = null; 140 | if (lasso != null) { 141 | lasso.close(); 142 | weAreLassoing = false; 143 | this.genotypeSelectPopup.show(this, e.getX(), e.getY()); 144 | } 145 | 146 | } 147 | else { 148 | super.mouseReleased(e); 149 | } 150 | } 151 | 152 | /** 153 | * Creates a popup menu for the panel. 154 | * 155 | * @param properties include a menu item for the chart property editor. 156 | * @param copy include a menu item for copying to the clipboard. 157 | * @param save include a menu item for saving the chart. 158 | * @param print include a menu item for printing the chart. 159 | * @param zoom include menu items for zooming. 160 | * 161 | * @return The popup menu. 162 | * 163 | * @since 1.0.13 164 | */ 165 | protected void createPopupMenu_() { 166 | ButtonGroup group = new ButtonGroup(); 167 | 168 | jrbZoom = new JRadioButtonMenuItem("Zoom"); 169 | if (plotPanel.theGenoplot.getMouseMode() == MouseMode.ZOOM) jrbZoom.setSelected(true); 170 | jrbZoom.setMnemonic(KeyEvent.VK_O); 171 | jrbZoom.setActionCommand(ZOOM_ENABLE_COMMAND); 172 | jrbZoom.addActionListener(this); 173 | group.add(jrbZoom); 174 | popup.add(jrbZoom, 0); 175 | 176 | jrbLasso = new JRadioButtonMenuItem("Lasso Select"); 177 | if (plotPanel.theGenoplot.getMouseMode() == MouseMode.LASSO) jrbLasso.setSelected(true); 178 | jrbLasso.setMnemonic(KeyEvent.VK_R); 179 | jrbLasso.setActionCommand(LASSO_SELECT_ENABLE_COMMAND); 180 | jrbLasso.addActionListener(this); 181 | group.add(jrbLasso); 182 | popup.add(jrbLasso, 1); 183 | } 184 | 185 | protected JPopupMenu createGenotypeSelectPopup() { 186 | JPopupMenu result = new JPopupMenu("Chart:"); 187 | ButtonGroup group = new ButtonGroup(); 188 | 189 | JRadioButtonMenuItem jrbMenIt = new JRadioButtonMenuItem("Y Homozygous"); 190 | jrbMenIt.setSelected(true); 191 | jrbMenIt.setMnemonic(KeyEvent.VK_O); 192 | jrbMenIt.setActionCommand("GENOTYPE_YY"); 193 | jrbMenIt.addActionListener(this); 194 | group.add(jrbMenIt); 195 | result.add(jrbMenIt); 196 | 197 | jrbMenIt = new JRadioButtonMenuItem("Heterozygous"); 198 | jrbMenIt.setMnemonic(KeyEvent.VK_R); 199 | jrbMenIt.setActionCommand("GENOTYPE_XY"); 200 | jrbMenIt.addActionListener(this); 201 | group.add(jrbMenIt); 202 | result.add(jrbMenIt); 203 | 204 | jrbMenIt = new JRadioButtonMenuItem("X Homozygous"); 205 | jrbMenIt.setMnemonic(KeyEvent.VK_R); 206 | jrbMenIt.setActionCommand("GENOTYPE_XX"); 207 | jrbMenIt.addActionListener(this); 208 | group.add(jrbMenIt); 209 | result.add(jrbMenIt); 210 | 211 | jrbMenIt = new JRadioButtonMenuItem("Unknown"); 212 | jrbMenIt.setMnemonic(KeyEvent.VK_R); 213 | jrbMenIt.setActionCommand("GENOTYPE_UNKNOWN"); 214 | jrbMenIt.addActionListener(this); 215 | group.add(jrbMenIt); 216 | result.add(jrbMenIt); 217 | 218 | return result; 219 | } 220 | 221 | /** 222 | * Handles action events generated by the popup menu. 223 | * 224 | * @param event the event. 225 | */ 226 | public void actionPerformed(ActionEvent event) { 227 | 228 | String command = event.getActionCommand(); 229 | if (command.equals("GENOTYPE_XX")) { 230 | adjustDataSeries(0); 231 | } 232 | else if (command.equals("GENOTYPE_UNKNOWN")) { 233 | adjustDataSeries(1); 234 | } 235 | else if (command.equals("GENOTYPE_XY")) { 236 | adjustDataSeries(2); 237 | } 238 | else if (command.equals("GENOTYPE_YY")) { 239 | adjustDataSeries(3); 240 | } 241 | else if (command.equals(ZOOM_ENABLE_COMMAND)) { 242 | plotPanel.theGenoplot.setMouseMode(MouseMode.ZOOM); 243 | } 244 | else if (command.equals(LASSO_SELECT_ENABLE_COMMAND)) { 245 | plotPanel.theGenoplot.setMouseMode(MouseMode.LASSO); 246 | } 247 | else { 248 | super.actionPerformed(event); 249 | } 250 | 251 | } 252 | 253 | public double round3(double n) { 254 | double result = n * 100000; 255 | result = Math.round(result); 256 | result = result / 100000; 257 | return result; 258 | } 259 | 260 | void adjustDataSeries(int genotype) { 261 | //ArrayList containedPoints = lasso.getContainedPoints(super.info.getEntityCollection()); 262 | HashMap containedPointsInd = lasso.getContainedPointsInd(super.info.getEntityCollection()); 263 | ArrayList al_s = new ArrayList(); 264 | 265 | XYPlot plot = (XYPlot) super.getChart().getPlot(); 266 | XYSeriesCollection xyseriescoll = (XYSeriesCollection) plot.getDataset(); 267 | 268 | ArrayList l_di = new ArrayList(); 269 | 270 | int seriesCount = xyseriescoll.getSeriesCount(); 271 | for (int a = 0; a < seriesCount; a++) { 272 | XYSeries series = xyseriescoll.getSeries(a); 273 | 274 | List items = (List) series.getItems(); 275 | int itemsLength = items.size(); 276 | for (int b = 0; b < itemsLength; b++) { 277 | XYDataItem xydi = items.get(b); 278 | Point2D p = new Point2D.Double(round3(Double.parseDouble(xydi.getX() + "")), 279 | round3(Double.parseDouble(xydi.getY() + ""))); 280 | 281 | for (EvokerPoint2D p_ : containedPointsInd.keySet()) { 282 | if (p.getX() == p_.getX() && p.getY() == p_.getY()) { 283 | if (al_s.contains(containedPointsInd.get(p_))) continue; 284 | plotData.moveIndToClass(containedPointsInd.get(p_), a, b, genotype); 285 | al_s.add(containedPointsInd.get(p_)); 286 | l_di.add(xydi); 287 | items.remove(b); 288 | b--; 289 | itemsLength--; 290 | break; 291 | } 292 | } 293 | } 294 | } 295 | 296 | XYSeries series = xyseriescoll.getSeries(genotype); 297 | List items = (List) series.getItems(); 298 | for (XYDataItem xydi : l_di) { 299 | items.add(xydi); 300 | } 301 | 302 | lasso = null; 303 | this.chart.setNotify(true); // last thing, redraw. Applies changes and gets rid of the line. 304 | this.plotData.computeSummary(); 305 | this.plotPanel.updateInfo(); 306 | 307 | this.plotData.changed = true; 308 | } 309 | 310 | private void save() throws IOException { 311 | JFileChooser fileChooser = new JFileChooser(); 312 | fileChooser.setCurrentDirectory(super.defaultDirectoryForSaveAs); 313 | ExtensionFileFilter filter = new ExtensionFileFilter("BED Binary Files", ".bed"); 314 | fileChooser.addChoosableFileFilter(filter); 315 | 316 | int option = fileChooser.showSaveDialog(this); 317 | if (option == JFileChooser.APPROVE_OPTION) { 318 | String filename = fileChooser.getSelectedFile().getPath(); 319 | if (isEnforceFileExtensions()) { 320 | if (!filename.endsWith(".bed")) { 321 | filename = filename + ".bed"; 322 | } 323 | } 324 | try { 325 | BEDFileWriter bfw = new BEDFileWriter(new File(filename)); 326 | } 327 | catch (IOException ex) { 328 | throw new IOException("Could not write file."); 329 | } 330 | } 331 | } 332 | } 333 | -------------------------------------------------------------------------------- /src/evoker/PlotData.java: -------------------------------------------------------------------------------- 1 | package evoker; 2 | 3 | import java.io.File; 4 | import java.util.ArrayList; 5 | import java.util.HashMap; 6 | import java.util.Vector; 7 | 8 | import org.jfree.data.xy.XYSeriesCollection; 9 | import org.jfree.data.xy.XYSeries; 10 | 11 | import evoker.Types.*; 12 | 13 | public class PlotData { 14 | 15 | private ArrayList calledGenotypes; 16 | private ArrayList intensities; 17 | private double maf, genopc, hwpval, minX, maxX, minY, maxY; 18 | private SampleData samples; 19 | private QCFilterData exclude; 20 | private int sampleNum; 21 | private CoordinateSystem coordSystem; 22 | private ArrayList> indsInClasses; 23 | private HashMap indexInArrayListByInd; 24 | private char[] alleles; 25 | private HashMap genotypeChanges = new HashMap(); 26 | public boolean changed = false; 27 | private FileFormat fileFormat; 28 | 29 | PlotData(ArrayList calledGenotypes, ArrayList intensities, SampleData samples, QCFilterData exclude, char[] alleles, CoordinateSystem coordSystem, FileFormat fileFormat) { 30 | this.calledGenotypes = calledGenotypes; 31 | this.intensities = intensities; 32 | this.samples = samples; 33 | this.exclude = exclude; 34 | this.minX = 100000; 35 | this.maxX = -100000; 36 | this.minY = 100000; 37 | this.maxY = -100000; 38 | this.alleles = alleles; 39 | this.fileFormat = fileFormat; 40 | this.setCoordSystem(coordSystem); 41 | } 42 | 43 | public void add(ArrayList calledgenotypes, ArrayList intensities) { 44 | this.calledGenotypes.addAll(calledgenotypes); 45 | this.intensities.addAll(intensities); 46 | } 47 | 48 | public PlotData getSubPlotData(Vector indices, Sex sexToPlot, CoordinateSystem newCoordinateSystem) { 49 | int subLength = indices.size(); 50 | ArrayList subIntensities = new ArrayList(subLength); 51 | ArrayList subCalledGenotypes = new ArrayList(subLength); 52 | Vector subSampleVector = new Vector(subLength); 53 | QCFilterData ukbExclude = samples.getUkbExclude(); 54 | 55 | for (int index: indices) { 56 | if ((sexToPlot != Sex.NOT_SEX) && (samples.getSexByIndex(index) != sexToPlot)) { 57 | continue; 58 | } 59 | subCalledGenotypes.add(calledGenotypes.get(index)); 60 | subIntensities.add(intensities.get(index)); 61 | subSampleVector.add(samples.getInd(index)); 62 | } 63 | SampleData subSamples = new SampleData(subSampleVector); 64 | subSamples.setUkbExclude(ukbExclude); 65 | return new PlotData(subCalledGenotypes, subIntensities, subSamples, exclude, 66 | alleles, newCoordinateSystem, fileFormat); 67 | } 68 | 69 | XYSeriesCollection generatePoints() { 70 | if (intensities == null || calledGenotypes == null) { 71 | return null; 72 | } 73 | 74 | computeSummary(); 75 | 76 | 77 | XYSeries intensityDataSeriesHomo1 = new XYSeries(0, false); 78 | XYSeries intensityDataSeriesMissing = new XYSeries(1, false); 79 | XYSeries intensityDataSeriesHetero = new XYSeries(2, false); 80 | XYSeries intensityDataSeriesHomo2 = new XYSeries(3, false); 81 | 82 | indsInClasses = new ArrayList>(); 83 | for (int i = 0; i < 4; i++) { 84 | indsInClasses.add(new ArrayList()); 85 | } 86 | 87 | indexInArrayListByInd = new HashMap(); 88 | 89 | sampleNum = 0; 90 | for (int i = 0; i < intensities.size(); i++) { 91 | float[] intens = intensities.get(i); 92 | 93 | if (coordSystem == CoordinateSystem.POLAR) { 94 | float x = intens[0]; 95 | float y = intens[1]; 96 | 97 | float r = (float) Math.sqrt(Math.pow(y, 2) + Math.pow(x, 2)); 98 | float theta = (float) Math.asin(y / r); 99 | 100 | intens[0] = theta; 101 | intens[1] = r; 102 | } else if (coordSystem == CoordinateSystem.UKBIOBANK) { 103 | float a = intens[0]; 104 | float b = intens[1]; 105 | 106 | // Contrast (x-axis) = log2(A/B) 107 | intens[0] = (float) log2(a/b); 108 | 109 | // Strength (y-axis) = (log2(A*B))/2 110 | intens[1] = (float) log2(a*b)/2; 111 | 112 | if (Float.isNaN(intens[0]) || Float.isNaN(intens[1])) { 113 | System.out.println(intens[0] + ", " + intens[1]); 114 | } 115 | } 116 | 117 | String sampleName = samples.getInd(i); 118 | 119 | // If there is nothing to plot, skip 120 | if (calledGenotypes.get(i) == null) { 121 | continue; 122 | } 123 | 124 | // If the exclude exists and the individual is excluded, skip 125 | if ((exclude != null) && exclude.isExcluded(sampleName)) { 126 | continue; 127 | } 128 | 129 | if ((fileFormat == FileFormat.UKBIOBANK) && samples.getUkbExclude().isExcluded(sampleName)) { 130 | continue; 131 | } 132 | 133 | sampleNum++; 134 | switch (calledGenotypes.get(i)) { 135 | case 0: 136 | intensityDataSeriesHomo1.add(intens[0], intens[1]); 137 | indsInClasses.get(0).add(sampleName); 138 | indexInArrayListByInd.put(sampleName, indsInClasses.get(0).size() -1); 139 | break; 140 | case 1: 141 | intensityDataSeriesMissing.add(intens[0], intens[1]); 142 | indsInClasses.get(1).add(sampleName); 143 | indexInArrayListByInd.put(sampleName, indsInClasses.get(1).size() -1); 144 | break; 145 | case 2: 146 | intensityDataSeriesHetero.add(intens[0], intens[1]); 147 | indsInClasses.get(2).add(sampleName); 148 | indexInArrayListByInd.put(sampleName, indsInClasses.get(2).size() -1); 149 | break; 150 | case 3: 151 | intensityDataSeriesHomo2.add(intens[0], intens[1]); 152 | indsInClasses.get(3).add(sampleName); 153 | indexInArrayListByInd.put(sampleName, indsInClasses.get(3).size() -1); 154 | break; 155 | default: 156 | //TODO: this is very bad 157 | break; 158 | } 159 | 160 | 161 | 162 | //illuminus uses [-1,-1] as a flag for missing data. technically we don't want to make it impossible 163 | //for such a datapoint to exist, but we won't let this exact data point adjust the bounds of the plot. 164 | //if it really is intentional, there will almost certainly be other nearby, negative points 165 | //which will resize the bounds appropriately. 166 | if (!(intens[0] == -1 && intens[1] == -1)) { 167 | if (intens[0] > maxX) { 168 | maxX = intens[0]; 169 | } 170 | if (intens[0] < minX) { 171 | minX = intens[0]; 172 | } 173 | 174 | if (intens[1] > maxY) { 175 | maxY = intens[1]; 176 | } 177 | if (intens[1] < minY) { 178 | minY = intens[1]; 179 | } 180 | } 181 | } 182 | 183 | int heteroCount = intensityDataSeriesHetero.getItemCount(); 184 | int homo1Count = intensityDataSeriesHomo1.getItemCount(); 185 | int homo2Count = intensityDataSeriesHomo2.getItemCount(); 186 | 187 | if (heteroCount + homo1Count + homo2Count == 0) { 188 | return null; 189 | } 190 | 191 | XYSeriesCollection xysc = new XYSeriesCollection(intensityDataSeriesHomo1); 192 | xysc.addSeries(intensityDataSeriesMissing); 193 | xysc.addSeries(intensityDataSeriesHetero); 194 | xysc.addSeries(intensityDataSeriesHomo2); 195 | return xysc; 196 | } 197 | 198 | public String getIndInClass(int cl, int i) { 199 | return indsInClasses.get(cl).get(i); 200 | } 201 | 202 | /** 203 | * Moves an IND to another (internal) genotype class 204 | * 205 | * @param ind name 206 | * @param class it is from 207 | * @param index of the genotype in that class 208 | * @param class it should be in 209 | */ 210 | public void moveIndToClass(String ind, int fromCl, int fromI, int to) { 211 | indsInClasses.get(fromCl).remove(fromI); 212 | indsInClasses.get(to).add(ind); 213 | 214 | int index = samples.getIndex(ind); 215 | calledGenotypes.set(index, (byte) to); 216 | genotypeChanges.put(ind, (byte) to); 217 | } 218 | 219 | protected void computeSummary() { 220 | double hom1 = 0, het = 0, hom2 = 0, missing = 0; 221 | 222 | for (int i = 0; i < calledGenotypes.size(); i++) { 223 | 224 | String sampleName = samples.getInd(i); 225 | 226 | // If the exclude exists and the individual is excluded, skip 227 | if ((exclude != null) && exclude.isExcluded(sampleName)) { 228 | continue; 229 | } 230 | 231 | if ((fileFormat == FileFormat.UKBIOBANK) && samples.getUkbExclude().isExcluded(sampleName)) { 232 | continue; 233 | } 234 | 235 | byte geno = calledGenotypes.get(i); 236 | if (geno == 0) { 237 | hom1++; 238 | } else if (geno == 2) { 239 | het++; 240 | } else if (geno == 3) { 241 | hom2++; 242 | } else { 243 | missing++; 244 | } 245 | genopc = 1 - (missing / (missing + hom1 + het + hom2)); 246 | double tmpmaf = ((2 * hom1) + het) / ((2 * het) + (2 * hom1) + (2 * hom2)); 247 | if (tmpmaf < 0.5) { 248 | maf = tmpmaf; 249 | } else { 250 | maf = 1 - tmpmaf; 251 | } 252 | hwpval = hwCalculate((int) hom1, (int) het, (int) hom2); 253 | } 254 | } 255 | 256 | 257 | private double hwCalculate(int obsAA, int obsAB, int obsBB) { 258 | //Calculates exact two-sided hardy-weinberg p-value. Parameters 259 | //are number of genotypes, number of rare alleles observed and 260 | //number of heterozygotes observed. 261 | // 262 | // (c) 2003 Jan Wigginton, Goncalo Abecasis 263 | 264 | int diplotypes = obsAA + obsAB + obsBB; 265 | if (diplotypes == 0) { 266 | return 0; 267 | } 268 | int rare = (obsAA * 2) + obsAB; 269 | int hets = obsAB; 270 | 271 | 272 | //make sure "rare" allele is really the rare allele 273 | if (rare > diplotypes) { 274 | rare = 2 * diplotypes - rare; 275 | } 276 | 277 | double[] tailProbs = new double[rare + 1]; 278 | for (int z = 0; z < tailProbs.length; z++) { 279 | tailProbs[z] = 0; 280 | } 281 | 282 | //start at midpoint 283 | int mid = rare * (2 * diplotypes - rare) / (2 * diplotypes); 284 | 285 | //check to ensure that midpoint and rare alleles have same parity 286 | if (((rare & 1) ^ (mid & 1)) != 0) { 287 | mid++; 288 | } 289 | int het = mid; 290 | int hom_r = (rare - mid) / 2; 291 | int hom_c = diplotypes - het - hom_r; 292 | 293 | //Calculate probability for each possible observed heterozygote 294 | //count up to a scaling constant, to avoid underflow and overflow 295 | tailProbs[mid] = 1.0; 296 | double sum = tailProbs[mid]; 297 | for (het = mid; het > 1; het -= 2) { 298 | tailProbs[het - 2] = (tailProbs[het] * het * (het - 1.0)) / (4.0 * (hom_r + 1.0) * (hom_c + 1.0)); 299 | sum += tailProbs[het - 2]; 300 | //2 fewer hets for next iteration -> add one rare and one common homozygote 301 | hom_r++; 302 | hom_c++; 303 | } 304 | 305 | het = mid; 306 | hom_r = (rare - mid) / 2; 307 | hom_c = diplotypes - het - hom_r; 308 | for (het = mid; het <= rare - 2; het += 2) { 309 | tailProbs[het + 2] = (tailProbs[het] * 4.0 * hom_r * hom_c) / ((het + 2.0) * (het + 1.0)); 310 | sum += tailProbs[het + 2]; 311 | //2 more hets for next iteration -> subtract one rare and one common homozygote 312 | hom_r--; 313 | hom_c--; 314 | } 315 | 316 | for (int z = 0; z < tailProbs.length; z++) { 317 | tailProbs[z] /= sum; 318 | } 319 | 320 | double top = tailProbs[hets]; 321 | for (int i = hets + 1; i <= rare; i++) { 322 | top += tailProbs[i]; 323 | } 324 | double otherSide = tailProbs[hets]; 325 | for (int i = hets - 1; i >= 0; i--) { 326 | otherSide += tailProbs[i]; 327 | } 328 | 329 | if (top > 0.5 && otherSide > 0.5) { 330 | return 1.0; 331 | } else { 332 | if (top < otherSide) { 333 | return top * 2; 334 | } else { 335 | return otherSide * 2; 336 | } 337 | } 338 | } 339 | 340 | public HashMap getGenotypeChanges() { 341 | return genotypeChanges; 342 | } 343 | 344 | public double getMaf() { 345 | return maf; 346 | } 347 | 348 | public double getGenopc() { 349 | return genopc; 350 | } 351 | 352 | public double getHwpval() { 353 | return hwpval; 354 | } 355 | 356 | public double getMaxDim() { return Math.max(maxX, maxY); } 357 | 358 | public double getMinDim() { return Math.min(minX, minY); } 359 | 360 | public double getMinX() { 361 | return minX; 362 | } 363 | 364 | public double getMaxX() { 365 | return maxX; 366 | } 367 | 368 | public double getMinY() { 369 | return minY; 370 | } 371 | 372 | public double getMaxY() { 373 | return maxY; 374 | } 375 | 376 | public double getRange() { 377 | return getMaxDim() - getMinDim(); 378 | } 379 | 380 | public char[] getAlleles() { 381 | if (alleles != null) { 382 | return alleles; 383 | } else { 384 | return new char[]{' ', ' '}; 385 | } 386 | } 387 | 388 | public int getSampleNum() { 389 | return sampleNum; 390 | } 391 | 392 | private void setCoordSystem(CoordinateSystem coordSystem) { 393 | this.coordSystem = coordSystem; 394 | } 395 | 396 | public CoordinateSystem getCoordSystem() { 397 | return coordSystem; 398 | } 399 | 400 | public byte getCalledGenotype(String ind){ 401 | return calledGenotypes.get(samples.getIndex(ind)); 402 | } 403 | 404 | public int getIndexInArrayList(String ind){ 405 | return indexInArrayListByInd.get(ind); 406 | } 407 | 408 | private double log2(double x) { 409 | return Math.log(x) / Math.log(2); 410 | } 411 | } 412 | -------------------------------------------------------------------------------- /resources/evoker-documentation.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | 3 | \title{Evoker: a genotype visualization tool} 4 | \author{Jeffrey C.\ Barrett\\\texttt{barrett@sanger.ac.uk}} 5 | \date{\today} 6 | 7 | \begin{document} 8 | 9 | \maketitle 10 | 11 | \section{Introduction} 12 | 13 | Evoker is a tool designed for visualizing genotype cluster plots as part of quality control procedures for genome-wide association studies. It provides a solution to the computational and storage problems related to being able to work with the huge volumes of data generated by such projects. 14 | 15 | \section{Getting started with Evoker} 16 | 17 | Using Evoker requires two important parts: the data, formatted and named in a specific way, can be stored either locally or on a remote server to which you have SSH access and the main Evoker program (\texttt{Evoker.jar}), which is run locally on your PC and is used for displaying cluster plots and assigning them a Pass/Fail verdict. You will need Java 5.0 (also known as version 1.5) or newer on your local machine to run Evoker. Details about preparing your data and using the program are presented below. 18 | 19 | \subsection{File formats and conventions} 20 | 21 | Evoker can work with files formatted in two different ways, either using a default file format based around the binary style files of the widely-used \texttt{PLINK} program or an Oxford file format that is based on the files used in the \texttt{WTCCC2} project. 22 | 23 | In both cases the data for your project (whether stored locally or remotely) should all be kept in a single directory to which you have read and write access. Because Evoker understands how to load and parse your data based on which files are in this directory you should not have any other files other than those you wish to load therein. The files in this directory are organised by `collection' (a group of samples, such as cases or controls) and `chromosome' (a group of SNPs on the same chromosome). 24 | 25 | The collection groupings can be any subset of your data which you wish to plot separately, such as samples collected in different ways, or genotyped in different laboratories. Furthermore, `chromosomes' can really be any grouping of SNPs, as this designation is used mostly for bookkeeping by the program. Indeed, you could organise your data with only a single `genome' chromosome, requiring only one set of files per collection, but this will result in a performance loss when using Evoker. 26 | 27 | \subsubsection{Default format} 28 | The default file format works with files formatted in the binary style of the widely-used \texttt{PLINK} program, along with one extension. Information about this format can be found at the \texttt{PLINK} website (see below). In the default format Evoker requires all of the \texttt{PLINK} style files (\texttt{.bed, .bim, .fam}), plus an additional binary intensity (\texttt{.bnt}) file, named according to a particular convention. Using the default file format your project should have one \texttt{.fam} file per collection (named \texttt{collection.fam}) and one set of \texttt{.bed/.bim/.bnt} files per collection--chromosome combination (named \texttt{collection.chromosome.bed} etc.). 29 | 30 | If, for instance, you had a case collection and a control collection in the default format, each genotyped on SNPs from chromosomes 20-22, you would have files as follows: 31 | 32 | \begin{verbatim} 33 | case.fam 34 | case.20.bed case.20.bim case.20.bnt 35 | case.21.bed case.21.bim case.21.bnt 36 | case.22.bed case.22.bim case.22.bnt 37 | 38 | control.fam 39 | control.20.bed control.20.bim control.20.bnt 40 | control.21.bed control.21.bim control.21.bnt 41 | control.22.bed control.22.bim control.22.bnt 42 | \end{verbatim} 43 | 44 | \subsubsection{Oxford format} 45 | The Oxford file format works with files formatted in the style of \texttt{WTCCC2} project files. Using the Oxford format Evoker requires \texttt{.gen.bin, .int.bin, .sample, .snp} files, the \texttt{.gen.bin} and the \texttt{.int.bin} files may be compressed such as \texttt{gen.bin.gz, int.bin.gz}. Like the default format the Oxford format files must be named according to a particular convention, your project should have one \texttt{.sample} file per collection (named \texttt{collection\_platform.sample}) and one set of \texttt{.gen.bin/.snp/.int.bin} files per collection--chromosome combination (named \texttt{collection\_chromosome\_platform.bed} etc.). 46 | 47 | If, for instance, you had a case collection and a control collection in the Oxford format, each genotyped using an Illumina platofrm on SNPs from chromosomes 20-22, you would have files as follows: 48 | 49 | \begin{verbatim} 50 | case_illumina.sample 51 | case_20_illumina.gen.bin case_20_illumina.snp case_20_illumina.int.bin 52 | case_21_illumina.gen.bin case_21_illumina.snp case_21_illumina.int.bin 53 | case_22_illumina.gen.bin case_22_illumina.snp case_22_illumina.int.bin 54 | 55 | control_illumina.sample 56 | control_20_illumina.gen.bin control_20_illumina.snp control_20_illumina.int.bin 57 | control_21_illumina.gen.bin control_21_illumina.snp control_21_illumina.int.bin 58 | control_22_illumina.gen.bin control_22_illumina.snp control_22_illumina.int.bin 59 | \end{verbatim} 60 | 61 | \subsection{Trying Evoker with the sample dataset} 62 | 63 | The easiest way to learn how to use Evoker is to test it with the included sample dataset. Put the following files into a clean directory: 64 | 65 | \begin{verbatim} 66 | sample.fam 67 | sample.22.bim 68 | sample.22.bed 69 | sample.22.bnt 70 | \end{verbatim} 71 | 72 | This sample dataset represents 10 SNPs genotyped in 100 individuals. You should be able to launch the program by double-clicking the \texttt{.jar} file if your system is configured in the standard way. Otherwise, you can try running the program from the command line: 73 | 74 | \begin{verbatim} 75 | java -jar Evoker.jar 76 | \end{verbatim} 77 | 78 | \subsubsection{Opening a directory} 79 | Open the data directory by selecting \texttt{Open directory} from the \texttt{File} menu and select the directory where you've put the data. Evoker keeps a log of data sources you've opened, which you can view by selecting \texttt{Show Evoker log} from the \texttt{Log} menu. The current data directory is shown in the title bar of the main window, for easy reference. 80 | 81 | \subsubsection{Viewing SNPs} 82 | You can plot a SNP by typing its name in the box and clicking \texttt{Go}. Try ``snp0'', one of the SNPs in the sample dataset. Genotypes are coloured red, green and blue for the three genotype classes and grey for uncalled, or missing data. Beneath the plot are some summary statistics: minor allele frequency (MAF), genotyping percentage (GPC), and Hardy-Weinberg equilibrium \emph{p}-value (HWE pval). 83 | 84 | Right clicking on the plot brings up a set of menu options, including saving the plot as a PNG image. You can zoom in and out of the plot either using this menu or by clicking and dragging (down and to the right creates a ``zoom in box'', while up and to the left zooms back out again). Holding the pointer over a single point will show which sample corresponds to that point. 85 | 86 | You can type in more SNP names or use the \texttt{Random} button to view more cluster plots. After you've viewed a few SNPs you can see which you've viewed recently in the \texttt{History} menu. You can click on previous SNPs to show those cluster plots again. 87 | 88 | \subsubsection{Scoring SNPs from a list} 89 | 90 | One of the ways in which Evoker can be used is to load a list of SNPs (say, those showing evidence of association) to verify that they have good clusters. You can do this by selecting \texttt{Load marker list} from the \texttt{File} menu. The sample dataset includes \texttt{sample.list} which you can load now. 91 | 92 | The first SNP on the list, ``snp0'' should be plotted. Choose from the three approval options: \texttt{Yes, Maybe, No}. As you make a decision on each SNP the next SNP in the list is plotted. In addition to the buttons, you can also press \texttt{Y,M,N} to render a verdict. Each time you select an approval option, it is recorded in a file in the same place where the list file came from, with \texttt{.scores} appended (it is important, therefore, that you have write permission in this directory). If you now look at \texttt{sample.list.scores} you'll see a line for each SNP with a score of 1, 0 or -1 corresponding to \texttt{Yes}, \texttt{Maybe} or \texttt{No}. 93 | 94 | Because you must render a verdict in order, and only once, for each SNP, if you jump back to another SNP in the history, or type a SNP name in the box and click \texttt{Go} the Approval controls will be disabled until you've returned to the current list position, which can be done via a link in the \texttt{History} menu. 95 | 96 | \subsubsection{Excluding samples by list} 97 | Using Evoker you can view the impact that excluding certain samples (for example samples with a poor quality control score) has on clusters. To achieve this Evoker is able to read in a \texttt{.qc} file, this file is simply a list of the sample identifiers corresponding to the \texttt{.fam} file. Once this file is loaded the selected samples are excluded from all following plots while filtering remains turned on. 98 | 99 | A \texttt{.qc} file can be loaded in two ways, if placed in the same directory as the \texttt{.fam/.bim/.bed/.bnt} files it will be loaded when the directory is opened (note: if there is more than one \texttt{.qc} file in the directory the first found will be loaded), you can also load a \texttt{.qc} after the data has been loaded by selecting \texttt{load exclude list} from the \texttt{file} menu. 100 | 101 | Once loaded the samples being excluded can be viewed from the Evoker log. Each time a new \texttt{.qc} file is loaded the previously loaded list is overwritten. Lastly filtering of samples can be turned on and off at any time by simply selecting \texttt{filter} data from the \texttt{file} menu. 102 | 103 | \subsection{Creating the files} 104 | 105 | When using Evoker with your own data, you can use \texttt{PLINK} to create your \texttt{.bed/.bim/.fam} files, but you'll need a special program, included in this package, to generate the \texttt{.bnt} files for Evoker. This is probably the hardest part of preparing your data for Evoker. The \texttt{int2bnt.pl} script will transform text intensity files into the correct binary format used by Evoker, the \texttt{int2bnt.pl} script can accept intensity data in a number of formats which are explained in more detail below. 106 | 107 | All input files should be named in the style \texttt{collection.chromosome.int} and the SNPs must be in the same order as the corresponding \texttt{.bim} file, and each pair of intensities should appear in the same order as the pair of alleles in the relevant entry in the matching \texttt{.bim} file. 108 | 109 | \subsubsection{Default intensity format} 110 | The default input format for \texttt{int2bnt.pl} is straightforward. It is a matrix of intensities with SNPs as rows and individuals as pairs of whitespace--separated columns. The first row of the file is a header with the names of the samples (each repeated twice because there are two intensities per sample), which must be in the same order as the matching \texttt{.fam} file. An example for two individuals and three SNPs might look like this: 111 | 112 | \begin{verbatim} 113 | SNP CASE1 CASE1 CASE2 CASE2 114 | rs123 0.956 0.009 0.999 0.010 115 | rs456 0.502 0.511 0.499 0.520 116 | rs789 0.012 0.026 0.003 0.977 117 | \end{verbatim} 118 | 119 | \subsubsection{Chiamo} 120 | If you use the Chiamo genotype calling program, the \texttt{int2bnt.pl} script is able to generate a \texttt{.bnt} file from the standard Chiamo input file. To use this file format just supply the following option when you run the \texttt{int2bnt.pl} script \texttt{--filetype "chiamo"}. 121 | 122 | \subsubsection{Illuminus} 123 | If you use the Illuminus genotype calling program, the \texttt{int2bnt.pl} script is able to generate a \texttt{.bnt} file from the standard Illuminus input file. To use this file format just supply the following option when you run the \texttt{int2bnt.pl} script \texttt{--filetype "illuminus"}. 124 | 125 | \subsubsection{Birdsuite} 126 | If you are using the Birdsuite collection of programs the \texttt{study.allele\_summary file} output file, is accepted as input by the \texttt{int2bnt.pl} script to generate a \texttt{.bnt} file. To use this file format just supply the following option when you run the \texttt{int2bnt.pl} script \texttt{--filetype "birdsuite"}. 127 | 128 | \subsubsection{BeadStudio} 129 | The Evoker package also contains a script called \texttt{parse\_illumina.pl} for parsing the text output files from the illumina BeadStudio software. The \texttt{parse\_illumina.pl} script will create the properly formatted \texttt{.bnt/} and \texttt{.bed/} files required by Evoker. 130 | 131 | \section{Remote data access} 132 | 133 | Even files in the compressed binary formats used by Evoker can be extremely large (many gigabytes) for datasets with thousands of samples and over one million SNPs. Since it is often unfeasible to have the complete dataset available on your PC, Evoker can access a data directory on any computer to which you have SSH access. 134 | 135 | Your data directory should be set up in exactly the same fashion as above, except that it needs the \texttt{evoker-helper.pl} script to be placed in the same spot. This script does the job of slicing out the data for individual SNPs from the data files, so that only a small amount of data is sent over the network, to maximise performance. Before you try to connect as described below, you might want to open the log to monitor the progress of your connection. 136 | 137 | Connect to a remote server by launching Evoker on your PC and selecting \texttt{Connect to remote server} from the \texttt{File} menu. Enter the name of the server you're connecting to, along with the absolute path of the data directory on the remote machine (see below for tips on specifying the correct path). Evoker requires a `scratch' directory on your computer to keep some (small) files it uses. This should be a directory you can read and write, but \emph{should not contain any other important files} because Evoker will overwrite files with the same names without warning. This should also be specified as an absolute path, but the \texttt{Browse} button can be used to help find the correct directory and its path. 138 | 139 | Enter your login details and click \texttt{OK} to connect. There might be a long delay the first time you connect to a remote data source because Evoker must download all the \texttt{.bim} and \texttt{.fam} files to the local directory. Evoker should work exactly the same as when the data files are stored locally, with the caveat that performance will be slower since data is being passed across the network. Note that Evoker uses files in the scratch directory to save time if they've already been requested from the remote server. Each time you connect to a remote data source you will be given the option of deleting all the files in your local directory, it's a good idea to clean out this directory if data on the remote server has changed, or if you're connecting to multiple remote data sources. The log will show some detail about the data being sent across the network. 140 | 141 | If you load a SNP list while using a remote data source, Evoker will download the data for all SNPs in the list in the background. This means that, unless you make very rapid Approval decisions, the program should be able to `keep up' with you and appear more responsive. 142 | 143 | \section{Additional Resources and tips} 144 | 145 | \subsection{The binary intensity format} 146 | 147 | The binary intensity format is very simple. First, a `magic number' of two bytes is written to the beginning of the file so that Evoker can recognise it. These bytes for Evoker are: 148 | \begin{verbatim} 149 | 00011010 00110001 150 | \end{verbatim} 151 | 152 | \noindent After these, each point is stored as a pair of 4--byte floats (for the intensity of the two alleles). These are simply packed in the same order as the text intensity files described above (a pair for each individual for the first SNP, then the same for the second SNP and so on). 153 | 154 | Evoker can also accept binary intensity data that uses two four byte integer values representing the number of rows of data (SNPs) and the number of columns (samples) as a header. 155 | 156 | \subsection{Tips} 157 | 158 | The perl scripts included are intended to work in most UNIX environments. They assume that the perl executable can be found at \texttt{/usr/bin/perl}, which you can change if for some reason that is not the case on your system. Finally, make sure that the script is `world executable'. You can accomplish this with the following command: 159 | 160 | \begin{verbatim} 161 | chmod +x evoker-helper.pl 162 | \end{verbatim} 163 | 164 | \noindent An easy way to get the correct string to enter in the data connection dialog for the remote directory is to login to that machine, change to that directory and run the \texttt{pwd} command, which will print the absolute path to that directory, which you can then cut and paste into the Evoker window. 165 | 166 | \subsection{Resources} 167 | Evoker.jar requires Java 5.0 or newer to run. You can download the latest version of Java at \texttt{www.java.com} 168 | \\ 169 | \\ 170 | \texttt{PLINK} binary pedfile (\texttt{.bed}) format: 171 | 172 | \indent \texttt{http://pngu.mgh.harvard.edu/purcell/plink/data.shtml\#bed} 173 | \end{document} --------------------------------------------------------------------------------