getUkbBatchMembership() { return ukbBatchMembership; }
130 |
131 | public Sex getSexByIndex(Integer index) { return sexByIndex.get(index); }
132 |
133 | public FileFormat getFileFormat() {
134 | return fileFormat;
135 | }
136 |
137 | public void setUkbExclude(QCFilterData ukbExclude) {
138 | this.ukbExclude = ukbExclude;
139 | }
140 |
141 | public QCFilterData getUkbExclude() {
142 | return ukbExclude;
143 | }
144 | }
145 |
146 |
--------------------------------------------------------------------------------
/src/evoker/EvokerPoint2D.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 1997, 2006, Oracle and/or its affiliates. All rights reserved.
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 | *
5 | * This code is free software; you can redistribute it and/or modify it
6 | * under the terms of the GNU General Public License version 2 only, as
7 | * published by the Free Software Foundation. Oracle designates this
8 | * particular file as subject to the "Classpath" exception as provided
9 | * by Oracle in the LICENSE file that accompanied this code.
10 | *
11 | * This code is distributed in the hope that it will be useful, but WITHOUT
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 | * version 2 for more details (a copy is included in the LICENSE file that
15 | * accompanied this code).
16 | *
17 | * You should have received a copy of the GNU General Public License version
18 | * 2 along with this work; if not, write to the Free Software Foundation,
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 | *
21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 | * or visit www.oracle.com if you need additional information or have any
23 | * questions.
24 | */
25 | package evoker;
26 |
27 | import java.io.Serializable;
28 |
29 | /**
30 | * The EvokerPoint2D class defines a point representing a location
31 | * in {@code (x,y)} coordinate space.
32 | *
33 | * This class is only the abstract superclass for all objects that
34 | * store a 2D coordinate.
35 | * The actual storage representation of the coordinates is left to
36 | * the subclass.
37 | *
38 | * @author Jim Graham
39 | * @since 1.2
40 | */
41 | /**
42 | * Modified for use in evoker
43 | */
44 | public class EvokerPoint2D implements Cloneable {
45 |
46 | public static long IDAt = 0;
47 | public long ID;
48 | /**
49 | * The X coordinate of this Point2D.
50 | * @since 1.2
51 | * @serial
52 | */
53 | public double x;
54 | /**
55 | * The Y coordinate of this Point2D.
56 | * @since 1.2
57 | * @serial
58 | */
59 | public double y;
60 |
61 | /**
62 | * Constructs and initializes a Point2D with
63 | * coordinates (0, 0).
64 | * @since 1.2
65 | */
66 | public EvokerPoint2D() {
67 | }
68 |
69 | /**
70 | * Constructs and initializes a Point2D with the
71 | * specified coordinates.
72 | *
73 | * @param x the X coordinate of the newly
74 | * constructed Point2D
75 | * @param y the Y coordinate of the newly
76 | * constructed Point2D
77 | * @since 1.2
78 | */
79 | public EvokerPoint2D(double x, double y) {
80 | this.x = x;
81 | this.y = y;
82 | this.ID = IDAt++;
83 | }
84 |
85 | /**
86 | * {@inheritDoc}
87 | * @since 1.2
88 | */
89 | public double getX() {
90 | return x;
91 | }
92 |
93 | /**
94 | * {@inheritDoc}
95 | * @since 1.2
96 | */
97 | public double getY() {
98 | return y;
99 | }
100 |
101 | public double getID(){
102 | return ID;
103 | }
104 |
105 | /**
106 | * {@inheritDoc}
107 | * @since 1.2
108 | */
109 | public void setLocation(double x, double y) {
110 | this.x = x;
111 | this.y = y;
112 | }
113 |
114 | /**
115 | * Returns a String that represents the value
116 | * of this Point2D.
117 | * @return a string representation of this Point2D.
118 | * @since 1.2
119 | */
120 | public String toString() {
121 | return "Point2D.Double[" + x + ", " + y + "]";
122 | }
123 |
124 | public int hashCode() {
125 | long bits = java.lang.Double.doubleToLongBits(getX());
126 | bits ^= java.lang.Double.doubleToLongBits(getY()) * 31;
127 | return (((int) bits) ^ ((int) (bits >> 32)));
128 | }
129 |
130 | /**
131 | * Determines whether or not two points are equal. Two instances of
132 | * Point2D are equal if the values of their
133 | * x and y member fields, representing
134 | * their position in the coordinate space, are the same.
135 | * @param obj an object to be compared with this Point2D
136 | * @return true if the object to be compared is
137 | * an instance of Point2D and has
138 | * the same values; false otherwise.
139 | * @since 1.2 */
140 | public boolean equals(Object obj) {
141 | if (obj instanceof EvokerPoint2D) {
142 | EvokerPoint2D p2d = (EvokerPoint2D) obj;
143 | return (getX() == p2d.getX()) && (getY() == p2d.getY()) && (getID() == p2d.getID());
144 | }
145 | return false;
146 | }
147 | }
148 |
--------------------------------------------------------------------------------
/src/resources/oxford_parser.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | ## Description: This script createds the .bed, .bim, .bnt and .fam files required by Evoker from Oxford format files
4 | ## Usage: ./oxford_parser /dir 'genotype_cutoff'
5 | ## Input: The script needs to be passed the full path to a directory containing the following files:
6 | ## Study_chr_illumina.gen.bin.gz
7 | ## Study_chr_illumina.int.bin.gz
8 | ## study_chr_illumina.snp
9 | ## Study_affy.sample
10 | ## Output: The script will create the four files required by evoker:
11 | ## Study_chr.bed
12 | ## Study_chr.bim
13 | ## Study_chr.bnt
14 | ## Study.fam
15 | ## Arguments: genotype probability cutoff
16 | ## Note: The *int.bin.gz file is already in a format that Evoker can understand and simply needs unzipping and renaming in the format 'study_chr.bnt'
17 | ##
18 | ## Author: jm20@sanger.ac.uk
19 |
20 |
21 | use strict;
22 |
23 | my $dir;
24 | my $cutoff;
25 |
26 | if (scalar(@ARGV) == 2) {
27 | $dir = $ARGV[0];
28 | $cutoff = $ARGV[1];
29 | } elsif (scalar(@ARGV) == 1) {
30 | $dir = $ARGV[0];
31 | $cutoff = 0.9;
32 | } else {
33 | die "Incorrect Number of Arguments\n";
34 | }
35 |
36 | ## TODO: make sure the path ends with a /
37 | ## TODO: convert chromosomes X,Y,XY,MT to numbers?
38 |
39 | opendir( DIR, "$dir" ) or die "Can't open '$dir': $!";
40 |
41 | while ( my $file = readdir(DIR) ) {
42 |
43 | if ( $file =~ /.gen.bin.gz$/ ) {
44 | ## genotype file
45 | open( GEN, "zcat $dir$file |" ) or die "Can't open gen file '$file': $!";
46 | $file =~ /^(\w+)_(\d+)/;
47 |
48 | open( BED, "> $dir$1.$2.bed" ) or die "Can't open output '$1.$2.bed': $!";
49 | #magic number and SNP-major mode.
50 | print BED pack( 'B*', "011011000001101100000001" );
51 |
52 | my $bsnp_num;
53 | read( GEN, $bsnp_num, 4 );
54 | my $snp_num = unpack( 'i*', $bsnp_num );
55 | my $bind_num;
56 | read( GEN, $bind_num, 4 );
57 | my $ind_num = unpack( 'i*', $bind_num );
58 | $ind_num = $ind_num/3;
59 |
60 | ## for each snp
61 | for ( my $i = 0 ; $i < $snp_num ; $i++ ) {
62 | my $bytecounter = 0;
63 | my $byte = "";
64 | my $individual;
65 | ## for all the inds in a snp work out the genotypes
66 | for ( my $j = 0 ; $j < $ind_num ; $j++ ) {
67 | ## get the next three float values (12 bytes)
68 | ## AA prob
69 | my $b_aa;
70 | read( GEN, $b_aa, 4 );
71 | my $aa = unpack( 'f*', $b_aa );
72 | ## AB prob
73 | my $b_ab;
74 | read( GEN, $b_ab, 4 );
75 | my $ab = unpack( 'f*', $b_ab );
76 | ## BB prob
77 | my $b_bb;
78 | read( GEN, $b_bb, 4 );
79 | my $bb = unpack( 'f*', $b_bb );
80 |
81 | if ( $aa > $cutoff ) {
82 | $individual = "00";
83 | }
84 | elsif ( $ab > $cutoff ) {
85 | $individual = "11";
86 | }
87 | elsif ( $bb > $cutoff ) {
88 | $individual = "10";
89 | }
90 | else {
91 | ## missing
92 | $individual = "01";
93 | }
94 |
95 | $byte = $individual . $byte;
96 | $bytecounter++;
97 |
98 | if ( $bytecounter == 4 ) {
99 | #we've completed a byte, so write it.
100 | print BED pack( 'B*', $byte );
101 | $bytecounter = 0;
102 | $byte = "";
103 | }
104 | }
105 |
106 | if ( $bytecounter != 0 ) {
107 | for ( my $k = 0 ; $k < 4 - $bytecounter ; $k++ ) {
108 | $byte = "00" . $byte;
109 | }
110 | print BED pack( 'B*', $byte );
111 | }
112 |
113 | }
114 |
115 | }
116 | elsif ( $file =~ /.snp$/ ) {
117 | open( SNP, $dir . $file ) or die "Can't open snp file '$file': $!";
118 | $file =~ /^(\w+)_(\d+)/;
119 | open( BIM, "> $dir$1.$2.bim" ) or die "Can't open output '$1.$2.bim': $!";
120 | my $chr = $2;
121 |
122 | while ( my $line = ) {
123 | chomp($line);
124 | my @values = split( /\s+/, $line );
125 | my $snp_id = $values[1];
126 | my $pos = $values[2];
127 | my $allele_a = $values[3];
128 | my $allele_b = $values[4];
129 | ## 'chromosome' 'snp identifier' 'Genetic distance (morgans)' 'Base-pair position (bp units)' 'Allele A' 'Allele B'
130 | print BIM "$chr $snp_id 0 $pos $allele_a $allele_b\n";
131 | }
132 | close(SNP);
133 | close(BIM);
134 |
135 | }
136 | elsif ( $file =~ /.sample$/ ) {
137 | open( SAM, $dir . $file ) or die "Can't open Sample file '$file': $!";
138 | $file =~ /^(\w+)_/;
139 | open( FAM, "> $dir$1.fam" ) or die "Can't open output '$1.fam': $!";
140 |
141 | my $header = ;
142 | my $header2 = ;
143 | while ( my $line = ) {
144 | chomp($line);
145 | my @values = split( /\s+/, $line );
146 | my $sample_id = $values[1];
147 | my $sex = $values[4];
148 | ## 'Family ID' 'Individual ID' 'Paternal ID' 'Maternal ID' 'Sex' 'Phenotype'
149 | print FAM "$sample_id $sample_id 0 0 $sex 0\n";
150 | }
151 | close(SAM);
152 | close(FAM);
153 |
154 | }
155 | elsif ( $file =~ /.int.bin.gz$/ ) {
156 | $file =~ /^(\w+)_(\d+)/;
157 | ## unzip the intensity file and create a new file using the naming scheme Evoker expects
158 | system("zcat $dir$file > $dir$1.$2.bnt");
159 |
160 | }
161 | }
162 |
--------------------------------------------------------------------------------
/src/evoker/PDFDialog.java:
--------------------------------------------------------------------------------
1 | package evoker;
2 |
3 | import javax.swing.*;
4 |
5 | import java.awt.event.ActionListener;
6 | import java.awt.event.ActionEvent;
7 |
8 | public class PDFDialog extends JDialog implements ActionListener {
9 |
10 | private boolean success;
11 | private String scoresFile;
12 | private String pdfDir;
13 | private JTextField scoresFileField;
14 | private JTextField pdfDirField;
15 | private JButton pdfBrowseButton;
16 | private JLabel pdfDirLabel;
17 | private JCheckBox allPlotsButton;
18 | private JCheckBox yesPlotsButton;
19 | private JCheckBox maybePlotsButton;
20 | private JCheckBox noPlotsButton;
21 |
22 | private JFileChooser jfc;
23 |
24 | public PDFDialog(JFrame parent){
25 | super(parent,"Generate PDF from Scores",true);
26 |
27 | jfc = new JFileChooser("user.dir");
28 |
29 | JPanel contents = new JPanel();
30 | contents.setLayout(new BoxLayout(contents,BoxLayout.Y_AXIS));
31 |
32 | JPanel scoresFilePanel = new JPanel();
33 | scoresFilePanel.add(new JLabel("Scores file: "));
34 | scoresFileField = new JTextField(20);
35 | scoresFilePanel.add(scoresFileField);
36 | JButton scoresBrowseButton = new JButton("Browse");
37 | scoresBrowseButton.addActionListener(this);
38 | scoresFilePanel.add(scoresBrowseButton);
39 | contents.add(scoresFilePanel);
40 |
41 | contents.add(new JPanel());
42 |
43 | JPanel savePlotsPanel = new JPanel();
44 | savePlotsPanel.setLayout(new BoxLayout(savePlotsPanel,BoxLayout.PAGE_AXIS));
45 | allPlotsButton = new JCheckBox("Save all plots");
46 | allPlotsButton.setSelected(false);
47 | savePlotsPanel.add(allPlotsButton);
48 | yesPlotsButton = new JCheckBox("Save all Yes plots");
49 | yesPlotsButton.setSelected(false);
50 | savePlotsPanel.add(yesPlotsButton);
51 | maybePlotsButton = new JCheckBox("Save all Maybe plots");
52 | maybePlotsButton.setSelected(false);
53 | savePlotsPanel.add(maybePlotsButton);
54 | noPlotsButton = new JCheckBox("Save all No plots");
55 | noPlotsButton.setSelected(false);
56 | savePlotsPanel.add(noPlotsButton);
57 |
58 | contents.add(savePlotsPanel);
59 |
60 | JPanel pdfDirPanel = new JPanel();
61 | pdfDirLabel = new JLabel("Destination directory for PDFs: ");
62 | pdfDirPanel.add(pdfDirLabel);
63 | pdfDirField = new JTextField(20);
64 | pdfDirPanel.add(pdfDirField);
65 | pdfBrowseButton = new JButton("Save to");
66 | pdfBrowseButton.addActionListener(this);
67 | pdfDirPanel.add(pdfBrowseButton);
68 | contents.add(pdfDirPanel);
69 |
70 | contents.add(new JPanel());
71 |
72 | JPanel butPan = new JPanel();
73 | JButton okbut = new JButton("OK");
74 | getRootPane().setDefaultButton(okbut);
75 | okbut.addActionListener(this);
76 | butPan.add(okbut);
77 | JButton cancelbut = new JButton("Cancel");
78 | cancelbut.addActionListener(this);
79 | butPan.add(cancelbut);
80 | contents.add(butPan);
81 |
82 | this.setContentPane(contents);
83 | }
84 |
85 | public void actionPerformed(ActionEvent e) {
86 | if (e.getActionCommand().equals("OK")){
87 | scoresFile = scoresFileField.getText();
88 | pdfDir = pdfDirField.getText();
89 | setSuccess(true);
90 | this.dispose();
91 | }else if (e.getActionCommand().equals("Browse")){
92 | jfc.setFileSelectionMode(JFileChooser.FILES_ONLY);
93 | if (jfc.showOpenDialog(this) == JFileChooser.APPROVE_OPTION){
94 | scoresFileField.setText(jfc.getSelectedFile().getAbsolutePath());
95 | }
96 | }else if (e.getActionCommand().equals("Save to")) {
97 | jfc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
98 | if (jfc.showOpenDialog(this) == JFileChooser.APPROVE_OPTION){
99 | pdfDirField.setText(jfc.getSelectedFile().getAbsolutePath());
100 | }
101 | }else if (e.getActionCommand().equals("Cancel")){
102 | setSuccess(false);
103 | this.dispose();
104 | }
105 | }
106 |
107 | public boolean success() {
108 | return success;
109 | }
110 |
111 | public String getscoresFile() {
112 | return scoresFile;
113 | }
114 |
115 | public String getPdfDir(){
116 | return pdfDir;
117 | }
118 |
119 | public boolean allPlots() {
120 | return allPlotsButton.isSelected();
121 | }
122 |
123 | public boolean yesPlots() {
124 | return yesPlotsButton.isSelected();
125 | }
126 |
127 | public boolean maybePlots() {
128 | return maybePlotsButton.isSelected();
129 | }
130 |
131 | public boolean noPlots() {
132 | return noPlotsButton.isSelected();
133 | }
134 |
135 | public void setSuccess(boolean b) {
136 | success = b;
137 |
138 | }
139 | }
--------------------------------------------------------------------------------
/src/resources/evoker-helper.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | #
3 | ## generate the .bnt and .bed files for just one SNP.
4 |
5 | use strict;
6 | use POSIX qw(ceil floor);
7 | use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
8 | use File::Spec::Functions;
9 |
10 | my $VERSION = "2.4";
11 |
12 | if ($ARGV[0] eq "--version") {
13 | print "version: $VERSION\n";
14 | exit 1;
15 | }
16 |
17 |
18 | my $snp = $ARGV[0];
19 | my $chr = $ARGV[1];
20 | my $collection = $ARGV[2];
21 | my $index = $ARGV[3];
22 | my $numinds = $ARGV[4];
23 | my $tot_snps = $ARGV[5];
24 | my $oxford = $ARGV[6];
25 | my $platform = $ARGV[7];
26 | my $ukbiobank_v2 = $ARGV[8];
27 | my $outpath = $ARGV[9];
28 | my $cutoff = 0.9;
29 | my $magic_num;
30 | my $bytesPerRecord;
31 | my $buf;
32 |
33 | my $bntpath = "$collection.$snp.bnt";
34 | my $bedpath = "$collection.$snp.bed";
35 | if ($outpath) {
36 | $bntpath = File::Spec->catdir($outpath, $bntpath);
37 | $bedpath = File::Spec->catdir($outpath, $bedpath);
38 | }
39 | $bntpath = ">$bntpath";
40 | $bedpath = ">$bedpath";
41 |
42 | open (BNTOUT, $bntpath);
43 | open (BEDOUT, $bedpath);
44 |
45 | if ($oxford) {
46 |
47 | ## .bed file
48 | my $gen_file;
49 | ## perl 5+ includes this module, so hopefully most users will have it.
50 | if (-s "$collection\_$chr\_$platform.gen.bin.gz") {
51 | $gen_file = new IO::Uncompress::Gunzip "$collection\_$chr\_$platform.gen.bin.gz";
52 | } else {
53 | open($gen_file, "<","$collection\_$chr\_$platform.gen.bin");
54 | }
55 | #magic number
56 | read ($gen_file, $magic_num, 8);
57 | print BEDOUT $magic_num;
58 |
59 | #jump to position
60 | $bytesPerRecord = $numinds*12;
61 | seek ($gen_file, ($index*$bytesPerRecord)+8, 0);
62 |
63 | my $bytecounter = 0;
64 | my $byte = "";
65 | my $individual;
66 | for ( my $i = 0 ; $i < $numinds ; $i++ ) {
67 | my $b_aa;
68 | read( $gen_file, $b_aa, 4 );
69 | my $aa = unpack( 'f*', $b_aa );
70 |
71 | my $b_ab;
72 | read( $gen_file, $b_ab, 4 );
73 | my $ab = unpack( 'f*', $b_ab );
74 |
75 | my $b_bb;
76 | read( $gen_file, $b_bb, 4 );
77 | my $bb = unpack( 'f*', $b_bb );
78 |
79 | if ( $aa > $cutoff ) {
80 | $individual = "00";
81 | }
82 | elsif ( $ab > $cutoff ) {
83 | $individual = "10";
84 | }
85 | elsif ( $bb > $cutoff ) {
86 | $individual = "11";
87 | }
88 | else {
89 | ## missing genotype
90 | $individual = "01";
91 | }
92 |
93 | $byte = $individual.$byte;
94 | $bytecounter++;
95 | if ( $bytecounter == 4 ) {
96 | ## completed a byte, so write it.
97 | print BEDOUT pack( 'B*', $byte );
98 | $bytecounter = 0;
99 | $byte = "";
100 | }
101 | }
102 | ## fill up any empty bytes
103 | if ( $bytecounter != 0 ) {
104 | for ( my $k = 0 ; $k < 4 - $bytecounter ; $k++ ) {
105 | $byte = "00" . $byte;
106 | }
107 | print BEDOUT pack( 'B*', $byte );
108 | }
109 | close $gen_file;
110 |
111 | ## .bnt file
112 | my $int_file;
113 | ## perl 5+ includes this module, so hopefully most users will have it.
114 | if (-s "$collection\_$chr\_$platform.int.bin.gz") {
115 | $int_file = new IO::Uncompress::Gunzip "$collection\_$chr\_$platform.int.bin.gz";
116 | } else {
117 | open ($int_file, "<", "$collection\_$chr\_$platform.int.bin");
118 | }
119 |
120 | read ($int_file, $magic_num, 8);
121 | print BNTOUT $magic_num;
122 |
123 | #jump to position
124 | $bytesPerRecord = $numinds*8;
125 | seek ($int_file, ($index*$bytesPerRecord)+8, 0);
126 | read ($int_file, $buf, $bytesPerRecord);
127 | print BNTOUT $buf;
128 | close $int_file;
129 | } elsif($ukbiobank_v2){
130 | ## .bed file
131 | $bytesPerRecord = ceil($numinds/4);
132 | open (BED, "ukb_cal_chr$chr\_v2.bed");
133 | #magic number and SNP-major mode
134 | read(BED, $magic_num, 3);
135 | print BEDOUT $magic_num;
136 |
137 | #jump to position
138 | seek (BED, ($index*$bytesPerRecord)+3,0);
139 | read (BED, $buf, $bytesPerRecord);
140 | print BEDOUT $buf;
141 | close BED;
142 |
143 | ## .bnt file (==.bin for UKB)
144 | $bytesPerRecord = $numinds*8;
145 | open (BNT, "ukb_int_chr$chr\_v2.bin");
146 | # UKB doesn't have a header
147 | # read (BNT, $magic_num, 2);
148 | # print BNTOUT $magic_num;
149 |
150 | #jump to position
151 | seek (BNT, ($index*$bytesPerRecord), 0);
152 | read (BNT, $buf, $bytesPerRecord);
153 | print BNTOUT $buf;
154 | close BNT;
155 | } else {
156 | ## .bed file
157 | $bytesPerRecord = ceil($numinds/4);
158 | open (BED, "$collection.$chr.bed");
159 | #magic number and SNP-major mode
160 | read(BED, $magic_num, 3);
161 | print BEDOUT $magic_num;
162 |
163 | #jump to position
164 | seek (BED, ($index*$bytesPerRecord)+3,0);
165 | read (BED, $buf, $bytesPerRecord);
166 | print BEDOUT $buf;
167 | close BED;
168 |
169 | ## .bnt file
170 | $bytesPerRecord = $numinds*8;
171 | open (BNT, "$collection.$chr.bnt");
172 | read (BNT, $magic_num, 2);
173 | print BNTOUT $magic_num;
174 |
175 | #jump to position
176 | seek (BNT, ($index*$bytesPerRecord)+2, 0);
177 | read (BNT, $buf, $bytesPerRecord);
178 | print BNTOUT $buf;
179 | close BNT;
180 |
181 | }
182 |
183 | close BEDOUT;
184 | close BNTOUT;
185 |
186 | print "$snp\n";
187 |
188 |
189 |
--------------------------------------------------------------------------------
/src/evoker/Lasso.java:
--------------------------------------------------------------------------------
1 | package evoker;
2 |
3 | import java.awt.Point;
4 | import java.awt.geom.Path2D;
5 | import java.awt.geom.Point2D;
6 | import java.util.ArrayList;
7 | import java.util.Collection;
8 | import java.util.HashMap;
9 | import org.jfree.chart.entity.ChartEntity;
10 | import org.jfree.chart.entity.EntityCollection;
11 | import org.jfree.chart.entity.StandardEntityCollection;
12 |
13 | /**
14 | * Implements lasso select. USE ONLY WITH PLOT DIAGRAMS.
15 | */
16 | public class Lasso {
17 |
18 | /** Representing the enclosed area*/
19 | private Path2D poly;
20 | /** To later hold all ChartEntitys within the polygon after they've been
21 | * calculated once, to not have to search them again. */
22 | private EntityCollection ec = null;
23 |
24 | public Lasso(double x, double y) {
25 | poly = new Path2D.Double();
26 | poly.moveTo(x,y);
27 | }
28 |
29 | /**
30 | *
31 | * @param entityCollection containing all ChartEntity-Objects to be searched through
32 | * @return EntityCollection containing all ChartEntity-Object within the borders of the selection
33 | */
34 | public EntityCollection getContainedEntitys(EntityCollection entityCollection) {
35 | if (ec == null) {
36 | ec = new StandardEntityCollection();
37 | Collection entities = entityCollection.getEntities();
38 | for (int i = 0; i < entities.size(); i++) {
39 | ChartEntity entity = entityCollection.getEntity(i);
40 | if (entity.getToolTipText() != null && "poly".equals(entity.getShapeType())) { // get sure (?) we only get data-points
41 | EvokerPoint2D p = getCoordinatesOfEntity(entity);
42 | if (poly.contains(p.getX(),p.getY())) {
43 | ec.add(entity);
44 | }
45 | }
46 | }
47 | }
48 | return ec;
49 | }
50 |
51 | /**
52 | * Returns the previously calculated entities. If
53 | * getContainedEntitys(EntityCollection entityCollection)
54 | * hasn't been called beforehand, it'll return null
55 | *
56 | * @return EntityCollection
57 | */
58 | public EntityCollection getContainedEntitys() {
59 | return ec;
60 | }
61 |
62 | /**
63 | * Reads out all diagram-coordinates of the points.
64 | *
65 | * @param entityCollection containing all ChartEntity-Objects to be searched through
66 | * @return ArrayList containing
67 | */
68 | public ArrayList getContainedPoints(EntityCollection entityCollection) {
69 | ArrayList al_ret = new ArrayList();
70 | getContainedEntitys(entityCollection);
71 | Collection entities = ec.getEntities();
72 | for (int i = 0; i < entities.size(); i++) {
73 | ChartEntity entity = ec.getEntity(i);
74 | al_ret.add(getCoordinatesOfEntity(entity));
75 | }
76 | return al_ret;
77 | }
78 |
79 | /**
80 | * Reads out all diagram-coordinates of the points.
81 | *
82 | * @param entityCollection containing all ChartEntity-Objects to be searched through
83 | * @return ArrayList containing
84 | */
85 | public HashMap getContainedPointsInd(EntityCollection entityCollection) {
86 | HashMap hm_ret = new HashMap();
87 | getContainedEntitys(entityCollection);
88 | Collection entities = ec.getEntities();
89 | for (int i = 0; i < entities.size(); i++) {
90 | ChartEntity entity = ec.getEntity(i);
91 | hm_ret.put(getCoordinatesOfEntity(entity), getIndOfEntity(entity));
92 | }
93 | return hm_ret;
94 | }
95 |
96 | /**
97 | * Returns the (screen-relative) coordinates of an entity
98 | * @param entity object
99 | * @return Point
100 | */
101 | /** public Point getScreenCoordinatesOfEntity(ChartEntity e) {
102 | String shapeCoords = e.getShapeCoords();
103 | String[] shapeCoords_array = shapeCoords.split(",");
104 |
105 | // I decided that these points are most like the center of the circle-area
106 | return new Point(
107 | Integer.parseInt(shapeCoords_array[2]),
108 | Integer.parseInt(shapeCoords_array[1]));
109 | }*/
110 |
111 | /**
112 | * Returns the (Diagram-relative) coordinates of a point
113 | * @param entity-object
114 | * @return Point (containing the coordinates)
115 | */
116 | public EvokerPoint2D getCoordinatesOfEntity(ChartEntity e) {
117 | String tooltip = e.getToolTipText();
118 | if (tooltip == null) {
119 | return null;
120 | }
121 |
122 | return new EvokerPoint2D(
123 | Double.parseDouble(
124 | tooltip.substring(
125 | tooltip.indexOf('(') + 1, tooltip.indexOf(','))),
126 | Double.parseDouble(
127 | tooltip.substring(
128 | tooltip.indexOf(',') + 1,
129 | tooltip.indexOf(')'))));
130 | }
131 |
132 | /**
133 | * Returns ind of a ChartEntity
134 | * @param entity object
135 | * @return the ind
136 | */
137 | public String getIndOfEntity(ChartEntity e) {
138 | String tooltip = e.getToolTipText();
139 | return tooltip.substring(0, tooltip.indexOf("(") - 1);
140 | }
141 |
142 | /**
143 | * Adds a point to the polygon
144 | * @param x coordinate
145 | * @param y coordinate
146 | */
147 | public void addPoint(double x, double y) {
148 | poly.lineTo(x,y);
149 | }
150 |
151 |
152 |
153 | public void close(){
154 | poly.closePath();
155 | }
156 | }
157 |
--------------------------------------------------------------------------------
/src/evoker/WrapLayout.java:
--------------------------------------------------------------------------------
1 | package evoker;
2 |
3 | import java.awt.*;
4 | import javax.swing.JScrollPane;
5 | import javax.swing.SwingUtilities;
6 |
7 | /**
8 | * FlowLayout subclass that fully supports wrapping of components.
9 | *
10 | * Written by Rob Camick of tips4java.wordpress.com and used with permission as specified:
11 | * "You are free to use and/or modify any or all code posted on the Java Tips Weblog without restriction."
12 | *
13 | * https://tips4java.wordpress.com/2008/11/06/wrap-layout/
14 | */
15 | public class WrapLayout extends FlowLayout
16 | {
17 | private Dimension preferredLayoutSize;
18 |
19 | /**
20 | * Constructs a new WrapLayout with a left
21 | * alignment and a default 5-unit horizontal and vertical gap.
22 | */
23 | public WrapLayout()
24 | {
25 | super();
26 | }
27 |
28 | /**
29 | * Constructs a new FlowLayout with the specified
30 | * alignment and a default 5-unit horizontal and vertical gap.
31 | * The value of the alignment argument must be one of
32 | * WrapLayout, WrapLayout,
33 | * or WrapLayout.
34 | * @param align the alignment value
35 | */
36 | public WrapLayout(int align)
37 | {
38 | super(align);
39 | }
40 |
41 | /**
42 | * Creates a new flow layout manager with the indicated alignment
43 | * and the indicated horizontal and vertical gaps.
44 | *
45 | * The value of the alignment argument must be one of
46 | * WrapLayout, WrapLayout,
47 | * or WrapLayout.
48 | * @param align the alignment value
49 | * @param hgap the horizontal gap between components
50 | * @param vgap the vertical gap between components
51 | */
52 | public WrapLayout(int align, int hgap, int vgap)
53 | {
54 | super(align, hgap, vgap);
55 | }
56 |
57 | /**
58 | * Returns the preferred dimensions for this layout given the
59 | * visible components in the specified target container.
60 | * @param target the component which needs to be laid out
61 | * @return the preferred dimensions to lay out the
62 | * subcomponents of the specified container
63 | */
64 | @Override
65 | public Dimension preferredLayoutSize(Container target)
66 | {
67 | return layoutSize(target, true);
68 | }
69 |
70 | /**
71 | * Returns the minimum dimensions needed to layout the visible
72 | * components contained in the specified target container.
73 | * @param target the component which needs to be laid out
74 | * @return the minimum dimensions to lay out the
75 | * subcomponents of the specified container
76 | */
77 | @Override
78 | public Dimension minimumLayoutSize(Container target)
79 | {
80 | Dimension minimum = layoutSize(target, false);
81 | minimum.width -= (getHgap() + 1);
82 | return minimum;
83 | }
84 |
85 | /**
86 | * Returns the minimum or preferred dimension needed to layout the target
87 | * container.
88 | *
89 | * @param target target to get layout size for
90 | * @param preferred should preferred size be calculated
91 | * @return the dimension to layout the target container
92 | */
93 | private Dimension layoutSize(Container target, boolean preferred)
94 | {
95 | synchronized (target.getTreeLock())
96 | {
97 | // Each row must fit with the width allocated to the containter.
98 | // When the container width = 0, the preferred width of the container
99 | // has not yet been calculated so lets ask for the maximum.
100 |
101 | int targetWidth = target.getSize().width;
102 | Container container = target;
103 |
104 | while (container.getSize().width == 0 && container.getParent() != null)
105 | {
106 | container = container.getParent();
107 | }
108 |
109 | targetWidth = container.getSize().width;
110 |
111 | if (targetWidth == 0)
112 | targetWidth = Integer.MAX_VALUE;
113 |
114 | int hgap = getHgap();
115 | int vgap = getVgap();
116 | Insets insets = target.getInsets();
117 | int horizontalInsetsAndGap = insets.left + insets.right + (hgap * 2);
118 | int maxWidth = targetWidth - horizontalInsetsAndGap;
119 |
120 | // Fit components into the allowed width
121 |
122 | Dimension dim = new Dimension(0, 0);
123 | int rowWidth = 0;
124 | int rowHeight = 0;
125 |
126 | int nmembers = target.getComponentCount();
127 |
128 | for (int i = 0; i < nmembers; i++)
129 | {
130 | Component m = target.getComponent(i);
131 |
132 | if (m.isVisible())
133 | {
134 | Dimension d = preferred ? m.getPreferredSize() : m.getMinimumSize();
135 |
136 | // Can't add the component to current row. Start a new row.
137 |
138 | if (rowWidth + d.width > maxWidth)
139 | {
140 | addRow(dim, rowWidth, rowHeight);
141 | rowWidth = 0;
142 | rowHeight = 0;
143 | }
144 |
145 | // Add a horizontal gap for all components after the first
146 |
147 | if (rowWidth != 0)
148 | {
149 | rowWidth += hgap;
150 | }
151 |
152 | rowWidth += d.width;
153 | rowHeight = Math.max(rowHeight, d.height);
154 | }
155 | }
156 |
157 | addRow(dim, rowWidth, rowHeight);
158 |
159 | dim.width += horizontalInsetsAndGap;
160 | dim.height += insets.top + insets.bottom + vgap * 2;
161 |
162 | // When using a scroll pane or the DecoratedLookAndFeel we need to
163 | // make sure the preferred size is less than the size of the
164 | // target containter so shrinking the container size works
165 | // correctly. Removing the horizontal gap is an easy way to do this.
166 |
167 | Container scrollPane = SwingUtilities.getAncestorOfClass(JScrollPane.class, target);
168 |
169 | if (scrollPane != null && target.isValid())
170 | {
171 | dim.width -= (hgap + 1);
172 | }
173 |
174 | return dim;
175 | }
176 | }
177 |
178 | /*
179 | * A new row has been completed. Use the dimensions of this row
180 | * to update the preferred size for the container.
181 | *
182 | * @param dim update the width and height when appropriate
183 | * @param rowWidth the width of the row to add
184 | * @param rowHeight the height of the row to add
185 | */
186 | private void addRow(Dimension dim, int rowWidth, int rowHeight)
187 | {
188 | dim.width = Math.max(dim.width, rowWidth);
189 |
190 | if (dim.height > 0)
191 | {
192 | dim.height += getVgap();
193 | }
194 |
195 | dim.height += rowHeight;
196 | }
197 | }
198 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Evoker
2 | ======
3 |
4 | Description
5 | -----------
6 | Evoker is a graphical tool for plotting genotype intensity data in order to assess quality of genotype calls. It implements a compact, binary format which allows rapid access to data, even with hundreds of thousands of observations.
7 |
8 | Evoker consists of two components:
9 |
10 | * A Java desktop application to be used on a local machine
11 | * A perl script `evoker-helper.pl` which will reside on the server where your intensity and genotype data is stored. This script reads small slices of your intensity and genotype files relevant to the SNP of interest and transfers this to the Java application over SSH.
12 |
13 | UK Biobank v2
14 | -------------
15 | Evoker has been adapted to view UK Biobank v2 data (released July 2017). Evoker expects the UK Biobank files to have the same naming conventions from the original data release. In a directory the following must all exist together:
16 |
17 | For each chromosome of interest, the following files must all sit in the same directory:
18 |
19 | ```
20 | ukb_cal_chr{chromosome}_v2.bed
21 | ukb_snp_chr{chromosome}_v2.bim
22 | ukb_int_chr{chromosome}_v2.bin
23 | ```
24 |
25 | In addition, you must point Evoker to the original fam file (the batch information in the final column).
26 |
27 | #### UK Biobank v2 steps
28 |
29 | 1. [Install Evoker and remote helper script](https://github.com/wtsi-medical-genomics/evoker#installing)
30 | 2. On local machine, [open Evoker](https://github.com/wtsi-medical-genomics/evoker#running)
31 | 3. `File` > `Connect to remote server`
32 | 4. Select `UK Biobank v2` file format. Then enter:
33 |
34 | * `Host` the remote server hostname
35 | * `Port` port to SSH to (default 22)
36 | * `Remote directory` the absolute path where the UK Biobank files reside.
37 | * `Local directory` a local location where temporary data slices can be stored.
38 | * `Username` your username on the remote host
39 | * `Password` your password on the remote host
40 | * `Remote FAM file` the location on the remote machine of the `fam` file provided to you by UK Biobank (including the final column which lists the batches).
41 | * `Remote temp directory` it is assumed you will not have write access to the release directory (instituions will most likely share a single release) so please specify a directory you have read/write access to where temporary subsets of the intensity/plink data can be stored on the remote machine.
42 |
43 | 5. Click OK to start transferring the fam and any bim files in the remote directory to your local machine. The speed of this process will depend on your data connection.
44 | 6. Enter the SNP of interest (rsid from the bim file) to view.
45 | 7. Scroll up and down to view all of the batches. If desired sort on Batch name, MAF, HWE p-value, or GPC from the `View` > `Sort` menu.
46 |
47 | **Note**: at present it is not possible to re-call (with the lasso select) UK Biobank v2 data at the moment. This feature will be available in the next release.
48 |
49 |
50 | Evoker Lite
51 | -----------
52 | For a python/CLI tool to generate cluster plot PNGs (including UK Biobank v2 data) see [Evoker Lite](https://github.com/dlrice/evoker-lite).
53 |
54 | Maintainer
55 | ----------
56 | Daniel Rice (dr9@sanger.ac.uk)
57 |
58 | Authors
59 | -------
60 | * James Morris
61 | * Jeff Barrett
62 |
63 | Contributors
64 | ------------
65 | * Tim Poterba
66 | * Natalie Wirth
67 | * Daniel Rice
68 |
69 | Requirements
70 | ------------
71 | * Desktop application: Java 8.0 (also known as 1.8) or later.
72 | * Remote helper script `evoker-helper.pl`: Perl 5
73 |
74 | Installing
75 | ----------
76 | #### Desktop application
77 | Download and extract the tarball of the [latest release](https://github.com/wtsi-medical-genomics/evoker/releases) on your local machine.
78 |
79 | #### Remote helper script (`evoker-helper.pl`)
80 | To view data that is on a remote machine (ie a UNIX server), download `evoker-helper.pl` and add it to your path so that it is executable everywhere. If using bash, the following will download this into a folder in your home directory:
81 |
82 | ```bash
83 | mkdir ~/evoker-helper
84 | curl -o ~/evoker-helper/evoker-helper.pl https://raw.githubusercontent.com/wtsi-medical-genomics/evoker/master/src/resources/evoker-helper.pl
85 | chmod 777 ~/evoker-helper/evoker-helper.pl
86 | echo export PATH=\"~/evoker-helper:\$PATH\" >> ~/.bashrc
87 | . ~/.bashrc
88 | ```
89 |
90 | To test that it is working go to some (non-home directory) location:
91 |
92 | ```bash
93 | cd /
94 | evoker-helper.pl --version
95 | ```
96 |
97 | and you should see the version reported to you.
98 |
99 |
100 | Running
101 | -------
102 | Within the untarred release directory you can either double click Evoker.jar contained within or from the command line:
103 |
104 | ```
105 | $ java -jar Evoker.jar
106 | ```
107 |
108 | To run with more memory than the default allocation, the ```-Xmx``` option can be used. For example to specify 1 GB of memeory,
109 |
110 | ```
111 | $ java -Xmx1024m -jar Evoker.jar
112 | ```
113 |
114 | For more information see the documentation included in the release.
115 |
116 | Documentation
117 | -------------
118 | A PDF is included in each release tarball. The latex and image files used to produce this are also available in the docs directory.
119 |
120 | Building
121 | --------
122 | To build a jar file from source:
123 |
124 | 1. Clone the repository.
125 | 2. Copy ```evoker/resources/build.xml``` into ```evoker/```.
126 | 3. Use the command ```ant evoker``` to build the jar file.
127 | 4. Use the command ```ant clean``` to remove temporary build files.
128 |
129 | Todo
130 | ----
131 | - [x] Support Windows
132 | - [x] Handle special characters in password
133 | - [x] Exclude individuals with a negative number as their sample ID.
134 | - [ ] Deal with hidden files (eg `.samples.fam`).
135 | - [ ] Gracefully fail if evoker-helper.pl is not reachable at the remote server.
136 | - [ ] Export BED changes when viewing over a remote connection.
137 | - [ ] Plot SNP Posterior ellipses.
138 | - [ ] Save the plot array to remove the need to re-load all data on sorting.
139 | - [ ] View all batches at once (with ability to filter eg on MAF).
140 |
141 | Citation
142 | --------
143 | James A. Morris, Joshua C. Randall, Julian B. Maller, Jeffrey C. Barrett; Evoker: a visualization tool for genotype intensity data. Bioinformatics 2010; 26 (14): 1786-1787. doi: 10.1093/bioinformatics/btq280
144 |
145 | Website
146 | -------
147 | [http://www.sanger.ac.uk/science/tools/evoker](http://www.sanger.ac.uk/science/tools/evoker)
148 |
149 |
150 | License
151 | -------
152 | MIT License (see LICENSE.md)
153 |
--------------------------------------------------------------------------------
/src/evoker/MarkerListDialog.java:
--------------------------------------------------------------------------------
1 | package evoker;
2 |
3 | import javax.swing.*;
4 |
5 | import java.awt.event.ActionListener;
6 | import java.awt.event.ActionEvent;
7 |
8 | public class MarkerListDialog extends JDialog implements ActionListener {
9 |
10 | private boolean success = false;
11 | private String markerList;
12 | private String pdfDir;
13 | private boolean savePlots;
14 | private boolean all;
15 | private boolean yes;
16 | private boolean maybe;
17 | private boolean no;
18 |
19 | private JTextField markerlistField;
20 | private JTextField pdfDirField;
21 | private JButton pdfBrowseButton;
22 | private JLabel pdfDirLabel;
23 | private JRadioButton savePlotsButton;
24 | private JCheckBox allPlotsButton;
25 | private JCheckBox yesPlotsButton;
26 | private JCheckBox maybePlotsButton;
27 | private JCheckBox noPlotsButton;
28 |
29 | private JFileChooser jfc;
30 |
31 | public MarkerListDialog(JFrame parent){
32 | super(parent,"Load Marker List",true);
33 |
34 | jfc = new JFileChooser("user.dir");
35 |
36 | JPanel contents = new JPanel();
37 | contents.setLayout(new BoxLayout(contents,BoxLayout.Y_AXIS));
38 |
39 | JPanel markerlistPanel = new JPanel();
40 | markerlistPanel.add(new JLabel("Marker list: "));
41 | markerlistField = new JTextField(20);
42 | markerlistPanel.add(markerlistField);
43 | JButton markerBrowseButton = new JButton("Browse");
44 | markerBrowseButton.addActionListener(this);
45 | markerlistPanel.add(markerBrowseButton);
46 | contents.add(markerlistPanel);
47 |
48 | contents.add(new JPanel());
49 |
50 | JPanel savePlotsPanel = new JPanel();
51 | savePlotsPanel.setLayout(new BoxLayout(savePlotsPanel,BoxLayout.PAGE_AXIS));
52 | savePlotsButton = new JRadioButton("Save viewed plots to PDF");
53 | savePlotsButton.setSelected(false);
54 | savePlotsButton.addActionListener(this);
55 | savePlotsPanel.add(savePlotsButton);
56 | allPlotsButton = new JCheckBox("Save all plots");
57 | allPlotsButton.setSelected(false);
58 | allPlotsButton.setEnabled(false);
59 | savePlotsPanel.add(allPlotsButton);
60 | yesPlotsButton = new JCheckBox("Save all Yes plots");
61 | yesPlotsButton.setSelected(false);
62 | yesPlotsButton.setEnabled(false);
63 | savePlotsPanel.add(yesPlotsButton);
64 | maybePlotsButton = new JCheckBox("Save all Maybe plots");
65 | maybePlotsButton.setSelected(false);
66 | maybePlotsButton.setEnabled(false);
67 | savePlotsPanel.add(maybePlotsButton);
68 | noPlotsButton = new JCheckBox("Save all No plots");
69 | noPlotsButton.setSelected(false);
70 | noPlotsButton.setEnabled(false);
71 | savePlotsPanel.add(noPlotsButton);
72 |
73 | contents.add(savePlotsPanel);
74 |
75 | JPanel pdfDirPanel = new JPanel();
76 | pdfDirLabel = new JLabel("Destination directory for PDFs: ");
77 | pdfDirLabel.setEnabled(false);
78 | pdfDirPanel.add(pdfDirLabel);
79 | pdfDirField = new JTextField(20);
80 | pdfDirField.setEnabled(false);
81 | pdfDirPanel.add(pdfDirField);
82 | pdfBrowseButton = new JButton("Save to");
83 | pdfBrowseButton.addActionListener(this);
84 | pdfBrowseButton.setEnabled(false);
85 | pdfDirPanel.add(pdfBrowseButton);
86 | contents.add(pdfDirPanel);
87 |
88 | contents.add(new JPanel());
89 |
90 | JPanel butPan = new JPanel();
91 | JButton okbut = new JButton("OK");
92 | getRootPane().setDefaultButton(okbut);
93 | okbut.addActionListener(this);
94 | butPan.add(okbut);
95 | JButton cancelbut = new JButton("Cancel");
96 | cancelbut.addActionListener(this);
97 | butPan.add(cancelbut);
98 | contents.add(butPan);
99 |
100 | this.setContentPane(contents);
101 | }
102 |
103 | public void actionPerformed(ActionEvent e) {
104 | if (e.getActionCommand().equals("OK")){
105 | markerList = markerlistField.getText();
106 | pdfDir = pdfDirField.getText();
107 | success = true;
108 | this.dispose();
109 | }else if (e.getActionCommand().equals("Save viewed plots to PDF")) {
110 | if(savePlotsButton.isSelected()) {
111 | pdfDirLabel.setEnabled(true);
112 | pdfDirField.setEnabled(true);
113 | pdfBrowseButton.setEnabled(true);
114 | allPlotsButton.setEnabled(true);
115 | yesPlotsButton.setEnabled(true);
116 | maybePlotsButton.setEnabled(true);
117 | noPlotsButton.setEnabled(true);
118 | }else {
119 | pdfDirLabel.setEnabled(false);
120 | pdfDirField.setEnabled(false);
121 | pdfBrowseButton.setEnabled(false);
122 | allPlotsButton.setEnabled(false);
123 | yesPlotsButton.setEnabled(false);
124 | maybePlotsButton.setEnabled(false);
125 | noPlotsButton.setEnabled(false);
126 | }
127 | }else if (e.getActionCommand().equals("Browse")){
128 | jfc.setFileSelectionMode(JFileChooser.FILES_ONLY);
129 | if (jfc.showOpenDialog(this) == JFileChooser.APPROVE_OPTION){
130 | markerlistField.setText(jfc.getSelectedFile().getAbsolutePath());
131 | }
132 | }else if (e.getActionCommand().equals("Save to")) {
133 | jfc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
134 | if (jfc.showOpenDialog(this) == JFileChooser.APPROVE_OPTION){
135 | pdfDirField.setText(jfc.getSelectedFile().getAbsolutePath());
136 | }
137 | }else if (e.getActionCommand().equals("Cancel")){
138 | this.dispose();
139 | }
140 | }
141 |
142 | public boolean success() {
143 | return success;
144 | }
145 |
146 | public String getMarkerList() {
147 | return markerList;
148 | }
149 |
150 | public String getPdfDir(){
151 | return pdfDir;
152 | }
153 |
154 | public boolean savePlots() {
155 | return savePlotsButton.isSelected();
156 | }
157 |
158 | public boolean allPlots() {
159 | return allPlotsButton.isSelected();
160 | }
161 |
162 | public boolean yesPlots() {
163 | return yesPlotsButton.isSelected();
164 | }
165 |
166 | public boolean maybePlots() {
167 | return maybePlotsButton.isSelected();
168 | }
169 |
170 | public boolean noPlots() {
171 | return noPlotsButton.isSelected();
172 | }
173 | }
--------------------------------------------------------------------------------
/src/evoker/MarkerData.java:
--------------------------------------------------------------------------------
1 | package evoker;
2 |
3 | import java.util.Hashtable;
4 | import java.util.Vector;
5 | import java.io.BufferedReader;
6 | import java.io.FileReader;
7 | import java.io.IOException;
8 | import java.util.HashMap;
9 | import java.util.regex.Pattern;
10 |
11 | /**
12 | * Holds all the data about a set of SNPs, usually for one chromosome.
13 | *
14 | */
15 | public class MarkerData {
16 |
17 | //Hashtable snpIndexTable;
18 | //Hashtable snpAlleleATable;
19 | //Hashtable snpAlleleBTable;
20 |
21 | //while this seems ridiculous, it is a considerable memory savings which is now not exposed anywhere
22 | //outside this class. Instead of having a hash keyed on strings of chroms, taking up something like
23 | //20 bytes per key, even though there are only a few possibilities. we do this dance to simultaneously
24 | //avoid the memory overhead for millions of entries while allowing "chrom" to be anything, rather than
25 | //just numbers 1..22 etc.
26 |
27 | HashMap chromosomeLookup; // chromosome -> id
28 | HashMap chromosomeBackLookup;// id -> chromosome
29 |
30 | HashMap markerTable; // SNP_Name -> Marker
31 | HashMap collectionIndices;// Collection -> ID (from collectionIndices after first chromosome file)
32 | HashMap snpsPerCollection;// Collection -> Number of SNPs
33 | private int numCollections; // Number of Collections
34 | private int runningCount; // Number of BimFiles so far
35 |
36 | //HashMap>> snpDB = new HashMap>>();
37 |
38 | public MarkerData(int numCollections){
39 | this.numCollections = numCollections;
40 | markerTable = new HashMap();
41 | collectionIndices = new HashMap();
42 | snpsPerCollection = new HashMap();
43 | chromosomeLookup = new HashMap();
44 | chromosomeBackLookup = new HashMap();
45 | runningCount = -1;
46 | }
47 |
48 |
49 | public int getSampleCollectionIndex(String collection){
50 | return collectionIndices.get(collection);
51 | }
52 |
53 | /**
54 | * Returns a Random SNP ID from markerTable (all IDs as key)
55 | * @return SNP ID
56 | */
57 | public String getRandomSNP(){
58 | Vector v = new Vector(markerTable.keySet());
59 | return (String)v.get((int)(Math.random()*markerTable.keySet().size()));
60 | }
61 |
62 |
63 | /**
64 | * Adds information of a bim file
65 | * @param bimFile
66 | * @param collection
67 | * @param chromosome
68 | * @param isOx
69 | * @throws IOException
70 | */
71 | public void addFile(String bimFile, String collection, String chromosome,
72 | boolean isOx) throws IOException {
73 |
74 | // All of the chromosomes for one collection are loaded at once in DataDirectory
75 | // which is why we can increase runningCount once the collection doesn't exist in
76 | // collectionIndices as a key and not worry about returning to it later.
77 | if (!collectionIndices.containsKey(collection)){
78 | runningCount++;
79 | collectionIndices.put(collection,runningCount);
80 | }
81 |
82 | // We shouldn't get a NullPointerException here because addChromToLookup has been called
83 | // in DataDirectory when the bim or snp file is parsed.
84 | byte chrom = chromosomeLookup.get(chromosome);
85 | String currentLine;
86 | BufferedReader bimReader = new BufferedReader(new FileReader(bimFile));
87 |
88 | //read through bim file to record marker order so we can quickly index
89 | //into binary files
90 | int index = 0;
91 | String[] bits;
92 | boolean missingAlleles = false;
93 | while ((currentLine = bimReader.readLine()) != null){
94 | bits = currentLine.split("\\s+");
95 | StringBuffer snpid = null;
96 | char a = 'A',b = 'B';
97 | // check the size of the bits array
98 | if(bits.length >= 5) {
99 | snpid = new StringBuffer(bits[1]);
100 | if (isOx){
101 | a = bits[3].toCharArray()[0];
102 | b = bits[4].toCharArray()[0];
103 | }else{
104 | a = bits[4].toCharArray()[0];
105 | b = bits[5].toCharArray()[0];
106 | }
107 | } else if (bits.length == 1){
108 | // if there is just 1 column assume the file contains only a SNP id
109 | missingAlleles = true;
110 | snpid = new StringBuffer(bits[0]);
111 | }
112 |
113 | // not sure if these files exist
114 | // else if(bits.length == 3) {
115 | // // if there are 3 columns assume the file contains name, id and position
116 | // missingAlleles = true;
117 | // snpid = new StringBuffer(bits[1]);
118 | // }
119 | String stringSnpid = snpid.toString();
120 | if (!markerTable.containsKey(stringSnpid))
121 | markerTable.put(stringSnpid, new Marker(numCollections,a,b,chrom));
122 |
123 | //TP: only the first two args are used in addSampleCollection
124 | markerTable.get(stringSnpid).addSampleCollection(runningCount,index++,a,b,stringSnpid);
125 |
126 | // if(! snpDB.containsKey(collection)) snpDB.put(collection, new HashMap>());
127 | // if(! snpDB.get(collection).containsKey(chromosome)) snpDB.get(collection).put(chromosome, new Vector());
128 | // snpDB.get(collection).get(chromosome).add(snpid.toString());
129 |
130 | }
131 | //TP CHANGED THIS
132 | int snpsSoFar = 0;
133 | if (snpsPerCollection.containsKey(collection))
134 | snpsSoFar = snpsPerCollection.get(collection);
135 |
136 | snpsPerCollection.put(collection,index + snpsSoFar);
137 | snpsPerCollection.put(collection+chromosome,index);
138 |
139 | if (missingAlleles) {
140 | Genoplot.ld.log("WARNING: SNP file does not contain allele information");
141 | }
142 | }
143 |
144 | // public HashMap> getSnpInfo(String collection){
145 | // return snpDB.get(collection);
146 | // }
147 |
148 | public char[] getAlleles(String snp){
149 | return markerTable.get(snp).getAlleles();
150 | }
151 |
152 | public String getChrom(String name){
153 | if (markerTable.get(name) != null){
154 | return chromosomeBackLookup.get(markerTable.get(name).getChrom());
155 | }else{
156 | return null;
157 | }
158 | }
159 |
160 | public HashMap getMarkerTable(){
161 | return markerTable;
162 | }
163 |
164 | /**
165 | * Returns the number of SNPs contained by a Collection
166 | * @param collectionName
167 | * @return SNP number
168 | */
169 | public int getNumSNPs(String collection) {
170 | return snpsPerCollection.get(collection);
171 | }
172 |
173 | public Integer getIndex(String markerName, int sampleIndex){
174 | if (markerTable.get(markerName) != null){
175 | return markerTable.get(markerName).getIndex(sampleIndex);
176 | }else{
177 | return -1;
178 | }
179 | }
180 |
181 | public void addChromToLookup(String chrom, byte counter) {
182 | chromosomeLookup.put(chrom,counter);
183 | chromosomeBackLookup.put(counter,chrom);
184 | }
185 |
186 | public boolean exists(String name) {
187 | return markerTable.containsKey(name);
188 | }
189 |
190 | public Boolean isSexSnp(String name) {
191 | return isSexChrom(getChrom(name));
192 | }
193 |
194 | public Boolean isSexChrom(String chrom) {
195 | return chrom.toLowerCase().contains("x") || chrom.toLowerCase().contains("y");
196 | }
197 | }
198 |
--------------------------------------------------------------------------------
/src/evoker/DataConnectionDialog.java:
--------------------------------------------------------------------------------
1 | package evoker;
2 |
3 | import javax.swing.*;
4 | import java.awt.*;
5 | import java.awt.event.ActionListener;
6 | import java.awt.event.ActionEvent;
7 | import evoker.Types.FileFormat;
8 |
9 | public class DataConnectionDialog extends JDialog implements ActionListener {
10 |
11 | private FileFormat fileFormat;
12 | private JPasswordField pf;
13 | private char[] password;
14 | private String username;
15 | private String remoteDir;
16 | private String localDir;
17 | private String remoteTempDir;
18 | private String host;
19 | private int port;
20 | private String fam;
21 | private JTextField userField;
22 | private JTextField remdirField;
23 | private JTextField remoteTempDirField;
24 | private JTextField famField;
25 | private JTextField locdirField;
26 | private JTextField hostField;
27 | private JTextField portField;
28 | private JPanel famPanel;
29 | private JPanel remoteTempDirPanel;
30 | private JCheckBox emptyIt;
31 | private JRadioButton defaultFormatButton;
32 | private JRadioButton oxfordFormatButton;
33 | private JRadioButton ukBioBankFormatButton;
34 | private Boolean cancelled;
35 |
36 | public DataConnectionDialog(JFrame parent){
37 | super(parent,"Data Connection",true);
38 | fam = "";
39 | JPanel contents = new JPanel();
40 | contents.setLayout(new BoxLayout(contents,BoxLayout.Y_AXIS));
41 | cancelled = true;
42 | JPanel hostPanel = new JPanel();
43 | hostPanel.add(new JLabel("Host: "));
44 | hostField = new JTextField(20);
45 | hostPanel.add(hostField);
46 |
47 | hostPanel.add(new JLabel("Port: "));
48 | portField = new JTextField("22", 3);
49 | hostPanel.add(portField);
50 | contents.add(hostPanel);
51 |
52 | JPanel remdirPanel = new JPanel();
53 | remdirPanel.add(new JLabel("Remote directory: "));
54 | remdirField = new JTextField(30);
55 | remdirPanel.add(remdirField);
56 | contents.add(remdirPanel);
57 |
58 | JPanel localdirPanel = new JPanel();
59 | localdirPanel.add(new JLabel("Local directory: "));
60 | locdirField = new JTextField(20);
61 | localdirPanel.add(locdirField);
62 | JButton browseButton = new JButton("Browse");
63 | browseButton.addActionListener(this);
64 | localdirPanel.add(browseButton);
65 | contents.add(localdirPanel);
66 |
67 | JPanel bottomPanel = new JPanel();
68 |
69 | JPanel loginPanel = new JPanel();
70 | loginPanel.setLayout(new BoxLayout(loginPanel,BoxLayout.Y_AXIS));
71 |
72 | JPanel userPanel = new JPanel();
73 | userPanel.add(new JLabel("Username: "));
74 | userField = new JTextField(10);
75 | userPanel.add(userField);
76 | loginPanel.add(userPanel);
77 |
78 | JPanel passPanel = new JPanel();
79 | passPanel.add(new JLabel("Password: "));
80 | pf = new JPasswordField(8);
81 | passPanel.add(pf);
82 | loginPanel.add(passPanel);
83 |
84 | bottomPanel.add(loginPanel);
85 |
86 | ButtonGroup bg = new ButtonGroup();
87 | JPanel formatPanel = new JPanel();
88 | formatPanel.setLayout(new BoxLayout(formatPanel,BoxLayout.Y_AXIS));
89 |
90 | defaultFormatButton = new JRadioButton("Default format");
91 | defaultFormatButton .addActionListener(this);
92 | formatPanel.add(defaultFormatButton);
93 | defaultFormatButton.setSelected(true);
94 | bg.add(defaultFormatButton);
95 |
96 | oxfordFormatButton = new JRadioButton("Oxford format");
97 | oxfordFormatButton.addActionListener(this);
98 | formatPanel.add(oxfordFormatButton);
99 | bg.add(oxfordFormatButton);
100 |
101 | ukBioBankFormatButton = new JRadioButton("UK BioBank v2 format");
102 | ukBioBankFormatButton.addActionListener(this);
103 | formatPanel.add(ukBioBankFormatButton);
104 | bg.add(ukBioBankFormatButton);
105 | bottomPanel.add(formatPanel);
106 | contents.add(bottomPanel);
107 |
108 | famPanel = new JPanel();
109 | famPanel.add(new JLabel("Remote FAM file: "));
110 | famField = new JTextField(30);
111 | famPanel.add(famField);
112 | famPanel.setVisible(false);
113 |
114 | remoteTempDirPanel = new JPanel();
115 | remoteTempDirPanel.add(new JLabel("Remote temp directory: "));
116 | remoteTempDirField = new JTextField(30);
117 | remoteTempDirPanel.add(remoteTempDirField);
118 | remoteTempDirPanel.setVisible(false);
119 |
120 |
121 | contents.add(famPanel);
122 | contents.add(remoteTempDirPanel);
123 |
124 | //TODO: should this be reactivated?
125 | emptyIt = new JCheckBox("Clear local cache?");
126 | //contents.add(emptyIt);
127 |
128 | JPanel butPan = new JPanel();
129 | JButton okbut = new JButton("OK");
130 | getRootPane().setDefaultButton(okbut);
131 | okbut.addActionListener(this);
132 | butPan.add(okbut);
133 | JButton cancelbut = new JButton("Cancel");
134 | cancelbut.addActionListener(this);
135 | butPan.add(cancelbut);
136 | contents.add(butPan);
137 |
138 | this.setContentPane(contents);
139 | this.setPreferredSize(new Dimension(550,350));
140 | this.setMinimumSize(new Dimension(550,350));
141 | }
142 |
143 | public void actionPerformed(ActionEvent e) {
144 | String command = e.getActionCommand();
145 | if (command.equals("UK BioBank v2 format")) {
146 | famPanel.setVisible(true);
147 | remoteTempDirPanel.setVisible(true);
148 | } else if (command.equals("Default format") || command.equals("Oxford format")) {
149 | famPanel.setVisible(false);
150 | remoteTempDirPanel.setVisible(false);
151 | } else if (command.equals("OK")){
152 | password = pf.getPassword();
153 | username = userField.getText();
154 | remoteDir = remdirField.getText();
155 | localDir = locdirField.getText();
156 | host = hostField.getText();
157 | port = Integer.parseInt(portField.getText());
158 | cancelled = false;
159 | if (defaultFormatButton.isSelected()) {
160 | fileFormat = FileFormat.DEFAULT;
161 | } else if (oxfordFormatButton.isSelected()) {
162 | fileFormat = FileFormat.OXFORD;
163 | } else if (ukBioBankFormatButton.isSelected()) {
164 | fileFormat = FileFormat.UKBIOBANK;
165 | fam = famField.getText();
166 | remoteTempDir = remoteTempDirField.getText();
167 |
168 | }
169 | this.dispose();
170 | }else if (command.equals("Browse")){
171 | JFileChooser jfc = new JFileChooser("user.dir");
172 | jfc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
173 | if (jfc.showOpenDialog(this) == JFileChooser.APPROVE_OPTION){
174 | locdirField.setText(jfc.getSelectedFile().getAbsolutePath());
175 | }
176 | }else if (command.equals("Cancel")){
177 | cancelled = true;
178 | this.dispose();
179 | }
180 | }
181 |
182 | public boolean getEmpty(){
183 | return emptyIt.isSelected();
184 | }
185 |
186 | public char[] getPassword(){
187 | return password;
188 | }
189 |
190 | public String getUsername(){
191 | return username;
192 | }
193 |
194 | public String getRemoteDirectory() {
195 | return remoteDir;
196 | }
197 |
198 | public String getHost() {
199 | return host;
200 | }
201 |
202 | public int getPort() {
203 | return port;
204 | }
205 |
206 | public FileFormat getFileFormat(){
207 | return fileFormat;
208 | }
209 |
210 | public String getFam() { return fam; }
211 |
212 | public String getRemoteTempDir(){ return remoteTempDir; }
213 |
214 | public boolean isOxformat() {
215 | return getFileFormat() == FileFormat.OXFORD;
216 | }
217 |
218 | public boolean isCancelled() { return cancelled; }
219 |
220 | public void clearPassword(){
221 | for (int i = 0; i < password.length; i++){
222 | password[i] = 0;
223 | pf.setText("");
224 | }
225 | }
226 |
227 | public String getLocalDirectory() {
228 | return localDir;
229 | }
230 | }
--------------------------------------------------------------------------------
/src/evoker/PlotPanel.java:
--------------------------------------------------------------------------------
1 | package evoker;
2 |
3 | import org.jfree.data.xy.XYSeriesCollection;
4 | import org.jfree.data.xy.XYDataset;
5 | import org.jfree.chart.JFreeChart;
6 | import org.jfree.chart.ChartFactory;
7 | import org.jfree.chart.ChartPanel;
8 | import org.jfree.chart.ChartUtilities;
9 | import org.jfree.chart.labels.StandardXYToolTipGenerator;
10 | import org.jfree.chart.renderer.xy.XYItemRenderer;
11 | import org.jfree.chart.title.TextTitle;
12 | import org.jfree.chart.plot.PlotOrientation;
13 | import org.jfree.chart.plot.XYPlot;
14 |
15 | import javax.swing.*;
16 | import java.awt.*;
17 | import java.awt.geom.Ellipse2D;
18 | import java.text.NumberFormat;
19 | import java.text.DecimalFormat;
20 | import java.text.DecimalFormatSymbols;
21 | import java.text.FieldPosition;
22 | import java.util.Locale;
23 | import java.io.File;
24 | import java.io.IOException;
25 | import java.io.OutputStream;
26 |
27 | import evoker.Types.CoordinateSystem;
28 |
29 | public class PlotPanel extends JPanel {
30 |
31 | /**
32 | * Mode, determining the way, the Diagram responses to mouse gestures.
33 | * true means lasso select, false means zooming
34 | * in.
35 | */
36 |
37 | ChartPanel generatePlot;
38 |
39 | protected Genoplot theGenoplot;
40 | private JFreeChart jfc;
41 | private PlotData data;
42 | private String title, xlab, ylab;
43 | private boolean foundData;
44 | static NumberFormat nf = NumberFormat.getInstance(Locale.US);
45 | boolean longStats;
46 | double totalMaf;
47 | int totalSamples;
48 | String collection;
49 | JPanel statistics = null;
50 |
51 | static {
52 | nf.setMaximumFractionDigits(2);
53 | nf.setMinimumFractionDigits(2);
54 | }
55 |
56 | PlotPanel(Genoplot gp, String title, PlotData pd, int plotHeight, int plotWidth,
57 | boolean longStats, double totalMaf, int totalSamples, String collection) {
58 | this.theGenoplot = gp;
59 | this.title = title;
60 | this.data = pd;
61 | this.longStats = longStats;
62 | this.totalMaf = totalMaf;
63 | this.totalSamples = totalSamples;
64 | this.collection = collection;
65 |
66 | switch (pd.getCoordSystem()) {
67 | case POLAR:
68 | this.xlab = String.valueOf("\u03F4");
69 | this.ylab = String.valueOf("r");
70 | break;
71 | case UKBIOBANK:
72 | this.xlab = String.valueOf("Contrast: log\u2082(A/B)");
73 | this.ylab = String.valueOf("Strength: log\u2082(A\u00D7B)/2");
74 | break;
75 | default:
76 | this.xlab = String.valueOf("X");
77 | this.ylab = String.valueOf("Y");
78 | break;
79 | }
80 |
81 | this.setLayout(new BoxLayout(this, BoxLayout.Y_AXIS));
82 | this.setPreferredSize(new Dimension(plotHeight, plotWidth));
83 | this.setMaximumSize(new Dimension(plotHeight, plotWidth));
84 | }
85 |
86 | protected void refresh() {
87 | this.removeAll();
88 | XYSeriesCollection xysc = data.generatePoints();
89 | if (xysc != null) {
90 | setFoundData(true);
91 | generatePlot = generatePlot(xysc);
92 | add(generatePlot);
93 | statistics = new JPanel();
94 | add(generateInfo());
95 | } else {
96 | setFoundData(false);
97 | this.setBackground(Color.WHITE);
98 | add(Box.createVerticalGlue());
99 | JLabel l = new JLabel("No data found for " + title);
100 | l.setAlignmentX(Component.CENTER_ALIGNMENT);
101 | add(l);
102 | add(Box.createVerticalGlue());
103 | }
104 | }
105 |
106 | private void setFoundData(boolean b) {
107 | foundData = b;
108 | }
109 |
110 | void saveToFile(File f) throws IOException {
111 | ChartUtilities.saveChartAsPNG(f, jfc, 400, 400);
112 | }
113 |
114 | public JPanel generateInfo() {
115 | statistics.setBackground(Color.white);
116 |
117 | boolean compressStats = true;
118 |
119 | JLabel mafLabel = new JLabel();
120 | JLabel gpcLabel = new JLabel();
121 | JLabel hwpLabel = new JLabel();
122 |
123 | double maf = data.getMaf();
124 | double mafAvg = totalMaf / totalSamples;
125 |
126 |
127 | double mafScore = (maf - mafAvg) * Math.sqrt(2 * data.getSampleNum()) / Math.sqrt(mafAvg * (1 - mafAvg));
128 |
129 | if (longStats) {
130 | if (mafScore > 3) {
131 | mafLabel.setText("MAF: " + nf.format(data.getMaf())
132 | + "");
133 | } else {
134 | mafLabel.setText("MAF: " + nf.format(data.getMaf()));
135 | }
136 |
137 | if (1 - data.getGenopc() > 0.02) {
138 | gpcLabel.setText("GPC: " + nf.format(data.getGenopc())
139 | + "");
140 | } else {
141 | gpcLabel.setText("GPC: " + nf.format(data.getGenopc()));
142 | }
143 | if (data.getHwpval() < 10e-5) {
144 | hwpLabel.setText("HWE pval: "
145 | + formatPValue(data.getHwpval()) + "");
146 | } else {
147 | hwpLabel.setText("HWE pval: " + formatPValue(data.getHwpval()));
148 | }
149 | } else {
150 | if (mafScore > 3) {
151 | mafLabel.setText("" + nf.format(data.getMaf())
152 | + "/");
153 | } else {
154 | mafLabel.setText(nf.format(data.getMaf()) + "/");
155 | }
156 | if (1 - data.getGenopc() > 0.02) {
157 | gpcLabel.setText("" + nf.format(data.getGenopc())
158 | + "/");
159 | } else {
160 | gpcLabel.setText(nf.format(data.getGenopc()) + "/");
161 | }
162 | if (data.getHwpval() < 10e-5) {
163 | hwpLabel.setText("" + formatPValue(data.getHwpval())
164 | + "");
165 | } else {
166 | hwpLabel.setText(formatPValue(data.getHwpval()));
167 | }
168 | }
169 |
170 | statistics.add(mafLabel);
171 | statistics.add(gpcLabel);
172 | statistics.add(hwpLabel);
173 |
174 | return statistics;
175 | }
176 |
177 | public void updateInfo() {
178 | statistics.removeAll();
179 | generateInfo();
180 | statistics.revalidate();
181 | }
182 |
183 | public String generateInfoStr() {
184 | return "MAF: " + nf.format(data.getMaf()) + "\tGPC: "
185 | + nf.format(data.getGenopc()) + "\tHWE pval: "
186 | + formatPValue(data.getHwpval());
187 | }
188 |
189 | private ChartPanel generatePlot(XYSeriesCollection xysc) {
190 |
191 | jfc = ChartFactory.createScatterPlot(title, xlab, ylab, xysc,
192 | PlotOrientation.VERTICAL, false, false, false);
193 | jfc.addSubtitle(new TextTitle("(n=" + data.getSampleNum() + ")"));
194 |
195 | XYPlot thePlot = jfc.getXYPlot();
196 | thePlot.setBackgroundPaint(Color.white);
197 | thePlot.setOutlineVisible(false);
198 |
199 | XYItemRenderer xyd = thePlot.getRenderer();
200 | Shape dot = new Ellipse2D.Double(-1.5, -1.5, 3, 3);
201 | xyd.setSeriesShape(0, dot);
202 | xyd.setSeriesShape(1, dot);
203 | xyd.setSeriesShape(2, dot);
204 | xyd.setSeriesShape(3, dot);
205 | xyd.setSeriesPaint(0, Color.BLUE);
206 | xyd.setSeriesPaint(1, new Color(180, 180, 180));
207 | xyd.setSeriesPaint(2, Color.GREEN);
208 | xyd.setSeriesPaint(3, Color.RED);
209 |
210 | xyd.setBaseToolTipGenerator(new ZitPlotToolTipGenerator());
211 |
212 | EvokerChartPanel cp = new EvokerChartPanel(jfc, data, this);
213 | cp.setDisplayToolTips(true);
214 | cp.setDismissDelay(10000);
215 | cp.setInitialDelay(0);
216 | cp.setReshowDelay(0);
217 |
218 | return cp;
219 | }
220 |
221 | public double getMaxDim() {
222 | return data.getMaxDim() + 0.05 * data.getRange();
223 | }
224 |
225 | public double getMinDim() {
226 | return data.getMinDim() - 0.05 * data.getRange();
227 | }
228 |
229 | public void setDimensions() {
230 | setDimensions(getMinDim(), getMaxDim());
231 | }
232 |
233 | public void setDimensionsToData() {
234 | double minX = data.getMinX();
235 | double maxX = data.getMaxX();
236 | double minY = data.getMinY();
237 | double maxY = data.getMaxY();
238 |
239 | double xPadding = 0.05 * (maxX - minX);
240 | minX -= xPadding;
241 | maxX += xPadding;
242 |
243 | double yPadding = 0.05 * (maxY - minY);
244 | minY -= yPadding;
245 | maxY += yPadding;
246 |
247 | setDimensions(minX, maxX, minY, maxY);
248 | }
249 |
250 | public void setDimensions(double minX, double maxX, double minY, double maxY) {
251 | if (jfc != null) {
252 | jfc.getXYPlot().setRangeAxis(new LinkedAxis(ylab, minY, maxY));
253 | jfc.getXYPlot().getRangeAxis().setRange(minY, maxY);
254 | jfc.getXYPlot().setDomainAxis(new LinkedAxis(xlab, minX, maxX));
255 | jfc.getXYPlot().getDomainAxis().setRange(minX, maxX);
256 | }
257 | }
258 |
259 | public void setDimensions(double min, double max) {
260 | if (jfc != null) {
261 | switch (data.getCoordSystem()) {
262 | case POLAR:
263 | jfc.getXYPlot().setRangeAxis(new LinkedAxis(ylab, min, max));
264 | jfc.getXYPlot().getRangeAxis().setRange(min, max);
265 | jfc.getXYPlot().setDomainAxis(new LinkedAxis(xlab, 0, 2));
266 | jfc.getXYPlot().getDomainAxis().setRange(0, 2);
267 | break;
268 | case UKBIOBANK:
269 | jfc.getXYPlot().setRangeAxis(new LinkedAxis(ylab, min, max));
270 | jfc.getXYPlot().getRangeAxis().setRange(min, max);
271 | jfc.getXYPlot().setDomainAxis(new LinkedAxis(xlab, min, max));
272 | jfc.getXYPlot().getDomainAxis().setRange(min, max);
273 | break;
274 | default:
275 | jfc.getXYPlot().setRangeAxis(new LinkedAxis(ylab, min, max));
276 | jfc.getXYPlot().getRangeAxis().setRange(min, max);
277 | jfc.getXYPlot().setDomainAxis(new LinkedAxis(xlab, min, max));
278 | jfc.getXYPlot().getDomainAxis().setRange(min, max);
279 | break;
280 | }
281 | }
282 | }
283 |
284 | public static String formatPValue(double pval) {
285 | DecimalFormat df;
286 | // java truly sucks for simply restricting the number of sigfigs but
287 | // still
288 | // using scientific notation when appropriate
289 | if (pval < 0.001) {
290 | df = new DecimalFormat("0.0E0", new DecimalFormatSymbols(Locale.US));
291 | } else {
292 | df = new DecimalFormat("0.000", new DecimalFormatSymbols(Locale.US));
293 | }
294 | return df.format(pval, new StringBuffer(),
295 | new FieldPosition(NumberFormat.INTEGER_FIELD)).toString();
296 | }
297 |
298 | class ZitPlotToolTipGenerator extends StandardXYToolTipGenerator {
299 |
300 | public double round3(double n) {
301 | double result = n * 100000;
302 | result = Math.round(result);
303 | result = result / 100000;
304 | return result;
305 | }
306 |
307 | public ZitPlotToolTipGenerator() {
308 | super();
309 | }
310 |
311 | public String generateToolTip(XYDataset dataset, int series, int item) {
312 | return data.getIndInClass(series, item) + " ("
313 | + round3(dataset.getXValue(series, item)) + ", "
314 | + round3(dataset.getYValue(series, item)) + ")";
315 | // + dataset.getXValue(series, item) + ", "
316 | // + dataset.getYValue(series, item) + ")";
317 | }
318 | }
319 |
320 | public JFreeChart getChart() {
321 | return jfc;
322 | }
323 |
324 | public PlotData getPlotData() {
325 | return data;
326 | }
327 |
328 | public boolean hasData() {
329 | return foundData;
330 | }
331 |
332 | public String getTitle() { return title; }
333 |
334 | public String toString() { return title; }
335 |
336 | public String getCollection() { return collection; }
337 |
338 | }
339 |
--------------------------------------------------------------------------------
/src/evoker/BEDFileChanger.java:
--------------------------------------------------------------------------------
1 | package evoker;
2 |
3 | import java.io.BufferedInputStream;
4 | import java.io.File;
5 | import java.io.FileInputStream;
6 | import java.io.FileOutputStream;
7 | import java.nio.channels.FileChannel;
8 | import java.io.FileNotFoundException;
9 | import java.io.IOException;
10 | import java.util.HashMap;
11 | import java.util.Vector;
12 |
13 | import evoker.DataDirectory.ExtensionFilter;
14 |
15 | /**
16 | * Change the genotype information within a bed file (and write to a new one)
17 | */
18 | public class BEDFileChanger {
19 |
20 | /** Holds the list of inds (in correct order) */
21 | Vector inds = null;
22 | /** Holds the changes made to the collection (_not_ in correct order)*/
23 | HashMap>> changes; // chromosome -> snp -> [ind -> change]
24 | /** */
25 | HashMap markerTable;
26 | /** Name of the collection to save */
27 | String collection = null;
28 | /** Path for file to be read */
29 | String path = null;
30 | /** internal ID of collection */
31 | int collectionID = -1;
32 | /** Absolute path of output file */
33 | String toWriteTo = null;
34 | /** Information about conversion from the internal genotype notation to the bed-one. */
35 | HashMap genotpeCoding = new HashMap();
36 | /** Number of SNPs contained in this collection */
37 | int noOfSnps = -1;
38 | //TP CHANGED THIS
39 | /** DATA DIRECTORY, required for looking up the number of SNPs associated with collections and chromosomes,
40 | * stored in a hashmap in MarkerData*/
41 | DataDirectory db;
42 | /**a boolean to store whether to print .bim and .fam files*/
43 | boolean printFullFileset;
44 | /**
45 | * Create a bed file from a given one according to the specified changes
46 | *
47 | * @param collectionID
48 | * @param collection
49 | * @param path
50 | * @param sd
51 | * @param noOfSnps
52 | * @param markerTable
53 | * @param changes
54 | * @param toWriteTo
55 | * @throws IOException
56 | */
57 | BEDFileChanger(int collectionID, String collection, String path, Vector inds, int noOfSnps,
58 | HashMap markerTable, HashMap>> changes,
59 | String toWriteTo, DataDirectory db, boolean printFullFileset) throws IOException {
60 | assert collectionID >= 0 && collection != null && path != null && inds != null &&
61 | noOfSnps != 0 && markerTable != null && changes != null && !changes.keySet().isEmpty() &&
62 | toWriteTo != null;
63 |
64 | this.inds = inds;
65 | this.changes = (HashMap>>) changes.clone();
66 | this.markerTable = markerTable;
67 | this.collection = collection;
68 | this.collectionID = collectionID;
69 | this.path = path;
70 | this.toWriteTo = toWriteTo;
71 | this.noOfSnps = noOfSnps;
72 | //TP CHANGED THIS
73 | this.db = db;
74 | this.printFullFileset = printFullFileset;
75 | setGenotypeCoding();
76 | write();
77 | }
78 |
79 | /**
80 | * Create a bed file from a given one according to the specified changes.
81 | * Same as the other constructor, but collects the information itself.
82 | *
83 | * @param containing related information
84 | * @param collection name to save
85 | * @param absolute file(path) to write to
86 | * @throws IOException
87 | */
88 | //TP CHANGED THIS
89 | BEDFileChanger(DataDirectory db, String collection, String file, boolean printFullFileset) throws IOException {
90 | new BEDFileChanger(db.getMarkerData().collectionIndices.get(collection), collection, db.getDisplayName(),
91 | db.samplesByCollection.get(collection).inds, db.getMarkerData().getNumSNPs(collection),
92 | db.getMarkerData().getMarkerTable(), db.changesByCollection.get(collection), file, db, printFullFileset);
93 | }
94 |
95 | /**
96 | * Sets the connection between the internal genotype coding and the bed-file-one
97 | */
98 | private void setGenotypeCoding() {
99 | genotpeCoding.put((byte) 0, 0x00); //homo1
100 | genotpeCoding.put((byte) 1, 0x40); //missing
101 | genotpeCoding.put((byte) 2, 0x80); //hetero
102 | genotpeCoding.put((byte) 3, 0xc0); //homo2
103 | }
104 |
105 | /**
106 | * The writing algorithm.
107 | *
108 | * @throws FileNotFoundException
109 | * @throws IOException
110 | */
111 | private void write() throws FileNotFoundException, IOException {
112 | int bytesPerSnp = (int) Math.ceil(((double) inds.size()) / 4);
113 |
114 | // for all the changed chromosomes.
115 | for (String chromosome : changes.keySet()) {
116 | //read file
117 | File f = new File(path + "/" + collection + "." + chromosome + ".bed");
118 | BufferedInputStream bis = new BufferedInputStream(new FileInputStream(f), 8192);
119 |
120 | //TP CHANGED THIS
121 | //output a .fam and .bim file for each .bed file
122 | //search through the current directory for files with the right names to copy
123 | if (printFullFileset) {File dir = new File(path);
124 | File dirList[] = dir.listFiles();
125 | for (File thisFile : dirList) {
126 | String fileName = thisFile.getName();
127 | String bimFileExt = (collection + "." + chromosome + ".bim");
128 | String famFileExt = (collection + ".fam");
129 |
130 | if (fileName.endsWith(bimFileExt))
131 | {
132 | File destination = new File(path + "/" + collection + "mod." + chromosome + ".bim");
133 | copyFile(thisFile, destination);
134 | }
135 |
136 | else if (fileName.endsWith(famFileExt))
137 | {
138 | File destination = new File(path + "/" + collection + "mod." + chromosome + ".fam");
139 | copyFile(thisFile, destination);
140 | }
141 | }
142 | }
143 |
144 | //write file
145 | File f_write = new File(toWriteTo + "." + chromosome + ".bed");
146 | if(f_write.exists()) f_write.delete();
147 | BEDFileWriter bfw = new BEDFileWriter(f_write);
148 |
149 | //skip header
150 | long toskip = 3;
151 | while ((toskip = toskip - bis.skip(toskip)) > 0);
152 | long snpAt = 0;
153 |
154 | byte[] rawSnpData = null;
155 |
156 | // until there is no snp left for this chromosome to change.
157 | while (changes.get(chromosome).keySet().size() > 0) {
158 | String nextSnpToStopAt = null;
159 | long nextSnpIndex = Long.MAX_VALUE;
160 |
161 | // find the first snip to be changed
162 | for (String s : changes.get(chromosome).keySet()) {
163 | Marker m = markerTable.get(s);
164 | int index = m.getIndex(collectionID);
165 | if (index < nextSnpIndex) {
166 | nextSnpIndex = index;
167 | nextSnpToStopAt = s;
168 | }
169 | }
170 |
171 | // read in snp per snp, write it to file until we reach a changed snp
172 | long skip = nextSnpIndex - snpAt;
173 | for (int i = 0; i < skip; i++) {
174 | rawSnpData = new byte[bytesPerSnp];
175 | bis.read(rawSnpData, 0, bytesPerSnp);
176 | bfw.write(rawSnpData);
177 | snpAt++;
178 | }
179 |
180 | // read that whole snp in
181 | rawSnpData = new byte[bytesPerSnp];
182 | bis.read(rawSnpData, 0, bytesPerSnp);
183 |
184 | // find changed inds
185 | for (String ind : changes.get(chromosome).get(nextSnpToStopAt).keySet()) {
186 | long indexOfInd = inds.indexOf(ind);
187 | int indexOfIndInArray = (int) (indexOfInd / 4);
188 | int posInTheByteFromBeginning = (int) (3 - (indexOfInd % 4)); // still to be used as index, as it is turned around, big endian >.<
189 | rawSnpData[indexOfIndInArray] = changeByte(rawSnpData[indexOfIndInArray], posInTheByteFromBeginning, changes.get(chromosome).get(nextSnpToStopAt).get(ind));
190 | }
191 | bfw.write(rawSnpData);
192 | snpAt++;
193 |
194 | // remove snp from the todo list
195 | changes.get(chromosome).remove(nextSnpToStopAt);
196 | }
197 | //TP changed this
198 | // there is nothing to change anymore, but there are still snps to copy.
199 | for (; snpAt < db.getMarkerData().getNumSNPs(collection+chromosome); snpAt++) {
200 | rawSnpData = new byte[bytesPerSnp];
201 | bis.read(rawSnpData, 0, bytesPerSnp);
202 | bfw.write(rawSnpData);
203 | }
204 |
205 | bfw.flush();
206 | bfw.close();
207 | }
208 | }
209 |
210 | /**
211 | * Change genotype within a byte
212 | *
213 | * @param byte to change
214 | * @param potition of the double-bit to change
215 | * @param genotype to change to (internal id-notation)
216 | * @return changed bit
217 | */
218 | private byte changeByte(byte b, int posInTheByteFromBeginning, byte changeTo) {
219 | int byteToChange = b & 0xff; // java seems to convert bytes to ints while processing them, that'd give problems... (no, java does not suck, java is great.)
220 | int toOrWith = genotpeCoding.get(changeTo);
221 | int toResetTo0 = 0xc0;
222 |
223 | toOrWith = toOrWith >>> (posInTheByteFromBeginning * 2);
224 | toResetTo0 = toResetTo0 >>> (posInTheByteFromBeginning * 2);
225 |
226 | return (byte) ((byteToChange & ~toResetTo0) | toOrWith);
227 | }
228 |
229 |
230 | //a function to copy a file
231 | //source help: http://blog-en.openalfa.com/how-to-rename-move-or-copy-a-file-in-java
232 | @SuppressWarnings("resource")
233 | private static void copyFile(File sourceFile, File destFile) throws IOException {
234 | if(!destFile.exists()) {
235 | destFile.createNewFile();
236 | }
237 |
238 | FileChannel origin = null;
239 | FileChannel destination = null;
240 | try {
241 | origin = new FileInputStream(sourceFile).getChannel();
242 | destination = new FileOutputStream(destFile).getChannel();
243 |
244 | long count = 0;
245 | long size = origin.size();
246 | while((count += destination.transferFrom(origin, count, size-count)) 0){
108 | Genoplot.ld.log("Files in the local directory");
109 | // ask the user if they wants it emptied
110 | int n = JOptionPane.showConfirmDialog(
111 | genoplot.getContentPane(),
112 | "The local directory selected is not empty.\n Would you like to clear all files in this directory?",
113 | "Clear the local directory?",
114 | JOptionPane.YES_NO_OPTION,
115 | JOptionPane.QUESTION_MESSAGE );
116 | // n 0 = yes 1 = no
117 | if (n == 0) {
118 | for (File localFile : localFiles){
119 | try {
120 | localFile.delete();
121 | }catch (SecurityException se){
122 | JOptionPane.showMessageDialog(null,se.getMessage(), "File delete error", JOptionPane.ERROR_MESSAGE);
123 | }
124 | }
125 | }
126 | }
127 | displayName = dcd.getHost()+":"+remoteDir;
128 | }else{
129 | throw new IOException("Authentication to host '"+dcd.getHost()+"' failed.");
130 | }
131 | }
132 | }
133 |
134 | public String[] getFilesInRemoteDir() throws IOException{
135 | if (files != null){
136 | String[] out = new String[files.size()];
137 | for (int i = 0; i < files.size(); i++){
138 | out[i] = ((SftpFile)files.get(i)).getFilename();
139 | }
140 | return out;
141 | }else{
142 | throw new IOException("Cannot ls files in remote directory");
143 | }
144 | }
145 |
146 | public boolean getConnectionStatus(){
147 | return (ssh != null);
148 | }
149 |
150 | public void getSNPFiles(String snp, String chrom, String collection, int index, int numinds, int totNumSNPs) throws IOException{
151 | String filestem = collection+"."+snp;
152 | if (!(new File(Utils.join(localDir, filestem+".bed")).exists() &&
153 | new File(Utils.join(localDir, filestem+".bnt")).exists())){
154 | long prev = System.currentTimeMillis();
155 |
156 | SessionChannelClient session = ssh.openSessionChannel();
157 | session.startShell();
158 |
159 | // variable to pass to the evoker-helper.pl script
160 | int oxStatus;
161 | if (fileFormat == FileFormat.OXFORD){
162 | oxStatus = 1;
163 | } else {
164 | oxStatus = 0;
165 | }
166 |
167 | int ukbiobank_v2;
168 | String outpath;
169 | if (fileFormat == FileFormat.UKBIOBANK) {
170 | ukbiobank_v2 = 1;
171 | outpath = remoteTempDir;
172 | filestem = Utils.join(remoteTempDir, filestem);
173 | } else {
174 | ukbiobank_v2 = 0;
175 | outpath = "0";
176 | }
177 |
178 |
179 | //Fire off the script on the remote server to get the requested slice of data
180 | OutputStream out = session.getOutputStream();
181 | //String cmd = "cd "+ remoteDir + "\nperl evoker-helper.pl "+ snp + " " + chrom + " " +
182 | String cmd = "cd "+ remoteDir + "\nevoker-helper.pl "+ snp + " " + chrom + " " +
183 | collection + " " + index + " " + numinds + " " + totNumSNPs + " " + oxStatus + " " +
184 | this.getOxPlatform() + " " + ukbiobank_v2 + " " + outpath + "\n";
185 | out.write(cmd.getBytes());
186 |
187 |
188 | //monitor the remote server for news that the script has been finished
189 | //this is pretty slow -- is there a better way?
190 | InputStream in = session.getInputStream();
191 | byte buffer[] = new byte[1024];
192 | int read;
193 | long start = System.currentTimeMillis();
194 |
195 | while((System.currentTimeMillis() - start)/1000 < 120) {
196 | try{
197 | read = in.read(buffer);
198 | String outstr = new String(buffer, 0, read);
199 | if (outstr.contains(snp)){
200 | break;
201 | } else if (outstr.contains("write_error")) {
202 | throw new IOException("user does not have write privileges");
203 | }
204 | }catch (IOException ioe){
205 | ssh.disconnect();
206 | throw new IOException("Problem with remote directory permissions:\n"+ioe.getMessage());
207 | }
208 |
209 | }
210 |
211 | if ((System.currentTimeMillis() - start)/1000 >= 120) {
212 | // if nothing is output from evoker-helper.pl in 2 minutes then die
213 | throw new IOException("evoker-helper.pl is not responsive check the script will run");
214 | }
215 |
216 | session.close();
217 |
218 | String[] filetypes = {".bed", ".bnt"};
219 | for (String filetype : filetypes) {
220 | File remoteFile = new File(filestem + filetype);
221 | String filename = remoteFile.getName();
222 | String localFilePath = Utils.join(localDir, filename);
223 | // Need UNIX-style paths on the remote machine
224 | String remoteFilePath = remoteFile.getPath().replace("\\", "/");
225 | ftp.get(remoteFilePath, localFilePath);
226 | ftp.rm(remoteFilePath);
227 | }
228 |
229 | double time = ((double)(System.currentTimeMillis() - prev))/1000;
230 | Genoplot.ld.log(snp +" for "+ collection +" was fetched in "+ time + "s.");
231 | }else{
232 | Genoplot.ld.log(snp +" for "+ collection +" was cached.");
233 | }
234 |
235 | }
236 |
237 |
238 | public boolean isOxFormat() {
239 | return fileFormat == FileFormat.OXFORD;
240 | }
241 |
242 | public File prepMetaFiles() throws IOException {
243 | files = ftp.ls();
244 |
245 | String famending = ".fam";
246 | String bimending = ".bim";
247 | if (fileFormat == FileFormat.OXFORD) {
248 | famending = ".sample";
249 | bimending = ".snp";
250 | }
251 |
252 | Iterator i = files.iterator();
253 |
254 | if (fileFormat == FileFormat.UKBIOBANK) {
255 | // UKB provides a separate fam files
256 | File f = new File(famPath);
257 | String famFile = f.getName();
258 |
259 |
260 |
261 | if (!new File(Utils.join(localDir, famFile)).exists()) {
262 | try {
263 | ftp.get(famPath);
264 | } catch (IOException e) {
265 | // TODO Auto-generated catch block
266 | e.printStackTrace();
267 | }
268 | }
269 |
270 | } else {
271 | while (i.hasNext()) {
272 | String filename = ((SftpFile) i.next()).getFilename();
273 | if (filename.endsWith(famending)) {
274 |
275 | if (!new File(Utils.join(localDir, filename)).exists()) {
276 | try {
277 | ftp.get(filename);
278 | } catch (IOException e) {
279 | // TODO Auto-generated catch block
280 | e.printStackTrace();
281 | }
282 | }
283 | }
284 |
285 | }
286 | }
287 |
288 | gp.pm.setMaximum(files.size() * 2);
289 | int loopCount = 0;
290 | i = files.iterator();
291 | while (i.hasNext()) {
292 | gp.pm.setProgress(++loopCount);
293 | if (gp.pm.isCanceled()) {
294 | return null;
295 | }
296 | String filename = ((SftpFile) i.next()).getFilename();
297 | if (filename.endsWith(bimending)) {
298 |
299 | if (!new File(Utils.join(localDir, filename)).exists()) {
300 | try {
301 | ftp.get(filename);
302 | } catch (IOException e) {
303 | // TODO Auto-generated catch block
304 | e.printStackTrace();
305 | }
306 | }
307 | }
308 |
309 | }
310 |
311 | i = files.iterator();
312 | while (i.hasNext()) {
313 | gp.pm.setProgress(++loopCount);
314 | if (gp.pm.isCanceled()) {
315 | return null;
316 | }
317 | String filename = ((SftpFile) i.next()).getFilename();
318 | if (filename.endsWith(".qc")) {
319 | if (!new File(Utils.join(localDir, filename)).exists()) {
320 | try {
321 | ftp.get(filename);
322 | } catch (IOException e) {
323 | // TODO Auto-generated catch block
324 | e.printStackTrace();
325 | }
326 | }
327 | }
328 |
329 | }
330 |
331 | return new File(localDir);
332 | }
333 |
334 | public void setOxPlatform(String oxPlatform) {
335 | this.oxPlatform = oxPlatform;
336 | }
337 |
338 | public String getOxPlatform() {
339 | return oxPlatform;
340 | }
341 |
342 | public SftpClient getFTP() {
343 | return ftp;
344 | }
345 |
346 | }
347 |
--------------------------------------------------------------------------------
/src/evoker/EvokerChartPanel.java:
--------------------------------------------------------------------------------
1 | package evoker;
2 |
3 | import java.awt.Graphics2D;
4 | import java.awt.Point;
5 | import java.awt.event.ActionEvent;
6 | import java.awt.event.KeyEvent;
7 | import java.awt.event.MouseEvent;
8 | import java.awt.geom.Line2D;
9 | import java.awt.geom.Point2D;
10 | import java.awt.geom.Rectangle2D;
11 | import java.io.File;
12 | import java.io.FileNotFoundException;
13 | import java.io.IOException;
14 | import java.text.DecimalFormat;
15 | import java.util.ArrayList;
16 | import java.util.HashMap;
17 | import java.util.List;
18 | import java.util.logging.Level;
19 | import java.util.logging.Logger;
20 | import javax.swing.ButtonGroup;
21 | import javax.swing.JFileChooser;
22 | import javax.swing.JMenu;
23 | import javax.swing.JMenuItem;
24 | import javax.swing.JPopupMenu;
25 | import javax.swing.JRadioButtonMenuItem;
26 | import org.jfree.chart.ChartPanel;
27 | import org.jfree.chart.JFreeChart;
28 | import org.jfree.chart.plot.XYPlot;
29 | import org.jfree.data.xy.XYDataItem;
30 | import org.jfree.data.xy.XYSeries;
31 | import org.jfree.data.xy.XYSeriesCollection;
32 | import org.jfree.ui.ExtensionFileFilter;
33 |
34 | import evoker.Genoplot.MouseMode;
35 |
36 | public class EvokerChartPanel extends ChartPanel {
37 |
38 | /** Holding the last Point, the mouse was dragged to (for drawing a line while selecting) */
39 | Point lastDragPoint = null;
40 | /** Will hold the lasso Object, while the selection is being made*/
41 | Lasso lasso = null;
42 | /** Enable mouse listeners to perform zoom*/
43 | public static final String ZOOM_ENABLE_COMMAND = "ZOOM_ENABLE";
44 | /** Enable mouse listeners to perform lasso select*/
45 | public static final String LASSO_SELECT_ENABLE_COMMAND = "LASSO_SELECT_ENABLE";
46 | /*The genotype selection menu*/
47 | private JPopupMenu genotypeSelectPopup;
48 | /** The genotype data being displayed */
49 | private PlotData plotData = null;
50 | /** The panel calling the ChartPanel*/
51 | private PlotPanel plotPanel = null;
52 | private boolean weAreLassoing = false;
53 | private JRadioButtonMenuItem jrbZoom;
54 | private JRadioButtonMenuItem jrbLasso;
55 |
56 | EvokerChartPanel(JFreeChart jfc, PlotData pdata, PlotPanel ppanel) {
57 | super(jfc);
58 | // set up genotype select menu (... :] )
59 | this.genotypeSelectPopup = createGenotypeSelectPopup();
60 | this.plotData = pdata;
61 | this.plotPanel = ppanel;
62 |
63 | createPopupMenu_();
64 | }
65 |
66 | private double rescaleMouseClickX(Point p){
67 | Rectangle2D plotArea = this.getScreenDataArea();
68 | XYPlot plot = (XYPlot) super.getChart().getPlot();
69 | double x = plot.getDomainAxis().java2DToValue(p.getX(), plotArea, plot.getDomainAxisEdge());
70 | return(x);
71 |
72 | }
73 |
74 | private double rescaleMouseClickY(Point p){
75 | Rectangle2D plotArea = this.getScreenDataArea();
76 | XYPlot plot = (XYPlot) super.getChart().getPlot();
77 | double y = plot.getRangeAxis().java2DToValue(p.getY(), plotArea, plot.getRangeAxisEdge());
78 | return(y);
79 | }
80 |
81 | /**
82 | * Handles a 'mouse pressed' event.
83 | *
84 | * This event is the popup trigger on Unix/Linux. For Windows, the popup
85 | * trigger is the 'mouse released' event.
86 | *
87 | * @param e The mouse event.
88 | */
89 | public void mousePressed(MouseEvent e) {
90 | if(!e.isPopupTrigger()) {
91 | if (plotPanel.theGenoplot.getMouseMode() == MouseMode.LASSO) {
92 | lastDragPoint = e.getPoint();
93 | lasso = new Lasso(rescaleMouseClickX(lastDragPoint),rescaleMouseClickY(lastDragPoint));
94 | weAreLassoing = true;
95 | }
96 | else {
97 | super.mousePressed(e);
98 | }
99 | }else{
100 | if (popup != null) {
101 | if (plotPanel.theGenoplot.getMouseMode() == MouseMode.ZOOM) jrbZoom.setSelected(true);
102 | if (plotPanel.theGenoplot.getMouseMode() == MouseMode.LASSO) jrbLasso.setSelected(true);
103 | displayPopupMenu(e.getX(), e.getY());
104 | }
105 | }
106 | }
107 |
108 | /**
109 | * Handles a 'mouse dragged' event.
110 | *
111 | * @param e the mouse event.
112 | */
113 | public void mouseDragged(MouseEvent e) {
114 | if(weAreLassoing) {
115 | // draw line from last Point to the current one
116 | if (lastDragPoint != null) {
117 | Graphics2D g2 = (Graphics2D) getGraphics();
118 | g2.draw(new Line2D.Double(lastDragPoint.getX(), lastDragPoint.getY(), e.getX(), e.getY()));
119 | }
120 | lastDragPoint = new Point(e.getX(), e.getY());
121 |
122 | lasso.addPoint(rescaleMouseClickX(lastDragPoint),rescaleMouseClickY(lastDragPoint));
123 | }else {
124 | super.mouseDragged(e);
125 | }
126 | }
127 |
128 | /**
129 | * Handles a 'mouse released' event. On Windows, we need to check if this
130 | * is a popup trigger, but only if we haven't already been tracking a zoom
131 | * rectangle.
132 | *
133 | * @param e information about the event.
134 | */
135 | public void mouseReleased(MouseEvent e) {
136 | if (weAreLassoing) {
137 | lastDragPoint = new Point(e.getX(), e.getY());
138 | lasso.addPoint(rescaleMouseClickX(lastDragPoint),rescaleMouseClickY(lastDragPoint));
139 | lastDragPoint = null;
140 | if (lasso != null) {
141 | lasso.close();
142 | weAreLassoing = false;
143 | this.genotypeSelectPopup.show(this, e.getX(), e.getY());
144 | }
145 |
146 | }
147 | else {
148 | super.mouseReleased(e);
149 | }
150 | }
151 |
152 | /**
153 | * Creates a popup menu for the panel.
154 | *
155 | * @param properties include a menu item for the chart property editor.
156 | * @param copy include a menu item for copying to the clipboard.
157 | * @param save include a menu item for saving the chart.
158 | * @param print include a menu item for printing the chart.
159 | * @param zoom include menu items for zooming.
160 | *
161 | * @return The popup menu.
162 | *
163 | * @since 1.0.13
164 | */
165 | protected void createPopupMenu_() {
166 | ButtonGroup group = new ButtonGroup();
167 |
168 | jrbZoom = new JRadioButtonMenuItem("Zoom");
169 | if (plotPanel.theGenoplot.getMouseMode() == MouseMode.ZOOM) jrbZoom.setSelected(true);
170 | jrbZoom.setMnemonic(KeyEvent.VK_O);
171 | jrbZoom.setActionCommand(ZOOM_ENABLE_COMMAND);
172 | jrbZoom.addActionListener(this);
173 | group.add(jrbZoom);
174 | popup.add(jrbZoom, 0);
175 |
176 | jrbLasso = new JRadioButtonMenuItem("Lasso Select");
177 | if (plotPanel.theGenoplot.getMouseMode() == MouseMode.LASSO) jrbLasso.setSelected(true);
178 | jrbLasso.setMnemonic(KeyEvent.VK_R);
179 | jrbLasso.setActionCommand(LASSO_SELECT_ENABLE_COMMAND);
180 | jrbLasso.addActionListener(this);
181 | group.add(jrbLasso);
182 | popup.add(jrbLasso, 1);
183 | }
184 |
185 | protected JPopupMenu createGenotypeSelectPopup() {
186 | JPopupMenu result = new JPopupMenu("Chart:");
187 | ButtonGroup group = new ButtonGroup();
188 |
189 | JRadioButtonMenuItem jrbMenIt = new JRadioButtonMenuItem("Y Homozygous");
190 | jrbMenIt.setSelected(true);
191 | jrbMenIt.setMnemonic(KeyEvent.VK_O);
192 | jrbMenIt.setActionCommand("GENOTYPE_YY");
193 | jrbMenIt.addActionListener(this);
194 | group.add(jrbMenIt);
195 | result.add(jrbMenIt);
196 |
197 | jrbMenIt = new JRadioButtonMenuItem("Heterozygous");
198 | jrbMenIt.setMnemonic(KeyEvent.VK_R);
199 | jrbMenIt.setActionCommand("GENOTYPE_XY");
200 | jrbMenIt.addActionListener(this);
201 | group.add(jrbMenIt);
202 | result.add(jrbMenIt);
203 |
204 | jrbMenIt = new JRadioButtonMenuItem("X Homozygous");
205 | jrbMenIt.setMnemonic(KeyEvent.VK_R);
206 | jrbMenIt.setActionCommand("GENOTYPE_XX");
207 | jrbMenIt.addActionListener(this);
208 | group.add(jrbMenIt);
209 | result.add(jrbMenIt);
210 |
211 | jrbMenIt = new JRadioButtonMenuItem("Unknown");
212 | jrbMenIt.setMnemonic(KeyEvent.VK_R);
213 | jrbMenIt.setActionCommand("GENOTYPE_UNKNOWN");
214 | jrbMenIt.addActionListener(this);
215 | group.add(jrbMenIt);
216 | result.add(jrbMenIt);
217 |
218 | return result;
219 | }
220 |
221 | /**
222 | * Handles action events generated by the popup menu.
223 | *
224 | * @param event the event.
225 | */
226 | public void actionPerformed(ActionEvent event) {
227 |
228 | String command = event.getActionCommand();
229 | if (command.equals("GENOTYPE_XX")) {
230 | adjustDataSeries(0);
231 | }
232 | else if (command.equals("GENOTYPE_UNKNOWN")) {
233 | adjustDataSeries(1);
234 | }
235 | else if (command.equals("GENOTYPE_XY")) {
236 | adjustDataSeries(2);
237 | }
238 | else if (command.equals("GENOTYPE_YY")) {
239 | adjustDataSeries(3);
240 | }
241 | else if (command.equals(ZOOM_ENABLE_COMMAND)) {
242 | plotPanel.theGenoplot.setMouseMode(MouseMode.ZOOM);
243 | }
244 | else if (command.equals(LASSO_SELECT_ENABLE_COMMAND)) {
245 | plotPanel.theGenoplot.setMouseMode(MouseMode.LASSO);
246 | }
247 | else {
248 | super.actionPerformed(event);
249 | }
250 |
251 | }
252 |
253 | public double round3(double n) {
254 | double result = n * 100000;
255 | result = Math.round(result);
256 | result = result / 100000;
257 | return result;
258 | }
259 |
260 | void adjustDataSeries(int genotype) {
261 | //ArrayList containedPoints = lasso.getContainedPoints(super.info.getEntityCollection());
262 | HashMap containedPointsInd = lasso.getContainedPointsInd(super.info.getEntityCollection());
263 | ArrayList al_s = new ArrayList();
264 |
265 | XYPlot plot = (XYPlot) super.getChart().getPlot();
266 | XYSeriesCollection xyseriescoll = (XYSeriesCollection) plot.getDataset();
267 |
268 | ArrayList l_di = new ArrayList();
269 |
270 | int seriesCount = xyseriescoll.getSeriesCount();
271 | for (int a = 0; a < seriesCount; a++) {
272 | XYSeries series = xyseriescoll.getSeries(a);
273 |
274 | List items = (List) series.getItems();
275 | int itemsLength = items.size();
276 | for (int b = 0; b < itemsLength; b++) {
277 | XYDataItem xydi = items.get(b);
278 | Point2D p = new Point2D.Double(round3(Double.parseDouble(xydi.getX() + "")),
279 | round3(Double.parseDouble(xydi.getY() + "")));
280 |
281 | for (EvokerPoint2D p_ : containedPointsInd.keySet()) {
282 | if (p.getX() == p_.getX() && p.getY() == p_.getY()) {
283 | if (al_s.contains(containedPointsInd.get(p_))) continue;
284 | plotData.moveIndToClass(containedPointsInd.get(p_), a, b, genotype);
285 | al_s.add(containedPointsInd.get(p_));
286 | l_di.add(xydi);
287 | items.remove(b);
288 | b--;
289 | itemsLength--;
290 | break;
291 | }
292 | }
293 | }
294 | }
295 |
296 | XYSeries series = xyseriescoll.getSeries(genotype);
297 | List items = (List) series.getItems();
298 | for (XYDataItem xydi : l_di) {
299 | items.add(xydi);
300 | }
301 |
302 | lasso = null;
303 | this.chart.setNotify(true); // last thing, redraw. Applies changes and gets rid of the line.
304 | this.plotData.computeSummary();
305 | this.plotPanel.updateInfo();
306 |
307 | this.plotData.changed = true;
308 | }
309 |
310 | private void save() throws IOException {
311 | JFileChooser fileChooser = new JFileChooser();
312 | fileChooser.setCurrentDirectory(super.defaultDirectoryForSaveAs);
313 | ExtensionFileFilter filter = new ExtensionFileFilter("BED Binary Files", ".bed");
314 | fileChooser.addChoosableFileFilter(filter);
315 |
316 | int option = fileChooser.showSaveDialog(this);
317 | if (option == JFileChooser.APPROVE_OPTION) {
318 | String filename = fileChooser.getSelectedFile().getPath();
319 | if (isEnforceFileExtensions()) {
320 | if (!filename.endsWith(".bed")) {
321 | filename = filename + ".bed";
322 | }
323 | }
324 | try {
325 | BEDFileWriter bfw = new BEDFileWriter(new File(filename));
326 | }
327 | catch (IOException ex) {
328 | throw new IOException("Could not write file.");
329 | }
330 | }
331 | }
332 | }
333 |
--------------------------------------------------------------------------------
/src/evoker/PlotData.java:
--------------------------------------------------------------------------------
1 | package evoker;
2 |
3 | import java.io.File;
4 | import java.util.ArrayList;
5 | import java.util.HashMap;
6 | import java.util.Vector;
7 |
8 | import org.jfree.data.xy.XYSeriesCollection;
9 | import org.jfree.data.xy.XYSeries;
10 |
11 | import evoker.Types.*;
12 |
13 | public class PlotData {
14 |
15 | private ArrayList calledGenotypes;
16 | private ArrayList intensities;
17 | private double maf, genopc, hwpval, minX, maxX, minY, maxY;
18 | private SampleData samples;
19 | private QCFilterData exclude;
20 | private int sampleNum;
21 | private CoordinateSystem coordSystem;
22 | private ArrayList> indsInClasses;
23 | private HashMap indexInArrayListByInd;
24 | private char[] alleles;
25 | private HashMap genotypeChanges = new HashMap();
26 | public boolean changed = false;
27 | private FileFormat fileFormat;
28 |
29 | PlotData(ArrayList calledGenotypes, ArrayList intensities, SampleData samples, QCFilterData exclude, char[] alleles, CoordinateSystem coordSystem, FileFormat fileFormat) {
30 | this.calledGenotypes = calledGenotypes;
31 | this.intensities = intensities;
32 | this.samples = samples;
33 | this.exclude = exclude;
34 | this.minX = 100000;
35 | this.maxX = -100000;
36 | this.minY = 100000;
37 | this.maxY = -100000;
38 | this.alleles = alleles;
39 | this.fileFormat = fileFormat;
40 | this.setCoordSystem(coordSystem);
41 | }
42 |
43 | public void add(ArrayList calledgenotypes, ArrayList intensities) {
44 | this.calledGenotypes.addAll(calledgenotypes);
45 | this.intensities.addAll(intensities);
46 | }
47 |
48 | public PlotData getSubPlotData(Vector indices, Sex sexToPlot, CoordinateSystem newCoordinateSystem) {
49 | int subLength = indices.size();
50 | ArrayList subIntensities = new ArrayList(subLength);
51 | ArrayList subCalledGenotypes = new ArrayList(subLength);
52 | Vector subSampleVector = new Vector(subLength);
53 | QCFilterData ukbExclude = samples.getUkbExclude();
54 |
55 | for (int index: indices) {
56 | if ((sexToPlot != Sex.NOT_SEX) && (samples.getSexByIndex(index) != sexToPlot)) {
57 | continue;
58 | }
59 | subCalledGenotypes.add(calledGenotypes.get(index));
60 | subIntensities.add(intensities.get(index));
61 | subSampleVector.add(samples.getInd(index));
62 | }
63 | SampleData subSamples = new SampleData(subSampleVector);
64 | subSamples.setUkbExclude(ukbExclude);
65 | return new PlotData(subCalledGenotypes, subIntensities, subSamples, exclude,
66 | alleles, newCoordinateSystem, fileFormat);
67 | }
68 |
69 | XYSeriesCollection generatePoints() {
70 | if (intensities == null || calledGenotypes == null) {
71 | return null;
72 | }
73 |
74 | computeSummary();
75 |
76 |
77 | XYSeries intensityDataSeriesHomo1 = new XYSeries(0, false);
78 | XYSeries intensityDataSeriesMissing = new XYSeries(1, false);
79 | XYSeries intensityDataSeriesHetero = new XYSeries(2, false);
80 | XYSeries intensityDataSeriesHomo2 = new XYSeries(3, false);
81 |
82 | indsInClasses = new ArrayList>();
83 | for (int i = 0; i < 4; i++) {
84 | indsInClasses.add(new ArrayList());
85 | }
86 |
87 | indexInArrayListByInd = new HashMap();
88 |
89 | sampleNum = 0;
90 | for (int i = 0; i < intensities.size(); i++) {
91 | float[] intens = intensities.get(i);
92 |
93 | if (coordSystem == CoordinateSystem.POLAR) {
94 | float x = intens[0];
95 | float y = intens[1];
96 |
97 | float r = (float) Math.sqrt(Math.pow(y, 2) + Math.pow(x, 2));
98 | float theta = (float) Math.asin(y / r);
99 |
100 | intens[0] = theta;
101 | intens[1] = r;
102 | } else if (coordSystem == CoordinateSystem.UKBIOBANK) {
103 | float a = intens[0];
104 | float b = intens[1];
105 |
106 | // Contrast (x-axis) = log2(A/B)
107 | intens[0] = (float) log2(a/b);
108 |
109 | // Strength (y-axis) = (log2(A*B))/2
110 | intens[1] = (float) log2(a*b)/2;
111 |
112 | if (Float.isNaN(intens[0]) || Float.isNaN(intens[1])) {
113 | System.out.println(intens[0] + ", " + intens[1]);
114 | }
115 | }
116 |
117 | String sampleName = samples.getInd(i);
118 |
119 | // If there is nothing to plot, skip
120 | if (calledGenotypes.get(i) == null) {
121 | continue;
122 | }
123 |
124 | // If the exclude exists and the individual is excluded, skip
125 | if ((exclude != null) && exclude.isExcluded(sampleName)) {
126 | continue;
127 | }
128 |
129 | if ((fileFormat == FileFormat.UKBIOBANK) && samples.getUkbExclude().isExcluded(sampleName)) {
130 | continue;
131 | }
132 |
133 | sampleNum++;
134 | switch (calledGenotypes.get(i)) {
135 | case 0:
136 | intensityDataSeriesHomo1.add(intens[0], intens[1]);
137 | indsInClasses.get(0).add(sampleName);
138 | indexInArrayListByInd.put(sampleName, indsInClasses.get(0).size() -1);
139 | break;
140 | case 1:
141 | intensityDataSeriesMissing.add(intens[0], intens[1]);
142 | indsInClasses.get(1).add(sampleName);
143 | indexInArrayListByInd.put(sampleName, indsInClasses.get(1).size() -1);
144 | break;
145 | case 2:
146 | intensityDataSeriesHetero.add(intens[0], intens[1]);
147 | indsInClasses.get(2).add(sampleName);
148 | indexInArrayListByInd.put(sampleName, indsInClasses.get(2).size() -1);
149 | break;
150 | case 3:
151 | intensityDataSeriesHomo2.add(intens[0], intens[1]);
152 | indsInClasses.get(3).add(sampleName);
153 | indexInArrayListByInd.put(sampleName, indsInClasses.get(3).size() -1);
154 | break;
155 | default:
156 | //TODO: this is very bad
157 | break;
158 | }
159 |
160 |
161 |
162 | //illuminus uses [-1,-1] as a flag for missing data. technically we don't want to make it impossible
163 | //for such a datapoint to exist, but we won't let this exact data point adjust the bounds of the plot.
164 | //if it really is intentional, there will almost certainly be other nearby, negative points
165 | //which will resize the bounds appropriately.
166 | if (!(intens[0] == -1 && intens[1] == -1)) {
167 | if (intens[0] > maxX) {
168 | maxX = intens[0];
169 | }
170 | if (intens[0] < minX) {
171 | minX = intens[0];
172 | }
173 |
174 | if (intens[1] > maxY) {
175 | maxY = intens[1];
176 | }
177 | if (intens[1] < minY) {
178 | minY = intens[1];
179 | }
180 | }
181 | }
182 |
183 | int heteroCount = intensityDataSeriesHetero.getItemCount();
184 | int homo1Count = intensityDataSeriesHomo1.getItemCount();
185 | int homo2Count = intensityDataSeriesHomo2.getItemCount();
186 |
187 | if (heteroCount + homo1Count + homo2Count == 0) {
188 | return null;
189 | }
190 |
191 | XYSeriesCollection xysc = new XYSeriesCollection(intensityDataSeriesHomo1);
192 | xysc.addSeries(intensityDataSeriesMissing);
193 | xysc.addSeries(intensityDataSeriesHetero);
194 | xysc.addSeries(intensityDataSeriesHomo2);
195 | return xysc;
196 | }
197 |
198 | public String getIndInClass(int cl, int i) {
199 | return indsInClasses.get(cl).get(i);
200 | }
201 |
202 | /**
203 | * Moves an IND to another (internal) genotype class
204 | *
205 | * @param ind name
206 | * @param class it is from
207 | * @param index of the genotype in that class
208 | * @param class it should be in
209 | */
210 | public void moveIndToClass(String ind, int fromCl, int fromI, int to) {
211 | indsInClasses.get(fromCl).remove(fromI);
212 | indsInClasses.get(to).add(ind);
213 |
214 | int index = samples.getIndex(ind);
215 | calledGenotypes.set(index, (byte) to);
216 | genotypeChanges.put(ind, (byte) to);
217 | }
218 |
219 | protected void computeSummary() {
220 | double hom1 = 0, het = 0, hom2 = 0, missing = 0;
221 |
222 | for (int i = 0; i < calledGenotypes.size(); i++) {
223 |
224 | String sampleName = samples.getInd(i);
225 |
226 | // If the exclude exists and the individual is excluded, skip
227 | if ((exclude != null) && exclude.isExcluded(sampleName)) {
228 | continue;
229 | }
230 |
231 | if ((fileFormat == FileFormat.UKBIOBANK) && samples.getUkbExclude().isExcluded(sampleName)) {
232 | continue;
233 | }
234 |
235 | byte geno = calledGenotypes.get(i);
236 | if (geno == 0) {
237 | hom1++;
238 | } else if (geno == 2) {
239 | het++;
240 | } else if (geno == 3) {
241 | hom2++;
242 | } else {
243 | missing++;
244 | }
245 | genopc = 1 - (missing / (missing + hom1 + het + hom2));
246 | double tmpmaf = ((2 * hom1) + het) / ((2 * het) + (2 * hom1) + (2 * hom2));
247 | if (tmpmaf < 0.5) {
248 | maf = tmpmaf;
249 | } else {
250 | maf = 1 - tmpmaf;
251 | }
252 | hwpval = hwCalculate((int) hom1, (int) het, (int) hom2);
253 | }
254 | }
255 |
256 |
257 | private double hwCalculate(int obsAA, int obsAB, int obsBB) {
258 | //Calculates exact two-sided hardy-weinberg p-value. Parameters
259 | //are number of genotypes, number of rare alleles observed and
260 | //number of heterozygotes observed.
261 | //
262 | // (c) 2003 Jan Wigginton, Goncalo Abecasis
263 |
264 | int diplotypes = obsAA + obsAB + obsBB;
265 | if (diplotypes == 0) {
266 | return 0;
267 | }
268 | int rare = (obsAA * 2) + obsAB;
269 | int hets = obsAB;
270 |
271 |
272 | //make sure "rare" allele is really the rare allele
273 | if (rare > diplotypes) {
274 | rare = 2 * diplotypes - rare;
275 | }
276 |
277 | double[] tailProbs = new double[rare + 1];
278 | for (int z = 0; z < tailProbs.length; z++) {
279 | tailProbs[z] = 0;
280 | }
281 |
282 | //start at midpoint
283 | int mid = rare * (2 * diplotypes - rare) / (2 * diplotypes);
284 |
285 | //check to ensure that midpoint and rare alleles have same parity
286 | if (((rare & 1) ^ (mid & 1)) != 0) {
287 | mid++;
288 | }
289 | int het = mid;
290 | int hom_r = (rare - mid) / 2;
291 | int hom_c = diplotypes - het - hom_r;
292 |
293 | //Calculate probability for each possible observed heterozygote
294 | //count up to a scaling constant, to avoid underflow and overflow
295 | tailProbs[mid] = 1.0;
296 | double sum = tailProbs[mid];
297 | for (het = mid; het > 1; het -= 2) {
298 | tailProbs[het - 2] = (tailProbs[het] * het * (het - 1.0)) / (4.0 * (hom_r + 1.0) * (hom_c + 1.0));
299 | sum += tailProbs[het - 2];
300 | //2 fewer hets for next iteration -> add one rare and one common homozygote
301 | hom_r++;
302 | hom_c++;
303 | }
304 |
305 | het = mid;
306 | hom_r = (rare - mid) / 2;
307 | hom_c = diplotypes - het - hom_r;
308 | for (het = mid; het <= rare - 2; het += 2) {
309 | tailProbs[het + 2] = (tailProbs[het] * 4.0 * hom_r * hom_c) / ((het + 2.0) * (het + 1.0));
310 | sum += tailProbs[het + 2];
311 | //2 more hets for next iteration -> subtract one rare and one common homozygote
312 | hom_r--;
313 | hom_c--;
314 | }
315 |
316 | for (int z = 0; z < tailProbs.length; z++) {
317 | tailProbs[z] /= sum;
318 | }
319 |
320 | double top = tailProbs[hets];
321 | for (int i = hets + 1; i <= rare; i++) {
322 | top += tailProbs[i];
323 | }
324 | double otherSide = tailProbs[hets];
325 | for (int i = hets - 1; i >= 0; i--) {
326 | otherSide += tailProbs[i];
327 | }
328 |
329 | if (top > 0.5 && otherSide > 0.5) {
330 | return 1.0;
331 | } else {
332 | if (top < otherSide) {
333 | return top * 2;
334 | } else {
335 | return otherSide * 2;
336 | }
337 | }
338 | }
339 |
340 | public HashMap getGenotypeChanges() {
341 | return genotypeChanges;
342 | }
343 |
344 | public double getMaf() {
345 | return maf;
346 | }
347 |
348 | public double getGenopc() {
349 | return genopc;
350 | }
351 |
352 | public double getHwpval() {
353 | return hwpval;
354 | }
355 |
356 | public double getMaxDim() { return Math.max(maxX, maxY); }
357 |
358 | public double getMinDim() { return Math.min(minX, minY); }
359 |
360 | public double getMinX() {
361 | return minX;
362 | }
363 |
364 | public double getMaxX() {
365 | return maxX;
366 | }
367 |
368 | public double getMinY() {
369 | return minY;
370 | }
371 |
372 | public double getMaxY() {
373 | return maxY;
374 | }
375 |
376 | public double getRange() {
377 | return getMaxDim() - getMinDim();
378 | }
379 |
380 | public char[] getAlleles() {
381 | if (alleles != null) {
382 | return alleles;
383 | } else {
384 | return new char[]{' ', ' '};
385 | }
386 | }
387 |
388 | public int getSampleNum() {
389 | return sampleNum;
390 | }
391 |
392 | private void setCoordSystem(CoordinateSystem coordSystem) {
393 | this.coordSystem = coordSystem;
394 | }
395 |
396 | public CoordinateSystem getCoordSystem() {
397 | return coordSystem;
398 | }
399 |
400 | public byte getCalledGenotype(String ind){
401 | return calledGenotypes.get(samples.getIndex(ind));
402 | }
403 |
404 | public int getIndexInArrayList(String ind){
405 | return indexInArrayListByInd.get(ind);
406 | }
407 |
408 | private double log2(double x) {
409 | return Math.log(x) / Math.log(2);
410 | }
411 | }
412 |
--------------------------------------------------------------------------------
/resources/evoker-documentation.tex:
--------------------------------------------------------------------------------
1 | \documentclass{article}
2 |
3 | \title{Evoker: a genotype visualization tool}
4 | \author{Jeffrey C.\ Barrett\\\texttt{barrett@sanger.ac.uk}}
5 | \date{\today}
6 |
7 | \begin{document}
8 |
9 | \maketitle
10 |
11 | \section{Introduction}
12 |
13 | Evoker is a tool designed for visualizing genotype cluster plots as part of quality control procedures for genome-wide association studies. It provides a solution to the computational and storage problems related to being able to work with the huge volumes of data generated by such projects.
14 |
15 | \section{Getting started with Evoker}
16 |
17 | Using Evoker requires two important parts: the data, formatted and named in a specific way, can be stored either locally or on a remote server to which you have SSH access and the main Evoker program (\texttt{Evoker.jar}), which is run locally on your PC and is used for displaying cluster plots and assigning them a Pass/Fail verdict. You will need Java 5.0 (also known as version 1.5) or newer on your local machine to run Evoker. Details about preparing your data and using the program are presented below.
18 |
19 | \subsection{File formats and conventions}
20 |
21 | Evoker can work with files formatted in two different ways, either using a default file format based around the binary style files of the widely-used \texttt{PLINK} program or an Oxford file format that is based on the files used in the \texttt{WTCCC2} project.
22 |
23 | In both cases the data for your project (whether stored locally or remotely) should all be kept in a single directory to which you have read and write access. Because Evoker understands how to load and parse your data based on which files are in this directory you should not have any other files other than those you wish to load therein. The files in this directory are organised by `collection' (a group of samples, such as cases or controls) and `chromosome' (a group of SNPs on the same chromosome).
24 |
25 | The collection groupings can be any subset of your data which you wish to plot separately, such as samples collected in different ways, or genotyped in different laboratories. Furthermore, `chromosomes' can really be any grouping of SNPs, as this designation is used mostly for bookkeeping by the program. Indeed, you could organise your data with only a single `genome' chromosome, requiring only one set of files per collection, but this will result in a performance loss when using Evoker.
26 |
27 | \subsubsection{Default format}
28 | The default file format works with files formatted in the binary style of the widely-used \texttt{PLINK} program, along with one extension. Information about this format can be found at the \texttt{PLINK} website (see below). In the default format Evoker requires all of the \texttt{PLINK} style files (\texttt{.bed, .bim, .fam}), plus an additional binary intensity (\texttt{.bnt}) file, named according to a particular convention. Using the default file format your project should have one \texttt{.fam} file per collection (named \texttt{collection.fam}) and one set of \texttt{.bed/.bim/.bnt} files per collection--chromosome combination (named \texttt{collection.chromosome.bed} etc.).
29 |
30 | If, for instance, you had a case collection and a control collection in the default format, each genotyped on SNPs from chromosomes 20-22, you would have files as follows:
31 |
32 | \begin{verbatim}
33 | case.fam
34 | case.20.bed case.20.bim case.20.bnt
35 | case.21.bed case.21.bim case.21.bnt
36 | case.22.bed case.22.bim case.22.bnt
37 |
38 | control.fam
39 | control.20.bed control.20.bim control.20.bnt
40 | control.21.bed control.21.bim control.21.bnt
41 | control.22.bed control.22.bim control.22.bnt
42 | \end{verbatim}
43 |
44 | \subsubsection{Oxford format}
45 | The Oxford file format works with files formatted in the style of \texttt{WTCCC2} project files. Using the Oxford format Evoker requires \texttt{.gen.bin, .int.bin, .sample, .snp} files, the \texttt{.gen.bin} and the \texttt{.int.bin} files may be compressed such as \texttt{gen.bin.gz, int.bin.gz}. Like the default format the Oxford format files must be named according to a particular convention, your project should have one \texttt{.sample} file per collection (named \texttt{collection\_platform.sample}) and one set of \texttt{.gen.bin/.snp/.int.bin} files per collection--chromosome combination (named \texttt{collection\_chromosome\_platform.bed} etc.).
46 |
47 | If, for instance, you had a case collection and a control collection in the Oxford format, each genotyped using an Illumina platofrm on SNPs from chromosomes 20-22, you would have files as follows:
48 |
49 | \begin{verbatim}
50 | case_illumina.sample
51 | case_20_illumina.gen.bin case_20_illumina.snp case_20_illumina.int.bin
52 | case_21_illumina.gen.bin case_21_illumina.snp case_21_illumina.int.bin
53 | case_22_illumina.gen.bin case_22_illumina.snp case_22_illumina.int.bin
54 |
55 | control_illumina.sample
56 | control_20_illumina.gen.bin control_20_illumina.snp control_20_illumina.int.bin
57 | control_21_illumina.gen.bin control_21_illumina.snp control_21_illumina.int.bin
58 | control_22_illumina.gen.bin control_22_illumina.snp control_22_illumina.int.bin
59 | \end{verbatim}
60 |
61 | \subsection{Trying Evoker with the sample dataset}
62 |
63 | The easiest way to learn how to use Evoker is to test it with the included sample dataset. Put the following files into a clean directory:
64 |
65 | \begin{verbatim}
66 | sample.fam
67 | sample.22.bim
68 | sample.22.bed
69 | sample.22.bnt
70 | \end{verbatim}
71 |
72 | This sample dataset represents 10 SNPs genotyped in 100 individuals. You should be able to launch the program by double-clicking the \texttt{.jar} file if your system is configured in the standard way. Otherwise, you can try running the program from the command line:
73 |
74 | \begin{verbatim}
75 | java -jar Evoker.jar
76 | \end{verbatim}
77 |
78 | \subsubsection{Opening a directory}
79 | Open the data directory by selecting \texttt{Open directory} from the \texttt{File} menu and select the directory where you've put the data. Evoker keeps a log of data sources you've opened, which you can view by selecting \texttt{Show Evoker log} from the \texttt{Log} menu. The current data directory is shown in the title bar of the main window, for easy reference.
80 |
81 | \subsubsection{Viewing SNPs}
82 | You can plot a SNP by typing its name in the box and clicking \texttt{Go}. Try ``snp0'', one of the SNPs in the sample dataset. Genotypes are coloured red, green and blue for the three genotype classes and grey for uncalled, or missing data. Beneath the plot are some summary statistics: minor allele frequency (MAF), genotyping percentage (GPC), and Hardy-Weinberg equilibrium \emph{p}-value (HWE pval).
83 |
84 | Right clicking on the plot brings up a set of menu options, including saving the plot as a PNG image. You can zoom in and out of the plot either using this menu or by clicking and dragging (down and to the right creates a ``zoom in box'', while up and to the left zooms back out again). Holding the pointer over a single point will show which sample corresponds to that point.
85 |
86 | You can type in more SNP names or use the \texttt{Random} button to view more cluster plots. After you've viewed a few SNPs you can see which you've viewed recently in the \texttt{History} menu. You can click on previous SNPs to show those cluster plots again.
87 |
88 | \subsubsection{Scoring SNPs from a list}
89 |
90 | One of the ways in which Evoker can be used is to load a list of SNPs (say, those showing evidence of association) to verify that they have good clusters. You can do this by selecting \texttt{Load marker list} from the \texttt{File} menu. The sample dataset includes \texttt{sample.list} which you can load now.
91 |
92 | The first SNP on the list, ``snp0'' should be plotted. Choose from the three approval options: \texttt{Yes, Maybe, No}. As you make a decision on each SNP the next SNP in the list is plotted. In addition to the buttons, you can also press \texttt{Y,M,N} to render a verdict. Each time you select an approval option, it is recorded in a file in the same place where the list file came from, with \texttt{.scores} appended (it is important, therefore, that you have write permission in this directory). If you now look at \texttt{sample.list.scores} you'll see a line for each SNP with a score of 1, 0 or -1 corresponding to \texttt{Yes}, \texttt{Maybe} or \texttt{No}.
93 |
94 | Because you must render a verdict in order, and only once, for each SNP, if you jump back to another SNP in the history, or type a SNP name in the box and click \texttt{Go} the Approval controls will be disabled until you've returned to the current list position, which can be done via a link in the \texttt{History} menu.
95 |
96 | \subsubsection{Excluding samples by list}
97 | Using Evoker you can view the impact that excluding certain samples (for example samples with a poor quality control score) has on clusters. To achieve this Evoker is able to read in a \texttt{.qc} file, this file is simply a list of the sample identifiers corresponding to the \texttt{.fam} file. Once this file is loaded the selected samples are excluded from all following plots while filtering remains turned on.
98 |
99 | A \texttt{.qc} file can be loaded in two ways, if placed in the same directory as the \texttt{.fam/.bim/.bed/.bnt} files it will be loaded when the directory is opened (note: if there is more than one \texttt{.qc} file in the directory the first found will be loaded), you can also load a \texttt{.qc} after the data has been loaded by selecting \texttt{load exclude list} from the \texttt{file} menu.
100 |
101 | Once loaded the samples being excluded can be viewed from the Evoker log. Each time a new \texttt{.qc} file is loaded the previously loaded list is overwritten. Lastly filtering of samples can be turned on and off at any time by simply selecting \texttt{filter} data from the \texttt{file} menu.
102 |
103 | \subsection{Creating the files}
104 |
105 | When using Evoker with your own data, you can use \texttt{PLINK} to create your \texttt{.bed/.bim/.fam} files, but you'll need a special program, included in this package, to generate the \texttt{.bnt} files for Evoker. This is probably the hardest part of preparing your data for Evoker. The \texttt{int2bnt.pl} script will transform text intensity files into the correct binary format used by Evoker, the \texttt{int2bnt.pl} script can accept intensity data in a number of formats which are explained in more detail below.
106 |
107 | All input files should be named in the style \texttt{collection.chromosome.int} and the SNPs must be in the same order as the corresponding \texttt{.bim} file, and each pair of intensities should appear in the same order as the pair of alleles in the relevant entry in the matching \texttt{.bim} file.
108 |
109 | \subsubsection{Default intensity format}
110 | The default input format for \texttt{int2bnt.pl} is straightforward. It is a matrix of intensities with SNPs as rows and individuals as pairs of whitespace--separated columns. The first row of the file is a header with the names of the samples (each repeated twice because there are two intensities per sample), which must be in the same order as the matching \texttt{.fam} file. An example for two individuals and three SNPs might look like this:
111 |
112 | \begin{verbatim}
113 | SNP CASE1 CASE1 CASE2 CASE2
114 | rs123 0.956 0.009 0.999 0.010
115 | rs456 0.502 0.511 0.499 0.520
116 | rs789 0.012 0.026 0.003 0.977
117 | \end{verbatim}
118 |
119 | \subsubsection{Chiamo}
120 | If you use the Chiamo genotype calling program, the \texttt{int2bnt.pl} script is able to generate a \texttt{.bnt} file from the standard Chiamo input file. To use this file format just supply the following option when you run the \texttt{int2bnt.pl} script \texttt{--filetype "chiamo"}.
121 |
122 | \subsubsection{Illuminus}
123 | If you use the Illuminus genotype calling program, the \texttt{int2bnt.pl} script is able to generate a \texttt{.bnt} file from the standard Illuminus input file. To use this file format just supply the following option when you run the \texttt{int2bnt.pl} script \texttt{--filetype "illuminus"}.
124 |
125 | \subsubsection{Birdsuite}
126 | If you are using the Birdsuite collection of programs the \texttt{study.allele\_summary file} output file, is accepted as input by the \texttt{int2bnt.pl} script to generate a \texttt{.bnt} file. To use this file format just supply the following option when you run the \texttt{int2bnt.pl} script \texttt{--filetype "birdsuite"}.
127 |
128 | \subsubsection{BeadStudio}
129 | The Evoker package also contains a script called \texttt{parse\_illumina.pl} for parsing the text output files from the illumina BeadStudio software. The \texttt{parse\_illumina.pl} script will create the properly formatted \texttt{.bnt/} and \texttt{.bed/} files required by Evoker.
130 |
131 | \section{Remote data access}
132 |
133 | Even files in the compressed binary formats used by Evoker can be extremely large (many gigabytes) for datasets with thousands of samples and over one million SNPs. Since it is often unfeasible to have the complete dataset available on your PC, Evoker can access a data directory on any computer to which you have SSH access.
134 |
135 | Your data directory should be set up in exactly the same fashion as above, except that it needs the \texttt{evoker-helper.pl} script to be placed in the same spot. This script does the job of slicing out the data for individual SNPs from the data files, so that only a small amount of data is sent over the network, to maximise performance. Before you try to connect as described below, you might want to open the log to monitor the progress of your connection.
136 |
137 | Connect to a remote server by launching Evoker on your PC and selecting \texttt{Connect to remote server} from the \texttt{File} menu. Enter the name of the server you're connecting to, along with the absolute path of the data directory on the remote machine (see below for tips on specifying the correct path). Evoker requires a `scratch' directory on your computer to keep some (small) files it uses. This should be a directory you can read and write, but \emph{should not contain any other important files} because Evoker will overwrite files with the same names without warning. This should also be specified as an absolute path, but the \texttt{Browse} button can be used to help find the correct directory and its path.
138 |
139 | Enter your login details and click \texttt{OK} to connect. There might be a long delay the first time you connect to a remote data source because Evoker must download all the \texttt{.bim} and \texttt{.fam} files to the local directory. Evoker should work exactly the same as when the data files are stored locally, with the caveat that performance will be slower since data is being passed across the network. Note that Evoker uses files in the scratch directory to save time if they've already been requested from the remote server. Each time you connect to a remote data source you will be given the option of deleting all the files in your local directory, it's a good idea to clean out this directory if data on the remote server has changed, or if you're connecting to multiple remote data sources. The log will show some detail about the data being sent across the network.
140 |
141 | If you load a SNP list while using a remote data source, Evoker will download the data for all SNPs in the list in the background. This means that, unless you make very rapid Approval decisions, the program should be able to `keep up' with you and appear more responsive.
142 |
143 | \section{Additional Resources and tips}
144 |
145 | \subsection{The binary intensity format}
146 |
147 | The binary intensity format is very simple. First, a `magic number' of two bytes is written to the beginning of the file so that Evoker can recognise it. These bytes for Evoker are:
148 | \begin{verbatim}
149 | 00011010 00110001
150 | \end{verbatim}
151 |
152 | \noindent After these, each point is stored as a pair of 4--byte floats (for the intensity of the two alleles). These are simply packed in the same order as the text intensity files described above (a pair for each individual for the first SNP, then the same for the second SNP and so on).
153 |
154 | Evoker can also accept binary intensity data that uses two four byte integer values representing the number of rows of data (SNPs) and the number of columns (samples) as a header.
155 |
156 | \subsection{Tips}
157 |
158 | The perl scripts included are intended to work in most UNIX environments. They assume that the perl executable can be found at \texttt{/usr/bin/perl}, which you can change if for some reason that is not the case on your system. Finally, make sure that the script is `world executable'. You can accomplish this with the following command:
159 |
160 | \begin{verbatim}
161 | chmod +x evoker-helper.pl
162 | \end{verbatim}
163 |
164 | \noindent An easy way to get the correct string to enter in the data connection dialog for the remote directory is to login to that machine, change to that directory and run the \texttt{pwd} command, which will print the absolute path to that directory, which you can then cut and paste into the Evoker window.
165 |
166 | \subsection{Resources}
167 | Evoker.jar requires Java 5.0 or newer to run. You can download the latest version of Java at \texttt{www.java.com}
168 | \\
169 | \\
170 | \texttt{PLINK} binary pedfile (\texttt{.bed}) format:
171 |
172 | \indent \texttt{http://pngu.mgh.harvard.edu/purcell/plink/data.shtml\#bed}
173 | \end{document}
--------------------------------------------------------------------------------