├── .gitignore ├── Bio3D ├── count.R ├── distance.R └── parse_pdb.R ├── BioJava ├── README.md ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── jgreener │ │ └── pdb │ │ ├── parse_mmcif.java │ │ ├── parse_mmtf.java │ │ └── parse_pdb.java │ └── test │ └── java │ └── com │ └── jgreener │ └── pdb │ └── AppTest.java ├── BioPerl ├── count.pl ├── distance.pl └── parse_pdb.pl ├── BioRuby ├── count.rb ├── distance.rb └── parse_pdb.rb ├── BioStructures ├── count.jl ├── distance.jl ├── parse_mmcif.jl ├── parse_mmtf.jl ├── parse_pdb.jl └── ramachandran.jl ├── Biopython ├── count.py ├── distance.py ├── parse_mmcif.py ├── parse_mmtf.py ├── parse_pdb.py └── ramachandran.py ├── CITATION.bib ├── ESBTL ├── CMakeLists.txt ├── README.md └── parse_pdb.cc ├── GEMMI ├── Makefile ├── README.md ├── count.cc ├── distance.cc ├── parse_mmcif.cc └── parse_pdb.cc ├── LICENSE ├── MDAnalysis ├── count.py ├── distance.py ├── parse_pdb.py └── ramachandran.py ├── MIToS ├── count.jl ├── distance.jl └── parse_pdb.jl ├── ProDy ├── count.py ├── distance.py ├── parse_pdb.py └── ramachandran.py ├── README.md ├── Rpdb ├── count.R ├── distance.R ├── parse_pdb.R └── ramachandran.R ├── Victor ├── Makefile ├── README.md └── parse_pdb.cc ├── atomium ├── parse_mmcif.py ├── parse_mmtf.py └── parse_pdb.py ├── benchmarks.csv ├── biotite ├── parse_mmcif.py ├── parse_mmtf.py └── parse_pdb.py ├── checkwholepdb ├── checknewpdb.jl ├── checkwholepdb.jl └── checkwholepdb.py ├── chemfiles ├── Makefile ├── README.md ├── count.cpp ├── count.py ├── distance.cpp ├── distance.py ├── parse_mmcif.cpp ├── parse_mmcif.py ├── parse_mmtf.cpp ├── parse_mmtf.py ├── parse_pdb.cpp ├── parse_pdb.py ├── ramachandran.cpp └── ramachandran.py ├── plot ├── plot.jl └── plot.png └── tools ├── download_data.jl ├── mean.py ├── run_benchmarks.sh └── table.jl /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | *.dat 3 | .Rhistory 4 | BioJava/target 5 | BioJava/dependency-reduced-pom.xml 6 | GEMMI/gemmi 7 | GEMMI/parse_pdb 8 | GEMMI/parse_mmcif 9 | GEMMI/count 10 | GEMMI/distance 11 | Victor/*.o 12 | Victor/parse_pdb 13 | ESBTL/CMakeFiles 14 | ESBTL/CMakeCache.txt 15 | ESBTL/Makefile 16 | ESBTL/cmake_install.cmake 17 | ESBTL/parse_pdb 18 | chemfiles/parse_pdb 19 | chemfiles/parse_mmcif 20 | chemfiles/parse_mmtf 21 | chemfiles/count 22 | chemfiles/distance 23 | chemfiles/ramachandran 24 | *~ 25 | *.swp 26 | todo.txt 27 | -------------------------------------------------------------------------------- /Bio3D/count.R: -------------------------------------------------------------------------------- 1 | # Benchmark the counting of alanine residues in a PDB file 2 | 3 | library(bio3d) 4 | library(microbenchmark) 5 | 6 | pdb_filepath <- "data/1AKE.pdb" 7 | struc <- read.pdb(pdb_filepath, multi=TRUE) 8 | 9 | count <- function() { 10 | resnums <- struc$atom$resno[struc$atom$resid=="ALA"] 11 | chains <- struc$atom$chain[struc$atom$resid=="ALA"] 12 | resids <- paste(resnums, chains, sep="") 13 | return(length(unique(resids))) 14 | } 15 | 16 | bench <- microbenchmark(count(), times=1) 17 | 18 | cat(bench$time / 10^9, "\n", sep="") 19 | -------------------------------------------------------------------------------- /Bio3D/distance.R: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of a distance in a PDB file 2 | # The distance is the closest distance between any atoms of residues 50 and 60 3 | # of chain A in 1AKE 4 | 5 | library(bio3d) 6 | library(microbenchmark) 7 | 8 | pdb_filepath <- "data/1AKE.pdb" 9 | struc <- read.pdb(pdb_filepath) 10 | 11 | distance <- function() { 12 | coords <- matrix(struc$xyz, length(struc$xyz) / 3, 3, byrow=TRUE) 13 | is_res50 <- which(struc$atom$resno == 50 & struc$atom$chain == "A") 14 | is_res60 <- which(struc$atom$resno == 60 & struc$atom$chain == "A") 15 | return(min(dist.xyz(coords[is_res50,], coords[is_res60,]))) 16 | } 17 | 18 | bench <- microbenchmark(distance(), times=1) 19 | 20 | cat(bench$time / 10^9, "\n", sep="") 21 | -------------------------------------------------------------------------------- /Bio3D/parse_pdb.R: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a PDB file given as an argument 2 | 3 | library(bio3d) 4 | library(microbenchmark) 5 | 6 | pdb_filepath <- commandArgs(trailingOnly=TRUE)[1] 7 | 8 | bench <- microbenchmark(read.pdb(pdb_filepath, multi=TRUE), times=1) 9 | 10 | cat(bench$time / 10^9, "\n", sep="") 11 | -------------------------------------------------------------------------------- /BioJava/README.md: -------------------------------------------------------------------------------- 1 | Build with `mvn package`. 2 | -------------------------------------------------------------------------------- /BioJava/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.jgreener.pdb 5 | pdb-benchmarks 6 | jar 7 | 1.0-SNAPSHOT 8 | pdb-benchmarks 9 | http://maven.apache.org 10 | 11 | 12 | org.biojava 13 | biojava-structure 14 | 5.3.0 15 | 16 | 17 | junit 18 | junit 19 | 4.13.1 20 | test 21 | 22 | 23 | 24 | 25 | 26 | org.apache.maven.plugins 27 | maven-shade-plugin 28 | 3.2.2 29 | 30 | 31 | package 32 | 33 | shade 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /BioJava/src/main/java/com/jgreener/pdb/parse_mmcif.java: -------------------------------------------------------------------------------- 1 | // Benchmark the parsing of a mmCIF file given as an argument 2 | 3 | package com.jgreener.pdb; 4 | 5 | import org.biojava.nbio.structure.Structure; 6 | import org.biojava.nbio.structure.io.StructureIOFile; 7 | import org.biojava.nbio.structure.io.MMCIFFileReader; 8 | 9 | public class parse_mmcif 10 | { 11 | public static void main( String[] args ) 12 | { 13 | String mmcif_filepath = args[0]; 14 | // Run once to trigger illegal reflective access warning 15 | StructureIOFile reader1 = new MMCIFFileReader(); 16 | try { 17 | Structure structure = reader1.getStructure(mmcif_filepath); 18 | } catch (Exception e) { 19 | e.printStackTrace(); 20 | } 21 | long startTime = System.nanoTime(); 22 | StructureIOFile reader2 = new MMCIFFileReader(); 23 | try { 24 | Structure structure = reader2.getStructure(mmcif_filepath); 25 | } catch (Exception e) { 26 | e.printStackTrace(); 27 | } 28 | long endTime = System.nanoTime(); 29 | System.out.println((endTime - startTime) / 1000000000.0); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /BioJava/src/main/java/com/jgreener/pdb/parse_mmtf.java: -------------------------------------------------------------------------------- 1 | // Benchmark the parsing of a MMTF file given as an argument 2 | 3 | package com.jgreener.pdb; 4 | 5 | import org.biojava.nbio.structure.Structure; 6 | import org.biojava.nbio.structure.io.mmtf.MmtfActions; 7 | 8 | import java.nio.file.Paths; 9 | 10 | public class parse_mmtf 11 | { 12 | public static void main( String[] args ) 13 | { 14 | String mmtf_filepath = args[0]; 15 | // Run once to trigger illegal reflective access warning 16 | try { 17 | Structure structure = MmtfActions.readFromFile(Paths.get(mmtf_filepath)); 18 | } catch (Exception e) { 19 | e.printStackTrace(); 20 | } 21 | long startTime = System.nanoTime(); 22 | try { 23 | Structure structure = MmtfActions.readFromFile(Paths.get(mmtf_filepath)); 24 | } catch (Exception e) { 25 | e.printStackTrace(); 26 | } 27 | long endTime = System.nanoTime(); 28 | System.out.println((endTime - startTime) / 1000000000.0); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /BioJava/src/main/java/com/jgreener/pdb/parse_pdb.java: -------------------------------------------------------------------------------- 1 | // Benchmark the parsing of a PDB file given as an argument 2 | 3 | package com.jgreener.pdb; 4 | 5 | import org.biojava.nbio.structure.Structure; 6 | import org.biojava.nbio.structure.io.PDBFileReader; 7 | 8 | public class parse_pdb 9 | { 10 | public static void main( String[] args ) 11 | { 12 | String pdb_filepath = args[0]; 13 | // Run once to trigger illegal reflective access warning 14 | PDBFileReader pdbreader1 = new PDBFileReader(); 15 | try { 16 | Structure structure = pdbreader1.getStructure(pdb_filepath); 17 | } catch (Exception e) { 18 | e.printStackTrace(); 19 | } 20 | long startTime = System.nanoTime(); 21 | PDBFileReader pdbreader2 = new PDBFileReader(); 22 | try { 23 | Structure structure = pdbreader2.getStructure(pdb_filepath); 24 | } catch (Exception e) { 25 | e.printStackTrace(); 26 | } 27 | long endTime = System.nanoTime(); 28 | System.out.println((endTime - startTime) / 1000000000.0); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /BioJava/src/test/java/com/jgreener/pdb/AppTest.java: -------------------------------------------------------------------------------- 1 | package com.jgreener.pdb; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /BioPerl/count.pl: -------------------------------------------------------------------------------- 1 | # Benchmark the counting of alanine residues in a PDB file 2 | 3 | use Bio::Structure::IO; 4 | use Time::HiRes qw(time); 5 | use strict; 6 | 7 | my $pdb_filepath = "data/1AKE.pdb"; 8 | my $structio = Bio::Structure::IO->new(-file => $pdb_filepath); 9 | my $struc = $structio->next_structure; 10 | 11 | sub count { 12 | my $c = 0; 13 | for my $chain ($struc->get_chains) { 14 | for my $res ($struc->get_residues($chain)) { 15 | if (substr($res->id, 0, 3) eq "ALA") { 16 | $c++; 17 | } 18 | } 19 | } 20 | return $c; 21 | } 22 | 23 | my $start = time(); 24 | count(); 25 | my $end = time(); 26 | 27 | print $end - $start, "\n"; 28 | -------------------------------------------------------------------------------- /BioPerl/distance.pl: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of a distance in a PDB file 2 | # The distance is the closest distance between any atoms of residues 50 and 60 3 | # of chain A in 1AKE 4 | 5 | use Bio::Structure::IO; 6 | use Time::HiRes qw(time); 7 | use strict; 8 | 9 | my $pdb_filepath = "data/1AKE.pdb"; 10 | my $structio = Bio::Structure::IO->new(-file => $pdb_filepath); 11 | my $struc = $structio->next_structure; 12 | 13 | sub distance { 14 | my @coords_50 = (); 15 | my @coords_60 = (); 16 | for my $chain ($struc->get_chains) { 17 | if ($chain->id eq "A") { 18 | for my $res ($struc->get_residues($chain)) { 19 | if (substr($res->id, -3, 3) eq "-50") { 20 | for my $atom ($struc->get_atoms($res)) { 21 | push @coords_50, [$atom->xyz]; 22 | } 23 | } elsif (substr($res->id, -3, 3) eq "-60") { 24 | for my $atom ($struc->get_atoms($res)) { 25 | push @coords_60, [$atom->xyz]; 26 | } 27 | } 28 | } 29 | } 30 | } 31 | my $min_sq_dist = "Infinity"; 32 | for (my $i = 0; $i < scalar(@coords_50); $i++) { 33 | for (my $j = 0; $j < scalar(@coords_60); $j++) { 34 | my $sq_dist = ($coords_50[$i][0]-$coords_60[$j][0]) ** 2 + ($coords_50[$i][1]-$coords_60[$j][1]) ** 2 + ($coords_50[$i][2]-$coords_60[$j][2]) ** 2; 35 | if ($sq_dist < $min_sq_dist) { 36 | $min_sq_dist = $sq_dist; 37 | } 38 | } 39 | } 40 | return sqrt($min_sq_dist); 41 | } 42 | 43 | my $start = time(); 44 | distance(); 45 | my $end = time(); 46 | 47 | print $end - $start, "\n"; 48 | -------------------------------------------------------------------------------- /BioPerl/parse_pdb.pl: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a PDB file given as an argument 2 | 3 | use Bio::Structure::IO; 4 | use Time::HiRes qw(time); 5 | use strict; 6 | 7 | my $pdb_filepath = $ARGV[0]; 8 | 9 | sub parse { 10 | my $structio = Bio::Structure::IO->new(-file => $pdb_filepath); 11 | return $structio->next_structure; 12 | } 13 | 14 | my $start = time(); 15 | parse(); 16 | my $end = time(); 17 | 18 | print $end - $start, "\n"; 19 | -------------------------------------------------------------------------------- /BioRuby/count.rb: -------------------------------------------------------------------------------- 1 | # Benchmark the counting of alanine residues in a PDB file 2 | 3 | require "bio" 4 | require "benchmark" 5 | 6 | pdb_filepath = "data/1AKE.pdb" 7 | pdb = Bio::PDB.new(File.read(pdb_filepath)) 8 | 9 | elapsed = Benchmark.realtime { 10 | pdb.find_residue { |res| res.resName == "ALA" }.length 11 | } 12 | 13 | print elapsed, "\n" 14 | -------------------------------------------------------------------------------- /BioRuby/distance.rb: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of a distance in a PDB file 2 | # The distance is the closest distance between any atoms of residues 50 and 60 3 | # of chain A in 1AKE 4 | 5 | require "bio" 6 | require "benchmark" 7 | include Bio::PDB::Utils 8 | 9 | pdb_filepath = "data/1AKE.pdb" 10 | pdb = Bio::PDB.new(File.read(pdb_filepath)) 11 | 12 | elapsed = Benchmark.realtime { 13 | res_50 = pdb.find_residue { |res| res.resSeq == 50 and res.chain.id == "A" }[0] 14 | res_60 = pdb.find_residue { |res| res.resSeq == 60 and res.chain.id == "A" }[0] 15 | min_dist = Float::INFINITY 16 | res_50.each_atom do |atom_50| 17 | res_60.each_atom do |atom_60| 18 | if distance(atom_50, atom_60) < min_dist 19 | min_dist = distance(atom_50, atom_60) 20 | end 21 | end 22 | end 23 | min_dist 24 | } 25 | 26 | print elapsed, "\n" 27 | -------------------------------------------------------------------------------- /BioRuby/parse_pdb.rb: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a PDB file given as an argument 2 | 3 | require "bio" 4 | require "benchmark" 5 | 6 | pdb_filepath = ARGV[0] 7 | 8 | elapsed = Benchmark.realtime { 9 | Bio::PDB.new(File.read(pdb_filepath)) 10 | } 11 | 12 | print elapsed, "\n" 13 | -------------------------------------------------------------------------------- /BioStructures/count.jl: -------------------------------------------------------------------------------- 1 | # Benchmark the counting of alanine residues in a PDB file 2 | 3 | using BioStructures 4 | 5 | pdb_filepath = "data/1AKE.pdb" 6 | struc = read(pdb_filepath, PDB) 7 | 8 | function counter() 9 | alanineselector(res::AbstractResidue) = resnameselector(res, ["ALA"]) 10 | return countresidues(struc, alanineselector) 11 | end 12 | 13 | # Run to JIT compile 14 | counter() 15 | 16 | elapsed = @elapsed counter() 17 | 18 | println(elapsed) 19 | -------------------------------------------------------------------------------- /BioStructures/distance.jl: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of a distance in a PDB file 2 | # The distance is the closest distance between any atoms of residues 50 and 60 3 | # of chain A in 1AKE 4 | 5 | using BioStructures 6 | 7 | pdb_filepath = "data/1AKE.pdb" 8 | struc = read(pdb_filepath, PDB) 9 | 10 | # Run to JIT compile 11 | distance(struc['A'][50], struc['A'][60]) 12 | 13 | elapsed = @elapsed distance(struc['A'][50], struc['A'][60]) 14 | 15 | println(elapsed) 16 | -------------------------------------------------------------------------------- /BioStructures/parse_mmcif.jl: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a mmCIF file given as an argument 2 | 3 | using BioStructures 4 | 5 | mmcif_filepath = ARGS[1] 6 | 7 | # Run to JIT compile 8 | read(mmcif_filepath, MMCIF) 9 | 10 | elapsed = @elapsed struc = read(mmcif_filepath, MMCIF) 11 | 12 | println(elapsed) 13 | -------------------------------------------------------------------------------- /BioStructures/parse_mmtf.jl: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a MMTF file given as an argument 2 | 3 | using BioStructures 4 | 5 | mmtf_filepath = ARGS[1] 6 | 7 | # Run to JIT compile 8 | read(mmtf_filepath, MMTF) 9 | 10 | elapsed = @elapsed struc = read(mmtf_filepath, MMTF) 11 | 12 | println(elapsed) 13 | -------------------------------------------------------------------------------- /BioStructures/parse_pdb.jl: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a PDB file given as an argument 2 | 3 | using BioStructures 4 | 5 | pdb_filepath = ARGS[1] 6 | 7 | # Run to JIT compile 8 | read(pdb_filepath, PDB) 9 | 10 | elapsed = @elapsed struc = read(pdb_filepath, PDB) 11 | 12 | println(elapsed) 13 | -------------------------------------------------------------------------------- /BioStructures/ramachandran.jl: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of Ramachandran phi/psi angles from a PDB file 2 | 3 | using BioStructures 4 | 5 | pdb_filepath = "data/1AKE.pdb" 6 | struc = read(pdb_filepath, PDB) 7 | 8 | # Run to JIT compile 9 | ramachandranangles(struc, standardselector) 10 | 11 | elapsed = @elapsed ramachandranangles(struc, standardselector) 12 | 13 | println(elapsed) 14 | -------------------------------------------------------------------------------- /Biopython/count.py: -------------------------------------------------------------------------------- 1 | # Benchmark the counting of alanine residues in a PDB file 2 | 3 | import time 4 | from Bio.PDB import PDBParser 5 | 6 | pdb_filepath = "data/1AKE.pdb" 7 | parser = PDBParser() 8 | struc = parser.get_structure("", pdb_filepath) 9 | 10 | def count(): 11 | count = 0 12 | for res in struc.get_residues(): 13 | if res.get_resname() == "ALA": 14 | count += 1 15 | return count 16 | 17 | start = time.time() 18 | count() 19 | end = time.time() 20 | 21 | print(end - start) 22 | -------------------------------------------------------------------------------- /Biopython/distance.py: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of a distance in a PDB file 2 | # The distance is the closest distance between any atoms of residues 50 and 60 3 | # of chain A in 1AKE 4 | 5 | import time 6 | from Bio.PDB import PDBParser 7 | 8 | pdb_filepath = "data/1AKE.pdb" 9 | parser = PDBParser() 10 | struc = parser.get_structure("", pdb_filepath) 11 | 12 | def distance(): 13 | min_dist = float("inf") 14 | for atom_a in struc[0]['A'][50]: 15 | for atom_b in struc[0]['A'][60]: 16 | if atom_a - atom_b < min_dist: 17 | min_dist = atom_a - atom_b 18 | return min_dist 19 | 20 | start = time.time() 21 | distance() 22 | end = time.time() 23 | 24 | print(end - start) 25 | -------------------------------------------------------------------------------- /Biopython/parse_mmcif.py: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a mmCIF file given as an argument 2 | 3 | import sys 4 | import time 5 | from Bio.PDB import MMCIFParser 6 | 7 | mmcif_filepath = sys.argv[1] 8 | parser = MMCIFParser() 9 | 10 | start = time.time() 11 | parser.get_structure("", mmcif_filepath) 12 | end = time.time() 13 | 14 | print(end - start) 15 | -------------------------------------------------------------------------------- /Biopython/parse_mmtf.py: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a MMTF file given as an argument 2 | 3 | import sys 4 | import time 5 | from Bio.PDB.mmtf import MMTFParser 6 | 7 | mmtf_filepath = sys.argv[1] 8 | 9 | start = time.time() 10 | MMTFParser.get_structure(mmtf_filepath) 11 | end = time.time() 12 | 13 | print(end - start) 14 | -------------------------------------------------------------------------------- /Biopython/parse_pdb.py: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a PDB file given as an argument 2 | 3 | import sys 4 | import time 5 | from Bio.PDB import PDBParser 6 | 7 | pdb_filepath = sys.argv[1] 8 | parser = PDBParser() 9 | 10 | start = time.time() 11 | parser.get_structure("", pdb_filepath) 12 | end = time.time() 13 | 14 | print(end - start) 15 | -------------------------------------------------------------------------------- /Biopython/ramachandran.py: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of Ramachandran phi/psi angles from a PDB file 2 | 3 | import time 4 | from Bio.PDB import PDBParser 5 | from Bio.PDB.vectors import calc_dihedral 6 | 7 | pdb_filepath = "data/1AKE.pdb" 8 | parser = PDBParser() 9 | struc = parser.get_structure("", pdb_filepath) 10 | 11 | def ramachandran(): 12 | phi_angles = [] 13 | psi_angles = [] 14 | residues = list(struc.get_residues()) 15 | for i in range(1, len(residues) - 1): 16 | res = residues[i] 17 | res_prev = residues[i - 1] 18 | res_next = residues[i + 1] 19 | # Check residues have sequential residue numbers 20 | if res.get_id()[1] == res_prev.get_id()[1] + 1 and res_next.get_id()[1] == res.get_id()[1] + 1: 21 | try: 22 | phi_angle = calc_dihedral(res_prev["C"].get_vector(), res["N"].get_vector(), res["CA"].get_vector(), res["C"].get_vector()) 23 | psi_angle = calc_dihedral(res["N"].get_vector(), res["CA"].get_vector(), res["C"].get_vector(), res_next["N"].get_vector()) 24 | phi_angles.append(phi_angle) 25 | psi_angles.append(psi_angle) 26 | except: 27 | pass 28 | return phi_angles, psi_angles 29 | 30 | start = time.time() 31 | ramachandran() 32 | end = time.time() 33 | 34 | print(end - start) 35 | -------------------------------------------------------------------------------- /CITATION.bib: -------------------------------------------------------------------------------- 1 | @article{BioStructures.jl-2020, 2 | author="Greener, J G and Selvaraj, J and Ward, B J", 3 | title="{BioStructures.jl: read, write and manipulate macromolecular structures in Julia}", 4 | journal="Bioinformatics", 5 | year="2020", 6 | volume="36", 7 | number="14", 8 | pages="4206--4207", 9 | doi="10.1093/bioinformatics/btaa502", 10 | } 11 | -------------------------------------------------------------------------------- /ESBTL/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # cmake file for ESBTL benchmarks 2 | # ESBTL_DIR needs to be set as the ESBTL root directory 3 | project( benchmarks ) 4 | CMAKE_MINIMUM_REQUIRED(VERSION 2.4.5) 5 | include_directories($ESBTL_DIR/include/) 6 | add_executable(parse_pdb parse_pdb.cc) 7 | -------------------------------------------------------------------------------- /ESBTL/README.md: -------------------------------------------------------------------------------- 1 | ESBTL_DIR needs to be set as the ESBTL root directory. 2 | CPLUS_INCLUDE_PATH needs to include the ESBTL library path. 3 | Compile with `cmake .` then `make`. 4 | -------------------------------------------------------------------------------- /ESBTL/parse_pdb.cc: -------------------------------------------------------------------------------- 1 | // Benchmark the parsing of a PDB file given as an argument 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | int main( int argc, char* argv[] ) { 8 | std::string pdb_filepath = argv[1]; 9 | std::cout.setstate(std::ios_base::failbit); 10 | struct timespec tstart, tend; 11 | clock_gettime(CLOCK_REALTIME, &tstart); 12 | ESBTL::PDB_line_selector sel; 13 | std::vector systems; 14 | ESBTL::All_atom_system_builder builder(systems, sel.max_nb_systems()); 15 | ESBTL::read_a_pdb_file(pdb_filepath, sel, builder, ESBTL::Accept_none_occupancy_policy >()); 16 | clock_gettime(CLOCK_REALTIME, &tend); 17 | std::cout.clear(); 18 | printf("%.6f\n", (tend.tv_sec - tstart.tv_sec) + (tend.tv_nsec - tstart.tv_nsec)/1E9); 19 | } 20 | -------------------------------------------------------------------------------- /GEMMI/Makefile: -------------------------------------------------------------------------------- 1 | TARGETS=parse_pdb parse_mmcif count distance 2 | all: ${TARGETS} 3 | 4 | %: %.cc 5 | ${CXX} -std=c++11 -lrt -O2 -Igemmi/include $< -o $@ 6 | 7 | clean: 8 | rm -rf ${TARGETS} 9 | -------------------------------------------------------------------------------- /GEMMI/README.md: -------------------------------------------------------------------------------- 1 | Compile with: 2 | ``` 3 | git clone https://github.com/project-gemmi/gemmi.git 4 | make 5 | ``` 6 | or 7 | ``` 8 | c++ -std=c++11 -Igemmi/include -O2 parse_pdb.cc -o parse_pdb 9 | c++ -std=c++11 -Igemmi/include -O2 parse_mmcif.cc -o parse_mmcif 10 | # etc. 11 | ``` 12 | -------------------------------------------------------------------------------- /GEMMI/count.cc: -------------------------------------------------------------------------------- 1 | // Benchmark the counting of alanine residues in a PDB file 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | static int count(const gemmi::Structure& st) { 9 | int counter = 0; 10 | const std::string resname = "ALA"; 11 | for (const gemmi::Chain& chain : st.first_model().chains) 12 | for (const gemmi::Residue& residue : chain.residues) 13 | if (residue.name == resname) 14 | ++counter; 15 | return counter; 16 | } 17 | 18 | int main() { 19 | std::string pdb_filepath = "data/1AKE.pdb"; 20 | gemmi::Structure st = gemmi::read_pdb_file(pdb_filepath); 21 | timespec tstart, tend; 22 | clock_gettime(CLOCK_REALTIME, &tstart); 23 | int n = count(st); 24 | clock_gettime(CLOCK_REALTIME, &tend); 25 | assert(n == 38); 26 | printf("%.6f\n", (tend.tv_sec - tstart.tv_sec) + (tend.tv_nsec - tstart.tv_nsec) / 1e9); 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /GEMMI/distance.cc: -------------------------------------------------------------------------------- 1 | // Benchmark the calculation of a distance in a PDB file 2 | // The distance is the closest distance between any atoms of residues 50 and 60 3 | // of chain A in 1AKE 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | static double distance(gemmi::Structure& st) { 13 | gemmi::Chain* a = st.first_model().find_chain("A"); 14 | gemmi::Residue& r50 = a->find_residue_group(gemmi::SeqId(50,' '))[0]; 15 | gemmi::Residue& r60 = a->find_residue_group(gemmi::SeqId(60,' '))[0]; 16 | double min_dist_sq = INFINITY; 17 | for (const gemmi::Atom& a: r50.atoms) 18 | for (const gemmi::Atom& b: r60.atoms) { 19 | double d2 = a.pos.dist_sq(b.pos); 20 | if (d2 < min_dist_sq) 21 | min_dist_sq = d2; 22 | } 23 | return std::sqrt(min_dist_sq); 24 | } 25 | 26 | int main() { 27 | std::string pdb_filepath = "data/1AKE.pdb"; 28 | gemmi::Structure st = gemmi::read_pdb_file(pdb_filepath); 29 | timespec tstart, tend; 30 | clock_gettime(CLOCK_REALTIME, &tstart); 31 | double d = distance(st); 32 | clock_gettime(CLOCK_REALTIME, &tend); 33 | assert(std::fabs(d - 9.57605) < 1e-5); 34 | printf("%.6f\n", (tend.tv_sec - tstart.tv_sec) + (tend.tv_nsec - tstart.tv_nsec) / 1e9); 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /GEMMI/parse_mmcif.cc: -------------------------------------------------------------------------------- 1 | // Benchmark the parsing of a mmCIF file given as an argument 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | int main( int argc, char* argv[] ) { 8 | std::string mmcif_filepath = argv[1]; 9 | struct timespec tstart, tend; 10 | clock_gettime(CLOCK_REALTIME, &tstart); 11 | gemmi::Structure st = gemmi::read_structure_file(mmcif_filepath); 12 | clock_gettime(CLOCK_REALTIME, &tend); 13 | printf("%.6f\n", (tend.tv_sec - tstart.tv_sec) + (tend.tv_nsec - tstart.tv_nsec)/1E9); 14 | } 15 | -------------------------------------------------------------------------------- /GEMMI/parse_pdb.cc: -------------------------------------------------------------------------------- 1 | // Benchmark the parsing of a PDB file given as an argument 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | int main( int argc, char* argv[] ) { 8 | std::string pdb_filepath = argv[1]; 9 | struct timespec tstart, tend; 10 | clock_gettime(CLOCK_REALTIME, &tstart); 11 | gemmi::Structure st = gemmi::read_structure_file(pdb_filepath); 12 | clock_gettime(CLOCK_REALTIME, &tend); 13 | printf("%.6f\n", (tend.tv_sec - tstart.tv_sec) + (tend.tv_nsec - tstart.tv_nsec)/1E9); 14 | } 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Joe Greener 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MDAnalysis/count.py: -------------------------------------------------------------------------------- 1 | # Benchmark the counting of alanine residues in a PDB file 2 | 3 | import time 4 | import MDAnalysis as mda 5 | 6 | pdb_filepath = "data/1AKE.pdb" 7 | u = mda.Universe(pdb_filepath) 8 | 9 | def count(): 10 | return (u.residues.resnames == "ALA").sum() 11 | 12 | start = time.time() 13 | count() 14 | end = time.time() 15 | 16 | print(end - start) 17 | -------------------------------------------------------------------------------- /MDAnalysis/distance.py: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of a distance in a PDB file 2 | # The distance is the closest distance between any atoms of residues 50 and 60 3 | # of chain A in 1AKE 4 | 5 | import time 6 | import MDAnalysis as mda 7 | from MDAnalysis.lib.distances import distance_array 8 | 9 | pdb_filepath = "data/1AKE.pdb" 10 | u = mda.Universe(pdb_filepath) 11 | 12 | def distance(): 13 | segA = u.segments[0] 14 | r50 = segA.atoms.select_atoms("resid 50") 15 | r60 = segA.atoms.select_atoms("resid 60") 16 | da = distance_array(r50.positions, r60.positions) 17 | return da.min() 18 | 19 | start = time.time() 20 | distance() 21 | end = time.time() 22 | 23 | print(end - start) 24 | -------------------------------------------------------------------------------- /MDAnalysis/parse_pdb.py: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a PDB file given as an argument 2 | 3 | import sys 4 | import time 5 | import MDAnalysis as mda 6 | 7 | pdb_filepath = sys.argv[1] 8 | 9 | start = time.time() 10 | mda.coordinates.PDB.PDBReader(pdb_filepath) 11 | end = time.time() 12 | 13 | print(end - start) 14 | -------------------------------------------------------------------------------- /MDAnalysis/ramachandran.py: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of Ramachandran phi/psi angles from a PDB file 2 | 3 | import time 4 | import MDAnalysis as mda 5 | 6 | pdb_filepath = "data/1AKE.pdb" 7 | u = mda.Universe(pdb_filepath) 8 | 9 | def ramachandran(): 10 | phi_angles = [] 11 | psi_angles = [] 12 | for res in u.residues: 13 | try: 14 | phi = res.phi_selection() 15 | except: 16 | pass 17 | else: 18 | if not phi is None: 19 | phi_angles.append(phi.dihedral.value()) 20 | try: 21 | psi = res.psi_selection() 22 | except: 23 | pass 24 | else: 25 | if not psi is None: 26 | psi_angles.append(psi.dihedral.value()) 27 | return phi_angles, psi_angles 28 | 29 | start = time.time() 30 | ramachandran() 31 | end = time.time() 32 | 33 | print(end - start) 34 | -------------------------------------------------------------------------------- /MIToS/count.jl: -------------------------------------------------------------------------------- 1 | # Benchmark the counting of alanine residues in a PDB file 2 | 3 | using MIToS.PDB 4 | 5 | pdb_filepath = "data/1AKE.pdb" 6 | struc = read(pdb_filepath, PDBFile) 7 | 8 | counter() = count(res -> res.id.name == "ALA", struc) 9 | 10 | # Run to JIT compile 11 | counter() 12 | 13 | elapsed = @elapsed counter() 14 | 15 | println(elapsed) 16 | -------------------------------------------------------------------------------- /MIToS/distance.jl: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of a distance in a PDB file 2 | # The distance is the closest distance between any atoms of residues 50 and 60 3 | # of chain A in 1AKE 4 | 5 | using MIToS.PDB 6 | 7 | pdb_filepath = "data/1AKE.pdb" 8 | struc = read(pdb_filepath, PDBFile, model="1", chain="A", group="ATOM") 9 | 10 | # Run to JIT compile 11 | distance(struc[50], struc[60]) 12 | 13 | elapsed = @elapsed distance(struc[50], struc[60]) 14 | 15 | println(elapsed) 16 | -------------------------------------------------------------------------------- /MIToS/parse_pdb.jl: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a PDB file given as an argument 2 | 3 | using MIToS.PDB 4 | 5 | pdb_filepath = ARGS[1] 6 | 7 | # Run to JIT compile 8 | read(pdb_filepath, PDBFile) 9 | 10 | elapsed = @elapsed struc = read(pdb_filepath, PDBFile) 11 | 12 | println(elapsed) 13 | -------------------------------------------------------------------------------- /ProDy/count.py: -------------------------------------------------------------------------------- 1 | # Benchmark the counting of alanine residues in a PDB file 2 | 3 | import time 4 | from prody import * 5 | 6 | pdb_filepath = "data/1AKE.pdb" 7 | struc = parsePDB(pdb_filepath) 8 | 9 | def count(): 10 | count = 0 11 | for res in struc.getHierView().iterResidues(): 12 | if res.getResname() == "ALA": 13 | count += 1 14 | return count 15 | 16 | start = time.time() 17 | count() 18 | end = time.time() 19 | 20 | print(end - start) 21 | -------------------------------------------------------------------------------- /ProDy/distance.py: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of a distance in a PDB file 2 | # The distance is the closest distance between any atoms of residues 50 and 60 3 | # of chain A in 1AKE 4 | 5 | import time 6 | from prody import * 7 | 8 | pdb_filepath = "data/1AKE.pdb" 9 | struc = parsePDB(pdb_filepath) 10 | 11 | def distance(): 12 | min_dist = float("inf") 13 | for atom_a in struc['A', 50]: 14 | for atom_b in struc['A', 60]: 15 | if calcDistance(atom_a, atom_b) < min_dist: 16 | min_dist = calcDistance(atom_a, atom_b) 17 | return min_dist 18 | 19 | start = time.time() 20 | distance() 21 | end = time.time() 22 | 23 | print(end - start) 24 | -------------------------------------------------------------------------------- /ProDy/parse_pdb.py: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a PDB file given as an argument 2 | 3 | import sys 4 | import time 5 | from prody import * 6 | 7 | pdb_filepath = sys.argv[1] 8 | 9 | start = time.time() 10 | parsePDB(pdb_filepath) 11 | end = time.time() 12 | 13 | print(end - start) 14 | -------------------------------------------------------------------------------- /ProDy/ramachandran.py: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of Ramachandran phi/psi angles from a PDB file 2 | 3 | import time 4 | from prody import * 5 | 6 | pdb_filepath = "data/1AKE.pdb" 7 | struc = parsePDB(pdb_filepath) 8 | 9 | def ramachandran(): 10 | phi_angles = [] 11 | psi_angles = [] 12 | for res in struc.getHierView().iterResidues(): 13 | try: 14 | phi_angle = calcPhi(res) 15 | psi_angle = calcPsi(res) 16 | phi_angles.append(phi_angle) 17 | psi_angles.append(psi_angle) 18 | except: 19 | pass 20 | return phi_angles, psi_angles 21 | 22 | start = time.time() 23 | ramachandran() 24 | end = time.time() 25 | 26 | print(end - start) 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PDB benchmarks 2 | 3 | Open source software packages to parse files in various formats from the [Protein Data Bank](http://www.rcsb.org/pdb/home/home.do) (PDB) and manipulate protein structures exist in many languages, often as part of Bio* projects. 4 | 5 | This repository aims to collate benchmarks for common tasks across various languages and packages. The collection of scripts may also be useful to get an idea how each package works. 6 | 7 | Please feel free to contribute scripts from other packages, or submit improvements to the scripts already present - I'm looking for the fastest implementation for each software that makes use of the provided API. 8 | 9 | Disclosure: I contributed the BioStructures.jl package to BioJulia and have made contributions to Biopython. 10 | 11 | ## Tests 12 | 13 | * Parsing 2 PDB entries, taken from the benchmarking in [1], in the PDB, mmCIF and MMTF formats: 14 | * [1CRN](http://www.rcsb.org/pdb/explore/explore.do?structureId=1crn) - hydrophobic protein (327 atoms). 15 | * [1HTQ](http://www.rcsb.org/pdb/explore/explore.do?structureId=1htq) - multicopy glutamine synthetase (10 models of 97,872 atoms). 16 | * Counting the number of alanine residues in adenylate kinase ([1AKE](http://www.rcsb.org/pdb/explore/explore.do?structureId=1ake)). 17 | * Calculating the distance between residues 50 and 60 of chain A in adenylate kinase ([1AKE](http://www.rcsb.org/pdb/explore/explore.do?structureId=1ake)). 18 | * Calculating the Ramachandran phi/psi angles in adenylate kinase ([1AKE](http://www.rcsb.org/pdb/explore/explore.do?structureId=1ake)). 19 | 20 | [1] Gajda MJ, hPDB - Haskell library for processing atomic biomolecular structures in protein data bank format, *BMC Research Notes* 2013, **6**:483 - [link](http://bmcresnotes.biomedcentral.com/articles/10.1186/1756-0500-6-483) 21 | 22 | The PDB files can be downloaded to directory `data` by running `julia tools/download_data.jl` from this directory. If you have all the software installed, and compiled where applicable, you can run `sh tools/run_benchmarks.sh` from this directory to run the benchmarks and store the results in `benchmarks.csv`. The mean over a number of runs is taken for each benchmark to obtain the values below. 23 | 24 | Benchmarks were carried out on an Intel Xeon CPU E5-1620 v3 3.50GHz x 8 processor with 32 GB 2400 MHz DDR4 RAM. The operating system was CentOS v8.1. Time is the elapsed time. 25 | 26 | ## Software 27 | 28 | Currently, 16 packages across 7 programming languages are included in the benchmarks: 29 | * [BioStructures](https://github.com/BioJulia/BioStructures.jl) v0.10.1 running on Julia v1.3.1; times measured after JIT compilation. 30 | * [MIToS](https://github.com/diegozea/MIToS.jl) v2.4.0 running on Julia v1.3.1; times measured after JIT compilation. 31 | * [Biopython](http://biopython.org/wiki/Biopython) v1.76 running on Python v3.7.6. 32 | * [ProDy](http://prody.csb.pitt.edu) v1.10.11 running on Python v3.7.6. 33 | * [MDAnalysis](http://www.mdanalysis.org) v0.20.1 running on Python v3.7.6. 34 | * [biotite](https://www.biotite-python.org) v0.20.1 running on Python v3.7.6. 35 | * [atomium](https://github.com/samirelanduk/atomium) v1.0.2 running on Python v3.7.6. 36 | * [Bio3D](http://thegrantlab.org/bio3d/index.php) v2.4.1 running on R v3.6.2. 37 | * [Rpdb](https://cran.r-project.org/web/packages/Rpdb/index.html) v2.3 running on R v3.6.2. 38 | * [BioJava](https://biojava.org) v5.3.0 running on Java v1.8.0. 39 | * [BioPerl](http://bioperl.org/index.html) v1.007002 running on Perl v5.26.3. 40 | * [BioRuby](http://bioruby.org) v2.0.1 running on Ruby v2.5.5. 41 | * [GEMMI](https://gemmi.readthedocs.io/en/latest/index.html) v0.3.6 compiled with gcc v8.3.1; there is also a Python interface but benchmarking was done in C++. 42 | * [Victor](http://protein.bio.unipd.it/victor/index.php/Main_Page) v1.0 compiled with gcc v7.3.1. 43 | * [ESBTL](http://esbtl.sourceforge.net/index.html) v1.0-beta01 compiled with gcc v7.3.1. 44 | * [chemfiles](https://chemfiles.org) v0.9.3 compiled with gcc v7.3.0 (C++ version) or running on Python v3.7.6 (Python version). 45 | 46 | ## Results 47 | 48 | Note that direct comparison between these times should be treated with caution, as each package does something slightly different. For example, things that increase parsing time include: 49 | 50 | * Parsing the header information. 51 | * Accounting for disorder at both the atom and residue (point mutation) level. 52 | * Forming a heirarchical model of the protein that makes access to specific residues, atoms etc. easier and faster after parsing. 53 | * Allowing models in a file to have different atoms present. 54 | * Checking that the file format is adhered to at various levels of strictness. 55 | 56 | Each package supports these to varying degrees. 57 | 58 | | | BioStructures | MIToS | Biopython | ProDy | MDAnalysis | biotite | atomium | Bio3D | Rpdb | BioJava | BioPerl | BioRuby | GEMMI | Victor | ESBTL | chemfiles-python | chemfiles-cxx | 59 | | :-------------------- | :--------------- | :--------------- | :--------------- | :--------------- | :--------------- | :--------------- | :--------------- | :--------------- | :--------------- | :--------------- | :--------------- | :--------------- | :--------------- | :--------------- | :--------------- | :--------------- | :--------------- | 60 | | Parse PDB 1CRN / ms | 0.75 | 0.63 | 7.3 | 3.1 | 4.2 | 4.4 | 7.0 | 10.0 | 9.5 | 8.1 | 43.0 | 21.0 | 0.24 | 7.6 | 2.4 | 4.5 | 0.67 | 61 | | Parse PDB 1HTQ / s | 2.6 | 2.8 | 16.0 | 2.1 | 1.5 | 4.8 | 20.0 | 2.9 | 14.0 | 1.3 | 49.0 | 13.0 | 0.36 | 11.0 | - | - | - | 62 | | Parse mmCIF 1CRN / ms | 2.0 | - | 16.0 | - | - | 4.8 | 13.0 | - | - | 40.0 | - | - | 0.97 | - | - | 3.8 | 0.99 | 63 | | Parse mmCIF 1HTQ / s | 8.0 | - | 45.0 | - | - | 9.0 | 36.0 | - | - | 17.0 | - | - | 1.5 | - | - | 2.0 | 2.0 | 64 | | Parse MMTF 1CRN / ms | 1.1 | - | 4.5 | - | - | 1.2 | 4.6 | - | - | 4.1 | - | - | - | - | - | 3.2 | 0.44 | 65 | | Parse MMTF 1HTQ / s | 3.6 | - | 16.0 | - | - | 0.16 | 43.0 | - | - | 0.74 | - | - | - | - | - | - | - | 66 | | Count / ms | 0.17 | 0.017 | 0.21 | 8.8 | 0.068 | - | - | 0.16 | 0.2 | - | 0.42 | 0.073 | 0.004 | - | - | 0.75 | 0.092 | 67 | | Distance / ms | 0.012 | 0.0044 | 0.25 | 50.0 | 0.62 | - | - | 19.0 | 1.3 | - | 0.53 | 0.32 | 0.001 | - | - | 0.55 | 0.19 | 68 | | Ramachandran / ms | 1.4 | - | 120.0 | 210.0 | 1200.0 | - | - | - | - | - | - | - | - | - | - | 7.4 | 2.1 | 69 | | Language | Julia | Julia | Python | Python | Python | Python | Python | R | R | Java | Perl | Ruby | C++/Python | C++ | C++ | Python | C++ | 70 | | License | MIT | MIT | Biopython | MIT | GPLv2 | BSD 3-Clause | MIT | GPLv2 | GPLv2/GPLv3 | LGPLv2.1 | GPL/Artistic | Ruby | MPLv2/LGPLv3 | GPLv3 | GPLv3 | BSD 3-Clause | BSD 3-Clause | 71 | | Hierarchichal parsing | ✓ | ✗ | ✓ | ✓ | ✓ | ✗ | ✓ | ✗ | ✗ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✗ | ✗ | 72 | | Supports disorder | ✓ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✓ | ✗ | ✗ | 73 | | Writes PDBs | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✗ | ✓ | ✓ | ✓ | ✓ | ✓ | 74 | | Parses PDB header | ✗ | ✗ | ✓ | ✓ | ✗ | ✗ | ✓ | ✓ | ✓ | ✓ | ✗ | ✓ | ✓ | ✓ | ✗ | ✗ | ✗ | 75 | | Superimposition | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✗ | ✓ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | 76 | | PCA | ✗ | ✗ | ✗ | ✓ | ✓ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | 77 | 78 | Benchmarks as a plot, sorted by increasing time to parse PDB 1CRN: 79 | 80 | ![benchmarks](plot/plot.png "benchmarks") 81 | 82 | ## Parsing the whole PDB 83 | 84 | It is instructive to run parsers over the whole PDB to see where errors arise. This approach has led to me submitting corrections for small mistakes (e.g. duplicate atoms, residue number errors) in a few PDB structures. As of July 2018, the PDB entries that error with the Biopython (permissive mode) and BioJulia parsers are: 85 | * 4UDF - mmCIF file errors in Biopython and BioJulia due to duplicate C and O atoms in Lys91 of chains B, F etc. 86 | * 1EJG - mmCIF file errors in Biopython due to blank and non-blank alt loc IDs at residue Pro22/Ser22. 87 | * 5O61 - mmCIF file errors in Biopython due to an incorrect residue number at line 165,223. 88 | 89 | Running Biopython in non-permissive mode picks up more potential problems such as broken chains and mixed blank/non-blank alt loc IDs. For further discussion on errors in PDB files see the Biopython [documentation](http://biopython.org/DIST/docs/tutorial/Tutorial.pdf). The scripts to reproduce the whole PDB checking can be found in `checkwholepdb`. There is also a script to check recent PDB changes that can be run as a CRON job. 90 | 91 | ## Opinions 92 | 93 | * For most purposes, particularly work on small numbers of files, the speed of the programs will not hold you back. In this case use the language/package you are most familiar with. 94 | * For fast parsing, use a binary format such as [MMTF](http://mmtf.rcsb.org) or [binaryCIF](https://github.com/dsehnal/BinaryCIF). 95 | * Whilst mmCIF became the standard PDB archive format in 2014, and is a very flexible archive format, that does not mean that it is the best choice for all of bioinformatics. mmCIF files take up a lot of space on disk, are slowest to read and do not yet work with many bioinformatics tools. 96 | * If you are analysing ensembles of proteins then use packages with that functionality, such as ProDy or Bio3D, rather than writing the code yourself. 97 | 98 | ## Citation 99 | 100 | If you use these benchmarks, please cite the [BioStructures.jl](https://github.com/BioJulia/BioStructures.jl) paper where they appear: 101 | 102 | Greener JG, Selvaraj J and Ward BJ. BioStructures.jl: read, write and manipulate macromolecular structures in Julia, *Bioinformatics* 36(14):4206-4207 (2020) - [link](https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btaa502/5837108?guestAccessKey=aec90643-1d43-4521-9883-4a4a669187da) - [PDF](https://github.com/BioJulia/BioStructures.jl/blob/master/paper.pdf) 103 | 104 | ## Contributing 105 | 106 | If you want to contribute benchmarks for a package, please make a pull request with the script(s) in a directory like the other packages. I will run the benchmarks again and change the README, thanks. 107 | 108 | ## Resources 109 | 110 | * Information on file formats for [PDB](http://www.wwpdb.org/documentation/file-format), [mmCIF](http://mmcif.wwpdb.org) and [MMTF](https://github.com/rcsb/mmtf). 111 | * Benchmarks for mmCIF parsing can be found [here](https://github.com/project-gemmi/mmcif-benchmark). 112 | * A list of PDB parsing packages, particularly in C/C++, can be found [here](http://bioinf.org.uk/software/bioplib/libraries). 113 | * The Biopython [documentation](http://biopython.org/DIST/docs/tutorial/Tutorial.pdf) has a useful discussion on disorder at the atom and residue level. 114 | * Sets of utility scripts exist including [pdbtools](https://github.com/harmslab/pdbtools), [pdb-tools](https://github.com/JoaoRodrigues/pdb-tools) and [PDBFixer](https://github.com/pandegroup/pdbfixer). 115 | -------------------------------------------------------------------------------- /Rpdb/count.R: -------------------------------------------------------------------------------- 1 | # Benchmark the counting of alanine residues in a PDB file 2 | 3 | library(Rpdb) 4 | library(microbenchmark) 5 | 6 | pdb_filepath <- "data/1AKE.pdb" 7 | struc <- read.pdb(pdb_filepath) 8 | 9 | count <- function() { 10 | resnums <- struc$atoms$resid[struc$atoms$resname=="ALA"] 11 | chains <- struc$atoms$chainid[struc$atoms$resname=="ALA"] 12 | resids <- paste(resnums, chains, sep="") 13 | return(length(unique(resids))) 14 | } 15 | 16 | bench <- microbenchmark(count(), times=1) 17 | 18 | cat(bench$time / 10^9, "\n", sep="") 19 | -------------------------------------------------------------------------------- /Rpdb/distance.R: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of a distance in a PDB file 2 | # The distance is the closest distance between any atoms of residues 50 and 60 3 | # of chain A in 1AKE 4 | 5 | library(Rpdb) 6 | library(microbenchmark) 7 | 8 | pdb_filepath <- "data/1AKE.pdb" 9 | struc <- read.pdb(pdb_filepath) 10 | 11 | distance <- function() { 12 | is.res50 <- struc$atoms$resid == 50 & struc$atoms$chainid == "A" 13 | is.res60 <- struc$atoms$resid == 60 & struc$atoms$chainid == "A" 14 | d <- distances(struc, is.res50, is.res60) 15 | return(min(norm(d, type="xyz"))) 16 | } 17 | 18 | bench <- microbenchmark(distance(), times=1) 19 | 20 | cat(bench$time / 10^9, "\n", sep="") 21 | -------------------------------------------------------------------------------- /Rpdb/parse_pdb.R: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a PDB file given as an argument 2 | 3 | library(Rpdb) 4 | library(microbenchmark) 5 | 6 | pdb_filepath <- commandArgs(trailingOnly=TRUE)[1] 7 | 8 | bench <- microbenchmark(read.pdb(pdb_filepath, MODEL=NULL), times=1) 9 | 10 | cat(bench$time / 10^9, "\n", sep="") 11 | -------------------------------------------------------------------------------- /Rpdb/ramachandran.R: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of Ramachandran phi/psi angles from a PDB file 2 | 3 | library(Rpdb) 4 | library(microbenchmark) 5 | 6 | pdb_filepath <- "data/1AKE.pdb" 7 | struc <- read.pdb(pdb_filepath) 8 | 9 | ramachandran <- function() { 10 | is_n <- which(struc$atoms$elename=="N") 11 | is_ca <- which(struc$atoms$elename=="CA") 12 | is_c <- which(struc$atoms$elename=="C") 13 | res_count <- length(is_ca) 14 | phi_angles <- dihedral(struc, is_c[1:res_count-2], is_n[2:res_count-1], is_ca[2:res_count-1], is_c[2:res_count-1]) 15 | psi_angles <- dihedral(struc, is_n[2:res_count-1], is_ca[2:res_count-1], is_c[2:res_count-1], is_n[3:res_count]) 16 | return(phi_angles, psi_angles) 17 | } 18 | 19 | bench <- microbenchmark(ramachandran(), times=1) 20 | 21 | cat(bench$time / 10^9, "\n", sep="") 22 | -------------------------------------------------------------------------------- /Victor/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Victor benchmarks 2 | # VICTOR_ROOT needs to be set as the Victor root directory 3 | 4 | BINPATH = $(VICTOR_ROOT)/bin 5 | 6 | LIBS = -lBiopool -ltools 7 | LIB_PATH = -L. -L$(VICTOR_ROOT)/lib/ 8 | INC_PATH += -I. -I$(VICTOR_ROOT)/tools/ -I$(VICTOR_ROOT)/Biopool/Sources/ 9 | 10 | CC=g++ 11 | CFLAGS=-I. -ansi -pedantic -DNEXCEPTIONS -DLINUX -c -O3 -ffast-math -DNDEBUG -ftemplate-depth-36 -Wno-reorder -Wno-uninitialized -Wno-write-strings -Wno-narrowing 12 | 13 | install: 14 | $(CC) $(CFLAGS) $(INC_PATH) -c parse_pdb.cc -o parse_pdb.o 15 | $(CC) parse_pdb.o -o parse_pdb $(LIB_PATH) $(LIBS) 16 | 17 | clean: 18 | rm parse_pdb.o parse_pdb 19 | -------------------------------------------------------------------------------- /Victor/README.md: -------------------------------------------------------------------------------- 1 | VICTOR_ROOT needs to be set as the Victor root directory. 2 | Compile with `make clean` then `make`. 3 | -------------------------------------------------------------------------------- /Victor/parse_pdb.cc: -------------------------------------------------------------------------------- 1 | // Benchmark the parsing of a PDB file given as an argument 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace Victor::Biopool; 9 | using namespace Victor; 10 | 11 | int main( int argc, char* argv[] ) { 12 | string pdb_filepath = argv[1]; 13 | struct timespec tstart, tend; 14 | clock_gettime(CLOCK_REALTIME, &tstart); 15 | ifstream inFile( pdb_filepath.c_str() ); 16 | // See options at 17 | // http://protein.bio.unipd.it/victor_doxygen/classVictor_1_1Biopool_1_1PdbLoader.html 18 | PdbLoader pl(inFile, true, true, false, true, false, false, false, true); 19 | Protein prot; 20 | prot.load( pl ); 21 | clock_gettime(CLOCK_REALTIME, &tend); 22 | printf("%.6f\n", (tend.tv_sec - tstart.tv_sec) + (tend.tv_nsec - tstart.tv_nsec)/1E9); 23 | } 24 | -------------------------------------------------------------------------------- /atomium/parse_mmcif.py: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a mmCIF file given as an argument 2 | 3 | import sys 4 | import time 5 | import atomium 6 | 7 | mmcif_filepath = sys.argv[1] 8 | 9 | start = time.time() 10 | atomium.open(mmcif_filepath) 11 | end = time.time() 12 | 13 | print(end - start) 14 | -------------------------------------------------------------------------------- /atomium/parse_mmtf.py: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a MMTF file given as an argument 2 | 3 | import sys 4 | import time 5 | import atomium 6 | 7 | mmtf_filepath = sys.argv[1] 8 | 9 | start = time.time() 10 | atomium.open(mmtf_filepath) 11 | end = time.time() 12 | 13 | print(end - start) 14 | -------------------------------------------------------------------------------- /atomium/parse_pdb.py: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a PDB file given as an argument 2 | 3 | import sys 4 | import time 5 | import atomium 6 | 7 | pdb_filepath = sys.argv[1] 8 | 9 | start = time.time() 10 | atomium.open(pdb_filepath) 11 | end = time.time() 12 | 13 | print(end - start) 14 | -------------------------------------------------------------------------------- /benchmarks.csv: -------------------------------------------------------------------------------- 1 | Package,Benchmark,Runtime 2 | BioStructures,Parse PDB 1CRN,0.0007521262 3 | BioStructures,Parse PDB 1HTQ,2.552547826 4 | BioStructures,Parse mmCIF 1CRN,0.001960067 5 | BioStructures,Parse mmCIF 1HTQ,7.958684753666667 6 | BioStructures,Parse MMTF 1CRN,0.001124614 7 | BioStructures,Parse MMTF 1HTQ,3.5960987396666666 8 | BioStructures,Count,0.00017121870000000002 9 | BioStructures,Distance,1.20023e-05 10 | BioStructures,Ramachandran,0.0014263943000000002 11 | MIToS,Parse PDB 1CRN,0.0006277443 12 | MIToS,Parse PDB 1HTQ,2.8049395730000004 13 | MIToS,Count,1.71974e-05 14 | MIToS,Distance,4.4442e-06 15 | Biopython,Parse PDB 1CRN,0.007256412506103515 16 | Biopython,Parse PDB 1HTQ,16.372705459594727 17 | Biopython,Parse mmCIF 1CRN,0.016134476661682128 18 | Biopython,Parse mmCIF 1HTQ,45.07611576716105 19 | Biopython,Parse MMTF 1CRN,0.0045304536819458004 20 | Biopython,Parse MMTF 1HTQ,16.356812477111816 21 | Biopython,Count,0.00020694732666015625 22 | Biopython,Distance,0.0002535343170166016 23 | Biopython,Ramachandran,0.12028734683990479 24 | ProDy,Parse PDB 1CRN,0.00309908390045166 25 | ProDy,Parse PDB 1HTQ,2.1210433642069497 26 | ProDy,Count,0.008846926689147949 27 | ProDy,Distance,0.049677252769470215 28 | ProDy,Ramachandran,0.21265184879302979 29 | MDAnalysis,Parse PDB 1CRN,0.00418851375579834 30 | MDAnalysis,Parse PDB 1HTQ,1.4514319896697998 31 | MDAnalysis,Count,6.7901611328125e-05 32 | MDAnalysis,Distance,0.0006227493286132812 33 | MDAnalysis,Ramachandran,1.2391331672668457 34 | biotite,Parse PDB 1CRN,0.004447317123413086 35 | biotite,Parse PDB 1HTQ,4.8055440584818525 36 | biotite,Parse mmCIF 1CRN,0.00476081371307373 37 | biotite,Parse mmCIF 1HTQ,8.978858550389608 38 | biotite,Parse MMTF 1CRN,0.0012470483779907227 39 | biotite,Parse MMTF 1HTQ,0.1640939712524414 40 | atomium,Parse PDB 1CRN,0.006968569755554199 41 | atomium,Parse PDB 1HTQ,20.193578879038494 42 | atomium,Parse mmCIF 1CRN,0.01340920925140381 43 | atomium,Parse mmCIF 1HTQ,35.97704792022705 44 | atomium,Parse MMTF 1CRN,0.004566097259521484 45 | atomium,Parse MMTF 1HTQ,43.482786417007446 46 | Bio3D,Parse PDB 1CRN,0.0101017309 47 | Bio3D,Parse PDB 1HTQ,2.893682 48 | Bio3D,Count,0.00016179009999999996 49 | Bio3D,Distance,0.018523388999999998 50 | Rpdb,Parse PDB 1CRN,0.0095353409 51 | Rpdb,Parse PDB 1HTQ,14.096083333333334 52 | Rpdb,Count,0.00019563180000000004 53 | Rpdb,Distance,0.0012988237 54 | BioJava,Parse PDB 1CRN,0.0080779367 55 | BioJava,Parse PDB 1HTQ,1.342112537 56 | BioJava,Parse mmCIF 1CRN,0.0399477593 57 | BioJava,Parse mmCIF 1HTQ,16.915583049333335 58 | BioJava,Parse MMTF 1CRN,0.004121529099999999 59 | BioJava,Parse MMTF 1HTQ,0.7408829063333334 60 | BioPerl,Parse PDB 1CRN,0.043307685852050776 61 | BioPerl,Parse PDB 1HTQ,48.674395720164 62 | BioPerl,Count,0.00042252540588378896 63 | BioPerl,Distance,0.0005311250686645508 64 | BioRuby,Parse PDB 1CRN,0.020962584391236307 65 | BioRuby,Parse PDB 1HTQ,12.833540361995498 66 | BioRuby,Count,7.327683269977569e-05 67 | BioRuby,Distance,0.0003178965300321579 68 | GEMMI,Parse PDB 1CRN,0.0002428 69 | GEMMI,Parse PDB 1HTQ,0.3553026666666667 70 | GEMMI,Parse mmCIF 1CRN,0.0009732999999999999 71 | GEMMI,Parse mmCIF 1HTQ,1.4752656666666668 72 | GEMMI,Count,4.000000000000001e-06 73 | GEMMI,Distance,1.0000000000000002e-06 74 | Victor,Parse PDB 1CRN,0.007647900000000001 75 | Victor,Parse PDB 1HTQ,10.691736999999998 76 | ESBTL,Parse PDB 1CRN,0.0024213000000000004 77 | chemfiles-python,Parse PDB 1CRN,0.004539108276367188 78 | chemfiles-python,Parse mmCIF 1CRN,0.003771042823791504 79 | chemfiles-python,Parse mmCIF 1HTQ,2.0077246030171714 80 | chemfiles-python,Parse MMTF 1CRN,0.0031836271286010743 81 | chemfiles-python,Count,0.0007463932037353515 82 | chemfiles-python,Distance,0.0005480289459228515 83 | chemfiles-python,Ramachandran,0.007416057586669922 84 | chemfiles-cxx,Parse PDB 1CRN,0.0006692 85 | chemfiles-cxx,Parse mmCIF 1CRN,0.000986 86 | chemfiles-cxx,Parse mmCIF 1HTQ,1.9856413333333334 87 | chemfiles-cxx,Parse MMTF 1CRN,0.000439 88 | chemfiles-cxx,Count,9.18e-05 89 | chemfiles-cxx,Distance,0.0001881 90 | chemfiles-cxx,Ramachandran,0.0020766 91 | -------------------------------------------------------------------------------- /biotite/parse_mmcif.py: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a mmCIF file given as an argument 2 | 3 | import sys 4 | import time 5 | import biotite.structure.io.pdbx as pdbx 6 | 7 | mmcif_filepath = sys.argv[1] 8 | 9 | start = time.time() 10 | file = pdbx.PDBxFile() 11 | file.read(mmcif_filepath) 12 | pdbx.get_structure(file) 13 | end = time.time() 14 | 15 | print(end - start) 16 | -------------------------------------------------------------------------------- /biotite/parse_mmtf.py: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a MMTF file given as an argument 2 | 3 | import sys 4 | import time 5 | import biotite.structure.io.mmtf as mmtf 6 | 7 | mmtf_filepath = sys.argv[1] 8 | 9 | start = time.time() 10 | file = mmtf.MMTFFile() 11 | file.read(mmtf_filepath) 12 | mmtf.get_structure(file) 13 | end = time.time() 14 | 15 | print(end - start) 16 | -------------------------------------------------------------------------------- /biotite/parse_pdb.py: -------------------------------------------------------------------------------- 1 | # Benchmark the parsing of a PDB file given as an argument 2 | 3 | import sys 4 | import time 5 | import biotite.structure.io.pdb as pdb 6 | 7 | pdb_filepath = sys.argv[1] 8 | 9 | start = time.time() 10 | file = pdb.PDBFile() 11 | file.read(pdb_filepath) 12 | file.get_structure() 13 | end = time.time() 14 | 15 | print(end - start) 16 | -------------------------------------------------------------------------------- /checkwholepdb/checknewpdb.jl: -------------------------------------------------------------------------------- 1 | # Test which new/modified PDB entries error on PDB/mmCIF parsers 2 | # Writes output to a file labelled with the week 3 | 4 | using BioStructures 5 | 6 | start = now() 7 | basedir = "." 8 | ad, mo, ob = pdbrecentchanges() 9 | 10 | outstrs = ["Checking new/modified PDB entries at $(now())", 11 | "Checking $(length(ad)) new and $(length(mo)) modified entries"] 12 | 13 | for p in sort(collect(Set([ad..., mo...]))) 14 | try 15 | downloadpdb(p, dir=basedir, format=PDB) 16 | catch 17 | # Not having a PDB file is acceptable, though a failure to download an 18 | # available file may hide an error in parsing 19 | rm("$basedir/$p.pdb", force=true) 20 | end 21 | if isfile("$basedir/$p.pdb") 22 | try 23 | s = read("$basedir/$p.pdb", PDB) 24 | catch 25 | push!(outstrs, "$p - PDB parsing error") 26 | end 27 | rm("$basedir/$p.pdb") 28 | end 29 | try 30 | downloadpdb(p, dir=basedir, format=MMCIF) 31 | catch 32 | rm("$basedir/$p.cif", force=true) 33 | push!(outstrs, "$p - no mmCIF download") 34 | end 35 | if isfile("$basedir/$p.cif") 36 | try 37 | s = read("$basedir/$p.cif", MMCIF) 38 | catch 39 | push!(outstrs, "$p - mmCIF parsing error") 40 | end 41 | rm("$basedir/$p.cif") 42 | end 43 | end 44 | 45 | if length(outstrs) == 2 46 | push!(outstrs, "All entries read fine") 47 | end 48 | 49 | push!(outstrs, "Time taken - $(ceil(now() - start, Dates.Minute))") 50 | 51 | datestr = replace(string(Date(now())), "-", "") 52 | # This overwrites any existing file 53 | open("$basedir/recentpdb_jl_$datestr.txt", "w") do f 54 | for l in outstrs 55 | println(f, l) 56 | end 57 | end 58 | -------------------------------------------------------------------------------- /checkwholepdb/checkwholepdb.jl: -------------------------------------------------------------------------------- 1 | # Test which PDB entries error on PDB/mmCIF parsers 2 | # Writes output to a file labelled with the week 3 | 4 | using BioStructures 5 | 6 | start = now() 7 | basedir = "." 8 | pdblist = pdbentrylist() 9 | 10 | outstrs = ["Checking all PDB entries at $(now())", 11 | "Checking $(length(pdblist)) entries"] 12 | 13 | for p in sort(pdblist) 14 | try 15 | downloadpdb(p, dir=basedir, format=PDB) 16 | catch 17 | # Not having a PDB file is acceptable, though a failure to download an 18 | # available file may hide an error in parsing 19 | rm("$basedir/$p.pdb", force=true) 20 | end 21 | if isfile("$basedir/$p.pdb") 22 | try 23 | s = read("$basedir/$p.pdb", PDB) 24 | catch 25 | push!(outstrs, "$p - PDB parsing error") 26 | end 27 | rm("$basedir/$p.pdb") 28 | end 29 | try 30 | downloadpdb(p, dir=basedir, format=MMCIF) 31 | catch 32 | rm("$basedir/$p.cif", force=true) 33 | push!(outstrs, "$p - no mmCIF download") 34 | end 35 | if isfile("$basedir/$p.cif") 36 | try 37 | s = read("$basedir/$p.cif", MMCIF) 38 | catch 39 | push!(outstrs, "$p - mmCIF parsing error") 40 | end 41 | rm("$basedir/$p.cif") 42 | end 43 | end 44 | 45 | if length(outstrs) == 2 46 | push!(outstrs, "All entries read fine") 47 | end 48 | 49 | push!(outstrs, "Time taken - $(ceil(now() - start, Dates.Minute))") 50 | 51 | datestr = replace(string(Date(now())), "-", "") 52 | # This overwrites any existing file 53 | open("$basedir/wholepdb_jl_$datestr.txt", "w") do f 54 | for l in outstrs 55 | println(f, l) 56 | end 57 | end 58 | -------------------------------------------------------------------------------- /checkwholepdb/checkwholepdb.py: -------------------------------------------------------------------------------- 1 | # Test which PDB entries error on PDB/mmCIF parsers 2 | # Writes output to a file labelled with the week 3 | 4 | import os 5 | from datetime import datetime 6 | from math import ceil 7 | from Bio.PDB import PDBList 8 | from Bio.PDB.PDBParser import PDBParser 9 | from Bio.PDB.MMCIFParser import MMCIFParser 10 | 11 | start = datetime.now() 12 | basedir = "." 13 | pdbl = PDBList() 14 | pdblist = pdbl.get_all_entries() 15 | 16 | outstrs = ["Checking all PDB entries at {}".format(start.isoformat()), 17 | "Checking {} entries".format(len(pdblist))] 18 | 19 | pdb_parser = PDBParser() 20 | mmcif_parser = MMCIFParser() 21 | 22 | for pu in sorted(pdblist): 23 | p = pu.lower() 24 | try: 25 | pdbl.retrieve_pdb_file(p, pdir=basedir, file_format="pdb") 26 | except: 27 | # Not having a PDB file is acceptable, though a failure to download an 28 | # available file may hide an error in parsing 29 | try: 30 | os.remove("{}/pdb{}.ent".format(basedir, p)) 31 | except: 32 | pass 33 | if os.path.isfile("{}/pdb{}.ent".format(basedir, p)): 34 | try: 35 | s = pdb_parser.get_structure("", "{}/pdb{}.ent".format(basedir, p)) 36 | except: 37 | outstrs.append("{} - PDB parsing error".format(pu)) 38 | os.remove("{}/pdb{}.ent".format(basedir, p)) 39 | try: 40 | pdbl.retrieve_pdb_file(p, pdir=basedir, file_format="mmCif") 41 | except: 42 | try: 43 | os.remove("{}/{}.cif".format(basedir, p)) 44 | except: 45 | pass 46 | outstrs.append("{} - no mmCIF download".format(pu)) 47 | if os.path.isfile("{}/{}.cif".format(basedir, p)): 48 | try: 49 | s = mmcif_parser.get_structure("", "{}/{}.cif".format(basedir, p)) 50 | except: 51 | outstrs.append("{} - mmCIF parsing error".format(pu)) 52 | os.remove("{}/{}.cif".format(basedir, p)) 53 | 54 | if len(outstrs) == 2: 55 | outstrs.append("All entries read fine") 56 | 57 | end = datetime.now() 58 | outstrs.append("Time taken - {} minute(s)".format(int(ceil((end - start).seconds / 60)))) 59 | 60 | datestr = str(end.date()).replace("-", "") 61 | # This overwrites any existing file 62 | with open("{}/wholepdb_py_{}.txt".format(basedir, datestr), "w") as f: 63 | for l in outstrs: 64 | f.write(l + "\n") 65 | -------------------------------------------------------------------------------- /chemfiles/Makefile: -------------------------------------------------------------------------------- 1 | CPPFLAGS=-std=c++11 -O2 -lrt 2 | INCLUDE=-I${CONDA_PREFIX}/include 3 | LIBS=-L${CONDA_PREFIX}/lib -lchemfiles -Wl,-rpath,${CONDA_PREFIX}/lib 4 | TARGETS=count distance parse_mmcif parse_mmtf parse_pdb ramachandran 5 | 6 | all: ${TARGETS} 7 | 8 | %: %.cpp 9 | ${CXX} ${CPPFLAGS} ${INCLUDE} $< -o $@ ${LIBS} 10 | 11 | .PHONY: clean 12 | clean: 13 | rm -rf ${TARGETS} 14 | -------------------------------------------------------------------------------- /chemfiles/README.md: -------------------------------------------------------------------------------- 1 | Compile C++ version with: 2 | ``` 3 | conda install chemfiles-lib 4 | make 5 | ``` 6 | -------------------------------------------------------------------------------- /chemfiles/count.cpp: -------------------------------------------------------------------------------- 1 | // Benchmark the counting of alanine residues in a PDB file 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | #include 8 | 9 | using namespace chemfiles; 10 | 11 | static size_t count(Frame& frame) { 12 | auto selection = Selection("resname ALA"); 13 | return selection.list(frame).size(); 14 | } 15 | 16 | int main() { 17 | auto pdb_filepath = "data/1AKE.pdb"; 18 | auto frame = Trajectory(pdb_filepath).read(); 19 | 20 | timespec tstart, tend; 21 | clock_gettime(CLOCK_REALTIME, &tstart); 22 | count(frame); 23 | clock_gettime(CLOCK_REALTIME, &tend); 24 | 25 | printf("%.6f\n", (tend.tv_sec - tstart.tv_sec) + (tend.tv_nsec - tstart.tv_nsec) / 1e9); 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /chemfiles/count.py: -------------------------------------------------------------------------------- 1 | # Benchmark the counting of alanine residues in a PDB file 2 | 3 | import time 4 | from chemfiles import Trajectory, Selection 5 | 6 | 7 | def count(frame): 8 | selection = Selection("resname ALA") 9 | return len(selection.evaluate(frame)) 10 | 11 | 12 | pdb_filepath = "data/1AKE.pdb" 13 | frame = Trajectory(pdb_filepath).read() 14 | 15 | start = time.time() 16 | count(frame) 17 | end = time.time() 18 | 19 | print(end - start) 20 | -------------------------------------------------------------------------------- /chemfiles/distance.cpp: -------------------------------------------------------------------------------- 1 | // Benchmark the calculation of a distance in a PDB file 2 | // The distance is the closest distance between any atoms of residues 50 and 60 3 | // of chain A in 1AKE 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include 11 | 12 | using namespace chemfiles; 13 | 14 | static double distance(Frame& frame) { 15 | // FIXME: this should use Selection("resid 50 and [chainname] A") which will 16 | // be available in chemfiles 0.10 (the next release) 17 | auto r50 = Selection("resid 50 and index < 1000").list(frame); 18 | auto r60 = Selection("resid 60 and index < 1000").list(frame); 19 | 20 | double min = INFINITY; 21 | for (auto i: r50) { 22 | for (auto j: r60) { 23 | auto r = frame.distance(i, j); 24 | if (r < min) { 25 | min = r; 26 | } 27 | } 28 | } 29 | 30 | return min; 31 | } 32 | 33 | int main() { 34 | auto pdb_filepath = "data/1AKE.pdb"; 35 | auto frame = Trajectory(pdb_filepath).read(); 36 | 37 | timespec tstart, tend; 38 | clock_gettime(CLOCK_REALTIME, &tstart); 39 | distance(frame); 40 | clock_gettime(CLOCK_REALTIME, &tend); 41 | 42 | printf("%.6f\n", (tend.tv_sec - tstart.tv_sec) + (tend.tv_nsec - tstart.tv_nsec) / 1e9); 43 | return 0; 44 | } 45 | -------------------------------------------------------------------------------- /chemfiles/distance.py: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of a distance in a PDB file 2 | # The distance is the closest distance between any atoms of residues 50 and 60 3 | # of chain A in 1AKE 4 | 5 | import time 6 | from chemfiles import Trajectory, Selection 7 | 8 | 9 | def distance(frame): 10 | # FIXME: this should use Selection("resid 50 and [chainname] A") which will 11 | # be available in chemfiles 0.10 (the next release) 12 | r50 = Selection("resid 50 and index < 1000").evaluate(frame) 13 | r60 = Selection("resid 60 and index < 1000").evaluate(frame) 14 | 15 | min = float('inf') 16 | for i in r50: 17 | for j in r60: 18 | r = frame.distance(i, j) 19 | if r < min: 20 | min = r 21 | 22 | return min 23 | 24 | 25 | pdb_filepath = "data/1AKE.pdb" 26 | frame = Trajectory(pdb_filepath).read() 27 | 28 | start = time.time() 29 | distance(frame) 30 | end = time.time() 31 | 32 | print(end - start) 33 | -------------------------------------------------------------------------------- /chemfiles/parse_mmcif.cpp: -------------------------------------------------------------------------------- 1 | // Benchmark the parsing of a mmCIF file given as an argument 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | int main(int argc, char* argv[]) { 9 | std::string mmcif_filepath = argv[1]; 10 | 11 | struct timespec tstart, tend; 12 | clock_gettime(CLOCK_REALTIME, &tstart); 13 | auto trajectory = chemfiles::Trajectory(mmcif_filepath, 'r', "mmCIF"); 14 | for (size_t step=0; step 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | int main(int argc, char* argv[]) { 9 | std::string mmtf_filepath = argv[1]; 10 | 11 | struct timespec tstart, tend; 12 | clock_gettime(CLOCK_REALTIME, &tstart); 13 | auto trajectory = chemfiles::Trajectory(mmtf_filepath); 14 | for (size_t step=0; step 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | int main(int argc, char* argv[]) { 9 | std::string pdb_filepath = argv[1]; 10 | 11 | struct timespec tstart, tend; 12 | clock_gettime(CLOCK_REALTIME, &tstart); 13 | auto trajectory = chemfiles::Trajectory(pdb_filepath); 14 | for (size_t step=0; step 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | 11 | using namespace chemfiles; 12 | 13 | using ramachandran_t = std::pair, std::vector>; 14 | static ramachandran_t ramachandran(Frame& frame) { 15 | auto phi_selection = Selection("dihedrals: name(#1) C and name(#2) N and name(#3) CA and name(#4) C"); 16 | auto phi_list = phi_selection.evaluate(frame); 17 | 18 | auto phi_angles = std::vector(); 19 | phi_angles.reserve(phi_list.size()); 20 | 21 | for (const auto& phi: phi_list) { 22 | // 57.29578 to convert from radians to degrees 23 | phi_angles.push_back(frame.dihedral(phi[0], phi[1], phi[2], phi[3]) * 57.29578); 24 | } 25 | 26 | 27 | auto psi_selection = Selection("dihedrals: name(#1) N and name(#2) CA and name(#3) C and name(#4) N"); 28 | auto psi_list = psi_selection.evaluate(frame); 29 | 30 | auto psi_angles = std::vector(); 31 | psi_angles.reserve(psi_list.size()); 32 | 33 | for (const auto& phi: psi_list) { 34 | // 57.29578 to convert from radians to degrees 35 | phi_angles.push_back(frame.dihedral(phi[0], phi[1], phi[2], phi[3]) * 57.29578); 36 | } 37 | 38 | // FIXME: the sign of the angles is inverted w.r.t. the MDAnalysis results 39 | return {phi_angles, psi_angles}; 40 | } 41 | 42 | int main() { 43 | auto pdb_filepath = "data/1AKE.pdb"; 44 | auto frame = Trajectory(pdb_filepath).read(); 45 | 46 | timespec tstart, tend; 47 | clock_gettime(CLOCK_REALTIME, &tstart); 48 | ramachandran(frame); 49 | clock_gettime(CLOCK_REALTIME, &tend); 50 | 51 | printf("%.6f\n", (tend.tv_sec - tstart.tv_sec) + (tend.tv_nsec - tstart.tv_nsec) / 1e9); 52 | return 0; 53 | } 54 | -------------------------------------------------------------------------------- /chemfiles/ramachandran.py: -------------------------------------------------------------------------------- 1 | # Benchmark the calculation of Ramachandran phi/psi angles from a PDB file 2 | import time 3 | from chemfiles import Trajectory, Selection 4 | 5 | 6 | def ramachandran(frame): 7 | phi_selection = Selection("dihedrals: name(#1) C and name(#2) N and name(#3) CA and name(#4) C") 8 | phi_angles = [] 9 | for (i, j, k, m) in phi_selection.evaluate(frame): 10 | # 57.29578 to convert from radians to degrees 11 | phi_angles.append(frame.dihedral(i, j, k, m) * 57.29578) 12 | 13 | psi_selection = Selection("dihedrals: name(#1) N and name(#2) CA and name(#3) C and name(#4) N") 14 | psi_angles = [] 15 | for (i, j, k, m) in psi_selection.evaluate(frame): 16 | psi_angles.append(frame.dihedral(i, j, k, m) * 57.29578) 17 | 18 | # FIXME: the sign of the angles is inverted w.r.t. the MDAnalysis results 19 | return phi_angles, psi_angles 20 | 21 | 22 | pdb_filepath = "data/1AKE.pdb" 23 | frame = Trajectory(pdb_filepath).read() 24 | 25 | start = time.time() 26 | ramachandran(frame) 27 | end = time.time() 28 | 29 | print(end - start) 30 | -------------------------------------------------------------------------------- /plot/plot.jl: -------------------------------------------------------------------------------- 1 | # Plot benchmark results 2 | 3 | using CSV 4 | using DataFrames 5 | using Gadfly 6 | using Cairo 7 | using Fontconfig 8 | 9 | df = CSV.read("benchmarks.csv") 10 | 11 | benchmarks = [ 12 | "Parse PDB 1CRN", 13 | "Parse PDB 1HTQ", 14 | "Parse mmCIF 1CRN", 15 | "Parse mmCIF 1HTQ", 16 | "Parse MMTF 1CRN", 17 | "Parse MMTF 1HTQ", 18 | "Count", 19 | "Distance", 20 | "Ramachandran", 21 | ] 22 | benchind(b) = findfirst(x -> x == b, benchmarks) 23 | df_sorted = sort(df, (order(:Benchmark, by=benchind), :Runtime)) 24 | 25 | theme = Theme( 26 | background_color="white", 27 | panel_stroke="black", 28 | major_label_color="black", 29 | minor_label_color="black", 30 | highlight_width=0mm, 31 | ) 32 | 33 | p = Gadfly.with_theme(theme) do 34 | plot(df_sorted, 35 | x=:Package, 36 | y=:Runtime, 37 | color=:Benchmark, 38 | Scale.y_log10, 39 | Guide.xlabel(nothing), 40 | Guide.ylabel("Runtime / s"), 41 | Geom.point, 42 | Geom.line, 43 | ) 44 | end 45 | 46 | draw(PNG("plot/plot.png", dpi=300), p) 47 | -------------------------------------------------------------------------------- /plot/plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jgreener64/pdb-benchmarks/b8150b4501a6c385722ad1815d0ba54b87a01431/plot/plot.png -------------------------------------------------------------------------------- /tools/download_data.jl: -------------------------------------------------------------------------------- 1 | # Download the data files required for benchmarking into data directory 2 | 3 | using BioStructures 4 | 5 | out_dir = "data" 6 | 7 | if !isdir(out_dir) 8 | mkdir(out_dir) 9 | end 10 | 11 | for pdbid in ("1CRN", "1HTQ") 12 | for format in (PDB, MMCIF, MMTF) 13 | downloadpdb(pdbid, format=format, dir=out_dir) 14 | end 15 | end 16 | 17 | downloadpdb("1AKE", dir=out_dir) 18 | -------------------------------------------------------------------------------- /tools/mean.py: -------------------------------------------------------------------------------- 1 | # Calculate the mean value from a file of numbers 2 | 3 | import sys 4 | import numpy as np 5 | 6 | with open(sys.argv[1]) as in_file: 7 | vals = [float(line) for line in in_file] 8 | 9 | print(np.mean(vals)) 10 | -------------------------------------------------------------------------------- /tools/run_benchmarks.sh: -------------------------------------------------------------------------------- 1 | # Run all benchmarks, save the results and form a csv file for plotting 2 | # Requires all packages to be installed and compiled where applicable 3 | 4 | echo "Running benchmarks" 5 | 6 | # Number of runs for each benchmark apart from 1HTQ parsing 7 | nb=10 8 | # Number of runs for 1HTQ parsing 9 | ns=3 10 | 11 | # Remove current data files 12 | rm */*.dat 13 | 14 | # Remove current plot 15 | rm plot/plot.png 16 | 17 | # Reset benchmarking results 18 | echo "Package,Benchmark,Runtime" > benchmarks.csv 19 | 20 | # Run a benchmark 21 | # Arguments are number of runs, benchmark command, output data file, csv file columns 22 | function run_benchmark { 23 | for i in $(seq 1 $1) 24 | do 25 | eval $2 | tail -n1 >> $3 26 | done 27 | echo -n $4 >> benchmarks.csv 28 | python tools/mean.py $3 >> benchmarks.csv 29 | } 30 | 31 | # BioStructures 32 | run_benchmark $nb "julia BioStructures/parse_pdb.jl data/1CRN.pdb" "BioStructures/parse_pdb_1CRN.dat" "BioStructures,Parse PDB 1CRN," 33 | run_benchmark $ns "julia BioStructures/parse_pdb.jl data/1HTQ.pdb" "BioStructures/parse_pdb_1HTQ.dat" "BioStructures,Parse PDB 1HTQ," 34 | run_benchmark $nb "julia BioStructures/parse_mmcif.jl data/1CRN.cif" "BioStructures/parse_mmcif_1CRN.dat" "BioStructures,Parse mmCIF 1CRN," 35 | run_benchmark $ns "julia BioStructures/parse_mmcif.jl data/1HTQ.cif" "BioStructures/parse_mmcif_1HTQ.dat" "BioStructures,Parse mmCIF 1HTQ," 36 | run_benchmark $nb "julia BioStructures/parse_mmtf.jl data/1CRN.mmtf" "BioStructures/parse_mmtf_1CRN.dat" "BioStructures,Parse MMTF 1CRN," 37 | run_benchmark $ns "julia BioStructures/parse_mmtf.jl data/1HTQ.mmtf" "BioStructures/parse_mmtf_1HTQ.dat" "BioStructures,Parse MMTF 1HTQ," 38 | run_benchmark $nb "julia BioStructures/count.jl" "BioStructures/count.dat" "BioStructures,Count," 39 | run_benchmark $nb "julia BioStructures/distance.jl" "BioStructures/distance.dat" "BioStructures,Distance," 40 | run_benchmark $nb "julia BioStructures/ramachandran.jl" "BioStructures/ramachandran.dat" "BioStructures,Ramachandran," 41 | echo "BioStructures benchmarks done" 42 | 43 | # MIToS 44 | run_benchmark $nb "julia MIToS/parse_pdb.jl data/1CRN.pdb" "MIToS/parse_pdb_1CRN.dat" "MIToS,Parse PDB 1CRN," 45 | run_benchmark $ns "julia MIToS/parse_pdb.jl data/1HTQ.pdb" "MIToS/parse_pdb_1HTQ.dat" "MIToS,Parse PDB 1HTQ," 46 | run_benchmark $nb "julia MIToS/count.jl" "MIToS/count.dat" "MIToS,Count," 47 | run_benchmark $nb "julia MIToS/distance.jl" "MIToS/distance.dat" "MIToS,Distance," 48 | echo "MIToS benchmarks done" 49 | 50 | # Biopython 51 | run_benchmark $nb "python Biopython/parse_pdb.py data/1CRN.pdb" "Biopython/parse_pdb_1CRN.dat" "Biopython,Parse PDB 1CRN," 52 | run_benchmark $ns "python Biopython/parse_pdb.py data/1HTQ.pdb" "Biopython/parse_pdb_1HTQ.dat" "Biopython,Parse PDB 1HTQ," 53 | run_benchmark $nb "python Biopython/parse_mmcif.py data/1CRN.cif" "Biopython/parse_mmcif_1CRN.dat" "Biopython,Parse mmCIF 1CRN," 54 | run_benchmark $ns "python Biopython/parse_mmcif.py data/1HTQ.cif" "Biopython/parse_mmcif_1HTQ.dat" "Biopython,Parse mmCIF 1HTQ," 55 | run_benchmark $nb "python Biopython/parse_mmtf.py data/1CRN.mmtf" "Biopython/parse_mmtf_1CRN.dat" "Biopython,Parse MMTF 1CRN," 56 | run_benchmark $ns "python Biopython/parse_mmtf.py data/1HTQ.mmtf" "Biopython/parse_mmtf_1HTQ.dat" "Biopython,Parse MMTF 1HTQ," 57 | run_benchmark $nb "python Biopython/count.py" "Biopython/count.dat" "Biopython,Count," 58 | run_benchmark $nb "python Biopython/distance.py" "Biopython/distance.dat" "Biopython,Distance," 59 | run_benchmark $nb "python Biopython/ramachandran.py" "Biopython/ramachandran.dat" "Biopython,Ramachandran," 60 | echo "Biopython benchmarks done" 61 | 62 | # ProDy 63 | run_benchmark $nb "python ProDy/parse_pdb.py data/1CRN.pdb" "ProDy/parse_pdb_1CRN.dat" "ProDy,Parse PDB 1CRN," 64 | run_benchmark $ns "python ProDy/parse_pdb.py data/1HTQ.pdb" "ProDy/parse_pdb_1HTQ.dat" "ProDy,Parse PDB 1HTQ," 65 | run_benchmark $nb "python ProDy/count.py" "ProDy/count.dat" "ProDy,Count," 66 | run_benchmark $nb "python ProDy/distance.py" "ProDy/distance.dat" "ProDy,Distance," 67 | run_benchmark $nb "python ProDy/ramachandran.py" "ProDy/ramachandran.dat" "ProDy,Ramachandran," 68 | echo "ProDy benchmarks done" 69 | 70 | # MDAnalysis 71 | run_benchmark $nb "python MDAnalysis/parse_pdb.py data/1CRN.pdb" "MDAnalysis/parse_pdb_1CRN.dat" "MDAnalysis,Parse PDB 1CRN," 72 | run_benchmark $ns "python MDAnalysis/parse_pdb.py data/1HTQ.pdb" "MDAnalysis/parse_pdb_1HTQ.dat" "MDAnalysis,Parse PDB 1HTQ," 73 | run_benchmark $nb "python MDAnalysis/count.py" "MDAnalysis/count.dat" "MDAnalysis,Count," 74 | run_benchmark $nb "python MDAnalysis/distance.py" "MDAnalysis/distance.dat" "MDAnalysis,Distance," 75 | run_benchmark $nb "python MDAnalysis/ramachandran.py" "MDAnalysis/ramachandran.dat" "MDAnalysis,Ramachandran," 76 | echo "MDAnalysis benchmarks done" 77 | 78 | # biotite 79 | run_benchmark $nb "python biotite/parse_pdb.py data/1CRN.pdb" "biotite/parse_pdb_1CRN.dat" "biotite,Parse PDB 1CRN," 80 | run_benchmark $ns "python biotite/parse_pdb.py data/1HTQ.pdb" "biotite/parse_pdb_1HTQ.dat" "biotite,Parse PDB 1HTQ," 81 | run_benchmark $nb "python biotite/parse_mmcif.py data/1CRN.cif" "biotite/parse_mmcif_1CRN.dat" "biotite,Parse mmCIF 1CRN," 82 | run_benchmark $ns "python biotite/parse_mmcif.py data/1HTQ.cif" "biotite/parse_mmcif_1HTQ.dat" "biotite,Parse mmCIF 1HTQ," 83 | run_benchmark $nb "python biotite/parse_mmtf.py data/1CRN.mmtf" "biotite/parse_mmtf_1CRN.dat" "biotite,Parse MMTF 1CRN," 84 | run_benchmark $ns "python biotite/parse_mmtf.py data/1HTQ.mmtf" "biotite/parse_mmtf_1HTQ.dat" "biotite,Parse MMTF 1HTQ," 85 | echo "biotite benchmarks done" 86 | 87 | # atomium 88 | run_benchmark $nb "python atomium/parse_pdb.py data/1CRN.pdb" "atomium/parse_pdb_1CRN.dat" "atomium,Parse PDB 1CRN," 89 | run_benchmark $ns "python atomium/parse_pdb.py data/1HTQ.pdb" "atomium/parse_pdb_1HTQ.dat" "atomium,Parse PDB 1HTQ," 90 | run_benchmark $nb "python atomium/parse_mmcif.py data/1CRN.cif" "atomium/parse_mmcif_1CRN.dat" "atomium,Parse mmCIF 1CRN," 91 | run_benchmark $ns "python atomium/parse_mmcif.py data/1HTQ.cif" "atomium/parse_mmcif_1HTQ.dat" "atomium,Parse mmCIF 1HTQ," 92 | run_benchmark $nb "python atomium/parse_mmtf.py data/1CRN.mmtf" "atomium/parse_mmtf_1CRN.dat" "atomium,Parse MMTF 1CRN," 93 | run_benchmark $ns "python atomium/parse_mmtf.py data/1HTQ.mmtf" "atomium/parse_mmtf_1HTQ.dat" "atomium,Parse MMTF 1HTQ," 94 | echo "atomium benchmarks done" 95 | 96 | # Bio3D 97 | run_benchmark $nb "Rscript Bio3D/parse_pdb.R data/1CRN.pdb" "Bio3D/parse_pdb_1CRN.dat" "Bio3D,Parse PDB 1CRN," 98 | run_benchmark $ns "Rscript Bio3D/parse_pdb.R data/1HTQ.pdb" "Bio3D/parse_pdb_1HTQ.dat" "Bio3D,Parse PDB 1HTQ," 99 | run_benchmark $nb "Rscript Bio3D/count.R" "Bio3D/count.dat" "Bio3D,Count," 100 | run_benchmark $nb "Rscript Bio3D/distance.R" "Bio3D/distance.dat" "Bio3D,Distance," 101 | echo "Bio3D benchmarks done" 102 | 103 | # Rpdb 104 | run_benchmark $nb "Rscript Rpdb/parse_pdb.R data/1CRN.pdb" "Rpdb/parse_pdb_1CRN.dat" "Rpdb,Parse PDB 1CRN," 105 | run_benchmark $ns "Rscript Rpdb/parse_pdb.R data/1HTQ.pdb" "Rpdb/parse_pdb_1HTQ.dat" "Rpdb,Parse PDB 1HTQ," 106 | run_benchmark $nb "Rscript Rpdb/count.R" "Rpdb/count.dat" "Rpdb,Count," 107 | run_benchmark $nb "Rscript Rpdb/distance.R" "Rpdb/distance.dat" "Rpdb,Distance," 108 | echo "Rpdb benchmarks done" 109 | 110 | # BioJava 111 | run_benchmark $nb "java -cp BioJava/target/pdb-benchmarks-1.0-SNAPSHOT.jar com.jgreener.pdb.parse_pdb data/1CRN.pdb" "BioJava/parse_pdb_1CRN.dat" "BioJava,Parse PDB 1CRN," 112 | run_benchmark $ns "java -cp BioJava/target/pdb-benchmarks-1.0-SNAPSHOT.jar com.jgreener.pdb.parse_pdb data/1HTQ.pdb" "BioJava/parse_pdb_1HTQ.dat" "BioJava,Parse PDB 1HTQ," 113 | run_benchmark $nb "java -cp BioJava/target/pdb-benchmarks-1.0-SNAPSHOT.jar com.jgreener.pdb.parse_mmcif data/1CRN.cif" "BioJava/parse_mmcif_1CRN.dat" "BioJava,Parse mmCIF 1CRN," 114 | run_benchmark $ns "java -cp BioJava/target/pdb-benchmarks-1.0-SNAPSHOT.jar com.jgreener.pdb.parse_mmcif data/1HTQ.cif" "BioJava/parse_mmcif_1HTQ.dat" "BioJava,Parse mmCIF 1HTQ," 115 | run_benchmark $nb "java -cp BioJava/target/pdb-benchmarks-1.0-SNAPSHOT.jar com.jgreener.pdb.parse_mmtf data/1CRN.mmtf" "BioJava/parse_mmtf_1CRN.dat" "BioJava,Parse MMTF 1CRN," 116 | run_benchmark $ns "java -cp BioJava/target/pdb-benchmarks-1.0-SNAPSHOT.jar com.jgreener.pdb.parse_mmtf data/1HTQ.mmtf" "BioJava/parse_mmtf_1HTQ.dat" "BioJava,Parse MMTF 1HTQ," 117 | echo "BioJava benchmarks done" 118 | 119 | # BioPerl 120 | run_benchmark $nb "perl BioPerl/parse_pdb.pl data/1CRN.pdb" "BioPerl/parse_pdb_1CRN.dat" "BioPerl,Parse PDB 1CRN," 121 | run_benchmark $ns "perl BioPerl/parse_pdb.pl data/1HTQ.pdb" "BioPerl/parse_pdb_1HTQ.dat" "BioPerl,Parse PDB 1HTQ," 122 | run_benchmark $nb "perl BioPerl/count.pl" "BioPerl/count.dat" "BioPerl,Count," 123 | run_benchmark $nb "perl BioPerl/distance.pl" "BioPerl/distance.dat" "BioPerl,Distance," 124 | echo "BioPerl benchmarks done" 125 | 126 | # BioRuby 127 | run_benchmark $nb "ruby BioRuby/parse_pdb.rb data/1CRN.pdb" "BioRuby/parse_pdb_1CRN.dat" "BioRuby,Parse PDB 1CRN," 128 | run_benchmark $ns "ruby BioRuby/parse_pdb.rb data/1HTQ.pdb" "BioRuby/parse_pdb_1HTQ.dat" "BioRuby,Parse PDB 1HTQ," 129 | run_benchmark $nb "ruby BioRuby/count.rb" "BioRuby/count.dat" "BioRuby,Count," 130 | run_benchmark $nb "ruby BioRuby/distance.rb" "BioRuby/distance.dat" "BioRuby,Distance," 131 | echo "BioRuby benchmarks done" 132 | 133 | # GEMMI 134 | run_benchmark $nb "GEMMI/parse_pdb data/1CRN.pdb" "GEMMI/parse_pdb_1CRN.dat" "GEMMI,Parse PDB 1CRN," 135 | run_benchmark $ns "GEMMI/parse_pdb data/1HTQ.pdb" "GEMMI/parse_pdb_1HTQ.dat" "GEMMI,Parse PDB 1HTQ," 136 | run_benchmark $nb "GEMMI/parse_mmcif data/1CRN.cif" "GEMMI/parse_mmcif_1CRN.dat" "GEMMI,Parse mmCIF 1CRN," 137 | run_benchmark $ns "GEMMI/parse_mmcif data/1HTQ.cif" "GEMMI/parse_mmcif_1HTQ.dat" "GEMMI,Parse mmCIF 1HTQ," 138 | run_benchmark $nb "GEMMI/count" "GEMMI/count.dat" "GEMMI,Count," 139 | run_benchmark $nb "GEMMI/distance" "GEMMI/distance.dat" "GEMMI,Distance," 140 | echo "GEMMI benchmarks done" 141 | 142 | # Victor 143 | run_benchmark $nb "Victor/parse_pdb data/1CRN.pdb" "Victor/parse_pdb_1CRN.dat" "Victor,Parse PDB 1CRN," 144 | run_benchmark $ns "Victor/parse_pdb data/1HTQ.pdb" "Victor/parse_pdb_1HTQ.dat" "Victor,Parse PDB 1HTQ," 145 | echo "Victor benchmarks done" 146 | 147 | # ESBTL 148 | run_benchmark $nb "ESBTL/parse_pdb data/1CRN.pdb" "ESBTL/parse_pdb_1CRN.dat" "ESBTL,Parse PDB 1CRN," 149 | echo "ESBTL benchmarks done" 150 | 151 | # chemfiles - Python 152 | run_benchmark $nb "python chemfiles/parse_pdb.py data/1CRN.pdb" "chemfiles/parse_pdb_1CRN_py.dat" "chemfiles-python,Parse PDB 1CRN," 153 | # FIXME: this uncovered a bug in chemfiles, the bugfix will be avaible on 154 | # chemfiles>=0.10 when released 155 | #run_benchmark $ns "python chemfiles/parse_pdb.py data/1HTQ.pdb" "chemfiles/parse_pdb_1HTQ_py.dat" "chemfiles-python,Parse PDB 1HTQ," 156 | run_benchmark $nb "python chemfiles/parse_mmcif.py data/1CRN.cif" "chemfiles/parse_mmcif_1CRN_py.dat" "chemfiles-python,Parse mmCIF 1CRN," 157 | run_benchmark $ns "python chemfiles/parse_mmcif.py data/1HTQ.cif" "chemfiles/parse_mmcif_1HTQ_py.dat" "chemfiles-python,Parse mmCIF 1HTQ," 158 | run_benchmark $nb "python chemfiles/parse_mmtf.py data/1CRN.mmtf" "chemfiles/parse_mmtf_1CRN_py.dat" "chemfiles-python,Parse MMTF 1CRN," 159 | #run_benchmark $ns "python chemfiles/parse_mmtf.py data/1HTQ.mmtf" "chemfiles/parse_mmtf_1HTQ_py.dat" "chemfiles-python,Parse MMTF 1HTQ," 160 | run_benchmark $nb "python chemfiles/count.py" "chemfiles/count_py.dat" "chemfiles-python,Count," 161 | run_benchmark $nb "python chemfiles/distance.py" "chemfiles/distance_py.dat" "chemfiles-python,Distance," 162 | run_benchmark $nb "python chemfiles/ramachandran.py" "chemfiles/ramachandran_py.dat" "chemfiles-python,Ramachandran," 163 | echo "chemfiles-python benchmarks done" 164 | 165 | # chemfiles - C++ 166 | run_benchmark $nb "chemfiles/parse_pdb data/1CRN.pdb" "chemfiles/parse_pdb_1CRN_cxx.dat" "chemfiles-cxx,Parse PDB 1CRN," 167 | #run_benchmark $ns "chemfiles/parse_pdb data/1HTQ.pdb" "chemfiles/parse_pdb_1HTQ_cxx.dat" "chemfiles-cxx,Parse PDB 1HTQ," 168 | run_benchmark $nb "chemfiles/parse_mmcif data/1CRN.cif" "chemfiles/parse_mmcif_1CRN_cxx.dat" "chemfiles-cxx,Parse mmCIF 1CRN," 169 | run_benchmark $ns "chemfiles/parse_mmcif data/1HTQ.cif" "chemfiles/parse_mmcif_1HTQ_cxx.dat" "chemfiles-cxx,Parse mmCIF 1HTQ," 170 | run_benchmark $nb "chemfiles/parse_mmtf data/1CRN.mmtf" "chemfiles/parse_mmtf_1CRN_cxx.dat" "chemfiles-cxx,Parse MMTF 1CRN," 171 | #run_benchmark $ns "chemfiles/parse_mmtf data/1HTQ.mmtf" "chemfiles/parse_mmtf_1HTQ_cxx.dat" "chemfiles-cxx,Parse MMTF 1HTQ," 172 | run_benchmark $nb "chemfiles/count" "chemfiles/count_cxx.dat" "chemfiles-cxx,Count," 173 | run_benchmark $nb "chemfiles/distance" "chemfiles/distance_cxx.dat" "chemfiles-cxx,Distance," 174 | run_benchmark $nb "chemfiles/ramachandran" "chemfiles/ramachandran_cxx.dat" "chemfiles-cxx,Ramachandran," 175 | echo "chemfiles-cxx benchmarks done" 176 | 177 | # Plot results 178 | julia plot/plot.jl 179 | echo "Results plotted" 180 | -------------------------------------------------------------------------------- /tools/table.jl: -------------------------------------------------------------------------------- 1 | # Print the benchmark results as a markdown table 2 | 3 | times = Dict{String, Dict}() 4 | 5 | open("benchmarks.csv") do f 6 | for line in eachline(f) 7 | if !startswith(line, "Package") 8 | software, benchmark, runtime = split(line, ",") 9 | if haskey(times, benchmark) 10 | times[benchmark][software] = parse(Float64, runtime) 11 | else 12 | times[benchmark] = Dict(software=> parse(Float64, runtime)) 13 | end 14 | end 15 | end 16 | end 17 | 18 | for (benchmark, label, millisecond) in ( 19 | ("Parse PDB 1CRN" , "Parse PDB 1CRN / ms" , true ), 20 | ("Parse PDB 1HTQ" , "Parse PDB 1HTQ / s" , false), 21 | ("Parse mmCIF 1CRN", "Parse mmCIF 1CRN / ms", true ), 22 | ("Parse mmCIF 1HTQ", "Parse mmCIF 1HTQ / s" , false), 23 | ("Parse MMTF 1CRN" , "Parse MMTF 1CRN / ms" , true ), 24 | ("Parse MMTF 1HTQ" , "Parse MMTF 1HTQ / s" , false), 25 | ("Count" , "Count / ms" , true ), 26 | ("Distance" , "Distance / ms" , true ), 27 | ("Ramachandran" , "Ramachandran / ms" , true )) 28 | print("| $(rpad(label, 21)) |") 29 | for software in ("BioStructures", "MIToS", "Biopython", "ProDy", "MDAnalysis", "biotite", 30 | "atomium", "Bio3D", "Rpdb", "BioJava", "BioPerl", "BioRuby", "GEMMI", 31 | "Victor", "ESBTL", "chemfiles-python", "chemfiles-cxx") 32 | if haskey(times[benchmark], software) 33 | if millisecond 34 | val = string(round(1000 * times[benchmark][software], sigdigits=2)) 35 | else 36 | val = string(round(times[benchmark][software], sigdigits=2)) 37 | end 38 | else 39 | val = "-" 40 | end 41 | print(" $(rpad(val, 16)) |") 42 | end 43 | println() 44 | end 45 | --------------------------------------------------------------------------------