├── scripts
    ├── Merge
    │   ├── README-vega-check.txt
    │   ├── merge-refseq.conf
    │   └── merge.conf
    ├── protein
    │   ├── THESE_SCRIPTS_ARE_USED_TO_RUN_THE_PROTEIN_ANNOTATION_PIPELINE
    │   └── chunk_protein_file.pl
    ├── assembly_patches
    │   ├── remove_patch_karyotype.sql
    │   └── remove_patch_raw_compute.sql
    ├── imgt
    │   └── kill_list.txt
    ├── cdna_update
    │   └── find_N.pl
    ├── genebuild
    │   ├── parse_embl_cds2uniprotkb.pl
    │   ├── sncrna
    │   │   ├── filter_cm.pl
    │   │   ├── repeats_dump.pl
    │   │   └── dump_prefilter_features.pl
    │   ├── find_seq_in_fasta.pl
    │   ├── convert_genome_dump.pl
    │   └── slice_coding_gene_cnt.pl
    ├── databases
    │   └── process_uniprot_isoforms.pl
    ├── chunk_fasta_file.pl
    ├── markers
    │   ├── map_weight.pl
    │   └── marker_match.pl
    └── delete_big_dir.pl
├── requirements.txt
├── modules
    ├── t
    │   ├── test-genome-DBs
    │   │   └── pararge_aegeria
    │   │   │   └── core
    │   │   │       ├── seq_region_attrib.txt
    │   │   │       ├── meta_coord.txt
    │   │   │       ├── seq_region_synonym.txt
    │   │   │       ├── coord_system.txt
    │   │   │       ├── analysis.txt
    │   │   │       ├── seq_region.txt
    │   │   │       ├── assembly.txt
    │   │   │       ├── external_db.txt
    │   │   │       ├── attrib_type.txt
    │   │   │       ├── analysis_description.txt
    │   │   │       └── meta.txt
    │   ├── MultiTestDB.conf.default
    │   ├── repeatcoverage.t
    │   ├── hiveassemblycomponents_rb.t
    │   ├── hiveloadgenomesequences_rb.t
    │   ├── hiveprocessassemblyreport_rb.t
    │   ├── prepare_local_tests.sh
    │   ├── hivecreatedirectories_rb.t
    │   ├── hiverepeatcoverage_rb.t
    │   └── filter_t.t
    └── Bio
    │   └── EnsEMBL
    │       └── Analysis
    │           ├── RunnableDB
    │               ├── Bam2Genes.pm
    │               ├── Bam2Introns.pm
    │               ├── Solexa2Genes.pm
    │               ├── BlastRNASeqPep.pm
    │               ├── ExonerateSolexa.pm
    │               ├── RefineSolexaGenes.pm
    │               ├── Solexa2GenesLiteNew.pm
    │               ├── ExonerateSolexaTranscript.pm
    │               ├── ExonerateSolexaLocalAlignment.pm
    │               ├── ProteinAnnotation
    │               │   ├── PrositePattern.pm.retired
    │               │   ├── Hamap_wormbase.pm
    │               │   ├── PrositeProfile.pm
    │               │   ├── PrositeProfile_wormbase.pm
    │               │   ├── Prints.pm
    │               │   ├── Prints_wormbase.pm
    │               │   ├── PrositePattern.pm
    │               │   ├── PrositePattern_wormbase.pm
    │               │   ├── Coil.pm
    │               │   ├── Signalp.pm
    │               │   ├── Hmmpfam.pm
    │               │   ├── PIRSF.pm
    │               │   ├── Superfamily.pm
    │               │   ├── IPRScan.pm
    │               │   ├── Pfam_wormbase.pm
    │               │   ├── Tmhmm.pm
    │               │   ├── Superfamily_wormbase.pm
    │               │   ├── Seg.pm
    │               │   └── Panther.pm
    │               ├── Finished
    │               │   ├── EPCR.pm
    │               │   └── RepeatMasker.pm
    │               ├── Accumulator.pm
    │               ├── Snap.pm
    │               ├── Fgenesh.pm
    │               └── Funcgen
    │               │   ├── ACME.pm
    │               │   └── Chipotle.pm
    │           ├── Config
    │               ├── GeneBuild
    │               │   ├── Bam2Genes.pm.example
    │               │   ├── Solexa2Genes.pm.example
    │               │   ├── Solexa2GenesLiteNew.pm
    │               │   ├── RefineSolexaGenes.pm.example
    │               │   ├── BlastRNASeqPep.pm.example
    │               │   ├── OrthologueEvaluatorExonerate.pm.example
    │               │   ├── Sam2Bam.pm.example
    │               │   ├── BuildChecks.pm.example
    │               │   ├── IgSegBuilder.pm.example
    │               │   ├── ProjectedTranscriptEvidence.pm.example
    │               │   ├── ExonerateSolexaLocalAlignment.pm.example
    │               │   └── Gsnap.pm.example
    │               ├── AddStableIds.pm.example
    │               ├── S3Config.pm.example
    │               ├── CloneEndsLinking.pm.example
    │               └── CollapseAffyProbes.pm.example
    │           ├── Hive
    │               ├── Config
    │               │   ├── genome_annotation.ini
    │               │   ├── sample_genes_registry_conf.pl
    │               │   └── BamMergeStatic.pm
    │               └── RunnableDB
    │               │   ├── HiveRunExternalCmd.pm
    │               │   ├── HiveCreateFastqDownloadJobs.pm
    │               │   ├── HiveStoreUnmappedcDNAs.pm
    │               │   ├── HiveLoadProteins.pm
    │               │   ├── HiveSequencesToFiles.pm
    │               │   ├── HiveLoadmRNAs.pm
    │               │   ├── HiveDBSeqFiles.pm
    │               │   ├── HiveLoadcDNAs.pm
    │               │   ├── HivecDNAManyHits.pm
    │               │   └── HiveIndexGenome.pm
    │           ├── Tools
    │               ├── BlastDBTracking
    │               │   └── Entry.pm
    │               ├── IMGT
    │               │   └── Seq
    │               │   │   └── RichSeqIMGT.pm
    │               ├── SoftwareConfigLoad.pm
    │               ├── Stashes.pm
    │               ├── GenomeOverlapFilter.pm
    │               ├── PacBioTranscriptFilter.pm
    │               ├── GeneBuildUtils
    │               │   └── HomologyUtils.pm
    │               ├── AllExonOverlapFilter.pm
    │               └── CodingExonOverlapFilter.pm
    │           └── Runnable
    │               ├── DustMasker.pm
    │               ├── ProteinAnnotation
    │                   └── PrositeProfile.pm
    │               └── SamtoolsMerge.pm
├── cpanfile
├── travisci
    └── MultiTestDB.conf.mysql
├── requirements_p36_ncrna.txt
├── pull_request_template.md
├── .gitignore
└── sql
    └── repeat_db_tables.sql


/scripts/Merge/README-vega-check.txt:
--------------------------------------------------------------------------------
1 | TBC
2 | 


--------------------------------------------------------------------------------
/scripts/protein/THESE_SCRIPTS_ARE_USED_TO_RUN_THE_PROTEIN_ANNOTATION_PIPELINE:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | deeptools ~= 3.4.1
2 | gspread
3 | oauth2client
4 | google-auth
5 | 


--------------------------------------------------------------------------------
/modules/t/test-genome-DBs/pararge_aegeria/core/seq_region_attrib.txt:
--------------------------------------------------------------------------------
1 | 1	367	1
2 | 1	6	1
3 | 


--------------------------------------------------------------------------------
/cpanfile:
--------------------------------------------------------------------------------
1 | requires 'Bio::DB::HTS';
2 | requires 'Proc::ProcessTable';
3 | requires 'Bio::DB::EUtilities';
4 | 


--------------------------------------------------------------------------------
/modules/t/test-genome-DBs/pararge_aegeria/core/meta_coord.txt:
--------------------------------------------------------------------------------
1 | gene	1	43270
2 | exon	1	3426
3 | transcript	1	6190
4 | 


--------------------------------------------------------------------------------
/modules/t/test-genome-DBs/pararge_aegeria/core/seq_region_synonym.txt:
--------------------------------------------------------------------------------
1 | 1	1	LR990895.1	50710
2 | 2	1	NC_053180.1	1830
3 | 


--------------------------------------------------------------------------------
/travisci/MultiTestDB.conf.mysql:
--------------------------------------------------------------------------------
1 | {
2 |   'port'   => '3306',
3 |   'driver' => 'mysql',
4 |   'user'   => 'root',
5 |   'host'   => '127.0.0.1',
6 | }
7 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/Bam2Genes.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ensembl/ensembl-analysis/HEAD/modules/Bio/EnsEMBL/Analysis/RunnableDB/Bam2Genes.pm


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/Bam2Introns.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ensembl/ensembl-analysis/HEAD/modules/Bio/EnsEMBL/Analysis/RunnableDB/Bam2Introns.pm


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/Solexa2Genes.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ensembl/ensembl-analysis/HEAD/modules/Bio/EnsEMBL/Analysis/RunnableDB/Solexa2Genes.pm


--------------------------------------------------------------------------------
/modules/t/test-genome-DBs/pararge_aegeria/core/coord_system.txt:
--------------------------------------------------------------------------------
1 | 1	1	primary_assembly	ilParAegt1.1	1	default_version
2 | 2	1	contig	\N	2	default_version,sequence_level
3 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/BlastRNASeqPep.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ensembl/ensembl-analysis/HEAD/modules/Bio/EnsEMBL/Analysis/RunnableDB/BlastRNASeqPep.pm


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ExonerateSolexa.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ensembl/ensembl-analysis/HEAD/modules/Bio/EnsEMBL/Analysis/RunnableDB/ExonerateSolexa.pm


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/RefineSolexaGenes.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ensembl/ensembl-analysis/HEAD/modules/Bio/EnsEMBL/Analysis/RunnableDB/RefineSolexaGenes.pm


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/Solexa2GenesLiteNew.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ensembl/ensembl-analysis/HEAD/modules/Bio/EnsEMBL/Analysis/RunnableDB/Solexa2GenesLiteNew.pm


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/Bam2Genes.pm.example:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ensembl/ensembl-analysis/HEAD/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/Bam2Genes.pm.example


--------------------------------------------------------------------------------
/modules/t/test-genome-DBs/pararge_aegeria/core/analysis.txt:
--------------------------------------------------------------------------------
1 | 1	2021-05-08 10:07:46	ensembl	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N
2 | 2	2021-05-08 10:08:27	ncrna	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N
3 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/Solexa2Genes.pm.example:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ensembl/ensembl-analysis/HEAD/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/Solexa2Genes.pm.example


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/Solexa2GenesLiteNew.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ensembl/ensembl-analysis/HEAD/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/Solexa2GenesLiteNew.pm


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ExonerateSolexaTranscript.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ensembl/ensembl-analysis/HEAD/modules/Bio/EnsEMBL/Analysis/RunnableDB/ExonerateSolexaTranscript.pm


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/RefineSolexaGenes.pm.example:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ensembl/ensembl-analysis/HEAD/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/RefineSolexaGenes.pm.example


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ExonerateSolexaLocalAlignment.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ensembl/ensembl-analysis/HEAD/modules/Bio/EnsEMBL/Analysis/RunnableDB/ExonerateSolexaLocalAlignment.pm


--------------------------------------------------------------------------------
/requirements_p36_ncrna.txt:
--------------------------------------------------------------------------------
1 | # We can only use python <= 3.6 because it needs scipy 0.18.1
2 | # We need to reinstall numpy as pandas will update to the latest version possible
3 | pandas
4 | scipy==0.18.1
5 | scikit-learn==0.18.1
6 | numpy==1.17.5
7 | 


--------------------------------------------------------------------------------
/scripts/assembly_patches/remove_patch_karyotype.sql:
--------------------------------------------------------------------------------
1 | delete karyotype from attrib_type, seq_region_attrib, karyotype where attrib_type.code in ('patch_novel','patch_fix') and attrib_type.attrib_type_id = seq_region_attrib.attrib_type_id and seq_region_attrib.seq_region_id = karyotype.seq_region_id;
2 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Hive/Config/genome_annotation.ini:
--------------------------------------------------------------------------------
 1 | assembly_accessions=[]
 2 | output_path=
 3 | release_number=
 4 | email_address=
 5 | genebuilder_id=
 6 | pipeline_name=
 7 | user_r=
 8 | user_w=
 9 | password=
10 | pipe_db_host=
11 | databases_host=
12 | dna_db_host=
13 | pipe_db_port=
14 | databases_port=
15 | dna_db_port=
16 | 


--------------------------------------------------------------------------------
/modules/t/test-genome-DBs/pararge_aegeria/core/seq_region.txt:
--------------------------------------------------------------------------------
 1 | 1	1	1	21295481
 2 | 2	contig_1	2	42060
 3 | 3	contig_2	2	43299
 4 | 4	contig_3	2	13106
 5 | 5	contig_4	2	20932
 6 | 6	contig_5	2	10928
 7 | 7	contig_6	2	17392
 8 | 8	contig_7	2	7483
 9 | 9	contig_8	2	26761
10 | 10	contig_9	2	149864
11 | 11	contig_10	2	16265
12 | 12	contig_11	2	22067
13 | 13	contig_12	2	18568
14 | 14	contig_13	2	72768
15 | 


--------------------------------------------------------------------------------
/modules/t/MultiTestDB.conf.default:
--------------------------------------------------------------------------------
 1 | # This provides a default set of configurations which is merged with your
 2 | # provided MultiTestDB.conf.
 3 | #
 4 | # You should provide a MultiTestDB.conf which defines the databases
 5 | 
 6 | {
 7 |   'databases' => {
 8 |     'homo_sapiens' => {
 9 |       'core' => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
10 |     },
11 |     'pararge_aegeria' => {
12 |       'core' => 'Bio::EnsEMBL::DBSQL::DBAdaptor',
13 |     },
14 |   },
15 | }
16 | 


--------------------------------------------------------------------------------
/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | # Requirements
 2 | When creating your Pull request, please fill out the template below:
 3 | 
 4 | # PR details
 5 | _Is this a fix/ update/ new feature?_
 6 | 
 7 | _Include a short description_
 8 | 
 9 | _Include links to JIRA tickets_
10 | 
11 | # Testing
12 | _Have you tested it?_
13 | 
14 | # Assign to the weekly GitHub reviewer
15 | _If you are a member of Ensembl, please check the Genebuild weekly Rotas and assign this week's GitHub reviewer to the PR_
16 | 


--------------------------------------------------------------------------------
/modules/t/test-genome-DBs/pararge_aegeria/core/assembly.txt:
--------------------------------------------------------------------------------
 1 | 1	8	1190846	1198328	1	7483	1
 2 | 1	9	1254816	1281576	1	26761	1
 3 | 1	10	1551975	1701838	1	149864	1
 4 | 1	11	2139403	2155667	1	16265	1
 5 | 1	12	2914577	2936643	1	22067	1
 6 | 1	2	3594096	3636155	1	42060	1
 7 | 1	3	7512206	7555504	1	43299	1
 8 | 1	4	7747315	7760420	1	13106	1
 9 | 1	5	9996681	10017612	1	20932	1
10 | 1	6	13645738	13656665	1	10928	1
11 | 1	7	15616608	15633999	1	17392	1
12 | 1	13	13399324	13417891	1	18568	1
13 | 1	14	14122744	14195511	1	72768	1
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | blib/
 2 | .build/
 3 | _build/
 4 | cover_db/
 5 | inc/
 6 | Build
 7 | !Build/
 8 | Build.bat
 9 | .last_cover_stats
10 | MANIFEST.bak
11 | META.yml
12 | MYMETA.yml
13 | nytprof.out
14 | pm_to_blib
15 | .DS_Store
16 | Thumbs.db
17 | *.swp
18 | *.swo
19 | *~
20 | \#*\#
21 | /.emacs.desktop
22 | /.emacs.desktop.lock
23 | .elc
24 | auto-save-list
25 | tramp
26 | .\#*
27 | # Org-mode
28 | .org-id-locations
29 | *_archive
30 | a.out
31 | *.o
32 | *.obj
33 | *.class
34 | modules/t/MultiTestDB.conf
35 | modules/Bio/EnsEMBL/Analysis/Config/General.pm
36 | 


--------------------------------------------------------------------------------
/scripts/imgt/kill_list.txt:
--------------------------------------------------------------------------------
1 | AF062232        human; heavy chain orphon (chr15) not annotated as such
2 | AF062120        human; heavy chain orphon (chr15) not annotated as such
3 | HSIGHZF         human; heavy chain orphon (chr15) not annotated as such
4 | HSIGVH441       human; heavy chain orphon (chr15) not annotated as such
5 | HSIGHXX27       human; heavy chain orphon (chr15) not annotated as such
6 | HSIGV79         human; heavy chain orphon (chr15) not annotated as such
7 | HSIGLC16        human; light chain orphon (chr22, distal). Probable pseudo
8 | MM07554         mouse; C-REGION contains J-REGION
9 | 


--------------------------------------------------------------------------------
/modules/t/test-genome-DBs/pararge_aegeria/core/external_db.txt:
--------------------------------------------------------------------------------
1 | 1830	RefSeq_genomic	\N	KNOWN	193	RefSeq Genomic	MISC	\N	\N	This external_db_id can be used in the seq_region_synonym table. For species such as human, cow, dog we store chromosome names (1-22, X,Y) in the name column of the seq_region table. The RefSeq_genomic is stored as a synonym. eg. NC_000011.10, NT_187365.1
2 | 50710	INSDC	\N	KNOWNXREF	5	International Nucleotide Sequence Database Collaboration	MISC	INSDC	\N	This external_db_id was initially made for use in the seq_region_synonym table. For species such as human, cow, dog we store chromosome names (1-22, X,Y) in the name column of the seq_region table. The INSDC accession is stored as a synonym.
3 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Tools/BlastDBTracking/Entry.pm:
--------------------------------------------------------------------------------
 1 | package Bio::EnsEMBL::Analysis::Tools::BlastDBTracking::Entry;
 2 | 
 3 | use warnings ;
 4 | use strict ;
 5 | use namespace::autoclean;
 6 | use Moose;
 7 | 
 8 | has filename       => ( is => 'ro', isa => 'Str', required => 1 );
 9 | has version        => ( is => 'ro', isa => 'Str', required => 1 );
10 | has sanger_version => ( is => 'ro', isa => 'Int',               );
11 | has installation   => ( is => 'ro', isa => 'Int',               );
12 | has count          => ( is => 'ro', isa => 'Int',               );
13 | has checksum       => ( is => 'ro', isa => 'Str',               );
14 | has from_file      => ( is => 'ro', isa => 'Bool',              );
15 | 
16 | 1;
17 | 


--------------------------------------------------------------------------------
/modules/t/test-genome-DBs/pararge_aegeria/core/attrib_type.txt:
--------------------------------------------------------------------------------
1 | 6	toplevel	Top Level	Top Level Non-Redundant Sequence Region
2 | 367	karyotype_rank	Rank in the karyotype	For a given seq_region, if it is part of the species karyotype, will indicate its rank
3 | 554	is_canonical	Ensembl Canonical	This transcript is the chosen canonical for its gene. For protein-coding genes, this is the MANE_Select transcript if there is one. If not, the canonical transcript is chosen by a pipeline that takes into account several criteria including transcript support (TSL), functional importance (APPRIS), representation in RefSeq and UniProt databases, length and coverage of pathogenic variants, where available. For non protein-coding genes, it is usually the longest transcript with the same biotype as its parent gene.
4 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Hive/RunnableDB/HiveRunExternalCmd.pm:
--------------------------------------------------------------------------------
 1 | package Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveRunExternalCmd;
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use base ('Bio::EnsEMBL::Hive::Process');
 7 | use Bio::EnsEMBL::Compara::Utils::RunCommand;
 8 | 
 9 | sub param_defaults {
10 |     return {
11 |         'cmd' => undef,   # command to run
12 |     };
13 | }
14 | 
15 | sub run {
16 |     my $self = shift;
17 | 
18 |     my $cmd = $self->param_required('cmd');
19 | 
20 |     my $rc = Bio::EnsEMBL::Compara::Utils::RunCommand
21 |                 ->new_and_exec($cmd, { die_on_failure => 1 });
22 | 
23 |     # Save stdout into a hive param so it can be used in flow_into
24 |     my $stdout = $rc->out;
25 |     chomp $stdout;
26 |     $self->param('stdout', $stdout);
27 |     $self->param('stderr', $rc->err);
28 | }
29 | 
30 | sub write_output {
31 |     my $self = shift;
32 | 
33 |     # Flow stdout as #stdout#
34 |     $self->dataflow_output_id({ stdout => $self->param('stdout') }, 1);
35 | }
36 | 
37 | 1;
38 | 


--------------------------------------------------------------------------------
/modules/t/repeatcoverage.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 3 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | use strict;
17 | use warnings;
18 | 
19 | use Test::More;
20 | 
21 | use_ok('Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveRepeatCoverage');
22 | 
23 | TODO: {
24 |   local $TODO = 'Proper tests needed';
25 |   note($TODO);
26 | }
27 | 
28 | done_testing();
29 | 


--------------------------------------------------------------------------------
/modules/t/hiveassemblycomponents_rb.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 3 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | use strict;
17 | use warnings;
18 | 
19 | use Test::More;
20 | 
21 | use_ok('Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveLoadAssemblyComponents');
22 | 
23 | TODO: {
24 |   local $TODO = 'Proper tests needed';
25 |   note($TODO);
26 | }
27 | 
28 | done_testing();
29 | 


--------------------------------------------------------------------------------
/modules/t/hiveloadgenomesequences_rb.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 3 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | use strict;
17 | use warnings;
18 | 
19 | use Test::More;
20 | 
21 | use_ok('Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveLoadGenomeSequences');
22 | 
23 | TODO: {
24 |   local $TODO = 'Proper tests needed';
25 |   note($TODO);
26 | }
27 | 
28 | done_testing();
29 | 


--------------------------------------------------------------------------------
/modules/t/hiveprocessassemblyreport_rb.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 3 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | use strict;
17 | use warnings;
18 | 
19 | use Test::More;
20 | 
21 | use_ok('Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveProcessAssemblyReport');
22 | 
23 | TODO: {
24 |   local $TODO = 'Proper tests needed';
25 |   note($TODO);
26 | }
27 | 
28 | done_testing();
29 | 


--------------------------------------------------------------------------------
/modules/t/test-genome-DBs/pararge_aegeria/core/analysis_description.txt:
--------------------------------------------------------------------------------
1 | 1	Annotation produced by the Ensembl <a href="https://www.ensembl.org/info/genome/genebuild/" class="cp-external">genebuild</a>.	Ensembl	1	{"caption": "Genes (Ensembl)", "colour_key": "[biotype]", "default": {"MultiBottom": "collapsed_label", "MultiTop": "gene_label", "alignsliceviewbottom": "as_collapsed_label", "contigviewbottom": "transcript_label", "contigviewtop": "gene_label", "cytoview": "gene_label"}, "key": "ensembl", "label_key": "[biotype]", "multi_name": "Ensembl genes"}
2 | 2	Non-coding RNAs (ncRNAs) predicted using sequences from <arel="external" href="http://rfam.xfam.org/">RFAM</a> and <arel="external" href="http://www.mirbase.org/">miRBase</a>. See <a href="/info/genome/genebuild/ncrna.html" class="cp-external">article</a>.	ncRNAs	1	{"caption": "Genes (Ensembl)", "colour_key": "[biotype]", "default": {"MultiBottom": "collapsed_label", "MultiTop": "gene_label", "alignsliceviewbottom": "as_collapsed_label", "contigviewbottom": "transcript_label", "contigviewtop": "gene_label", "cytoview": "gene_label"}, "key": "ensembl", "label_key": "[biotype]", "multi_name": "Ensembl genes"}
3 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/PrositePattern.pm.retired:
--------------------------------------------------------------------------------
 1 | 
 2 | =pod 
 3 | 
 4 | =head1 NAME
 5 | 
 6 |   Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::PrositePattern
 7 | 
 8 | =head1 SYNOPSIS
 9 | 
10 |   my $tmhmm = Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::PrositePattern->
11 |     new ( 
12 |     -db      => $db,
13 |     -input_id   => $input_id,
14 |     -analysis   => $analysis)
15 |     );
16 |   $tmhmm->fetch_input;  # gets sequence from DB
17 |   $tmhmm->run;
18 |   $tmhmm->write_output; # writes features to to DB
19 | 
20 | =head1 DESCRIPTION
21 | 
22 | =cut
23 | 
24 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::PrositePattern;
25 | 
26 | use strict;
27 | use vars qw(@ISA);
28 | 
29 | 
30 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::PrositePattern;
31 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
32 | 
33 | 
34 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
35 | 
36 | 
37 | sub fetch_input {
38 |   my ($self, @args) = @_;
39 |   
40 |   $self->SUPER::fetch_input(@args);
41 | 
42 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::PrositePattern->
43 |       new(-query     => $self->query,
44 |           -analysis  => $self->analysis,
45 |           %{$self->parameters_hash}
46 |           );
47 |   $self->runnable($run);
48 | }
49 | 
50 | 1;
51 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Tools/IMGT/Seq/RichSeqIMGT.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | package Bio::EnsEMBL::Analysis::Tools::IMGT::Seq::RichSeqIMGT;
17 | use warnings ;
18 | use strict;
19 | 
20 | use base qw(Bio::Seq::RichSeq);
21 | 
22 | 
23 | sub new {
24 |   # standard new call..
25 |   my($caller,@args) = @_;
26 |   my $self = $caller->SUPER::new(@args);
27 |   
28 |   my ($data_class) = $self->_rearrange([qw(DATA_CLASS
29 | 					    )],
30 | 					@args);
31 | 
32 |   defined $data_class and $self->data_class($data_class);
33 | 
34 |   return $self;
35 | }
36 | 
37 | 
38 | sub data_class {
39 |    my $obj = shift;
40 |    if( @_ ) {
41 |       my $value = shift;
42 |       $obj->{'_data_class'} = $value;
43 |     }
44 |     return $obj->{'_data_class'};
45 | 
46 | }
47 | 
48 | 
49 | 1;
50 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/Hamap_wormbase.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Hamap_wormbase;
18 | use warnings ;
19 | use vars qw(@ISA);
20 | 
21 | use strict;
22 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
23 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Hamap_wormbase;
24 | 
25 | @ISA = qw (Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
26 | 
27 | 
28 | sub fetch_input {
29 |   my ($self, @args) = @_;
30 | 
31 |   $self->SUPER::fetch_input(@args);
32 | 
33 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Hamap_wormbase->
34 |       new(-query     => $self->query,
35 |           -analysis  => $self->analysis);
36 |   $self->runnable($run);
37 | }
38 | 
39 | 
40 | 1;
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/sql/repeat_db_tables.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | CREATE TABLE assembly (
 3 | 
 4 |   assembly_id                 INT(10) UNSIGNED NOT NULL AUTO_INCREMENT,
 5 |   gca                         VARCHAR(14) NOT NULL,
 6 |   species_id                  INT(10) NOT NULL,
 7 | 
 8 |   PRIMARY KEY (assembly_id),
 9 | 
10 | ) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
11 | 
12 | 
13 | CREATE TABLE species (
14 | 
15 |   species_id                  INT(10) NOT NULL AUTO_INCREMENT,
16 |   taxon_id                    INT(10) UNSIGNED NOT NULL,
17 |   common_name                 VARCHAR(40) NOT NULL,
18 |   group_name                  VARCHAR(40) NOT NULL,
19 | 
20 |   PRIMARY KEY (species_id),
21 | 
22 | ) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
23 | 
24 | 
25 | CREATE TABLE repeat_sequence (
26 | 
27 |   repeat_sequence_id          INT(10) NOT NULL AUTO_INCREMENT,
28 |   repeat_class_id             INT(10) NOT NULL,
29 |   species_id                  INT(10) UNSIGNED NOT NULL,
30 |   assembly_id                 INT(10) UNSIGNED NOT NULL,
31 | 
32 |   PRIMARY KEY (repeat_sequence_id),
33 | 
34 | ) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
35 | 
36 | 
37 | CREATE TABLE repeat_class (
38 | 
39 |   repeat_class_id             INT(10) NOT NULL AUTO_INCREMENT,
40 |   repeat_name                 VARCHAR(255) NOT NULL,
41 |   repeat_class                VARCHAR(100) NOT NULL,
42 |   repeat_type                 VARCHAR(40) NOT NULL,
43 |   repeat_sequence             LONGTEXT NOT NULL,
44 | 
45 |   PRIMARY KEY (repeat_class_id),
46 |   KEY name (repeat_name),
47 |   KEY class (repeat_class),
48 |   KEY type (repeat_type),
49 | 
50 | ) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
51 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/PrositeProfile.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =head1 NAME
17 | 
18 | =head1 SYNOPSIS
19 | 
20 | =head1 DESCRIPTION
21 | 
22 | =cut
23 | 
24 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::PrositeProfile;
25 | use warnings ;
26 | use vars qw(@ISA);
27 | 
28 | use strict;
29 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
30 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::PrositeProfile;
31 | 
32 | @ISA = qw (Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
33 | 
34 | 
35 | sub fetch_input {
36 |   my ($self, @args) = @_;
37 | 
38 |   $self->SUPER::fetch_input(@args);
39 | 
40 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::PrositeProfile->
41 |       new(-query     => $self->query,
42 |           -analysis  => $self->analysis);
43 |   $self->runnable($run);
44 | }
45 | 
46 | 
47 | 1;
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Hive/RunnableDB/HiveCreateFastqDownloadJobs.pm:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | package Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveCreateFastqDownloadJobs;
19 | 
20 | use strict;
21 | use warnings;
22 | use feature 'say';
23 | 
24 | use base ('Bio::EnsEMBL::Hive::RunnableDB::JobFactory');
25 | 
26 | =head2 fetch_input
27 | 
28 |  Arg [1]    : 
29 |  Description: 
30 | 
31 |  Returntype : None
32 |  Exceptions : None
33 | 
34 | =cut
35 | 
36 | 
37 | sub write_output {
38 |     my $self = shift;
39 |     my $inputfile = $self->param('inputfile');
40 |     my @fastq_list = `cut -d\$'\t' -f4 $inputfile`;
41 |     my @output_ids;
42 |     foreach my $fastq (@fastq_list){
43 |       chomp $fastq;
44 |       if ($fastq ne ""){
45 | 	push(@output_ids, {iid => $fastq})
46 |       }
47 |     }
48 |   $self->dataflow_output_id(\@output_ids, $self->param('fan_branch_code'));
49 |   }
50 | 
51 | 1;
52 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/PrositeProfile_wormbase.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =head1 NAME
17 | 
18 | =head1 SYNOPSIS
19 | 
20 | =head1 DESCRIPTION
21 | 
22 | =cut
23 | 
24 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::PrositeProfile_wormbase;
25 | use warnings ;
26 | use vars qw(@ISA);
27 | 
28 | use strict;
29 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
30 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::PrositeProfile_wormbase;
31 | 
32 | @ISA = qw (Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
33 | 
34 | 
35 | sub fetch_input {
36 |   my ($self, @args) = @_;
37 | 
38 |   $self->SUPER::fetch_input(@args);
39 | 
40 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::PrositeProfile_wormbase->
41 |       new(-query     => $self->query,
42 |           -analysis  => $self->analysis);
43 |   $self->runnable($run);
44 | }
45 | 
46 | 
47 | 1;
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/modules/t/prepare_local_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 3 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | CURRENT_DIR=`dirname $0`
18 | if [ ! -e "$CURRENT_DIR/MultiTestDB.conf" ];then
19 |   echo "You should create a file $CURRENT_DIR/MultiTestDB.conf containing connection details to a database:"
20 |   cat <<EOF
21 | {
22 |   port => 3306,
23 |   user => EHIVE_USER,
24 |   pass => EHIVE_PASS,
25 |   host => HOST,
26 |   driver => 'mysql',
27 | }
28 | EOF
29 |   echo
30 | fi
31 | 
32 | BASEDIR="modules/t/test-genome-DBs/homo_sapiens/core"
33 | ENSEMBLDIR="../ensembl/$BASEDIR"
34 | if [ ! -e "$ENSEMBLDIR" ];then
35 |   if [ -n "$PERL5LIB" ];then
36 |     for D in `echo $PERL5LIB | sed 's/:/\n/g'`; do
37 |       if [ "$D" != "${D/ensembl\/modules}" ]; then
38 |         ENSEMBLDIR=$D
39 |       fi
40 |     done
41 |   else
42 |     printf "\033[31mPERL5LIB is not set\033[0m\n"
43 |     exit 1
44 |   fi
45 | fi
46 | 
47 | if [ ! -e "$BASEDIR" ]; then
48 |   mkdir -p "$BASEDIR"
49 | fi
50 | 
51 | for F in ${ENSEMBLDIR}/*; do
52 | # We also want the SQLite table in case we start testing it too
53 |   cp -r "$F" "$BASEDIR"
54 | done
55 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Tools/SoftwareConfigLoad.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 2 | # 
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | # 
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | package Bio::EnsEMBL::Analysis::Tools::SoftwareConfigLoad;
17 | 
18 | use strict;
19 | use warnings;
20 | use JSON;
21 | use File::Basename;
22 | use File::Spec;
23 | use Exporter 'import';
24 | our @EXPORT_OK = qw(get_software_path);
25 | 
26 | my $config_file = File::Spec->catfile(dirname(__FILE__), 'SoftwareConfig.json');  # Find suitable location
27 | 
28 | sub get_software_path {
29 |   my ($software_type, $tool) = @_;
30 | 
31 |   open my $fh, '<', $config_file or die "Could not open config file: $!";
32 |   my $json_text = do { local $/; <$fh> };
33 |   close $fh;
34 | 
35 |   my $config = decode_json($json_text);
36 |   
37 | 
38 |   # Validate inputs
39 |   unless ($software_type && exists $config->{software_paths}{$software_type}) {
40 |     die "Software type '$software_type' not found in config. Available types: "
41 |       . join(", ", keys %{ $config->{software_paths} }) . "\n";
42 |   }
43 | 
44 |   unless (exists $config->{software_paths}{$software_type}{$tool}) {
45 |     die "Tool '$tool' not found for software type '$software_type'. Available tools: "
46 |       . join(", ", keys %{ $config->{software_paths}{$software_type} }) . "\n";
47 |   }
48 | 
49 |   return $config->{software_paths}{$software_type}{$tool};
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/scripts/cdna_update/find_N.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use warnings ;
 3 | use strict;
 4 | 
 5 | 
 6 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 7 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 8 | # 
 9 | # Licensed under the Apache License, Version 2.0 (the "License");
10 | # you may not use this file except in compliance with the License.
11 | # You may obtain a copy of the License at
12 | # 
13 | #      http://www.apache.org/licenses/LICENSE-2.0
14 | # 
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 | 
21 | 
22 | #script to parse a fasta file and identify sequences with large strings of 'N's
23 | 
24 | #perl find_N.pl missing_fasta.out >many_n.out
25 | 
26 | my $percent = 2; #percentage of sequence which must be consecutive Ns
27 | my $total_percent = 5 * $percent; #total Ns 
28 | 
29 | my $data = $ARGV[0];
30 | my $a_count = 0;
31 | 
32 | local $/ = "\n>";
33 | 
34 | open(DATA, "<$data") or die ("Can't read $data $! \n");
35 | 
36 | while(<DATA>){ 
37 | 	#have a sequence:
38 | 	
39 | 	s/>//g;
40 | 	
41 | 	my $len = length $_;
42 | 	my $max_n = sprintf "%.0f", (($len / 100) * $percent); #threshold number of Ns which we want to flag 
43 | 	my $percent_n = 0;
44 | 	
45 | 	my ($name, $seq);
46 | 	if ($_=~/^([\w\.]+)\s+([\w\s]+)/){
47 | 		$name = $1;
48 | 		my @tmp = $2;
49 | 		
50 | 		for my $s (@tmp){
51 | 			$s =~s/\s//g;
52 | 		}
53 | 		$seq = join "", @tmp;
54 | 	}
55 | 	
56 | 	while ($_=~/(N+)/g){ #will match greedily
57 | 		$percent_n += length $1;
58 | 		if (length $1 >= $max_n && $percent_n >= $total_percent){
59 | 			
60 | 			print "$name\n"; #print the seq id
61 | 			last;
62 | 		}
63 | 	}
64 | }
65 | 
66 | 	
67 | 


--------------------------------------------------------------------------------
/scripts/genebuild/parse_embl_cds2uniprotkb.pl:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | #!/usr/bin/env perl
17 | 
18 | use strict;
19 | use warnings;
20 | 
21 | use Getopt::Long;
22 | 
23 | my $fasta_file;
24 | my $map_file;
25 | my $edited_fasta_file;
26 | 
27 | &GetOptions(
28 |             'fasta_file:s' => \$fasta_file,
29 |             'map_file:s' => \$map_file,
30 |             'edited_fasta_file:s' => \$edited_fasta_file
31 |             );
32 | 
33 | my %map_ids;
34 | 
35 | open (EFF,">".$edited_fasta_file) || die "Could not open edited_fasta_file for writing\n";
36 | open (MAP, "$map_file") or die "Can't open ".$map_file."\n";
37 | 
38 | while(<MAP>){
39 |   chomp;
40 |   my @values = split(/\t/,$_);
41 |  
42 |   if ($values[1] eq "EMBL-CDS"){
43 |    $map_ids{$values[2]} = $values[0];
44 |   }
45 | }
46 | 
47 | close MAP;
48 | 
49 | open (FASTA, "$fasta_file") or die "Can't open ".$fasta_file."\n";
50 | 
51 | while (<FASTA>){
52 |   chomp;
53 | 
54 | 	if ($_ =~/^>/){
55 | 	 my @accessions = split(/\s+/,$_);
56 | 	  if($map_ids{$accessions[1]}){
57 |       print EFF $accessions[0]." ".$map_ids{$accessions[1]}."\n";
58 | 	  }else{
59 | 		  print EFF $_."\n";
60 |     }
61 | 	}else{
62 | 	  print EFF $_."\n";
63 | 	}
64 | }
65 | close EFF;
66 | close FASTA;
67 | 


--------------------------------------------------------------------------------
/scripts/genebuild/sncrna/filter_cm.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/env perl
 2 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 3 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | use strict;
18 | use warnings;
19 | 
20 | use Getopt::Long;
21 | use Path::Tiny qw(path);
22 | use Data::Dumper;
23 | 
24 | sub filter_rfam {
25 |   my ($cm, $rfam_acc) = @_;
26 | 
27 |   my @filtered;
28 |   my @cm_models = split(/\/\/\n/, $cm);
29 |   
30 |   my %rfam_acc = map { $_ => 1 } @$rfam_acc;
31 |   foreach my $cm_model (@cm_models) {
32 |     $cm_model =~ m/(RF\d+)/ ;
33 |     my $rfam = $1;
34 |     print $rfam . "\t";
35 |     if (exists($rfam_acc{$rfam})) {
36 |       push @filtered, $cm_model;
37 |     } else {
38 |       # print("Rfam model $rfam removed by filtering.\n");
39 |     }
40 |   }
41 | 
42 |   return join("//\n", @filtered)."//\n";
43 | }
44 | 
45 | my $rfam_cm_file       = $ARGV[0]; #$self->param_required('rfam_cm_file');
46 | my $rfam_accessions   = $ARGV[1]; #$self->param_required('rfam_accessions');
47 | 
48 | my $cm_path = path($rfam_cm_file);
49 | my $ra_path = path($rfam_accessions);
50 | my $output = path($ARGV[2]);#path($working_dir . "/Rfam.cm");
51 | 
52 | my $cm = $cm_path->slurp;
53 | my $ra = $ra_path->slurp;
54 | 
55 | my @accessions = split(/\n/, $ra);
56 | 
57 | $cm = filter_rfam($cm, \@accessions);
58 | $output->spew($cm);
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Tools/Stashes.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | package Bio::EnsEMBL::Analysis::Tools::Stashes;
17 | 
18 | use warnings;
19 | use strict;
20 | no strict "refs";
21 | 
22 | use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning);
23 | use Exporter;
24 | use vars qw (@ISA @EXPORT %stash $alias @alias %alias);
25 | 
26 | @ISA    = qw(Exporter);
27 | @EXPORT = qw( package_stash );
28 | 
29 | sub package_stash {
30 |   my ($packageName) = @_;
31 | 
32 |   my %result;
33 | 
34 |   local (*alias);
35 |   *stash = *{"${packageName}::"};
36 | 
37 |   while ( my ( $varName, $globValue ) = each %stash ) {
38 |     # only return the config hash
39 |     next if $varName =~ m/BEGIN/;
40 |     next if $varName =~ m/import/;
41 | 
42 |     *alias = $globValue;
43 |     $result{$varName} = $alias  if ( defined($alias) );
44 |     $result{$varName} = \@alias if ( *alias{ARRAY} );
45 |     $result{$varName} = \%alias if ( *alias{HASH} );
46 |   }
47 | 
48 |   if ( scalar( keys %result > 1 ) ) {
49 |     throw( "Have more than one item exported from " .
50 |            "$packageName - you'll run into trouble\n" );
51 |   }
52 |   my $hash_name = ( keys %result )[0];
53 |   return [ $result{$hash_name}, $hash_name ];
54 | } ## end sub package_stash
55 | 
56 | 1;
57 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Hive/RunnableDB/HiveStoreUnmappedcDNAs.pm:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | #Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | package Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveStoreUnmappedcDNAs;
19 | 
20 | use strict;
21 | use warnings;
22 | 
23 | 
24 | use Bio::EnsEMBL::UnmappedObject;
25 | 
26 | use parent ('Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveBaseRunnableDB');
27 | 
28 | 
29 | 
30 | sub fetch_input {
31 |   my $self = shift;
32 | 
33 |   $self->create_analysis;
34 |   my $db = $self->get_database_by_name('target_db');
35 |   $self->hrdb_set_con($db, 'target_db');
36 |   return 1;
37 | }
38 | 
39 | sub run {
40 |   my ($self) = shift;
41 | 
42 |   return 1;
43 | }
44 | 
45 | sub write_output {
46 |   my $self = shift;
47 | 
48 |   my $unmapped_adaptor = $self->hrdb_get_con('target_db')->get_UnmappedObjectAdaptor;
49 |   foreach my $iid (@{$self->param('iid')}) {
50 |     $unmapped_adaptor->store(Bio::EnsEMBL::UnmappedObject->new(
51 |      -type => 'cDNA',
52 |      -identifier => $iid,
53 |      -summary => 'No output from Exonerate',
54 |      -full_desc => 'Exonerate returned no hits using standard parameters plus options --maxintron 400000 and --softmasktarget FALSE',
55 |      -analysis => $self->analysis,
56 |     ));
57 |   }
58 | }
59 | 
60 | 
61 | 1;
62 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/Prints.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =head1 NAME
17 | 
18 | Prints.pm - DESCRIPTION of Object
19 | 
20 | =head1 SYNOPSIS
21 | 
22 |  my $rsb = Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Prints->new(
23 |     -db       => $db
24 |     -input_id    => $id
25 |     -analysis    => $analysis);
26 | 
27 | 
28 | =head1 DESCRIPTION
29 | 
30 | 
31 | =cut
32 | 
33 | 
34 | # Let the code begin...
35 | 
36 | 
37 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Prints;
38 | use warnings ;
39 | use vars qw(@ISA);
40 | use strict;
41 | 
42 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
43 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Prints;
44 | 
45 | 
46 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
47 | 
48 | 
49 | sub fetch_input {
50 |   my ($self, @args) = @_;
51 | 
52 |   $self->SUPER::fetch_input(@args);
53 | 
54 |   my $run =  Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Prints->new(-query     => $self->query,
55 |                                                                               -analysis  => $self->analysis);
56 |   $self->runnable($run);
57 | }
58 | 
59 | 
60 | 
61 | 
62 | 1;
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/Prints_wormbase.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =head1 NAME
17 | 
18 | Prints.pm - DESCRIPTION of Object
19 | 
20 | =head1 SYNOPSIS
21 | 
22 |  my $rsb = Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Prints_wormbase->new(
23 |     -db       => $db
24 |     -input_id    => $id
25 |     -analysis    => $analysis);
26 | 
27 | 
28 | =head1 DESCRIPTION
29 | 
30 | 
31 | =cut
32 | 
33 | 
34 | # Let the code begin...
35 | 
36 | 
37 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Prints_wormbase;
38 | use warnings ;
39 | use vars qw(@ISA);
40 | use strict;
41 | 
42 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
43 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Prints_wormbase;
44 | 
45 | 
46 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
47 | 
48 | 
49 | sub fetch_input {
50 |   my ($self, @args) = @_;
51 | 
52 |   $self->SUPER::fetch_input(@args);
53 | 
54 |   my $run =  Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Prints_wormbase->new(-query     => $self->query,
55 |                                                                               -analysis  => $self->analysis);
56 |   $self->runnable($run);
57 | }
58 | 
59 | 
60 | 
61 | 
62 | 1;
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/scripts/genebuild/sncrna/repeats_dump.pl:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 3 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | use strict;
18 | use warnings;
19 | 
20 | use Bio::EnsEMBL::DBSQL::DBAdaptor;
21 | 
22 | my ($dbname, $dbhost, $dbport, $dbuser, $working_dir, $logic_name) = @ARGV;
23 | 
24 | my $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(
25 | 	-DBNAME => $dbname,
26 |   	-HOST => $dbhost,
27 |   	-PORT => $dbport,
28 |   	-USER => $dbuser,
29 | 	-DRIVER => 'mysql',
30 | );
31 | 
32 | # dump repeat features
33 | my $rfa = $db->get_RepeatFeatureAdaptor();
34 | my $fn = $working_dir . "/repeats.bed";
35 | open(FH, '>', $fn) or die "Could not write to $fn";
36 | 
37 | my $sa = $db->get_SliceAdaptor();
38 | my $slice_name;
39 | 
40 | my $logic_names = $db->get_MetaContainer->list_value_by_key('repeat.analysis');
41 | if (!@$logic_names) {
42 |   push(@$logic_names, '');
43 | }
44 | foreach my $slice (@{ $sa->fetch_all( 'toplevel') }){
45 |   $slice_name = $slice->seq_region_name();
46 |   foreach my $logic_name (@$logic_names) {
47 |     foreach my $repeat (@{ $rfa->fetch_all_by_Slice($slice, $logic_name) }){
48 |       print FH $slice_name, "\t",
49 |         $repeat->seq_region_start(), "\t",
50 |         $repeat->seq_region_end(), "\t",
51 |         ($repeat->strand() == 1 ? '+' : '-'), "\n";
52 |     }
53 |   }
54 | }
55 | 
56 | close(FH) or die "Could not close $fn";
57 | 
58 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/PrositePattern.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =pod 
17 | 
18 | =head1 NAME
19 | 
20 |   Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::PrositePattern
21 | 
22 | =head1 SYNOPSIS
23 | 
24 |   my $tmhmm = Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::PrositePattern->
25 |     new ( 
26 |     -db      => $db,
27 |     -input_id   => $input_id,
28 |     -analysis   => $analysis)
29 |     );
30 |   $tmhmm->fetch_input;  # gets sequence from DB
31 |   $tmhmm->run;
32 |   $tmhmm->write_output; # writes features to to DB
33 | 
34 | =head1 DESCRIPTION
35 | 
36 | =cut
37 | 
38 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::PrositePattern;
39 | 
40 | use warnings ;
41 | use strict;
42 | use vars qw(@ISA);
43 | 
44 | 
45 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::PrositePattern;
46 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
47 | 
48 | 
49 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
50 | 
51 | 
52 | sub fetch_input {
53 |   my ($self, @args) = @_;
54 |   
55 |   $self->SUPER::fetch_input(@args);
56 | 
57 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::PrositePattern->
58 |       new(-query     => $self->query,
59 |           -analysis  => $self->analysis,
60 |           %{$self->parameters_hash}
61 |           );
62 |   $self->runnable($run);
63 | }
64 | 
65 | 1;
66 | 


--------------------------------------------------------------------------------
/modules/t/hivecreatedirectories_rb.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 3 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | use strict;
17 | use warnings;
18 | 
19 | use File::Path qw(remove_tree);
20 | use File::Spec::Functions qw(catdir);
21 | 
22 | use Test::More;
23 | 
24 | use Bio::EnsEMBL::Hive::Utils::Test qw(standaloneJob);
25 | 
26 | use_ok('Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveCreateDirectories');
27 | 
28 | my $directory = catdir($ENV{PWD}, 'test_directory');
29 | standaloneJob(
30 | 	'Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveCreateDirectories', # module
31 | 	{ # input param hash
32 |     paths => [$directory],
33 | 	},
34 | );
35 | 
36 | my @stat = stat($directory);
37 | cmp_ok(sprintf("%04o", $stat[2] & 07777), 'eq', 2775, 'Checking permissions for default production directory');
38 | remove_tree($directory);
39 | 
40 | my $directory_2 = catdir($directory, 'directory_test');
41 | standaloneJob(
42 | 	'Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveCreateDirectories', # module
43 | 	{ # input param hash
44 |     paths => [{
45 |       path => $directory_2,
46 |       mode => 0755,
47 |       }],
48 | 	},
49 | );
50 | 
51 | @stat = stat($directory);
52 | cmp_ok(sprintf("%04o", $stat[2] & 07777), 'eq', '0755', 'Checking permissions for directory');
53 | @stat = stat($directory_2);
54 | cmp_ok(sprintf("%04o", $stat[2] & 07777), 'eq', '0755', 'Checking permissions for subdirectory');
55 | remove_tree($directory);
56 | 
57 | done_testing();
58 | 


--------------------------------------------------------------------------------
/scripts/protein/chunk_protein_file.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | # 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | # 
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | # 
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | use warnings ;
19 | use strict;
20 | use Getopt::Long;
21 | use Bio::SeqIO;
22 | 
23 | my ($input_pep_file,
24 |     $output_chunk_dir,
25 |     $chunk_size);
26 | 
27 | GetOptions('pepfile=s'   => \$input_pep_file,
28 |            'chunkdir=s' => \$output_chunk_dir,
29 |            'chunksize=s' => \$chunk_size);
30 | 
31 | 
32 | die "You must supply a valid input peptide file\n"
33 |     if not defined $input_pep_file or not -e $input_pep_file;
34 | die "You must supply a valid output chunk directory\n"
35 |     if not defined $output_chunk_dir or not -d $output_chunk_dir;
36 | 
37 | if (not defined $chunk_size or $chunk_size < 0) {
38 |   warn "No/invalid chunk size given; defaulting to 20";
39 |   $chunk_size = 20;
40 | }
41 | 
42 | my $seqio = Bio::SeqIO->new(-format => 'fasta',
43 |                             -file   => $input_pep_file);
44 | 
45 | my $count = 0;
46 | my $chunk_num = 1;
47 | my $outseqio;
48 | 
49 | while (my $seq = $seqio->next_seq) {
50 |   if (not defined $outseqio) {
51 |     $outseqio = Bio::SeqIO->new(-format => 'fasta',
52 |                                 -file   => ">$output_chunk_dir/chunk." . $chunk_num++);
53 |   }
54 | 
55 |   $outseqio->write_seq($seq);
56 |   $count++;
57 | 
58 |   if ($count >= $chunk_size) {
59 |     $outseqio->close;
60 |     $outseqio = undef;
61 |     $count = 0;
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Tools/GenomeOverlapFilter.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | package Bio::EnsEMBL::Analysis::Tools::GenomeOverlapFilter;
18 | 
19 | use strict;
20 | use warnings;
21 | 
22 | use Bio::EnsEMBL::Root;
23 | use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning);
24 | use Bio::EnsEMBL::Utils::Argument qw( rearrange );
25 | 
26 | 
27 | 
28 | sub new{
29 |   my ($class, @args) = @_;
30 |   my $self = bless {},$class;
31 | 
32 |   if (scalar(@args)) {
33 |     throw("GenomeOverlapFilter should have no args in new");
34 |   }
35 | 
36 |   return $self;
37 | }
38 | 
39 | #####################################
40 | sub filter {
41 |   my ($self, $these, $others) = @_;
42 | 
43 |   # interference is judged by overlap at genomic level
44 |   # assumption is that @others is sorted by gene start
45 | 
46 |   my @filtered;
47 | 
48 |   foreach my $obj (@$these) {
49 |     my ($left_bound, $genomic_overlap);
50 | 
51 |     for(my $i=0; $i < @$others && !$genomic_overlap; $i++) {
52 |       my $o_obj = $others->[$i];
53 | 
54 |       next if $o_obj->strand != $obj->strand;
55 | 
56 |       if ($o_obj->end < $obj->start) {
57 |         next;
58 |       } elsif ($o_obj->start > $obj->end) {
59 |         last;
60 |       } else {
61 |         $genomic_overlap = 1;
62 |       }
63 |     }
64 | 
65 |     if (not $genomic_overlap) {
66 |       push @filtered, $obj;
67 |     }
68 |   }
69 | 
70 |   return \@filtered;
71 | }
72 | 
73 | 1;
74 | 


--------------------------------------------------------------------------------
/scripts/assembly_patches/remove_patch_raw_compute.sql:
--------------------------------------------------------------------------------
 1 | delete repeat_feature from attrib_type, seq_region_attrib, repeat_feature where attrib_type.code in ('patch_novel','patch_fix') and attrib_type.attrib_type_id = seq_region_attrib.attrib_type_id and seq_region_attrib.seq_region_id = repeat_feature.seq_region_id;
 2 | 
 3 | delete prediction_exon from attrib_type, seq_region_attrib, prediction_exon where attrib_type.code in ('patch_novel','patch_fix') and attrib_type.attrib_type_id = seq_region_attrib.attrib_type_id and seq_region_attrib.seq_region_id = prediction_exon.seq_region_id;
 4 | 
 5 | delete prediction_transcript from attrib_type, seq_region_attrib, prediction_transcript where attrib_type.code in ('patch_novel','patch_fix') and attrib_type.attrib_type_id = seq_region_attrib.attrib_type_id and seq_region_attrib.seq_region_id = prediction_transcript.seq_region_id;
 6 | 
 7 | delete simple_feature from attrib_type, seq_region_attrib, simple_feature where attrib_type.code in ('patch_novel','patch_fix') and attrib_type.attrib_type_id = seq_region_attrib.attrib_type_id and seq_region_attrib.seq_region_id = simple_feature.seq_region_id;
 8 | 
 9 | delete dna_align_feature from attrib_type, seq_region_attrib, dna_align_feature where attrib_type.code in ('patch_novel','patch_fix') and attrib_type.attrib_type_id = seq_region_attrib.attrib_type_id and seq_region_attrib.seq_region_id = dna_align_feature.seq_region_id and dna_align_feature_id not in (select feature_id from transcript_supporting_feature where feature_type = 'dna_align_feature') and dna_align_feature_id not in (select feature_id from supporting_feature where feature_type = 'dna_align_feature');
10 | 
11 | delete protein_align_feature from attrib_type, seq_region_attrib, protein_align_feature where attrib_type.code in ('patch_novel','patch_fix') and attrib_type.attrib_type_id = seq_region_attrib.attrib_type_id and seq_region_attrib.seq_region_id = protein_align_feature.seq_region_id and protein_align_feature_id not in (select feature_id from transcript_supporting_feature where feature_type = 'protein_align_feature') and protein_align_feature_id not in (select feature_id from supporting_feature where feature_type = 'protein_align_feature');
12 | 
13 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/PrositePattern_wormbase.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =pod 
17 | 
18 | =head1 NAME
19 | 
20 |   Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::PrositePattern_wormbase
21 | 
22 | =head1 SYNOPSIS
23 | 
24 |   my $tmhmm = Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::PrositePattern_wormbase->
25 |     new ( 
26 |     -db      => $db,
27 |     -input_id   => $input_id,
28 |     -analysis   => $analysis)
29 |     );
30 |   $tmhmm->fetch_input;  # gets sequence from DB
31 |   $tmhmm->run;
32 |   $tmhmm->write_output; # writes features to to DB
33 | 
34 | =head1 DESCRIPTION
35 | 
36 | =cut
37 | 
38 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::PrositePattern_wormbase;
39 | 
40 | use warnings ;
41 | use strict;
42 | use vars qw(@ISA);
43 | 
44 | 
45 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::PrositePattern_wormbase;
46 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
47 | 
48 | 
49 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
50 | 
51 | 
52 | sub fetch_input {
53 |   my ($self, @args) = @_;
54 |   
55 |   $self->SUPER::fetch_input(@args);
56 | 
57 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::PrositePattern_wormbase->
58 |       new(-query     => $self->query,
59 |           -analysis  => $self->analysis,
60 |           %{$self->parameters_hash}
61 |           );
62 |   $self->runnable($run);
63 | }
64 | 
65 | 1;
66 | 


--------------------------------------------------------------------------------
/scripts/genebuild/find_seq_in_fasta.pl:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | #!/usr/bin/env perl
17 | 
18 | use strict;
19 | use warnings;
20 | use Bio::SeqIO;
21 | use Getopt::Long;
22 | 
23 | my ($fasta_file,$id_file);
24 | my $id;
25 | my $prefix;
26 | 
27 | &GetOptions(
28 |             'id:s'           => \$id,
29 |             'fasta_file:s'         => \$fasta_file, 
30 |             'id_file:s'         => \$id_file, 
31 |              'prefix:s'       => \$prefix,
32 |            );
33 | 
34 | if (!defined($fasta_file) || (!defined($id) && !defined($prefix) && !defined($id_file))) {
35 |   die "ERROR: Must at least set file (-fasta_file) and full id (-id)\n" .
36 |       "       or prefix (-prefix)\n";
37 | }
38 | 
39 | my %ids ;
40 | if ( $id_file ) {  
41 |   open(I,"$id_file") || die ( "Cant read file : $id_file\n") ; 
42 |   while(my $line=<I>){ 
43 |     chomp($line); 
44 |     $ids{$line} = 1;  
45 |   } 
46 | } elsif ( $id ) {  
47 |   $ids{$id} = 1;  
48 | } 
49 | 
50 | 
51 | my $inputer = Bio::SeqIO->new(-file => "<" . $fasta_file , '-format' => 'Fasta') ;
52 | my $outputer = Bio::SeqIO->new(-file => ">-" , '-format' => 'Fasta') ;
53 | 
54 | while (my $seq = $inputer->next_seq) {
55 |    #print $seq->id ."\n" ; 
56 |   if (exists $ids{$seq->id}) {
57 |     $outputer->write_seq($seq);
58 |   }
59 |   if (defined($prefix)) {
60 |     if ($seq->id =~ /^$prefix/) {
61 |       $outputer->write_seq($seq);
62 |     }
63 |   }
64 | }
65 | $inputer->close;
66 | $outputer->close;
67 | 


--------------------------------------------------------------------------------
/scripts/databases/process_uniprot_isoforms.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 3 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | use strict;
18 | use warnings;
19 | 
20 | use Getopt::Long;
21 | use Bio::SeqIO;
22 | 
23 | my $infile;
24 | my $outfile;
25 | my $isofile;
26 | my $use_version = 0;
27 | my $use_description = 0;
28 | 
29 | &GetOptions (
30 |             'i|infile=s'  => \$infile,
31 |             'v|isofile=s' => \$isofile,
32 |             'o|outfile=s' => \$outfile,
33 |             'version!'    => \$use_version,
34 |             'desc!'       => \$use_description,
35 |         );
36 | 
37 | my %accessions;
38 | open(RF, $infile) || die("Could not open $infile\n");
39 | while (<RF>) {
40 |     if (/^>(\w+)\.(\d+)/) {
41 |         $accessions{$1} = $2;
42 |     }
43 | }
44 | close(RF) || die("Could not close $infile\n");
45 | 
46 | 
47 | my $sequences = Bio::SeqIO->new(-format => 'fasta', -file => $isofile);
48 | my $writer = Bio::SeqIO->new(-format => 'fasta', -file => '>'.$outfile);
49 | $writer->preferred_id_type('accession');
50 | $writer->preferred_id_type('accession.version') if ($use_version);
51 | while (my $seq = $sequences->next_seq()) {
52 |     my ($accession, $isoform_id) = $seq->id =~ /[sptr]{2}\|(\w+)(-\d+)/;
53 |     if (exists $accessions{$accession}) {
54 |         $seq->accession_number($accession.$isoform_id);
55 |         $seq->version($accessions{$accession}) if ($use_version);
56 |         $seq->desc('') unless ($use_description);
57 |         $writer->write_seq($seq);
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Hive/Config/sample_genes_registry_conf.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 3 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | use strict;
18 | use warnings;
19 | use Bio::EnsEMBL::Utils::ConfigRegistry;
20 | use Bio::EnsEMBL::DBSQL::DBAdaptor;
21 | use Bio::EnsEMBL::Compara::DBSQL::DBAdaptor;
22 | use Bio::EnsEMBL::Taxonomy::DBSQL::TaxonomyDBAdaptor;
23 | 
24 | my $curr_release = $ENV{ENSEMBL_RELEASE};
25 | 
26 | # ---------------------- CURRENT COMPARA DATABASE ---------------------------------
27 | 
28 | my $compara_dbs = {
29 |     'compara_curr'   => [ 'mysql-ens-sta-1', 'ensembl_compara_'.$curr_release ],
30 | };
31 | 
32 | foreach my $alias_name ( keys %$compara_dbs ) {
33 |   my ( $host, $db_name ) = @{ $compara_dbs->{$alias_name} };
34 |   my ( $user, $pass ) = ( 'ensro', '' );
35 |   Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(
36 |             -host => $host,
37 |             -user => $user,
38 |             -pass => $pass,
39 |             -port => get_port($host),
40 |             -species => $alias_name,
41 |             -dbname  => $db_name,
42 |         );
43 | }
44 | 
45 | # ---------------------- CURRENT CORE DATABASES ---------------------------------
46 | 
47 | # The majority of core databases live on staging servers:
48 | Bio::EnsEMBL::Registry->load_registry_from_url(
49 |    "mysql://ensro\@mysql-ens-sta-1.ebi.ac.uk:4519/$curr_release");
50 | 
51 | sub get_port {
52 |     my $host = shift;
53 |     my $port = `$host port`;
54 |     chomp $port;
55 |     return $port;
56 |   }
57 | 
58 | 1;
59 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/Coil.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =pod 
17 | 
18 | =head1 NAME
19 | 
20 |   Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Coil
21 | 
22 | =head1 SYNOPSIS
23 | 
24 |   my $ncoils = Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Coil->new ( -db      => $db,
25 |     	  	                                                        -input_id   => $input_id,
26 |                                                                         -analysis   => $analysis,
27 |                                                                       );
28 |   $ncoils->fetch_input;  # gets sequence from DB
29 |   $ncoils->run;
30 |   $ncoils->write_output; # writes features to to DB
31 | 
32 | =head1 DESCRIPTION
33 | 
34 | =cut
35 | 
36 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Coil;
37 | 
38 | use warnings ;
39 | use strict;
40 | use vars qw(@ISA);
41 | 
42 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
43 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Coil;
44 | 
45 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
46 | 
47 | 
48 | sub fetch_input {
49 |   my ($self, @args) = @_;
50 | 
51 |   $self->SUPER::fetch_input(@args);
52 | 
53 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Coil->new(-query     => $self->query,
54 |                                                                            -analysis  => $self->analysis);
55 |   
56 |   $self->runnable($run);
57 | }
58 | 
59 | 
60 | 1;
61 | 


--------------------------------------------------------------------------------
/modules/t/hiverepeatcoverage_rb.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 3 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | use strict;
17 | use warnings;
18 | 
19 | use Test::More;
20 | 
21 | use Bio::EnsEMBL::Test::TestUtils;
22 | use Bio::EnsEMBL::Test::MultiTestDB;
23 | 
24 | use Bio::EnsEMBL::Hive::Utils::Test qw(standaloneJob);
25 | 
26 | use_ok('Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveRepeatCoverage');
27 | 
28 | my $multi = Bio::EnsEMBL::Test::MultiTestDB->new();
29 | 
30 | my $db = $multi->get_DBAdaptor('core');
31 | my %target_db = (
32 |   -dbname => $db->dbc->dbname,
33 |   -host   => $db->dbc->host,
34 |   -port   => $db->dbc->port,
35 |   -user   => $db->dbc->user,
36 |   -pass   => $db->dbc->pass,
37 |   -driver => $db->dbc->driver,
38 | );
39 | 
40 | standaloneJob(
41 | 	'Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveRepeatCoverage', # module
42 | 	{ # input param hash
43 |     source_db => \%target_db,
44 |     repeat_logic_names => ['RepeatMask'],
45 |     coord_system_version => 'NCBI33',
46 | 	},
47 | 	[ # list of events to test for (just 1 event in this case)
48 | 		[ # start event
49 | 			'WARNING', # event to test for (could be WARNING)
50 | 			$db->dbc->dbname . "\nAnalyses: RepeatMask\nTotal bases = 62842997\nTotal masked = 504576\t( 0.80% masked)\n", # expected data flowed out
51 | 		], # end event
52 | 		[ # start event
53 | 			'DATAFLOW', # event to test for (could be WARNING)
54 | 			{repeat_mask_coverage => 0.802915239704433574}, # expected data flowed out
55 | 			2 # dataflow branch
56 | 		], # end event
57 | 	]
58 | );
59 | 
60 | done_testing();
61 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/Signalp.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | =pod 
16 | 
17 | =head1 NAME
18 | 
19 |   Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Signalp
20 | 
21 | =head1 SYNOPSIS
22 | 
23 |   my $signalp = Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Signalp->new ( -db      => $db,
24 |     	  	                                                            -input_id   => $input_id,
25 |                                                                             -analysis   => $analysis,
26 |                                                                           );
27 |   $signalp->fetch_input;  # gets sequence from DB
28 |   $signalp->run;
29 |   $signalp->write_output; # writes features to to DB
30 | 
31 | =head1 DESCRIPTION
32 | 
33 | =cut
34 | 
35 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Signalp;
36 | 
37 | use warnings ;
38 | use strict;
39 | use vars qw(@ISA);
40 | 
41 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
42 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Signalp;
43 | 
44 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
45 | 
46 | sub fetch_input {
47 |   my ($self, @args) = @_;
48 | 
49 |   $self->SUPER::fetch_input(@args);
50 |   
51 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Signalp->new(-query     => $self->query,
52 |                                                                               -analysis  => $self->analysis);
53 |   $self->runnable($run);
54 | }
55 | 
56 | 
57 | 1;
58 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Hive/RunnableDB/HiveLoadProteins.pm:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | package Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveLoadProteins;
19 | 
20 | use strict;
21 | use warnings;
22 | use feature 'say';
23 | use Bio::EnsEMBL::IO::Parser::Fasta;
24 | 
25 | use parent ('Bio::EnsEMBL::Hive::RunnableDB::JobFactory');
26 | 
27 | sub param_defaults {
28 |   my ($self) = @_;
29 | 
30 |   return {
31 |     %{$self->SUPER::param_defaults()},
32 |     column_names => ['iid'],
33 |     sequence_table_name => 'protein_sequences',
34 |     load_biotype => 0,
35 |   }
36 | }
37 | 
38 | sub fetch_input {
39 |   my $self = shift;
40 | 
41 |   my $parser = Bio::EnsEMBL::IO::Parser::Fasta->open($self->param_required('protein_file'));
42 | 
43 |   my $table_adaptor = $self->db->get_NakedTableAdaptor();
44 |   $table_adaptor->table_name($self->param_required('sequence_table_name'));
45 | 
46 |   my @iids;
47 |   while($parser->next()) {
48 |     my ($accession) = $parser->getHeader =~ /^(\S+)/;
49 |     my $db_row = [{
50 |       'accession'  => $accession,
51 |       'seq'        => $parser->getSequence,
52 |     }];
53 |     if ($self->param('load_biotype')) {
54 |       if ($parser->getHeader =~ /\S+\s+(\S+)/) {
55 |         $db_row->[0]->{biotype} = $1;
56 |       }
57 |       else {
58 |         $self->warning('Could not find biotype for '.$accession);
59 |       }
60 |     }
61 |     $table_adaptor->store($db_row);
62 |     push(@iids, $accession);
63 |   }
64 |   $self->param('inputlist', \@iids);
65 | }
66 | 
67 | 1;
68 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Hive/RunnableDB/HiveSequencesToFiles.pm:
--------------------------------------------------------------------------------
 1 | =head1 LICENSE
 2 | 
 3 | Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | 
 6 | Licensed under the Apache License, Version 2.0 (the "License");
 7 | you may not use this file except in compliance with the License.
 8 | You may obtain a copy of the License at
 9 | 
10 |      http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | 
18 | =head1 CONTACT
19 | 
20 | Please email comments or questions to the public Ensembl
21 | developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
22 | 
23 | Questions may also be sent to the Ensembl help desk at
24 | <http://www.ensembl.org/Help/Contact>.
25 | 
26 | =head1 NAME
27 | 
28 | Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveSequencesToFiles
29 | 
30 | =head1 SYNOPSIS
31 | 
32 | 
33 | =head1 DESCRIPTION
34 | 
35 | 
36 | =cut
37 | 
38 | package Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveSequencesToFiles;
39 | 
40 | use strict;
41 | use warnings;
42 | 
43 | use Bio::SeqIO;
44 | use Bio::Seq;
45 | 
46 | use parent ('Bio::EnsEMBL::Hive::RunnableDB::JobFactory');
47 | 
48 | sub param_defaults {
49 |   my ($self) = @_;
50 | 
51 |   return {
52 |     %{$self->SUPER::param_defaults},
53 |     format => 'fasta',
54 |     chunk => 0,
55 |     chunk_size => 10,
56 |     column_names => ['filename'],
57 |     inputlist => ['#filename#'],
58 |   }
59 | }
60 | 
61 | 
62 | sub fetch_input {
63 |   my ($self) = @_;
64 | 
65 |   my $sth = $self->db->dbc->prepare('SELECT accession, seq FROM '.$self->param_required('sequence_table_name'));
66 |   $sth->execute();
67 |   my $parser = Bio::SeqIO->new(-format => $self->param('format'), -file => '>'.$self->param_required('filename'));
68 |   while (my $row = $sth->fetchrow_arrayref) {
69 |     my $seq = Bio::Seq->new(-id => $row->[0], -seq => $row->[1]);
70 |     $parser->write_seq($seq);
71 |   }
72 | }
73 | 
74 | 1;
75 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/Hmmpfam.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | #
17 | #
18 | #
19 | =pod 
20 | 
21 | =head1 NAME
22 | 
23 |   Bio::EnsEMBL::Pipeline::RunnableDB::ProteinAnnotation::Hmmpfam
24 | 
25 | =head1 SYNOPSIS
26 | 
27 |   my $seg = Bio::EnsEMBL::Pipeline::RunnableDB::ProteinAnnotation::Hmmpfam
28 |     ->new ( -db      => $db,
29 |     -input_id   => $input_id,
30 |     -analysis   => $analysis,
31 |     );
32 |   $seg->fetch_input;  # gets sequence from DB
33 |   $seg->run;
34 |   $seg->output;
35 |   $seg->write_output; # writes features to to DB
36 | 
37 | =head1 DESCRIPTION
38 | 
39 |   This object wraps Bio::EnsEMBL::Pipeline::Runnable::Hmmpfam
40 |   to add functionality to read and write to databases in 
41 | 
42 | 
43 | =head1 CONTACT
44 | 
45 | =cut
46 | 
47 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Hmmpfam;
48 | 
49 | use warnings ;
50 | use strict;
51 | use vars qw(@ISA);
52 | 
53 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
54 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Hmmpfam;
55 | 
56 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
57 | 
58 | 
59 | #
60 | # overridden methods
61 | #
62 | sub fetch_input {
63 |   my ($self) = @_;
64 | 
65 |   $self->SUPER::fetch_input;
66 | 
67 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Hmmpfam->
68 |       new(-query     => $self->query,
69 |           -analysis  => $self->analysis,
70 |           -database  => $self->analysis->db_file,
71 |           %{$self->parameters_hash}
72 |           );
73 |   $self->runnable($run);    
74 | }
75 | 
76 | 
77 | 1;
78 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/PIRSF.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | #
17 | #
18 | =pod 
19 | 
20 | =head1 NAME
21 | 
22 |   Bio::EnsEMBL::Pipeline::RunnableDB::ProteinAnnotation::PIRSF
23 | 
24 | =head1 SYNOPSIS
25 | 
26 |   my $seg = Bio::EnsEMBL::Pipeline::RunnableDB::ProteinAnnotation::PIRSF->
27 |     new ( -db      => $db,
28 |     -input_id   => $input_id,
29 |     -analysis   => $analysis,
30 |                                                                       );
31 |   $seg->fetch_input;  # gets sequence from DB
32 |   $seg->run;
33 |   $seg->write_output; # writes features to to DB
34 | 
35 | =head1 DESCRIPTION
36 | 
37 |   This object wraps Bio::EnsEMBL::Pipeline::Runnable::Hmmpfam
38 |   to add functionality to read and write to databases in 
39 |   a Pfam-specific way.
40 | 
41 | =head1 CONTACT
42 | 
43 | =cut
44 | 
45 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::PIRSF;
46 | 
47 | use warnings ;
48 | use strict;
49 | use vars qw(@ISA);
50 | 
51 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
52 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::PIRSF;
53 | 
54 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
55 | 
56 | 
57 | sub fetch_input {
58 |   my ($self) = @_;
59 |  
60 |   $self->SUPER::fetch_input;
61 |  
62 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::PIRSF->
63 |         new(-query     => $self->query,
64 |             -analysis  => $self->analysis,
65 |             -database  => $self->analysis->db_file,
66 |             %{$self->parameters_hash}
67 |             );
68 |   $self->runnable($run);
69 | }
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/Superfamily.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | #
17 | #
18 | =pod 
19 | 
20 | =head1 NAME
21 | 
22 |   Bio::EnsEMBL::Pipeline::RunnableDB::ProteinAnnotation::Superfamily
23 | 
24 | =head1 SYNOPSIS
25 | 
26 |   my $seg = Bio::EnsEMBL::Pipeline::RunnableDB::ProteinAnnotation::Superfamily
27 |     ->new ( -db      => $db,
28 |     -input_id   => $input_id,
29 |     -analysis   => $analysis,
30 |     );
31 |   $seg->fetch_input;  # gets sequence from DB
32 |   $seg->run;
33 |   $seg->output;
34 |   $seg->write_output; # writes features to to DB
35 | 
36 | =head1 DESCRIPTION
37 | 
38 |   This object wraps Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Superfamily
39 |   to add functionality to read and write to databases in 
40 | 
41 | 
42 | =head1 CONTACT
43 | 
44 | =cut
45 | 
46 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Superfamily;
47 | 
48 | use warnings ;
49 | use strict;
50 | use vars qw(@ISA);
51 | 
52 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
53 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Superfamily;
54 | 
55 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
56 | 
57 | 
58 | #
59 | # overridden methods
60 | #
61 | sub fetch_input {
62 |   my ($self) = @_;
63 | 
64 |   $self->SUPER::fetch_input;
65 | 
66 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Superfamily->
67 |       new(-query     => $self->query,
68 |           -analysis  => $self->analysis,
69 |           -database  => $self->analysis->db_file,
70 |           %{$self->parameters_hash}
71 |           );
72 |   $self->runnable($run);    
73 | }
74 | 
75 | 
76 | 1;
77 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/BlastRNASeqPep.pm.example:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | package Bio::EnsEMBL::Analysis::Config::GeneBuild::BlastRNASeqPep;
17 | 
18 | use strict;
19 | use vars qw( %Config );
20 | 
21 | # Hash containing config info
22 | %Config = (
23 | 
24 |          BLASTRNASEQPEP_CONFIG_BY_LOGIC => 
25 |            {
26 |             DEFAULT => {
27 | 	    		# databases are defined as hash keys from Bio::EnsEMBL::Analysis::Config::Databases
28 |                         OUTPUT_DB => '',
29 | 			MODEL_DB  => '',
30 | 			
31 | 			# If left blank all refined genes will be fetched
32 | 			LOGICNAME => '',
33 | 			
34 | 			# path to index to fetch the sequence of the blast hit to calculate % coverage
35 | 			INDEX => '/path/to/indexed/sequences/from/the/blastdb/index',
36 | 	             },
37 | 	    }
38 | 	    
39 | );
40 | 
41 | sub import {
42 |   my ($callpack) = caller(0); # Name of the calling package
43 |   my $pack = shift; # Need to move package off @_
44 | 
45 |   # Get list of variables supplied, or else everything
46 |   my @vars = @_ ? @_ : keys( %Config );
47 |   return unless @vars;
48 |   
49 |   # Predeclare global variables in calling package
50 |   eval "package $callpack; use vars qw("
51 |     . join(' ', map { '$'.$_ } @vars) . ")";
52 |     die $@ if $@;
53 | 
54 | 
55 |     foreach (@vars) {
56 | 	if ( defined $Config{$_} ) {
57 |             no strict 'refs';
58 | 	    # Exporter does a similar job to the following
59 | 	    # statement, but for function names, not
60 | 	    # scalar variables:
61 | 	    *{"${callpack}::$_"} = \$Config{ $_ };
62 | 	} else {
63 | 	    die "Error: Config: $_ not known\n";
64 | 	}
65 |     }
66 | }
67 | 
68 | 1;
69 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/IPRScan.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | #
17 | #
18 | =pod 
19 | 
20 | =head1 NAME
21 | 
22 |   Bio::EnsEMBL::Pipeline::RunnableDB::ProteinAnnotation::IPRScan
23 | 
24 | =head1 SYNOPSIS
25 | 
26 |   my $seg = Bio::EnsEMBL::Pipeline::RunnableDB::ProteinAnnotation::IPRScan->
27 |     new ( -db      => $db,
28 |     -input_id   => $input_id,
29 |     -analysis   => $analysis,
30 |                                                                       );
31 |   $seg->fetch_input;  # gets sequence from DB
32 |   $seg->run;
33 |   $seg->write_output; # writes features to to DB
34 | 
35 | =head1 DESCRIPTION
36 | 
37 |   This object wraps Bio::EnsEMBL::Pipeline::Runnable::Hmmpfam
38 |   to add functionality to read and write to databases in 
39 |   a IPRScan-specific way.
40 | 
41 | =head1 CONTACT
42 | 
43 | =cut
44 | 
45 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::IPRScan;
46 | 
47 | use warnings ;
48 | use strict;
49 | use vars qw(@ISA);
50 | 
51 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
52 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::IPRScan;
53 | 
54 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
55 | 
56 | 
57 | sub fetch_input {
58 |   my ($self) = @_;
59 |   $self->SUPER::fetch_input;
60 |   print "FETCHING INPUT\n";
61 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::IPRScan->
62 |       new(
63 |           -query     => $self->query,
64 |           -analysis  => $self->analysis,
65 |           -program => $self->analysis->program_file,
66 |           %{$self->parameters_hash}
67 |          );
68 |   $self->runnable($run);
69 | }
70 | 
71 | 1;
72 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/Pfam_wormbase.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | #
17 | #
18 | =pod 
19 | 
20 | =head1 NAME
21 | 
22 |   Bio::EnsEMBL::Pipeline::RunnableDB::ProteinAnnotation::Pfam_wormbase
23 | 
24 | =head1 SYNOPSIS
25 | 
26 |   my $seg = Bio::EnsEMBL::Pipeline::RunnableDB::ProteinAnnotation::Pfam_wormbase->
27 |     new ( -db      => $db,
28 |     -input_id   => $input_id,
29 |     -analysis   => $analysis,
30 |                                                                       );
31 |   $seg->fetch_input;  # gets sequence from DB
32 |   $seg->run;
33 |   $seg->write_output; # writes features to to DB
34 | 
35 | =head1 DESCRIPTION
36 | 
37 |   This object wraps Bio::EnsEMBL::Pipeline::Runnable::Hmmpfam
38 |   to add functionality to read and write to databases in 
39 |   a Pfam-specific way.
40 | 
41 | =head1 CONTACT
42 | 
43 | =cut
44 | 
45 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Pfam_wormbase;
46 | 
47 | use warnings ;
48 | use strict;
49 | use vars qw(@ISA);
50 | 
51 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
52 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Pfam_wormbase;
53 | 
54 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
55 | 
56 | 
57 | sub fetch_input {
58 |   my ($self) = @_;
59 |  
60 |   $self->SUPER::fetch_input;
61 |  
62 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Pfam_wormbase->
63 |       new(-query     => $self->query,
64 |           -analysis  => $self->analysis,
65 |           -database  => $self->analysis->db_file,
66 |           %{$self->parameters_hash}
67 |           );
68 |   $self->runnable($run);
69 | }
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/Tmhmm.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =pod 
17 | 
18 | =head1 NAME
19 | 
20 |   Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Tmhmm
21 | 
22 | =head1 SYNOPSIS
23 | 
24 |   my $tmhmm = Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Tmhmm->new ( -db      => $db,
25 | 	  	                                                        -input_id   => $input_id,
26 |                                                                         -analysis   => $analysis,
27 |                                                                       );
28 |   $tmhmm->fetch_input;  # gets sequence from DB
29 |   $tmhmm->run;
30 |   $tmhmm->write_output; # writes features to to DB
31 | 
32 | =head1 DESCRIPTION
33 | 
34 | =cut
35 | 
36 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Tmhmm;
37 | 
38 | use warnings ;
39 | use strict;
40 | use vars qw(@ISA);
41 | 
42 | 
43 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Tmhmm;
44 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
45 | 
46 | 
47 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
48 | 
49 | 
50 | sub fetch_input {
51 |   my ($self, @args) = @_;
52 |   
53 |   $self->SUPER::fetch_input(@args);
54 | 
55 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Tmhmm->new(-query     => $self->query,
56 |                                                                             -analysis  => $self->analysis,
57 |                                                                             %{$self->parameters_hash}
58 |                                                                             );
59 |   $self->runnable($run);
60 | }
61 | 
62 | 1;
63 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Tools/PacBioTranscriptFilter.pm:
--------------------------------------------------------------------------------
 1 | =head1 LICENSE
 2 | 
 3 | Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | 
 6 | Licensed under the Apache License, Version 2.0 (the "License");
 7 | you may not use this file except in compliance with the License.
 8 | You may obtain a copy of the License at
 9 | 
10 |      http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | 
18 | =head1 CONTACT
19 | 
20 | Please email comments or questions to the public Ensembl
21 | developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
22 | 
23 | Questions may also be sent to the Ensembl help desk at
24 | <http://www.ensembl.org/Help/Contact>.
25 | 
26 | =head1 NAME
27 | 
28 | Bio::EnsEMBL::Analysis::Tools::PacBioTranscriptFilter
29 | 
30 | =head1 SYNOPSIS
31 | 
32 | 
33 | =head1 DESCRIPTION
34 | 
35 | 
36 | =cut
37 | 
38 | package Bio::EnsEMBL::Analysis::Tools::PacBioTranscriptFilter;
39 | 
40 | use strict;
41 | use warnings;
42 | 
43 | use parent ('Bio::EnsEMBL::Analysis::Tools::CdnaUpdateTranscriptFilter');
44 | 
45 | sub filter_results {
46 |   my ($self, $transcripts) = @_;
47 |   my @modified_transcripts;
48 |   foreach my $transcript (@$transcripts ){
49 |     my $real_strand = $self->_get_transcript_evidence_strand($transcript);
50 |     if ($transcript->strand != $real_strand) {
51 |       my $exons = $transcript->get_all_Exons;
52 |       $transcript->flush_Exons();
53 |       foreach my $exon (@$exons) {
54 |         $exon->strand($real_strand);
55 |         $transcript->add_Exon($exon);
56 |       }
57 |       $transcript->{_gb_flag} = 1;
58 |     }
59 |     push(@modified_transcripts, $transcript);
60 |   }
61 |   return $self->SUPER::filter_results(\@modified_transcripts);
62 | }
63 | 
64 | sub _get_transcript_evidence_strand {
65 |   my ($self,$tran) = @_;
66 | 
67 |   my ($sf) = @{$tran->get_all_supporting_features};
68 | 
69 |   if (!$sf) {
70 |     ($sf) = @{$tran->get_all_Exons->[0]->get_all_supporting_features};
71 |   }
72 | 
73 |   return $sf->strand*$sf->hstrand;
74 | }
75 | 
76 | 1;
77 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/Superfamily_wormbase.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | #
17 | #
18 | =pod 
19 | 
20 | =head1 NAME
21 | 
22 |   Bio::EnsEMBL::Pipeline::RunnableDB::ProteinAnnotation::Superfamily_wormbase
23 | 
24 | =head1 SYNOPSIS
25 | 
26 |   my $seg = Bio::EnsEMBL::Pipeline::RunnableDB::ProteinAnnotation::Superfamily_wormase
27 |     ->new ( -db      => $db,
28 |     -input_id   => $input_id,
29 |     -analysis   => $analysis,
30 |     );
31 |   $seg->fetch_input;  # gets sequence from DB
32 |   $seg->run;
33 |   $seg->output;
34 |   $seg->write_output; # writes features to to DB
35 | 
36 | =head1 DESCRIPTION
37 | 
38 |   This object wraps Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Superfamily_wormbase
39 |   to add functionality to read and write to databases in 
40 | 
41 | 
42 | =head1 CONTACT
43 | 
44 | =cut
45 | 
46 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Superfamily_wormbase;
47 | 
48 | use warnings ;
49 | use strict;
50 | use vars qw(@ISA);
51 | 
52 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
53 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Superfamily_wormbase;
54 | 
55 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
56 | 
57 | 
58 | #
59 | # overridden methods
60 | #
61 | sub fetch_input {
62 |   my ($self) = @_;
63 | 
64 |   $self->SUPER::fetch_input;
65 | 
66 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Superfamily_wormbase->
67 |       new(-query     => $self->query,
68 |           -analysis  => $self->analysis,
69 |           -database  => $self->analysis->db_file,
70 |           %{$self->parameters_hash}
71 |           );
72 |   $self->runnable($run);    
73 | }
74 | 
75 | 
76 | 1;
77 | 


--------------------------------------------------------------------------------
/scripts/chunk_fasta_file.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | # 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | # 
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | # 
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | #This is a script which will run through the fasta file you provide and
19 | #chunk into into fasta files each containing the number of entries you
20 | #specify. 
21 | #the usage is
22 | # chunk_fasta_file.pl fasta_file output_dir chunk_size
23 | 
24 | use warnings ;
25 | use strict;
26 | 
27 | 
28 | my $filename = shift;
29 | my $output_dir = shift;
30 | my $chunk_size = shift;
31 | 
32 | if(!$filename || !$output_dir || !$chunk_size){
33 |   print "usage chunk_fasta_file.pl fasta_file output_dir chunk_size";
34 |   exit;
35 | }
36 | 
37 | if($filename eq '-h' || $filename eq '-help'){
38 |   print "usage chunk_fasta_file.pl fasta_file output_dir chunk_size";
39 |   exit;
40 | }
41 | 
42 | &chunk_pepfile($filename, $output_dir, $chunk_size);
43 | 
44 | sub chunk_pepfile {
45 |   my ($pepfile, $scratchdir, $size) = @_;
46 |   
47 |   #Chunk the peptide file
48 |   open (PEPFILE, "$pepfile") or die "couldn't open $pepfile $!";
49 |   my $count = 0;
50 |   my $chunk = 1;
51 |   #print STDERR "chunking peptide file\n";
52 |   
53 |   
54 |   $/ = "\>";
55 |   #print "have opened ".$pep_file."\n";
56 |   while(<PEPFILE>){
57 |     #print $_."\n";
58 |     if ($_ ne "\>") {
59 |       if ($count == 0) {
60 |         open (CHUNK,">".$scratchdir."/".$pepfile."_chunk.$chunk") or die "couldn't open ".$scratchdir."/".$pepfile."_chunk.$chunk";
61 |         #print "have opened ".$scratchdir."/chunks/chunk.$chunk\n";
62 |       }
63 |       
64 |       $_ =~ s/\>$//;  
65 |       
66 |       print CHUNK ">$_";
67 |       $count++;
68 |       if ($count == $size) {
69 |         $count = 0;
70 |         $chunk++;
71 |       }
72 |     }
73 |   }
74 |   $/ = "\n";
75 | }
76 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/Finished/EPCR.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =head1 NAME
17 | 
18 | EPCR.pm 
19 | 
20 | =head1 SYNOPSIS
21 | 
22 |   my $runnabledb = Bio::EnsEMBL::Analysis::RunnableDB::Finished::EPCR->
23 |   new(
24 |       -input_id => 'contig::AL805961.22.1.166258:1:166258:1',
25 |       -db => $db,
26 |       -analysis => $analysis,
27 |      );
28 |   $runnabledb->fetch_input;
29 |   $runnabledb->run;
30 |   $runnabledb->write_output;
31 | 
32 | 
33 | =head1 DESCRIPTION
34 | 
35 | The Finished version of EPCR.
36 | 
37 | =head1 CONTACT
38 | 
39 | anacode@sanger.ac.uk
40 | 
41 | =cut
42 | 
43 | package Bio::EnsEMBL::Analysis::RunnableDB::Finished::EPCR;
44 | 
45 | use strict;
46 | use warnings;
47 | 
48 | use Bio::EnsEMBL::Analysis::RunnableDB::EPCR;
49 | use Bio::EnsEMBL::Analysis::Runnable::Finished::EPCR;
50 | use vars qw(@ISA);
51 | 
52 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::EPCR);
53 | 
54 | sub fetch_input{
55 |   my ($self) = @_;
56 |   my %parameters = %{$self->parameters_hash};
57 |   if($self->analysis->db_file){
58 |     $parameters{'-STS_FILE'} = $self->analysis->db_file 
59 |       unless($parameters{'-STS_FILE'});
60 |   }
61 |   if(!$parameters{'-STS_FILE'}){
62 |     my $sts = $self->db->get_MarkerAdaptor->fetch_all;
63 |     throw("No markers in ".$self->db->dbname) unless(@$sts);
64 |     $parameters{'-STS_FEATURES'} = $sts;
65 |   }
66 |   my $slice = $self->fetch_sequence;
67 |   $self->query($slice);
68 |   my $runnable = Bio::EnsEMBL::Analysis::Runnable::Finished::EPCR->new
69 |     (
70 |      -query => $slice,
71 |      -program => $self->analysis->program_file,
72 |      -analysis => $self->analysis,
73 |      %parameters
74 |     );
75 |   $self->runnable($runnable);
76 | }
77 | 
78 | 1;
79 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Runnable/DustMasker.pm:
--------------------------------------------------------------------------------
 1 | =head1 LICENSE
 2 | 
 3 | Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | 
 6 | Licensed under the Apache License, Version 2.0 (the "License");
 7 | you may not use this file except in compliance with the License.
 8 | You may obtain a copy of the License at
 9 | 
10 |      http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | 
18 | =head1 CONTACT
19 | 
20 | Please email comments or questions to the public Ensembl
21 | developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
22 | 
23 | Questions may also be sent to the Ensembl help desk at
24 | <http://www.ensembl.org/Help/Contact>.
25 | 
26 | =head1 NAME
27 | 
28 | Bio::EnsEMBL::Analysis::Runnable::DustMasker
29 | 
30 | =head1 SYNOPSIS
31 | 
32 | 
33 | =head1 DESCRIPTION
34 | 
35 | 
36 | =cut
37 | 
38 | package Bio::EnsEMBL::Analysis::Runnable::DustMasker;
39 | 
40 | use strict;
41 | use warnings;
42 | 
43 | use parent qw(Bio::EnsEMBL::Analysis::Runnable::Dust);
44 | 
45 | =head2 run_analysis
46 | 
47 |   Arg [1]   : Bio::EnsEMBL::Analysis::Runnable::Dust
48 |   Arg [2]   : string, program name
49 |   Function  : constructs a commandline and runs the program passed
50 |   in, the generic method in Runnable isnt used as Dust doesnt
51 |   fit this module
52 |   Returntype: none
53 |   Exceptions: throws if run failed because system doesnt
54 |   return 0
55 |   Example   :
56 | 
57 | =cut
58 | 
59 | sub run_analysis{
60 |   my ($self, $program) = @_;
61 |   if(!$program){
62 |     $program = $self->program;
63 |   }
64 |   throw($program." is not executable Dust::run_analysis ")
65 |     unless($program && -x $program);
66 |   my $command = $self->program;
67 |   $command .= " -level ".$self->level if($self->level);
68 |   $command .= " -window ".$self->window_size if($self->window_size);
69 |   $command .= ' '.$self->options if ($self->options);
70 |   $command .= " -in ".$self->queryfile." > ".$self->resultsfile;
71 |   print "Running analysis ".$command."\n";
72 |   system($command) == 0 or throw("FAILED to run ".$command);
73 | }
74 | 
75 | 1;
76 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Hive/Config/BamMergeStatic.pm:
--------------------------------------------------------------------------------
 1 | =head1 LICENSE
 2 | 
 3 | Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | 
 6 | Licensed under the Apache License, Version 2.0 (the "License");
 7 | you may not use this file except in compliance with the License.
 8 | You may obtain a copy of the License at
 9 | 
10 |      http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | 
18 | =head1 CONTACT
19 | 
20 | Please email comments or questions to the public Ensembl
21 | developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
22 | 
23 | Questions may also be sent to the Ensembl help desk at
24 | <http://www.ensembl.org/Help/Contact>.
25 | 
26 | =head1 NAME
27 | 
28 | Bio::EnsEMBL::Analysis::Hive::Config::BamMergeStatic
29 | 
30 | =head1 SYNOPSIS
31 | 
32 | 
33 | =head1 DESCRIPTION
34 | 
35 | 
36 | =cut
37 | 
38 | package Bio::EnsEMBL::Analysis::Hive::Config::BamMergeStatic;
39 | 
40 | use strict;
41 | use warnings;
42 | 
43 | 
44 | use parent ('Bio::EnsEMBL::Analysis::Hive::Config::BaseStatic');
45 | 
46 | sub _master_config {
47 |   my ($self, $key) = @_;
48 | 
49 |   my %config = (
50 |     default => {
51 |       # If 0, do not use multithreading, faster but can use more memory.
52 |       # If > 0, tells how many cpu to use for samtools or just to use multiple cpus for picard
53 |       use_threading => '#use_threads#',
54 |     },
55 |     picard => {
56 |       java       => 'java',
57 |       java_options  => '-Xmx2g',
58 |       # Path to MergeSamFiles.jar
59 |       picard_lib    => '#picard_lib_jar#',
60 |       # Use this default options for Picard: 'MAX_RECORDS_IN_RAM=20000000 CREATE_INDEX=true SORT_ORDER=coordinate ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT'
61 |       # You will need to change the options if you want to use samtools for merging
62 |       options       => 'MAX_RECORDS_IN_RAM=20000000 CREATE_INDEX=true SORT_ORDER=coordinate ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT',
63 |     },
64 |     samtools => {
65 |       options => '',
66 |     },
67 |   );
68 | 
69 |   return $config{$key};
70 | }
71 | 
72 | 1;
73 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/OrthologueEvaluatorExonerate.pm.example:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | package Bio::EnsEMBL::Analysis::Config::GeneBuild::OrthologueEvaluatorExonerate;
17 | 
18 | use strict;
19 | use vars qw(%Config);
20 | 
21 | %Config= 
22 |    (
23 |        EXONERATE_PROTEIN_CONF => { 
24 | 
25 |                 QUERYTYPE => 'protein',
26 |                 QUERYSEQS  => "QUERYSEQ" , 
27 |                 IIDREGEXP => '(\d+):(\d+)',   
28 |                 #
29 |                 # either you set this variable or you fill out 
30 |                 # Bio::EnsEMBL::Analysis::Config::Databases
31 |                 #
32 |                 OUTDB => { -dbname => '',
33 |                            -host   => '',
34 |                            -port   => '3306',
35 |                            -user   => 'ensadmin',
36 |                            -pass   => '****',
37 |                          },
38 |                 COVERAGE_BY_ALIGNED => 0,
39 |                 OPTIONS => "--model protein2genome --forwardcoordinates FALSE ".
40 |                 "--softmasktarget TRUE --exhaustive FALSE  --bestn 1",
41 |              },
42 | 
43 | 
44 |    );
45 | 
46 | 
47 | ############################################################
48 | 
49 | 
50 | sub import {
51 |     my ($callpack) = caller(0);  
52 |     my $pack = shift; 
53 |     my @vars = @_ ? @_ : keys(%Config); 
54 | 
55 |     return unless @vars; 
56 |     eval "package $callpack; use vars qw("
57 |          . join(' ', map { '$'.$_ } @vars) . ")";
58 |     die $@ if $@;
59 |     foreach (@vars) {
60 | 	if (defined $Config{ $_ }) {
61 |             no strict 'refs';
62 | 	    *{"${callpack}::$_"} = \$Config{ $_ };
63 | 	} else {
64 | 	    die "Error: Config: $_ not known\n";
65 | 	}
66 |     }
67 | }
68 | 1;
69 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Hive/RunnableDB/HiveLoadmRNAs.pm:
--------------------------------------------------------------------------------
 1 | =head1 LICENSE
 2 | 
 3 | Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | 
 6 | Licensed under the Apache License, Version 2.0 (the "License");
 7 | you may not use this file except in compliance with the License.
 8 | You may obtain a copy of the License at
 9 | 
10 |      http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | 
18 | =head1 CONTACT
19 | 
20 | Please email comments or questions to the public Ensembl
21 | developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
22 | 
23 | Questions may also be sent to the Ensembl help desk at
24 | <http://www.ensembl.org/Help/Contact>.
25 | 
26 | =head1 NAME
27 | 
28 | Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveLoadmRNAs
29 | 
30 | =head1 SYNOPSIS
31 | 
32 | 
33 | =head1 DESCRIPTION
34 | 
35 | Module to load mRNA into a customised table in the Hive database
36 | 
37 | =cut
38 | 
39 | package Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveLoadmRNAs;
40 | 
41 | use strict;
42 | use warnings;
43 | use POSIX qw(strftime);
44 | 
45 | use parent ('Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveLoadSequences');
46 | 
47 | =head2 create_row_data
48 | 
49 |  Arg [1]    : Bio::EnsEMBL::IO::Parser object
50 |  Description: It will truncate the header to have only the accession, then return
51 |               the accession and the sequence to be stored in the table 'table_name'
52 |               of the Hive pipeline database
53 |  Returntype : Array ref
54 |  Exceptions : None
55 | 
56 | =cut
57 | 
58 | sub create_row_data {
59 |   my ($self, $parser) = @_;
60 | 
61 |   my ($accession) = $parser->getHeader =~ /^\s*(\S+)/;
62 |   my $source = 'INSDC';
63 |   if ($accession =~ /^NM/) {
64 |     $source = 'RefSeq';
65 |   }
66 |   my $biotype = 'mRNA';
67 |   my $date = strftime "%Y/%m/%d", localtime;
68 | #  return [{accession => $accession, seq => $parser->getSequence, source => $source, biotype => $biotype, date => $date}];
69 |   return [{accession => $accession, seq => $parser->getSequence, source_db => $source, biotype => $biotype, date => $date}];
70 | }
71 | 
72 | 1;
73 | 
74 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Tools/GeneBuildUtils/HomologyUtils.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =head1 NAME
17 | 
18 | Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::HomologyUtils - utilities for gene objects
19 | 
20 | =head1 SYNOPSIS
21 | 
22 |   use Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::HomologyUtils qw(clone_Gene);
23 | 
24 |   or 
25 | 
26 |   use Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::HomologyUtils
27 |   
28 |   to get all methods
29 | 
30 | =head1 DESCRIPTION
31 | 
32 | All methods in this class should take a Bio::EnsEMBL::Compara::Homology
33 | object as their first argument.
34 | 
35 | The methods provided should carry out some standard 
36 | functionality for said objects such as printing info, and 
37 | cloning
38 | 
39 | =head1 CONTACT
40 | 
41 | please send any questions to http://lists.ensembl.org/mailman/listinfo/dev
42 | 
43 | =head1 METHODS
44 | 
45 | the rest of the documention details the exported static
46 | class methods
47 | 
48 | =cut
49 | 
50 | 
51 | package Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::HomologyUtils;
52 | 
53 | use strict;
54 | use warnings;
55 | use Exporter;
56 | 
57 | use vars qw (@ISA  @EXPORT);
58 | 
59 | @ISA = qw(Exporter);
60 | @EXPORT = qw(
61 |              get_gene_obj_out_of_compara_homology_object
62 |             );
63 | 
64 | 
65 | use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning stack_trace_dump);
66 | 
67 | sub get_gene_obj_out_of_compara_homology_object {
68 |   my ( $homology, $species ) = @_ ;
69 | 
70 |   my $gene ;
71 |   return $gene unless $homology ;
72 | 
73 |   for my $homology_member_obj ( @{$homology->gene_list}) {
74 |      if ($homology_member_obj->genome_db->name eq $species ) {
75 |        $gene = $homology_member_obj->get_Gene ;
76 |      }
77 |   }
78 |   return $gene ;
79 | }
80 | 
81 | 1;
82 | 


--------------------------------------------------------------------------------
/scripts/Merge/merge-refseq.conf:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is a "bash" script which is sourced by merge-wrapper.ksh.
 3 | # This means that there can't be any spaces around '=', for example.
 4 | #
 5 | 
 6 | ### THIS FILE: An example configuration for merging Ensembl into
 7 | ### RefSeq (rather than Havana as we would normally do).  The RefSeq
 8 | ### gene set is taken from the most recent human otherfeatures database
 9 | ### (the genes there with analysis logic name "refseq_human_import" in
10 | ### this case).
11 | #
12 | # Look to "merge.conf" for a slightly more well-documented configuration
13 | # file.
14 | 
15 | ensembl_analysis_base="${HOME}/ensembl-src/ensembl-analysis"
16 | 
17 | # Comment out to get a randomly generated output directory name
18 | # ("output.XXXX") in the current directory.
19 | output_dir='merge-refseq_ensembl-output'
20 | 
21 | njobs='75'
22 | concurrent='20'
23 | 
24 | rouser='ensro'
25 | ropassword=''
26 | 
27 | rwuser=''
28 | rwpassword=''
29 | 
30 | # host_secondary='genebuild8'
31 | # database_secondary='cgg_homo_sapiens_ensembl_74'
32 | host_secondary=''
33 | database_secondary=''
34 | 
35 | # host_primary='ens-livemirror'
36 | # database_primary='homo_sapiens_otherfeatures_74_37'
37 | host_primary=''
38 | database_primary=''
39 | 
40 | # Target database needs to exist but should be empty.
41 | # host_output='genebuild8'
42 | # database_output='ak4_refseq_ensembl_74'
43 | host_output=''
44 | database_output=''
45 | 
46 | # Just comment out or leave empty if not applicable.
47 | # host_ccds='ens-livemirror'
48 | # database_ccds='ccds_human_74'
49 | host_ccds=''
50 | database_ccds=''
51 | 
52 | # Filter options.  You may specify either the X_include and X_exclude
53 | # options, but not both.  These filter on gene analysis logic names.
54 | # Any gene filtered out will not be found in the output database (i.e.
55 | # they will also be skipped by the post-prosessing script that copies
56 | # all unprocessed Secondary genes to the output database).
57 | 
58 | secondary_include=''
59 | secondary_exclude=''
60 | 
61 | primary_include='refseq_human_import'
62 | primary_exclude=''
63 | 
64 | 
65 | # Tagging:  Will be used as suffix for logic names ("_tag") and for
66 | # source.
67 | 
68 | secondary_tag='ensembl'
69 | primary_tag='refseq'
70 | 
71 | # Xrefs:  The format is a comma-separated list of
72 | # "db_name,db_display_name,type"
73 | 
74 | primary_gene_xref='RefSeq_import,RefSeq_import,MISC'
75 | primary_transcript_xref='RefSeq_mRNA,RefSeq mRNA,MISC'
76 | primary_translation_xref='RefSeq_peptide,RefSeq peptide,MISC'
77 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Hive/RunnableDB/HiveDBSeqFiles.pm:
--------------------------------------------------------------------------------
 1 | =head1 LICENSE
 2 | 
 3 | Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | 
 6 | Licensed under the Apache License, Version 2.0 (the "License");
 7 | you may not use this file except in compliance with the License.
 8 | You may obtain a copy of the License at
 9 | 
10 |      http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | 
18 | =head1 CONTACT
19 | 
20 | Please email comments or questions to the public Ensembl
21 | developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
22 | 
23 | Questions may also be sent to the Ensembl help desk at
24 | <http://www.ensembl.org/Help/Contact>.
25 | 
26 | =head1 NAME
27 | 
28 | Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveDBSeqFiles
29 | 
30 | =head1 SYNOPSIS
31 | 
32 | 
33 | =head1 DESCRIPTION
34 | 
35 | 
36 | =cut
37 | 
38 | package Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveDBSeqFiles;
39 | 
40 | use strict;
41 | use warnings;
42 | 
43 | use parent ('Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveBaseRunnableDB');
44 | 
45 | 
46 | sub get_query_seqs {
47 |   my ($self, $accession_array) = @_;
48 | 
49 |   my $table_adaptor = $self->db->get_NakedTableAdaptor();
50 |   $table_adaptor->table_name($self->param('sequence_table_name'));
51 | 
52 |   my $biotypes_hash = {};
53 |   my @query_sequences;
54 |   foreach my $accession (@{$accession_array}) {
55 |     my $db_row = $table_adaptor->fetch_by_dbID($accession);
56 |     unless($db_row) {
57 |       $self->throw('Did not find an entry in the '.$self->param('sequence_table_name')." table matching the accession. Accession:\n".$accession);
58 |     }
59 | 
60 |     my $seq = $db_row->{'seq'};
61 |     $biotypes_hash->{$accession} = $db_row->{'biotype'};
62 | 
63 |     push(@query_sequences, Bio::Seq->new(-display_id => $accession, -seq => $seq));
64 |   }
65 | 
66 |   $self->get_biotype($biotypes_hash);
67 | 
68 |   return \@query_sequences;
69 | }
70 | 
71 | 
72 | sub get_biotype {
73 |   my ($self,$biotype_hash) = @_;
74 |   if($biotype_hash) {
75 |     $self->param('_biotype_hash',$biotype_hash);
76 |   }
77 |   return($self->param('_biotype_hash'));
78 | }
79 | 
80 | 
81 | 1;
82 | 


--------------------------------------------------------------------------------
/scripts/genebuild/convert_genome_dump.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright [2019-2024] EMBL-European Bioinformatics Institute
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | use warnings;
18 | use strict;
19 | use feature 'say';
20 | use Getopt::Long;
21 | use Bio::EnsEMBL::Utils::Exception qw( throw warning verbose);
22 | 
23 | my $input_file;
24 | my $output_file;
25 | my $conversion_type;
26 | my $remove_masking = 0;
27 | GetOptions( 'input_file:s'      => \$input_file,
28 |             'output_file:s'     => \$output_file,
29 |             'conversion_type:s' => \$conversion_type,
30 |             'remove_masking!'   => \$remove_masking);
31 | 
32 | unless($input_file && $output_file && $conversion_type) {
33 |   throw("You must specify both an input file, an output file and a conversion type");
34 | }
35 | 
36 | unless(-e $input_file) {
37 |   throw("The input file specified does not exist. Input file: ".$input_file);
38 | }
39 | 
40 | if($conversion_type eq "slice_name_to_seq_region_name") {
41 |   slice_name_to_seq_region_name($input_file,$output_file,$remove_masking);
42 | } else {
43 |   throw("The conversion type you selected is not supported. Conversion type selected: ".$conversion_type);
44 | }
45 | 
46 | exit;
47 | 
48 | sub slice_name_to_seq_region_name {
49 |   my ($input_file,$output_file,$remove_masking) = @_;
50 | 
51 |   open(IN,$input_file);
52 |   unless(open(OUT,">".$output_file)) {
53 |     throw("Could not open output file for writing. Output file: ".$output_file);
54 |   }
55 | 
56 |   while(<IN>) {
57 |     my $line = $_;
58 |     if($line =~ /^>/) {
59 |       unless($line =~ /[^\:]+\:[^\:]+\:([^\:]+)\:/) {
60 |         throw("Failed to parse the header line. Expected to find a seq region name after the second colon in header. Header used: ".$line);
61 |       }
62 |       my $header = ">".$1;
63 |       say OUT $header;
64 |     } else {
65 |       if($remove_masking) {
66 |         $line = uc($line);
67 |       }
68 |       print OUT $line;
69 |     }
70 |   }
71 |   close OUT;
72 |   close IN;
73 | }
74 | 


--------------------------------------------------------------------------------
/scripts/markers/map_weight.pl:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # script to calculate map_weights in a database that has markers
17 | # and marker_features. Recreates the marker_feature table with weights set
18 | 
19 | use warnings ;
20 | use strict;
21 | use DBI;
22 | 
23 | use Getopt::Long qw(:config no_ignore_case);
24 | 
25 | my ( $host, $user, $pass, $port, $dbname );
26 | my $verbose = 0;
27 | $port = 3306;
28 | 
29 | GetOptions( "host|dbhost|h=s", \$host,
30 | 	    "user|dbuser|u=s", \$user,
31 | 	    "pass|dbpass|p=s", \$pass,
32 | 	    "port|dbport|P=i", \$port,
33 | 	    "dbname|db|D=s", \$dbname,
34 | 	    "verbose", \$verbose
35 | 	  );
36 | 
37 | if( !$host ) {
38 |   usage();
39 | }
40 | 
41 | 
42 | 
43 | my $dsn = "DBI:mysql:host=$host;dbname=$dbname";
44 | if( $port ) {
45 |   $dsn .= ";port=$port";
46 | }
47 | 
48 | my $db = DBI->connect( $dsn, $user, $pass );
49 | 
50 | 
51 | $db->do( "
52 |   CREATE TABLE tmp_m_weight
53 |   SELECT marker_id, count(*) as count 
54 |   FROM marker_feature
55 |   GROUP BY marker_id
56 | " );
57 | 
58 | $db->do( "
59 |   CREATE TABLE new_marker_feature
60 |   SELECT mf.marker_feature_id, mf.marker_id, mf.seq_region_id, mf.seq_region_start,
61 |          mf.seq_region_end, mf.analysis_id, tmw.count
62 |   FROM   marker_feature mf, tmp_m_weight tmw
63 |   WHERE  mf.marker_id = tmw.marker_id
64 | " );
65 | 
66 | $db->do( "delete from marker_feature" );
67 | $db->do( "insert into marker_feature select * from new_marker_feature" );
68 | $db->do( "drop table tmp_m_weight" );
69 | $db->do( "drop table new_marker_feature" );
70 | 
71 | sub usage {
72 |   print <<EOF;
73 |     
74 | Usage: perl map_weight.pl [options]
75 |    -user username for a write enabled user
76 |    -host hostname
77 |    -port portnumber
78 |    -pass password
79 |    -dbname database name where the markers and the features are
80 | 
81 | EOF
82 | 
83 |   exit;
84 | }
85 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/Sam2Bam.pm.example:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | package Bio::EnsEMBL::Analysis::Config::GeneBuild::Sam2Bam;
17 | 
18 | use strict;
19 | use vars qw( %Config );
20 | 
21 | %Config = (
22 |   SAM2BAM_CONFIG_BY_LOGIC =>  {
23 |             DEFAULT =>  {
24 | 	      # directory containg the sam file(s)
25 | 	      SAM_DIR => '/path/to/directory',
26 | 	      
27 | 	      # path to the bam file to produce as output
28 | 	      BAMFILE => '/path/to/my/SAM/file/introns.sam',
29 | 	      
30 | 	      # regex to identify which SAM files to merge
31 | 	      REGEX => '.sam',
32 | 	      
33 |               # file containing all the readgroup headers used in the alignments (optional)
34 |               HEADERFILE => '/path/to/my/header/file/headers.txt',
35 |               
36 | 	      # path to dumped genome file used for the alignment
37 | 	      # it will make an index for it if one does not already exist
38 | 	      GENOMEFILE => '/path/to/my/genome/file.fa',  
39 |             },	
40 | 	 }
41 | );
42 | 
43 | sub import {
44 |   my ($callpack) = caller(0); # Name of the calling package
45 |   my $pack = shift; # Need to move package off @_
46 | 
47 |   # Get list of variables supplied, or else everything
48 |   my @vars = @_ ? @_ : keys( %Config );
49 |   return unless @vars;
50 |   
51 |   # Predeclare global variables in calling package
52 |   eval "package $callpack; use vars qw("
53 |     . join(' ', map { '$'.$_ } @vars) . ")";
54 |     die $@ if $@;
55 | 
56 | 
57 |     foreach (@vars) {
58 | 	if ( defined $Config{$_} ) {
59 |             no strict 'refs';
60 | 	    # Exporter does a similar job to the following
61 | 	    # statement, but for function names, not
62 | 	    # scalar variables:
63 | 	    *{"${callpack}::$_"} = \$Config{ $_ };
64 | 	} else {
65 | 	    die "Error: Config: $_ not known\n";
66 | 	}
67 |     }
68 | }
69 | 
70 | 1;
71 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/AddStableIds.pm.example:
--------------------------------------------------------------------------------
 1 | =head1 LICENSE
 2 | 
 3 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | 
19 | =head1 NAME
20 | 
21 |     Bio::EnsEMBL::Analysis::Config::AddStableIds
22 | 
23 | =head1 SYNOPSIS
24 | 
25 |     use Bio::EnsEMBL::Analysis::Config::AddStableIds;
26 | 
27 | =head1 DESCRIPTION
28 | 
29 | 
30 | 
31 | =head1 CONTACT
32 | 
33 |     Please email comments or questions to the public Ensembl
34 |     developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
35 | 
36 |     Questions may also be sent to the Ensembl help desk at
37 |     <http://www.ensembl.org/Help/Contact>.
38 | 
39 | =cut
40 | 
41 | 
42 | package Bio::EnsEMBL::Analysis::Config::AddStableIds;
43 | 
44 | use strict;
45 | use vars qw( %Config );
46 | 
47 | # Hash containing config info
48 | %Config = (
49 |     ADD_STABLEIDS_BY_LOGIC => {
50 |         DEFAULT => {
51 | 
52 |         GENES_DB         => 'ROUGHDB',
53 | 
54 |         PREFIX           => undef,
55 |         LOGIC_NAME           => undef,
56 |         },
57 | 
58 |     }
59 | );
60 | 
61 | sub import {
62 |   my ($callpack) = caller(0);    # Name of the calling package
63 |   my $pack = shift;              # Need to move package off @_
64 | 
65 |   # Get list of variables supplied, or else everything
66 |   my @vars = @_ ? @_ : keys(%Config);
67 |   return unless @vars;
68 | 
69 |   # Predeclare global variables in calling package
70 |   eval "package $callpack; use vars qw("
71 |     . join( ' ', map { '$' . $_ } @vars ) . ")";
72 |   die $@ if $@;
73 | 
74 |   foreach (@vars) {
75 |     if ( defined $Config{$_} ) {
76 |       no strict 'refs';
77 |       # Exporter does a similar job to the following
78 |       # statement, but for function names, not
79 |       # scalar variables:
80 |       *{"${callpack}::$_"} = \$Config{$_};
81 |     } else {
82 |       die "Error: Config: $_ not known\n";
83 |     }
84 |   }
85 | } ## end sub import
86 | 
87 | 1;
88 | 


--------------------------------------------------------------------------------
/modules/t/test-genome-DBs/pararge_aegeria/core/meta.txt:
--------------------------------------------------------------------------------
 1 | 1	\N	schema_type	core
 2 | 2	\N	schema_version	104
 3 | 4	1	genebuild.start_date	2021-05-Ensembl
 4 | 5	1	assembly.date	2021-01
 5 | 6	1	species.common_name	Speckled Wood Butterfly
 6 | 8	1	species.scientific_name	Pararge aegeria
 7 | 9	1	species.taxonomy_id	116150
 8 | 10	1	assembly.accession	GCA_905163445.1
 9 | 11	1	assembly.default	ilParAegt1.1
10 | 12	1	assembly.name	ilParAegt1.1
11 | 13	1	assembly.web_accession_source	NCBI
12 | 14	1	assembly.web_accession_type	INSDC Assembly ID
13 | 15	1	annotation.provider_name	Ensembl
14 | 16	1	annotation.provider_url	www.ensembl.org
15 | 17	1	assembly.coverage_depth	high
16 | 18	1	assembly.provider_name	
17 | 19	1	assembly.provider_url	
18 | 21	1	species.stable_id_prefix	ENSPAG
19 | 22	1	species.url	Pararge_aegeria_GCA_905163445.1
20 | 23	1	species.display_name	Pararge aegeria (Speckled Wood Butterfly) - GCA_905163445.1
21 | 24	1	species.division	EnsemblMetazoa
22 | 25	1	species.strain	reference
23 | 26	1	species.strain_group	pararge_aegeria_gca905163445v1
24 | 27	1	species.production_name	pararge_aegeria_gca905163445v1
25 | 28	1	strain.type	strain
26 | 29	1	repeat.analysis	repeatdetector
27 | 30	1	repeat.analysis	dust
28 | 31	1	repeat.analysis	trf
29 | 36	1	species.classification	Parargina
30 | 37	1	species.classification	Satyrini
31 | 38	1	species.classification	Satyrinae
32 | 39	1	species.classification	Nymphalidae
33 | 40	1	species.classification	Papilionoidea
34 | 41	1	species.classification	Obtectomera
35 | 42	1	species.classification	Ditrysia
36 | 43	1	species.classification	Heteroneura
37 | 44	1	species.classification	Neolepidoptera
38 | 45	1	species.classification	Glossata
39 | 46	1	species.classification	Lepidoptera
40 | 47	1	species.classification	Amphiesmenoptera
41 | 48	1	species.classification	Holometabola
42 | 49	1	species.classification	Neoptera
43 | 50	1	species.classification	Pterygota
44 | 51	1	species.classification	Dicondylia
45 | 52	1	species.classification	Insecta
46 | 53	1	species.classification	Hexapoda
47 | 54	1	species.classification	Pancrustacea
48 | 55	1	species.classification	Mandibulata
49 | 56	1	species.classification	Arthropoda
50 | 57	1	species.classification	Panarthropoda
51 | 58	1	species.classification	Ecdysozoa
52 | 59	1	species.classification	Protostomia
53 | 60	1	species.classification	Bilateria
54 | 61	1	species.classification	Eumetazoa
55 | 62	1	species.classification	Metazoa
56 | 63	1	species.classification	Opisthokonta
57 | 64	1	species.classification	Eukaryota
58 | 65	1	genebuild.level	toplevel
59 | 66	1	transcriptbuild.level	toplevel
60 | 67	1	exonbuild.level	toplevel
61 | 68	1	assembly.mapping	primary_assembly:ilParAegt1.1|contig
62 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Runnable/ProteinAnnotation/PrositeProfile.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | package Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::PrositeProfile;
18 | use warnings ;
19 | use vars qw(@ISA);
20 | use strict;
21 | 
22 | # Object preamble - inheriets from Bio::Root::Object
23 | 
24 | 
25 | use Bio::EnsEMBL::Utils::Exception qw(throw warning);
26 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation;
27 | 
28 | @ISA = qw(Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation);
29 | 
30 | 
31 | sub multiprotein{
32 |   my ($self) = @_;
33 |   return 0;
34 | }
35 | 
36 | 
37 | sub run_analysis {
38 |   my ($self) = @_;
39 |   
40 |   throw("Failed during Profile run $!\n") unless 
41 |     (system ($self->program . ' -f ' . $self->queryfile. ' ' .
42 |              $self->database . ' > ' .$self->resultsfile) == 0) ;
43 |  
44 | }
45 | 
46 | 
47 | sub parse_results {
48 |   my ($self,$seqid) = @_;
49 |   
50 |   my ($fh);
51 |   my $resfile = $self->resultsfile;
52 |   
53 |   if (-e $resfile) {	
54 |     if (-z $resfile) {  
55 |       return; 
56 |     } else {
57 |       open ($fh, "<$resfile") or throw("Error opening ", $resfile,);
58 |     }
59 |   }
60 |   
61 |   my (@pfs);
62 |   while (<$fh>) {
63 |     if (/^\s*(\S+)\s+(\d+)\s*pos\.\s+(\d+)\s+\-\s+(\d+)\s+(\w+)\|/) {
64 |       my ($sc, $rsc, $st, $en, $acc) = ($1, $2, $3, $4, $5);
65 |       my $fp = $self->create_protein_feature($st,
66 |                                              $en,
67 |                                              $sc,
68 |                                              $seqid,
69 |                                              0, 0,
70 |                                              $acc,
71 |                                              $self->analysis,
72 |                                              0, 0);
73 |       push @pfs, $fp;
74 |     }
75 |   }
76 |   
77 |   $self->output(\@pfs);  
78 | }
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Runnable/SamtoolsMerge.pm:
--------------------------------------------------------------------------------
 1 | =head1 LICENSE
 2 | 
 3 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | =head1 CONTACT
19 | 
20 |   Please email comments or questions to the public Ensembl
21 |   developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
22 | 
23 |   Questions may also be sent to the Ensembl help desk at
24 |   <http://www.ensembl.org/Help/Contact>.
25 | 
26 | =cut
27 | 
28 | =head1 NAME
29 | 
30 | Bio::EnsEMBL::Analysis::Runnable::SamtoolsMerge -
31 | 
32 | =head1 SYNOPSIS
33 | 
34 | 
35 | =head1 DESCRIPTION
36 | 
37 | Merge BAM files using samtools
38 | 
39 | =head1 APPENDIX
40 | 
41 | The rest of the documentation details each of the object methods.
42 | Internal methods are usually preceded with a _
43 | 
44 | =cut
45 | 
46 | package Bio::EnsEMBL::Analysis::Runnable::SamtoolsMerge;
47 | 
48 | use warnings;
49 | use strict;
50 | 
51 | use parent ('Bio::EnsEMBL::Analysis::Runnable::BaseBamMerge');
52 | 
53 | 
54 | sub new {
55 |     my ($class,@args) = @_;
56 |     my $self = $class->SUPER::new(@args);
57 | 
58 |     $self->samtools($self->program);
59 |     if ($self->options =~ /-b\s+(\S+)/){
60 |         throw('Could not access file containing BAM files '.$1) unless (-e $1);
61 |     }
62 | 
63 |     return $self;
64 | }
65 | 
66 | 
67 | 
68 | ############################################################
69 | #
70 | # Analysis methods
71 | #
72 | ############################################################
73 | 
74 | =head2 run
75 | 
76 |  Arg [1]    : None
77 |  Description: Merge the BAM files using samtools and create the index file
78 |  Returntype : Integer, 1
79 |  Exceptions : None
80 | 
81 | =cut
82 | 
83 | sub run {
84 |   my ($self) = @_;
85 | 
86 |   my $input_files = $self->input_files;
87 |   $input_files = $input_files->[0] if (scalar(@{$input_files}) == 1);
88 | 
89 |   $self->samtools->merge($self->options, $self->output_file, $input_files);
90 |   $self->check_output_file;
91 | 
92 |   return 1;
93 | }
94 | 
95 | 1;
96 | 


--------------------------------------------------------------------------------
/scripts/delete_big_dir.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright [2018-2024] EMBL-European Bioinformatics Institute
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | =head1 NAME
18 | 
19 |   delete_big_dir.pl
20 | 
21 | =head1 DESCRIPTION
22 | 
23 | This script take a single argument, the full path to a dir to remove and then removes all
24 | files and subdirs before finally removing the dir itself. This is designed to remove very
25 | complex subdir structures or dirs with a very large number of files in them. Perl is much
26 | faster at this kind of task than rm or rsync to an empty dir
27 | 
28 | =cut
29 | 
30 | use Cwd;
31 | use File::Spec;
32 | use warnings;
33 | use strict;
34 | use feature 'say';
35 | use Getopt::Long;
36 | 
37 | my $full_master_dir_path;
38 | GetOptions('dir:s' => \$full_master_dir_path);
39 | 
40 | unless($full_master_dir_path) {
41 |   die "No agruments entered. You need to pass in the name of the dir in the current directory to delete";
42 | }
43 | 
44 | $full_master_dir_path = File::Spec->rel2abs($full_master_dir_path);
45 | 
46 | unless(-d $full_master_dir_path) {
47 |   die "The argument you entered is not a dir. Argument entered: ".$full_master_dir_path;
48 | }
49 | 
50 | say "The full path for the dir to be deleted is:\n".$full_master_dir_path;
51 | 
52 | say "Getting subdir list...";
53 | my @subdirs = `find $full_master_dir_path -type d`;
54 | 
55 | say "Found ".(scalar(@subdirs) - 1)." subdirs";
56 | 
57 | for (my $i=5; $i>0; $i--) {
58 |   say "Beginning file deletion in ".$i."...";
59 |   sleep(1);
60 | }
61 | 
62 | print "\n";
63 | 
64 | foreach my $dir (@subdirs) {
65 |   chomp $dir;
66 |   say "Removing files from:\n".$dir;
67 |   foreach my $file (<$dir/*>) {
68 |     unless($file =~ /^$full_master_dir_path/) {
69 |       die "Potential issue with file path, path didn't match to the master dir path. Path found:\n".$file;
70 |     }
71 |     unlink($file);
72 |   }
73 | }
74 | 
75 | say "\nFinished removing files. Now removing empty dirs...";
76 | my $result = system('rm -r '.$full_master_dir_path);
77 | if($result) {
78 |   die "Could not remove the master dir, something potentially went wrong with the deletion!";
79 | }
80 | 
81 | exit;
82 | 
83 | 


--------------------------------------------------------------------------------
/modules/t/filter_t.t:
--------------------------------------------------------------------------------
 1 | #!/usr/env perl
 2 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 3 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | use strict;
17 | use warnings;
18 | 
19 | use Test::More;
20 | 
21 | use Bio::EnsEMBL::Analysis::Tools::Filter;
22 | 
23 | my %params = (
24 |   -coverage => 50,
25 |   -percent_id => 87,
26 |   -reject_processed_pseudos => 1,
27 |   -best_in_genome => 1,
28 |   -verbosity => 2,
29 | );
30 | 
31 | my $filter = new_ok('Bio::EnsEMBL::Analysis::Tools::Filter');
32 | ok(!defined($filter->min_coverage), 'Checking default min_coverage');
33 | ok(!defined($filter->min_percent), 'Checking default min_percent');
34 | ok($filter->reject_processed_pseudos == 0, 'Checking default reject_processed_pseudos');
35 | ok($filter->best_in_genome == 0, 'Checking default best_in_genome');
36 | ok($filter->verbosity == 0, 'Checking default verbosity');
37 | 
38 | $filter->min_coverage(90);
39 | $filter->min_percent(97);
40 | $filter->reject_processed_pseudos(1);
41 | $filter->best_in_genome(1);
42 | $filter->verbosity(1);
43 | ok($filter->min_coverage == 90, 'Checking min_coverage');
44 | ok($filter->min_percent == 97, 'Checking min_percent');
45 | ok($filter->reject_processed_pseudos == 1, 'Checking reject_processed_pseudos');
46 | ok($filter->best_in_genome == 1, 'Checking best_in_genome');
47 | ok($filter->verbosity == 1, 'Checking verbosity');
48 | 
49 | $filter = Bio::EnsEMBL::Analysis::Tools::Filter->new(%params);
50 | ok($filter->min_coverage == 50, 'Checking min_coverage');
51 | ok($filter->min_percent == 87, 'Checking min_percent');
52 | ok($filter->reject_processed_pseudos == 1, 'Checking reject_processed_pseudos');
53 | ok($filter->best_in_genome == 1, 'Checking best_in_genome');
54 | ok($filter->verbosity == 2, 'Checking verbosity');
55 | 
56 | eval{
57 |   $filter->filter_results;
58 | };
59 | ok($@ && $@ =~ /You should give an arrayref of objects/, 'Checking fails on empty');
60 | eval{
61 |   $filter->filter_results([]);
62 | };
63 | ok($@ && $@ =~ /You should implement the filter_results method/, 'Checking fails on not implemented');
64 | done_testing();
65 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/BuildChecks.pm.example:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | package Bio::EnsEMBL::Analysis::Config::GeneBuild::BuildChecks;
17 | 
18 | use strict;
19 | use vars qw( %GeneBuilder );
20 | 
21 | # Hash containing config info
22 | %GeneBuilder = (				
23 | 		# introns smaller than this could be real due to framshifts
24 | 		MINSHORTINTRONLEN    => 7, 
25 | 		
26 | 		# introns between smaller than this is considered too short
27 | 		MAXSHORTINTRONLEN    => 10, 
28 | 		
29 |                 # introns longer than this are too long
30 | 		MINLONGINTRONLEN     => 100000, 
31 | 		
32 | 		# exons smaller than this could be real due to framshifts
33 | 		MINSHORTEXONLEN      => 3, 
34 | 		
35 | 		# exons shorter than this are too short
36 | 		MAXSHORTEXONLEN      => 10, 
37 | 		
38 | 		# exons longer than this are probably too long
39 | 		MINLONGEXONLEN       => 5000, 
40 | 		
41 | 		MINTRANSLATIONLEN    => 10, 
42 | 
43 | 		MAX_EXONSTRANSCRIPT  => 150, 
44 | 
45 | 		MAXTRANSCRIPTS       => 10, 
46 | 		MAXGENELEN           => 2_000_000, 
47 | 
48 | 		IGNOREWARNINGS       => 1, 	    
49 | 
50 | 	       );
51 | 
52 | sub import {
53 |   my ($callpack) = caller(0); # Name of the calling package
54 |   my $pack = shift; # Need to move package off @_
55 |   
56 |   # Get list of variables supplied, or else
57 |   # all of GeneBuilder:
58 |   my @vars = @_ ? @_ : keys( %GeneBuilder );
59 |   return unless @vars;
60 |   
61 |   # Predeclare global variables in calling package
62 |   eval "package $callpack; use vars qw("
63 |     . join(' ', map { '$'.$_ } @vars) . ")";
64 |     die $@ if $@;
65 | 
66 | 
67 |     foreach (@vars) {
68 | 	if ( defined $GeneBuilder{ $_ } ) {
69 |             no strict 'refs';
70 | 	    # Exporter does a similar job to the following
71 | 	    # statement, but for function names, not
72 | 	    # scalar variables:
73 | 	    *{"${callpack}::$_"} = \$GeneBuilder{ $_ };
74 | 	} else {
75 | 	    die "Error: GeneBuilder: $_ not known\n";
76 | 	}
77 |     }
78 | }
79 | 
80 | 1;
81 | 


--------------------------------------------------------------------------------
/scripts/Merge/merge.conf:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is a "bash" script which is sourced by merge-wrapper.ksh.
 3 | # This means that there can't be any spaces around '=', for example.
 4 | #
 5 | 
 6 | ensembl_analysis_base="${HOME}/ensembl-src/ensembl-analysis"
 7 | 
 8 | # Comment out to get a randomly generated output directory name
 9 | # ("output.XXXX") in the current directory.  The merge will write log
10 | # files here.  The directory must not already exist.
11 | output_dir='merge-havana_ensembl-output'
12 | 
13 | # The number of jobs in the job array.  The workload will be evenly
14 | # distributed over these jobs no matter what number of jobs you put
15 | # here.
16 | njobs='75'
17 | 
18 | # The maximum number of consecutive jobs to run at any point in time.
19 | # A number between 10 and 20 seems to be optimal.
20 | concurrent='20'
21 | 
22 | # ro = read only
23 | rouser='ensro'
24 | ropassword=''
25 | 
26 | # rw = read and write
27 | rwuser=''
28 | rwpassword=''
29 | 
30 | # host_secondary='genebuild8'
31 | # database_secondary='cgg_homo_sapiens_ensembl_74'
32 | host_secondary=''
33 | database_secondary=''
34 | 
35 | # host_primary='genebuild8'
36 | # database_primary='cgg_homo_sapiens_vega_fixed_72'
37 | host_primary=''
38 | database_primary=''
39 | 
40 | # Target database needs to exist but should be empty.
41 | # (The "essential tables" needs to be populated)
42 | # host_output='genebuild8'
43 | # database_output='ak4_havana_ensembl_74'
44 | host_output=''
45 | database_output=''
46 | 
47 | # Just comment out or leave empty if not applicable.
48 | # host_ccds='ens-livemirror'
49 | # database_ccds='ccds_human_74'
50 | host_ccds=''
51 | database_ccds=''
52 | 
53 | # Filter options.  You may specify either the X_include and X_exclude
54 | # options, but not both.  These filter on gene analysis logic names.
55 | # Any gene filtered out will not be found in the output database (i.e.
56 | # they will also be skipped by the post-prosessing script that copies
57 | # all unprocessed Secondary genes to the output database).
58 | 
59 | ## These are the defaults:
60 | # secondary_include=''
61 | # secondary_exclude=''
62 | #
63 | # primary_include=''
64 | # primary_exclude=''
65 | 
66 | # Tagging:  Will be used as suffix for logic names ("_tag") and for
67 | # source.  With the default settings, merged genes and transcripts will
68 | # get the source "secondary_primary".
69 | 
70 | ## These are the defaults:
71 | # secondary_tag='ensembl'
72 | # primary_tag='havana'
73 | 
74 | # Xrefs:  The format is a comma-separated list of
75 | # "db_name,db_display_name,type"
76 | 
77 | ## These are the defaults:
78 | # primary_gene_xref='OTTG,Havana gene,ALT_GENE'
79 | # primary_transcript_xref='OTTT,Havana transcript,ALT_TRANS'
80 | # primary_translation_xref='OTTP,Havana translation,MISC'
81 | 


--------------------------------------------------------------------------------
/scripts/genebuild/sncrna/dump_prefilter_features.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 3 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | use strict;
18 | use warnings;
19 | 
20 | use File::Spec::Functions qw(catfile);
21 | use Bio::EnsEMBL::DBSQL::DBAdaptor;
22 | 
23 | my ($dbname, $dbhost, $dbport, $dbuser, $working_dir, $logic_name) = @ARGV;
24 | 
25 | my $db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(
26 | 	-DBNAME => $dbname,
27 |   	-HOST => $dbhost,
28 |   	-PORT => $dbport,
29 |   	-USER => $dbuser,
30 | 	-DRIVER => 'mysql',
31 | );
32 | 
33 | my $daf_adaptor = $db->get_DnaAlignFeatureAdaptor();
34 | 
35 | my $fn = catfile($working_dir, $logic_name.'_dafs.bed');
36 | 
37 | open(FH, '>', $fn) or die "Could not write to $fn";
38 | 
39 | foreach my $daf (@{$daf_adaptor->fetch_all_by_logic_name($logic_name)}) {
40 | 	my $strand = $daf->strand() > 0 ? "+" : "-";
41 | 
42 | 
43 | 	print FH $daf->seq_region_name(), "\t",
44 | 		$daf->seq_region_start(), "\t",
45 | 		$daf->seq_region_end(), "\t",
46 | 		$daf->seq_region_name(), ":",
47 | 		$daf->seq_region_start(), "-",
48 | 		$daf->seq_region_end(), "\t",
49 | 		$daf->score(), "\t",
50 | 		$strand, "\t",
51 | 		$daf->hseqname(), "\t",
52 | 		$daf->p_value(), "\t",
53 | 		$daf->percent_id(), "\t",
54 | 		$daf->cigar_string(),  "\n";
55 | 
56 | }
57 | 
58 | close(FH) or die("Could not close $fn");
59 | 
60 | # dump putative stem-loops
61 | my $gene_adaptor = $db->get_GeneAdaptor();
62 | 
63 | $fn = catfile($working_dir, 'identified_mirnas.bed');
64 | 
65 | open(FH, '>', $fn) or die "Could not write to $fn";
66 | 
67 | foreach my $gene (@{$gene_adaptor->fetch_all_by_biotype('miRNA')}){
68 |     my $strand = $gene->strand() > 0 ? "+" : "-";
69 | 
70 | 
71 |       print FH $gene->seq_region_name(), "\t",
72 |           $gene->seq_region_start(), "\t",
73 |           $gene->seq_region_end(), "\t",
74 |           $gene->seq_region_name(), ":",
75 |           $gene->seq_region_start(), "-",
76 |           $gene->seq_region_end(), "\t0\t",
77 |           $strand, "\t",
78 |           $gene->dbID(), "\n";
79 | 
80 | }
81 | 
82 | close(FH) or die("Could not close $fn");
83 | 
84 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/Accumulator.pm:
--------------------------------------------------------------------------------
  1 | =head1 LICENSE
  2 | 
  3 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
  4 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
  5 | # 
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | # 
 10 | #      http://www.apache.org/licenses/LICENSE-2.0
 11 | # 
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | =head1 CONTACT
 19 | 
 20 |   Please email comments or questions to the public Ensembl
 21 |   developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
 22 | 
 23 |   Questions may also be sent to the Ensembl help desk at
 24 |   <http://www.ensembl.org/Help/Contact>.
 25 | 
 26 | =cut
 27 | 
 28 | =head1 NAME
 29 | 
 30 | Bio::EnsEMBL::Analysis::RunnableDB::Accumulator - 
 31 | 
 32 | =head1 SYNOPSIS
 33 | 
 34 |   my $accumulator = Bio::EnsEMBL::Analysis::RunnableDB::Accumulator->
 35 |   new(
 36 |       -input_id => 'ACCUMULATOR',
 37 |       -db => $db,
 38 |       -analysis => $analysis,
 39 |      );
 40 |   $accumulator->fetch_input;
 41 |   $accumulator->run;
 42 |   $accumulator->write_output;
 43 | 
 44 | =head1 DESCRIPTION
 45 | 
 46 | This is a simple place holder module to allow the accumulator wait for all
 47 | stages in the pipeline to work. It does nothing just
 48 | 
 49 | =head1 METHODS
 50 | 
 51 | =cut
 52 | 
 53 | 
 54 | package Bio::EnsEMBL::Analysis::RunnableDB::Accumulator;
 55 | 
 56 | use warnings ;
 57 | use strict;
 58 | 
 59 | use Bio::EnsEMBL::Analysis::RunnableDB;
 60 | use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning);
 61 | 
 62 | use vars qw(@ISA);
 63 | 
 64 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB);
 65 | 
 66 | =head2 fetch_input
 67 | 
 68 |     Title   :   fetch_input
 69 |     Usage   :   $self->fetch_input
 70 |     Function:   Dummy method to comply to the interface
 71 |     Returns :   none
 72 |     Args    :   none
 73 | 
 74 | =cut
 75 | 
 76 | sub fetch_input {
 77 |     my( $self) = @_;
 78 |     
 79 |     throw("No input id") unless defined($self->input_id);
 80 | 
 81 |     return 1;
 82 | 
 83 | }
 84 | 
 85 | sub run {
 86 |     my ($self) = @_;
 87 |     print "Dummy RunnableDB - no runnable to run\n";
 88 | 
 89 | }
 90 | 
 91 | sub write_output {
 92 |     my ($self) = @_;
 93 | 
 94 |     print "Dummy RunnableDB - no output to write\n";
 95 | 
 96 |     return 1;
 97 | }
 98 | 
 99 | 1;
100 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Tools/AllExonOverlapFilter.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | package Bio::EnsEMBL::Analysis::Tools::AllExonOverlapFilter;
16 | 
17 | use strict;
18 | use warnings;
19 | 
20 | use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning);
21 | use Bio::EnsEMBL::Utils::Argument qw( rearrange );
22 | 
23 | 
24 | 
25 | sub new{
26 |   my ($class, @args) = @_;
27 |   my $self = bless {},$class;
28 | 
29 |   if (scalar(@args)) {
30 |     throw("AllExonOverlapFilter should have no args in new");
31 |   }
32 | 
33 |   return $self;
34 | }
35 | 
36 | #####################################
37 | sub filter {
38 |   my ($self, $these, $others) = @_;
39 | 
40 |   # interference is judged by overlap at exon level
41 |   # assumption is that @others is sorted by gene start
42 | 
43 |   my @filtered;
44 | 
45 |   my $cur_idx = 0;
46 | 
47 |   foreach my $obj (@$these) {
48 |     my (@genomic_overlap, $left_bound);
49 | 
50 | 
51 |     for(my $i=$cur_idx; $i < @$others; $i++) {
52 |       my $o_obj = $others->[$i];
53 | 
54 |       if ($o_obj->end >= $obj->start and not defined $left_bound) {
55 |         $left_bound = $i;
56 |       }
57 | 
58 |       if ($o_obj->end < $obj->start) {
59 |         next;
60 |       } elsif ($o_obj->start > $obj->end) {
61 |         last;
62 |       } else {
63 |         push @genomic_overlap, $o_obj;
64 |       }
65 |     }
66 | 
67 |     $cur_idx = $left_bound if defined $left_bound;
68 | 
69 |     my $exon_overlap = 0;
70 |     if (@genomic_overlap) {
71 |       my @exons = @{$obj->get_all_Exons};
72 |       OG: foreach my $o_obj (@genomic_overlap) {
73 |         foreach my $oe (@{$o_obj->get_all_Exons}) {
74 |           foreach my $e (@exons) {
75 |             if ($oe->strand == $e->strand and
76 |                 $oe->end >= $e->start and
77 |                 $oe->start <= $e->end) {
78 |               $exon_overlap = 1;
79 |               last OG;
80 |             }
81 |           }
82 |         }
83 |       }
84 |     }
85 | 
86 |     if (not $exon_overlap) {
87 |       push @filtered, $obj;
88 |     }
89 |   }
90 | 
91 |   return \@filtered;
92 | }
93 | 1;
94 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/S3Config.pm.example:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 3 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 4 | # 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | # 
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | # 
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | =head1 NAME
18 | 
19 | Bio::EnsEMBL::Analysis::Config::General
20 | 
21 | =head1 SYNOPSIS
22 | 
23 |     use Bio::EnsEMBL::Analysis::Config::General;
24 |     use Bio::EnsEMBL::Analysis::Config::General qw();
25 | 
26 | =head1 DESCRIPTION
27 | 
28 | General analysis configuration.
29 | 
30 | It imports and sets a number of standard global variables into the
31 | calling package. Without arguments all the standard variables are set,
32 | and with a list, only those variables whose names are provided are set.
33 | The module will die if a variable which doesn't appear in its
34 | C<%Config> hash is asked to be set.
35 | 
36 | The variables can also be references to arrays or hashes.
37 | 
38 | Edit C<%Config> to add or alter variables.
39 | 
40 | All the variables are in capitals, so that they resemble environment
41 | variables.
42 | 
43 | =head1 CONTACT
44 | 
45 | B<http://lists.ensembl.org/mailman/listinfo/dev>
46 | 
47 | =cut
48 | 
49 | package Bio::EnsEMBL::Analysis::Config::S3Config; 
50 | 
51 | use strict;
52 | use vars qw(%Config);
53 | 
54 | %Config = (
55 |            
56 |            S3_CONFIG_FILE  => "$ENV{S3_CONFIG_FILE}",
57 | 
58 | );
59 | 
60 | 
61 | 
62 | sub import {
63 |     my ($callpack) = caller(0); # Name of the calling package
64 |     my $pack = shift; # Need to move package off @_
65 | 
66 |     # Get list of variables supplied, or else all
67 |     my @vars = @_ ? @_ : keys(%Config);
68 |     return unless @vars;
69 | 
70 |     # Predeclare global variables in calling package
71 |     eval "package $callpack; use vars qw("
72 |          . join(' ', map { '$'.$_ } @vars) . ")";
73 |     die $@ if $@;
74 | 
75 | 
76 |     foreach (@vars) {
77 | 	if (defined $Config{ $_ }) {
78 |             no strict 'refs';
79 | 	    # Exporter does a similar job to the following
80 | 	    # statement, but for function names, not
81 | 	    # scalar variables:
82 | 	    *{"${callpack}::$_"} = \$Config{ $_ };
83 | 	} else {
84 | 	    die "Error: Config: $_ not known\n";
85 | 	}
86 |     }
87 | }
88 | 
89 | 1;
90 | 


--------------------------------------------------------------------------------
/scripts/markers/marker_match.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | # 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | # 
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | # 
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # get a complete but non-redundant set of marker definitions
19 | 
20 | use warnings ;
21 | use strict;
22 | 
23 | my $infile1 = shift;
24 | my $infile2 = shift;
25 | my %marker = ();
26 | 
27 | #store file 1
28 | open(IN,  "<$infile1") || die "cant open file $infile1\n" ; 
29 | while(<IN>){ 
30 |   chomp;
31 |   my @line = split("\t", $_);
32 |   push @{$marker{$line[0]}}, $_ ;   
33 | }
34 | close(IN);
35 | 
36 | #add file 2
37 | open(IN,  "<$infile2")|| die "cant open file $infile1\n" ; ;
38 | while(<IN>){
39 |   chomp;
40 |   my @line = split("\t", $_);
41 |     push @{$marker{$line[0]}}, $_ ;  
42 | }
43 | close(IN);
44 | 
45 | # combine them 
46 | foreach my $id (keys %marker){    
47 |    my ( %names, %accs ) ; 
48 |    my ($display_id, $lprim, $rprim, $dist, $name, $junk, $acc, $species) ;  
49 | 
50 |    for my $l (@{$marker{$id}}){    
51 |      ($display_id, $lprim, $rprim, $dist, $name, $junk, $acc, $species) = split /\t/, $l;   
52 | 
53 |      # getting name unique 
54 |      unless ($name=~m/-/){  
55 |        if ($name=~m/;/) { 
56 |          my @na = split/\;/,$name ;  
57 |          @names{@na}=(); 
58 |        } else { 
59 |          $names{$name}=() ; 
60 |        } 
61 |      } 
62 | 
63 |      # getting acc unique 
64 |      unless ($acc=~m/-/) {  
65 |        if ($acc=~m/;/) { 
66 |          my @ac = split/\;/,$acc ;  
67 |          @accs{@ac}=(); 
68 |        } else { 
69 |          $accs{$acc}=() ; 
70 |        } 
71 |      }
72 | 
73 |    } 
74 |    print "$display_id\t$lprim\t$rprim\t$dist\t";   
75 |    unless (scalar(keys %names)==0) { 
76 |      print join (";",keys %names) ; 
77 |    } else { 
78 |      print "\t-\t" ; 
79 |    }
80 |    print "\t$junk\t" ;  
81 |    unless (scalar(keys %accs)==0) { 
82 |      print join (";",keys %accs) ; 
83 |    }else { 
84 |      print "\t-\t" ; 
85 |    }
86 |    print "\t$species\n" ; 
87 | }
88 | 
89 | 
90 | __END__
91 | 
92 | 87      AAAAACACAAGTTTCATACATCACA       AATGTAACTGTACCCTTCTGCATG        -       D9S1986 -       G07334;Z39132   Mus musculus
93 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/Seg.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =pod 
17 | 
18 | =head1 NAME
19 | 
20 |   Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Seg
21 | 
22 | =head1 SYNOPSIS
23 | 
24 |   my $seg = Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Seg->new ( -db      => $db,
25 | 	    	                                                    -input_id   => $input_id,
26 |                                                                     -analysis   => $analysis,
27 |                                                                   );
28 |   $seg->fetch_input;  # gets sequence from DB
29 |   $seg->run;
30 |   $seg->output;
31 |   $seg->write_output; # writes features to to DB
32 | 
33 |  NB: The input_id can either be a peptide id or the location for a protein file. 
34 | 
35 | =head1 DESCRIPTION
36 | 
37 |   This object wraps Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Seg
38 |   to add functionality to read and write to databases.
39 |   The query sequence is provided through the input_id.
40 |   The appropriate Bio::EnsEMBL::Analysis object
41 |   must be passed for extraction of parameters.
42 | 
43 | =head1 CONTACT
44 | 
45 |   Marc Sohrmann: ms2@sanger.ac.uk
46 | 
47 | =head1 APPENDIX
48 | 
49 |   The rest of the documentation details each of the object methods. 
50 |   Internal methods are usually preceded with a _.
51 | 
52 | =cut
53 | 
54 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Seg;
55 | 
56 | use warnings ;
57 | use strict;
58 | use vars qw(@ISA);
59 | 
60 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
61 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Seg;
62 | 
63 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
64 | 
65 | sub fetch_input {
66 |   my ($self, @args) = @_;
67 | 
68 |   $self->SUPER::fetch_input(@args);
69 | 
70 |   my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Seg->new(-query     => $self->query,
71 |                                                                           -analysis  => $self->analysis);
72 |   $self->runnable($run);
73 | }
74 | 
75 | 
76 | 1;
77 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/Snap.pm:
--------------------------------------------------------------------------------
 1 | =head1 LICENSE
 2 | 
 3 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | # 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | # 
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | # 
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | 
19 | =head1 CONTACT
20 | 
21 |   Please email comments or questions to the public Ensembl
22 |   developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
23 | 
24 |   Questions may also be sent to the Ensembl help desk at
25 |   <http://www.ensembl.org/Help/Contact>.
26 | 
27 | =cut
28 | 
29 | =head1 NAME
30 | 
31 | Bio::EnsEMBL::Analysis::RunnableDB::Snap - 
32 | 
33 | =head1 SYNOPSIS
34 | 
35 |   my $runnabledb = Bio::EnsEMBL::Analysis::RunnableDB::Snap->
36 |   new(
37 |       -input_id => 'contig::AL805961.22.1.166258:1:166258:1',
38 |       -db => $db,
39 |       -analysis => $analysis,
40 |      );
41 |   $runnabledb->fetch_input;
42 |   $runnabledb->run;
43 |   $runnabledb->write_output;
44 | 
45 | 
46 | =head1 DESCRIPTION
47 | 
48 | fetches sequence data from database an instantiates and runs the
49 | fgenesh runnable, this inherits from the Genscan runnableDB an as such doesnt
50 | implement much itself
51 | 
52 | =head1 METHODS
53 | 
54 | =cut
55 | 
56 | 
57 | package Bio::EnsEMBL::Analysis::RunnableDB::Snap;
58 | 
59 | use strict;
60 | use warnings;
61 | 
62 | use Bio::EnsEMBL::Analysis::RunnableDB::Genscan;
63 | use Bio::EnsEMBL::Analysis::Runnable::Snap;
64 | use Bio::EnsEMBL::Analysis::Config::General;
65 | use vars qw(@ISA);
66 | 
67 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::Genscan);
68 | 
69 | 
70 | 
71 | =head2 runnable_path
72 | 
73 |   Arg [1]   : Bio::EnsEMBL::Analysis::RunnableDB::Snap
74 |   Function  : return the runnable path
75 |   Returntype: string
76 |   Exceptions: 
77 |   Example   : my $runnable = $self->runnable_path->new
78 |                                (
79 |                                 -query    => $self->query,
80 |                                 -program  => $self->analysis->program_file,
81 |                                 -analysis => $self->analysis,
82 |                                 %parameters,
83 |                                );
84 | 
85 | =cut
86 | 
87 | 
88 | sub runnable_path{
89 |   my ($self);
90 |   return "Bio::EnsEMBL::Analysis::Runnable::Snap";
91 | }
92 | 
93 | 1;
94 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/Fgenesh.pm:
--------------------------------------------------------------------------------
 1 | =head1 LICENSE
 2 | 
 3 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | # 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | # 
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | # 
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | =head1 CONTACT
19 | 
20 |   Please email comments or questions to the public Ensembl
21 |   developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
22 | 
23 |   Questions may also be sent to the Ensembl help desk at
24 |   <http://www.ensembl.org/Help/Contact>.
25 | 
26 | =cut
27 | 
28 | =head1 NAME
29 | 
30 | Bio::EnsEMBL::Analysis::RunnableDB::Fgenesh - 
31 | 
32 | =head1 SYNOPSIS
33 | 
34 |   my $runnabledb = Bio::EnsEMBL::Analysis::RunnableDB::Fgenesh->
35 |   new(
36 |       -input_id => 'contig::AL805961.22.1.166258:1:166258:1',
37 |       -db => $db,
38 |       -analysis => $analysis,
39 |      );
40 |   $runnabledb->fetch_input;
41 |   $runnabledb->run;
42 |   $runnabledb->write_output;
43 | 
44 | 
45 | =head1 DESCRIPTION
46 | 
47 | fetches sequence data from database an instantiates and runs the
48 | fgenesh runnable, this inherits from the Genscan runnableDB an as such doesnt
49 | implement much itself
50 | 
51 | =head1 METHODS
52 | 
53 | =cut
54 | 
55 | 
56 | package Bio::EnsEMBL::Analysis::RunnableDB::Fgenesh;
57 | 
58 | use strict;
59 | use warnings;
60 | 
61 | use Bio::EnsEMBL::Analysis::RunnableDB::Genscan;
62 | use Bio::EnsEMBL::Analysis::Runnable::Fgenesh;
63 | use Bio::EnsEMBL::Analysis::Config::General;
64 | use vars qw(@ISA);
65 | 
66 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::Genscan);
67 | 
68 | 
69 | 
70 | =head2 runnable_path
71 | 
72 |   Arg [1]   : Bio::EnsEMBL::Analysis::RunnableDB::Fgenesh
73 |   Function  : return the runnable path
74 |   Returntype: string
75 |   Exceptions: 
76 |   Example   : my $runnable = $self->runnable_path->new
77 |                                (
78 |                                 -query    => $self->query,
79 |                                 -program  => $self->analysis->program_file,
80 |                                 -analysis => $self->analysis,
81 |                                 %parameters,
82 |                                );
83 | 
84 | =cut
85 | 
86 | 
87 | sub runnable_path{
88 |   my ($self);
89 |   return "Bio::EnsEMBL::Analysis::Runnable::Fgenesh";
90 | }
91 | 
92 | 1;
93 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/Finished/RepeatMasker.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | =head1 NAME
18 | 
19 | Bio::EnsEMBL::Analysis::RunnableDB::Finished::RepeatMasker
20 | 
21 | =head1 SYNOPSIS
22 | 
23 |   my $repeat_masker = Bio::EnsEMBL::Analysis::RunnableDB::Finished::RepeatMasker->
24 |   new(
25 |       -input_id => 'contig::AL805961.22.1.166258:1:166258:1',
26 |       -db => $db,
27 |       -analysis => $analysis,
28 |      );
29 |   $repeat_masker->fetch_input;
30 |   $repeat_masker->run;
31 |   $repeat_masker->write_output;
32 | 
33 | =head1 DESCRIPTION
34 | 
35 | This module provides an interface between the ensembl database and
36 | the Runnable RepeatMasker which wraps the program RepeatMasker
37 | 
38 | This module can fetch appropriate input from the database
39 | pass it to the runnable then write the results back to the database
40 | in the repeat_feature and repeat_consensus tables
41 | 
42 | =head1 CONTACT
43 | 
44 | Post questions to : anacode@sanger.ac.uk
45 | 
46 | =cut
47 | 
48 | package Bio::EnsEMBL::Analysis::RunnableDB::Finished::RepeatMasker;
49 | 
50 | use strict;
51 | use warnings;
52 | use Bio::EnsEMBL::Analysis::RunnableDB::RepeatMasker;
53 | use Bio::EnsEMBL::Analysis::Runnable::Finished::RepeatMasker;
54 | 
55 | use vars qw(@ISA);
56 | 
57 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::RepeatMasker);
58 | 
59 | 
60 | 
61 | =head2 fetch_input
62 | 
63 |   Arg [1]   : Bio::EnsEMBL::Analysis::RunnableDB::Finished::RepeatMasker
64 |   Function  : fetch data out of database and create runnable
65 |   Returntype: 1
66 |   Exceptions: none
67 |   Example   :
68 | 
69 | =cut
70 | 
71 | 
72 | 
73 | sub fetch_input{
74 |   my ($self) = @_;
75 |   my $slice = $self->fetch_sequence;
76 |   $self->query($slice);
77 |   my %parameters;
78 |   if($self->parameters_hash){
79 |     %parameters = %{$self->parameters_hash};
80 |   }
81 |   my $runnable = Bio::EnsEMBL::Analysis::Runnable::Finished::RepeatMasker->new
82 |     (
83 |      -query => $self->query,
84 |      -program => $self->analysis->program_file,
85 |      -analysis => $self->analysis,
86 |      %parameters,
87 |     );
88 |   $self->runnable($runnable);
89 |   return 1;
90 | }
91 | 
92 | 1;
93 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Tools/CodingExonOverlapFilter.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | package Bio::EnsEMBL::Analysis::Tools::CodingExonOverlapFilter;
18 | 
19 | use strict;
20 | use warnings;
21 | 
22 | use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning);
23 | use Bio::EnsEMBL::Utils::Argument qw( rearrange );
24 | 
25 | 
26 | 
27 | sub new{
28 |   my ($class, @args) = @_;
29 |   my $self = bless {},$class;
30 | 
31 |   if (scalar(@args)) {
32 |     throw("CodingExonOverlapFilter should have no args in new");
33 |   }
34 | 
35 |   return $self;
36 | }
37 | 
38 | #####################################
39 | sub filter {
40 |   my ($self, $these, $others) = @_;
41 | 
42 |   # interference is judged by overlap at exon level
43 |   # assumption is that @others is sorted by gene start
44 | 
45 |   my @filtered;
46 | 
47 |   my $cur_idx = 0;
48 | 
49 |   foreach my $obj (@$these) {
50 |     my (@genomic_overlap, $left_bound);
51 | 
52 | 
53 |     for(my $i=$cur_idx; $i < @$others; $i++) {
54 |       my $o_obj = $others->[$i];
55 | 
56 |       if ($o_obj->end >= $obj->start and not defined $left_bound) {
57 |         $left_bound = $i;
58 |       }
59 | 
60 |       if ($o_obj->end < $obj->start) {
61 |         next;
62 |       } elsif ($o_obj->start > $obj->end) {
63 |         last;
64 |       } else {
65 |         push @genomic_overlap, $o_obj;
66 |       }
67 |     }
68 | 
69 |     $cur_idx = $left_bound if defined $left_bound;
70 | 
71 |     my $exon_overlap = 0;
72 |     if (@genomic_overlap) {
73 |       my @exons = @{$obj->get_all_Transcripts->[0]->get_all_translateable_Exons};
74 |       OG: foreach my $o_obj (@genomic_overlap) {
75 |         foreach my $oe (@{$o_obj->get_all_Transcripts->[0]->get_all_translateable_Exons}) {
76 |           foreach my $e (@exons) {
77 |             if ($oe->strand == $e->strand and
78 |                 $oe->end >= $e->start and
79 |                 $oe->start <= $e->end) {
80 |               $exon_overlap = 1;
81 |               last OG;
82 |             }
83 |           }
84 |         }
85 |       }
86 |     }
87 | 
88 |     if (not $exon_overlap) {
89 |       push @filtered, $obj;
90 |     }
91 |   }
92 | 
93 |   return \@filtered;
94 | }
95 | 1;
96 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/IgSegBuilder.pm.example:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =head1 NAME
17 | 
18 | Bio::EnsEMBL::Analysis::Config:IgSegBuilder
19 | 
20 | =head1 SYNOPSIS
21 | 
22 |     use Bio::EnsEMBL::Analysis::Config::Genebuild::IgSegBuilder;
23 | 
24 | =cut
25 | 
26 | 
27 | package Bio::EnsEMBL::Analysis::Config::GeneBuild::IgSegBuilder;
28 | 
29 | use strict;
30 | use vars qw( %Config );
31 | 
32 | # Hash containing config info
33 | %Config = (
34 |            IGSEG_CONFIG_BY_LOGIC => {
35 |              DEFAULT => {
36 | 
37 |                TRANDB_DATABASES_NAME => 'IG_EXONERATE_DB',
38 |                LV_LOGICS => [],
39 |                D_LOGICS  => [],
40 |                J_LOGICS  => [],
41 |                C_LOGICS  => [],
42 | 
43 |                LV_OUTPUT_BIOTYPE => 'LV_segment',
44 |                D_OUTPUT_BIOTYPE => 'D_segment',
45 |                J_OUTPUT_BIOTYPE => 'J_segment',
46 |                C_OUTPUT_BIOTYPE => 'C_segment',
47 | 
48 |                SUPPORTING_FEATURE_OUTPUT_LOGIC => '',
49 | 
50 |                OUTPUTDB_DATABASES_NAME     => 'IG_OUTPUT_DB',
51 | 
52 |                # D/J genes that are not closer than the distance
53 |                # below to a V/C gene are rejected               
54 |                D_J_PROXIMITY_THRESHOLD => 200000,
55 | 
56 |              },
57 |            }
58 |            );
59 | 
60 | sub import {
61 |   my ($callpack) = caller(0); # Name of the calling package
62 |   my $pack = shift; # Need to move package off @_
63 | 
64 |   # Get list of variables supplied, or else everything
65 |   my @vars = @_ ? @_ : keys( %Config );
66 |   return unless @vars;
67 |   
68 |   # Predeclare global variables in calling package
69 |   eval "package $callpack; use vars qw("
70 |     . join(' ', map { '$'.$_ } @vars) . ")";
71 |     die $@ if $@;
72 | 
73 | 
74 |     foreach (@vars) {
75 | 	if ( defined $Config{$_} ) {
76 |             no strict 'refs';
77 | 	    # Exporter does a similar job to the following
78 | 	    # statement, but for function names, not
79 | 	    # scalar variables:
80 | 	    *{"${callpack}::$_"} = \$Config{ $_ };
81 | 	} else {
82 | 	    die "Error: Config: $_ not known\n";
83 | 	}
84 |     }
85 | }
86 | 
87 | 1;
88 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/CloneEndsLinking.pm.example:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # POD documentation - main docs before the code
17 | 
18 | =head1 NAME
19 | 
20 | Bio::EnsEMBL::Analysis::Config::CloneEndsLinking
21 | 
22 | =head1 SYNOPSIS
23 | 
24 |     use Bio::EnsEMBL::Pipeline::Config::CloneEndsLinking;
25 | 
26 | =head1 DESCRIPTION
27 | 
28 | This contains the configuration for the linking of aligned
29 | clone ends in order to produce a misc set and its associated
30 | misc features. It needs to be run after a ExonerateAlignFeature
31 | run with specifique parameters.
32 | 
33 | The layout of the configuration is a set of hashes,
34 | each one keyed by logic name. There is also a DEFAULT hash,
35 | which is used as the default for all logic names
36 | 
37 | =head1 CONTACT
38 | 
39 | =cut
40 | 
41 | 
42 | package Bio::EnsEMBL::Analysis::Config::CloneEndsLinking;
43 | 
44 | use strict;
45 | use vars qw( %Config );
46 | 
47 | %Config = (
48 |   CLONE_END_LINKING_CONFIG_BY_LOGIC => {
49 |     DEFAULT => {
50 |       # must be a directory with files containing clone fasta sequences with extra information in the headerlike this:
51 |       # >918936606:CH243-100A1:F:CH243:184000:36800:1098268172037:1001
52 |       CLONE_ALIGNED_DB => '',
53 |       CLONE_LOGIC_NAME => '',
54 |       OUTDB       => '',
55 |       STORE_DNAALIGNFEATURES => 1,
56 |     },
57 |   }
58 | );
59 | 
60 | sub import {
61 |   my ($callpack) = caller(0); # Name of the calling package
62 |   my $pack = shift; # Need to move package off @_
63 | 
64 |   # Get list of variables supplied, or else everything
65 |   my @vars = @_ ? @_ : keys( %Config );
66 |   return unless @vars;
67 | 
68 |   # Predeclare global variables in calling package
69 |   eval "package $callpack; use vars qw("
70 |     . join(' ', map { '$'.$_ } @vars) . ")";
71 |     die $@ if $@;
72 | 
73 | 
74 |     foreach (@vars) {
75 | 	if ( defined $Config{$_} ) {
76 |             no strict 'refs';
77 | 	    # Exporter does a similar job to the following
78 | 	    # statement, but for function names, not
79 | 	    # scalar variables:
80 | 	    *{"${callpack}::$_"} = \$Config{ $_ };
81 | 	} else {
82 | 	    die "Error: Config: $_ not known\n";
83 | 	}
84 |     }
85 | }
86 | 
87 | 1;
88 | 


--------------------------------------------------------------------------------
/scripts/genebuild/slice_coding_gene_cnt.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | #
 3 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | # 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | # 
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | # 
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License
17 | 
18 | #This script checks slices for a given db to flag up cases where slices > 5mb have no protein coding gene
19 | 
20 | #
21 | 
22 | use strict;
23 | use warnings;
24 | 
25 | use Getopt::Long;
26 | use Bio::EnsEMBL::DBSQL::DBAdaptor;
27 | use feature 'say';
28 | use Bio::EnsEMBL::Utils::Exception qw(throw warning);
29 | 
30 | my $dbname = '';
31 | my $host = '';
32 | my $user = '';
33 | my $port = '';
34 | my $pass = '';
35 | my $driver = '';
36 | 
37 | GetOptions('dbname:s' => \$dbname,
38 |            'host:s'  => \$host,
39 |            'user:s' => \$user,
40 |            'port:s' => \$port,
41 |            'pass:s' => \$pass,
42 |            'driver:s' => \$driver,
43 |            );
44 | my $slice_cnt = 0; my $gene_cnt = 0; my $slice_with_gene = 0; my $slice_no_gene = 0; my $size = 0;
45 | 
46 | my $db_adaptor = new Bio::EnsEMBL::DBSQL::DBAdaptor(
47 |   -dbname => $dbname,
48 |   -host   => $host,
49 |   -port   => $port,
50 |   -user   => $user,
51 |   -pass   => $pass,
52 |   -driver => $driver,
53 | );
54 | my $slice_adaptor = $db_adaptor->get_SliceAdaptor();
55 | foreach my $slice ( @{ $slice_adaptor->fetch_all('toplevel') } ){
56 | 	
57 | 	my $gene_cnt = 0;
58 | 	$slice_cnt++;
59 | 		
60 | 	#retrieving gene from slice 
61 | 	foreach my $gene ( @{ $slice->get_all_Genes } ){
62 | 		if ($gene->biotype eq 'protein_coding'){
63 | 			#counting all protein coding genes
64 | 			$gene_cnt++;
65 | 		}
66 | 		
67 | 	}
68 | 	if ($slice->length >= 5000000) {#check that slice is bigger than 5mb
69 | 		
70 | 		if ($gene_cnt < 1){#check if slice contains protein coding genes
71 | 			 throw("slice has no protein coding gene");
72 | 			#say "slice " . $slice->name . " has no protein coding gene";
73 | 			$slice_no_gene++;
74 | 		}
75 | 		else{#slice contains protein coding genes
76 | 			#say "slice " . $slice->name . " has $gene_cnt protein coding genes";
77 | 			$slice_with_gene++;
78 | 		}
79 | 	}
80 | 	else{#count number of slice in database
81 | 		$size++;
82 | 	}
83 | 	
84 | }
85 | #print stats found
86 | say "Total slice = $slice_cnt"; say "Slice with genes = $slice_with_gene"; say "slice with no gene = $slice_no_gene";
87 | say "slice less 5mb = $size";
88 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/Funcgen/ACME.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =head1 NAME
17 | 
18 | Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::ACME
19 | 
20 | =head1 SYNOPSIS
21 | 
22 |   my $runnable = Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::ACME->new
23 |      (
24 |          -db       => $db,
25 |          -input_id => 'chromosome::20:1:100000:1',
26 |          -analysis => $analysis,
27 |      );
28 |   $runnable->fetch_input;
29 |   $runnable->run;
30 |   $runnable->write_output;
31 | 
32 | =head1 DESCRIPTION
33 | 
34 | This module provides an interface between the ensembl functional genomics 
35 | database and the Runnable ACME which wraps the R package ACME (for Algorithm 
36 | for Capturing Microarray Enrichment).
37 | 
38 | =head1 AUTHOR
39 | 
40 | Stefan Graf, Ensembl Functional Genomics - http://www.ensembl.org/
41 | 
42 | =head1 CONTACT
43 | 
44 | Post questions to the Ensembl development list: http://lists.ensembl.org/mailman/listinfo/dev
45 | 
46 | =cut
47 | 
48 | package Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::ACME;
49 | 
50 | use strict;
51 | use warnings;
52 | use Data::Dumper;
53 | 
54 | use Bio::EnsEMBL::Analysis::RunnableDB;
55 | use Bio::EnsEMBL::Analysis::RunnableDB::Funcgen;
56 | use Bio::EnsEMBL::Analysis::Runnable::Funcgen::ACME;
57 | 
58 | use Bio::EnsEMBL::Analysis::Config::General;
59 | use Bio::EnsEMBL::Analysis::Config::Funcgen::ACME;
60 | 
61 | use Bio::EnsEMBL::Utils::Exception qw(throw warning);
62 | use vars qw(@ISA); 
63 | 
64 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::Funcgen);
65 | 
66 | =head2 new
67 | 
68 |   Arg [1]     : 
69 |   Arg [2]     : 
70 |   Description : Instantiates new ACME runnabledb
71 |   Returntype  : Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::ACME object
72 |   Exceptions  : 
73 |   Example     : 
74 | 
75 | =cut
76 | 
77 | sub new {
78 | 
79 |     print "Analysis::RunnableDB::Funcgen::ACME::new\n";
80 |     my ($class,@args) = @_;
81 |     my $self = $class->SUPER::new(@args);
82 | 
83 |     $self->read_and_check_config($CONFIG);
84 | 
85 |     # make sure we have the correct analysis object
86 |     $self->check_Analysis();
87 | 
88 |     # make sure we can store the correct feature_set, data_sets, and result_sets
89 |     $self->check_Sets();
90 | 
91 |     return $self;
92 | 	
93 | }
94 | 
95 | 1;
96 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Hive/RunnableDB/HiveLoadcDNAs.pm:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | #Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | package Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveLoadcDNAs;
19 | 
20 | use strict;
21 | use warnings;
22 | 
23 | use Bio::SeqIO;
24 | use Bio::EnsEMBL::Analysis::Tools::PolyAClipping qw(clip_if_necessary);
25 | 
26 | use parent ('Bio::EnsEMBL::Hive::RunnableDB::JobFactory');
27 | 
28 | sub param_defaults {
29 |   my ($self) = @_;
30 | 
31 |   return {
32 |     %{$self->SUPER::param_defaults()},
33 |     sequence_biotype => 'cdna',
34 |     column_names => ['iid'],
35 |     sequence_table_name => 'cdna_sequences',
36 |     iid_type  => 'db_seq',
37 |     format => 'fasta',
38 |   }
39 | }
40 | 
41 | 
42 | sub fetch_input {
43 |   my $self = shift;
44 | 
45 |   my $process_polyA = 0;
46 |   my $parser = Bio::SeqIO->new(-format => $self->param('format'), -file => $self->param_required('cdna_file'));
47 |   if ($self->param_is_defined('process_polyA') and $self->param('process_polyA')) {
48 |     $process_polyA = 1;
49 |   }
50 |   my $biotype = $self->param('sequence_biotype');
51 | 
52 |   my $adaptor;
53 |   my $write_to_file = $self->param('iid_type') eq 'db_seq' ? 0 : 1;
54 |   if ($write_to_file) {
55 |     $adaptor = Bio::SeqIO->new(-format => 'fasta', -file => '>'.$self->param_required('output_file'));
56 |   }
57 |   else {
58 |     $adaptor = $self->db->get_NakedTableAdaptor();
59 |     $adaptor->table_name($self->param('sequence_table_name'));
60 |   }
61 | 
62 |   my @iids;
63 |   while(my $bioseq = $parser->next_seq) {
64 |     my $header = $bioseq->id;
65 |     if ($process_polyA) {
66 |       ($bioseq, undef, undef) = clip_if_necessary($bioseq);
67 |       if (!$bioseq) {
68 |         $self->warning('Sequence full of polyA for '.$header);
69 |         next;
70 |       }
71 |     }
72 | 
73 |     $header =~ s/^\w*\|\w*\|//;
74 |     if ($write_to_file) {
75 |       $bioseq->id($header);
76 |       $adaptor->write_seq($bioseq);
77 |     }
78 |     else {
79 |       my $db_row = [{
80 |         'accession'  => $header,
81 |         'seq'        => $bioseq->seq,
82 |         'biotype'    => $biotype,
83 |       }];
84 |       $adaptor->store($db_row);
85 |     }
86 |     push(@iids, $header);
87 |   }
88 |   $self->param('inputlist', \@iids);
89 | }
90 | 
91 | 1;
92 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/ProjectedTranscriptEvidence.pm.example:
--------------------------------------------------------------------------------
 1 | 1;
 2 | 
 3 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | # 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | # 
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | # 
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | =head1 NAME
19 | 
20 | Bio::EnsEMBL::Analysis::Config::GeneBuild::ProjectedTranscriptEvidence
21 | 
22 | =head1 SYNOPSIS
23 | 
24 |     use Bio::EnsEMBL::Analysis::Config::GeneBuild::ProjectedTranscriptEvidence
25 | 
26 | =head1 DESCRIPTION
27 | Supplies config for Bio::EnsEMBL::Analysis::RunnableDB::ProjectedTranscriptEvidence,
28 | which is used to align a projected transcript against the original and add the original 
29 | transcript as a transcript_supporting_feature of the projected transcript.
30 | 
31 | 
32 | =head1 CONTACT
33 | 
34 | =cut
35 | 
36 | 
37 | package Bio::EnsEMBL::Analysis::Config::GeneBuild::ProjectedTranscriptEvidence;
38 | 
39 | use strict;
40 | use vars qw( %Config );
41 | 
42 | %Config = (
43 |   PROJECTED_TRANSCRIPT_EVIDENCE_CONFIG_BY_LOGIC =>  {
44 |             DEFAULT =>  {
45 |               #Database to fetch the original/reference genes from
46 |               GENEDB    => 'REFERENCE_DB',
47 |               #Database the projected (transformed) genes were written to
48 |               #where original transcript will be added as a transcript_supporting_feature
49 | 	            OUTGENEDB => 'PROJECT_DB',
50 |               OPTIONS   => '--model affine:local --bestn 1 --dnahspthreshold 50 -w 1 -s 0',
51 |               PROGRAM   => "exonerate-0.9.0",
52 |        	    },
53 | 
54 |      }
55 | );
56 | 
57 | sub import {
58 |   my ($callpack) = caller(0); # Name of the calling package
59 |   my $pack = shift; # Need to move package off @_
60 | 
61 |   # Get list of variables supplied, or else everything
62 |   my @vars = @_ ? @_ : keys( %Config );
63 |   return unless @vars;
64 |   
65 |   # Predeclare global variables in calling package
66 |   eval "package $callpack; use vars qw("
67 |     . join(' ', map { '$'.$_ } @vars) . ")";
68 |     die $@ if $@;
69 | 
70 | 
71 |     foreach (@vars) {
72 | 	if ( defined $Config{$_} ) {
73 |             no strict 'refs';
74 | 	    # Exporter does a similar job to the following
75 | 	    # statement, but for function names, not
76 | 	    # scalar variables:
77 | 	    *{"${callpack}::$_"} = \$Config{ $_ };
78 | 	} else {
79 | 	    die "Error: Config: $_ not known\n";
80 | 	}
81 |     }
82 | }
83 | 
84 | 1;
85 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/ExonerateSolexaLocalAlignment.pm.example:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =head1 NAME
17 | 
18 | Bio::EnsEMBL::Analysis::Config::GeneBuild::ExonerateSolexa
19 | 
20 | =head1 SYNOPSIS
21 | 
22 |     use Bio::EnsEMBL::Analysis::Config::GeneBuild::ExonerateSolexaLocalAlignment
23 | 
24 | =head1 DESCRIPTION
25 | 
26 | This contains the specific configuraton for 
27 | Bio::EnsEMBL::Analysis::RunnableDB::ExonerateSolexaLocalAlignment
28 | 
29 | =head1 CONTACT
30 | 
31 | =cut
32 | 
33 | 
34 | package Bio::EnsEMBL::Analysis::Config::GeneBuild::ExonerateSolexaLocalAlignment;
35 | 
36 | use strict;
37 | use vars qw( %Config );
38 | 
39 | %Config = (
40 |   EXONERATE_SOLEXA_LOCAL_ALIGNMENT_CONFIG_BY_LOGIC =>  {
41 |             DEFAULT =>  {
42 | 	       # database to fetch genomic alignments from 
43 | 	       GENOMICDB => '',
44 | 	       # only want to realign partial genomic alignments max score 
45 | 	       # for alignments to be included say 60% of read length?
46 | 	       SCORE => ,
47 | 	       # logicnames of the reads you want to use blank = all
48 | 	       LOGIC_NAMES => [],
49 | 	       # logic name of the refined models to use, blank = all
50 | 	       REFINED_LN => "",
51 | 	       # database to fetch refined models from 
52 | 	       REFINED_DB => "",
53 | 	       # maximum distance between split models before assuming they are 
54 | 	       # 2 separate genes?
55 | 	       MAX_GAP => 20000,
56 |        	     },
57 |     }
58 | );
59 | 
60 | sub import {
61 |   my ($callpack) = caller(0); # Name of the calling package
62 |   my $pack = shift; # Need to move package off @_
63 | 
64 |   # Get list of variables supplied, or else everything
65 |   my @vars = @_ ? @_ : keys( %Config );
66 |   return unless @vars;
67 |   
68 |   # Predeclare global variables in calling package
69 |   eval "package $callpack; use vars qw("
70 |     . join(' ', map { '$'.$_ } @vars) . ")";
71 |     die $@ if $@;
72 | 
73 | 
74 |     foreach (@vars) {
75 | 	if ( defined $Config{$_} ) {
76 |             no strict 'refs';
77 | 	    # Exporter does a similar job to the following
78 | 	    # statement, but for function names, not
79 | 	    # scalar variables:
80 | 	    *{"${callpack}::$_"} = \$Config{ $_ };
81 | 	} else {
82 | 	    die "Error: Config: $_ not known\n";
83 | 	}
84 |     }
85 | }
86 | 
87 | 1;
88 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Hive/RunnableDB/HivecDNAManyHits.pm:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 4 | #Copyright [2016-2024] EMBL-European Bioinformatics Institute
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | package Bio::EnsEMBL::Analysis::Hive::RunnableDB::HivecDNAManyHits;
19 | 
20 | use strict;
21 | use warnings;
22 | use feature 'say';
23 | 
24 | 
25 | use Bio::EnsEMBL::Analysis::Tools::Utilities qw(hrdb_get_dba);
26 | use parent ('Bio::EnsEMBL::Hive::RunnableDB::JobFactory');
27 | 
28 | 
29 | sub param_defaults {
30 |   my ($self) = @_;
31 | 
32 |   return {
33 |     %{$self->SUPER::param_defaults},
34 |     threshold => 20,
35 |     column_names => ['iid'],
36 |     many_hits_process_threshod => .90,
37 |   }
38 | }
39 | 
40 | sub fetch_input {
41 |   my $self = shift;
42 | 
43 |   my $db = hrdb_get_dba($self->param_required('target_db'));
44 |   my $slice_adaptor = $db->get_SliceAdaptor;
45 |   my %hit_names;
46 |   foreach my $slice (@{$slice_adaptor->fetch_all('toplevel', undef, 1)}) {
47 |     foreach my $transcript (@{$slice->get_all_Transcripts}) {
48 |       ++$hit_names{$transcript->get_all_supporting_features->[0]->hseqname};
49 |     }
50 |   }
51 |   my @many_hits;
52 |   my $threshold = $self->param('threshold');
53 |   foreach my $key (keys %hit_names) {
54 |     push(@many_hits, $key) if ($hit_names{$key} > $threshold);
55 |   }
56 |   if (@many_hits) {
57 |     if ($self->param_is_defined('old_db')) {
58 |       my $old_db = hrdb_get_dba($self->param_required('old_db'));
59 |       my $transcript_adaptor = $old_db->get_TranscriptAdaptor;
60 |       my @to_process;
61 |       $threshold *= $self->param('many_hits_process_threshod');
62 |       foreach my $hitname (@many_hits) {
63 |         my $transcripts = $transcript_adaptor->fetch_all_by_transcript_supporting_evidence($hitname, 'dna_align_feature');
64 |         push(@to_process, $hitname) unless (scalar(@$transcripts) > $threshold);
65 |       }
66 |       if (@to_process) {
67 |         $self->param('inputlist', \@to_process);
68 |       }
69 |       else {
70 |         $self->complete_early(scalar(@many_hits).' cDNAs had more than '.$self->param('threshold').' hits but were already in the previous database');
71 |       }
72 |     }
73 |     else {
74 |       $self->param('inputlist', \@many_hits);
75 |     }
76 |   }
77 |   else {
78 |     $self->complete_early("No cDNAs had more than $threshold hits");
79 |   }
80 | }
81 | 
82 | 
83 | 1;
84 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/GeneBuild/Gsnap.pm.example:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =head1 NAME
17 | 
18 | Bio::EnsEMBL::Analysis::Config::GeneBuild::Gsnap
19 | 
20 | =head1 SYNOPSIS
21 | 
22 |     use Bio::EnsEMBL::Analysis::Config::GeneBuild::Gsnap
23 | 
24 | =head1 DESCRIPTION
25 | 
26 | This contains the specific configuraton for 
27 | Bio::EnsEMBL::Analysis::RunnableDB::Gsnap 
28 | 
29 | =head1 CONTACT
30 | 
31 | =cut
32 | 
33 | 
34 | package Bio::EnsEMBL::Analysis::Config::GeneBuild::Gsnap;
35 | 
36 | use strict;
37 | use vars qw( %Config );
38 | 
39 | %Config = (
40 |   GSNAP_CONFIG_BY_LOGIC =>  {
41 |             DEFAULT =>  {
42 | 
43 | 	      # base path to the fastq
44 | 	      INDIR => "/path/to/my/input",	
45 | 	      
46 | 	      # path to the output directory
47 | 	      OUTDIR => "/path/to/my/output",	
48 | 			      
49 | 	      # Nmme given to the indexed genome when using gmap build
50 | 	      GENOMENAME => "",
51 | 	      # Directory containing the genome files
52 | 	      GENOMEDIR  => "/path/to/genome/dir",
53 | 	      # alignment options ( just for example )
54 | 	      OPTIONS => "",
55 |             
56 | 	      # are the reads paired end? (1/0)
57 | 	      PAIRED => "0",
58 |     
59 |               # path to the samtools binaries
60 | 	      SAMTOOLS_PATH => "/software/solexa/bin/samtools",
61 | 
62 |               # optional header with additional information describing the sample
63 |               HEADER => "",
64 |             }, 
65 |      }
66 | );
67 | 
68 | sub import {
69 |   my ($callpack) = caller(0); # Name of the calling package
70 |   my $pack = shift; # Need to move package off @_
71 | 
72 |   # Get list of variables supplied, or else everything
73 |   my @vars = @_ ? @_ : keys( %Config );
74 |   return unless @vars;
75 |   
76 |   # Predeclare global variables in calling package
77 |   eval "package $callpack; use vars qw("
78 |     . join(' ', map { '$'.$_ } @vars) . ")";
79 |     die $@ if $@;
80 | 
81 | 
82 |     foreach (@vars) {
83 | 	if ( defined $Config{$_} ) {
84 |             no strict 'refs';
85 | 	    # Exporter does a similar job to the following
86 | 	    # statement, but for function names, not
87 | 	    # scalar variables:
88 | 	    *{"${callpack}::$_"} = \$Config{ $_ };
89 | 	} else {
90 | 	    die "Error: Config: $_ not known\n";
91 | 	}
92 |     }
93 | }
94 | 
95 | 1;
96 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/Funcgen/Chipotle.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | =head1 NAME
17 | 
18 | Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::Chipotle
19 | 
20 | =head1 SYNOPSIS
21 | 
22 |   my $runnable = Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::Chipotle->new
23 |      (
24 |          -db       => $db,
25 |          -input_id => 'chromosome::20:1:100000:1',
26 |          -analysis => $analysis,
27 |      );
28 |   $runnable->fetch_input;
29 |   $runnable->run;
30 |   $runnable->write_output;
31 | 
32 | =head1 DESCRIPTION
33 | 
34 | This module provides an interface between the ensembl database and
35 | the Runnable Chipotle which wraps the program ChIPoTle
36 | 
37 | =head1 AUTHOR
38 | 
39 | Stefan Graf, Ensembl Functional Genomics - http://www.ensembl.org/
40 | 
41 | =head1 CONTACT
42 | 
43 | Post questions to the Ensembl development list: http://lists.ensembl.org/mailman/listinfo/dev
44 | 
45 | =cut
46 | 
47 | package Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::Chipotle;
48 | 
49 | use strict;
50 | use warnings;
51 | use Data::Dumper;
52 | 
53 | use Bio::EnsEMBL::Analysis::Config::General;
54 | use Bio::EnsEMBL::Analysis::Config::Funcgen::Chipotle;
55 | 
56 | use Bio::EnsEMBL::Analysis::RunnableDB;
57 | use Bio::EnsEMBL::Analysis::RunnableDB::Funcgen;
58 | use Bio::EnsEMBL::Analysis::Runnable::Funcgen::Chipotle;
59 | 
60 | use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump);
61 | use vars qw(@ISA); 
62 | 
63 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::Funcgen);
64 | 
65 | =head2 new
66 | 
67 |   Arg [1]     : 
68 |   Arg [2]     : 
69 |   Description : Instantiates new Chipotle runnabledb
70 |   Returntype  : Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::Chipotle object
71 |   Exceptions  : 
72 |   Example     : 
73 | 
74 | =cut
75 | 
76 | sub new {
77 | 
78 |     print "Analysis::RunnableDB::Funcgen::Chipotle::new\n";
79 |     my ($class,@args) = @_;
80 | 
81 |     my $self = $class->SUPER::new(@args);
82 | 
83 |     $self->read_and_check_config($CONFIG);
84 | 
85 |     # add some runnable/program special params to analysis here
86 | 
87 |     # make sure we have the correct analysis object
88 |     $self->check_Analysis();
89 | 
90 |     # make sure we can store the correct feature_set, data_sets, and result_sets
91 |     $self->check_Sets();
92 | 
93 |     return $self;
94 | }
95 | 
96 | 1;
97 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/RunnableDB/ProteinAnnotation/Panther.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
 2 | # Copyright [2016-2024] EMBL-European Bioinformatics Institute
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | # 
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | # Author: Gary Williams (gw3@sanger.ac.uk)
18 | # Copyright (c) Marc Sohrmann, 2001
19 | # You may distribute this code under the same terms as perl itself
20 | #
21 | # You may distribute this module under the same terms as perl itself
22 | #
23 | # POD documentation - main docs before the code
24 | 
25 | =pod 
26 | 
27 | =head1 NAME
28 | 
29 |   Bio::EnsEMBL::Analysis::RunnableDB::Protein::Panther
30 | 
31 | =head1 SYNOPSIS
32 | 
33 |   my $seg = Bio::EnsEMBL::Analysis::RunnableDB::Protein::Panther->new ( -db      => $db,
34 | 	    	                                                        -input_id   => $input_id,
35 |                                                                         -analysis   => $analysis,
36 |                                                                       );
37 |   $seg->fetch_input;  # gets sequence from DB
38 |   $seg->run;
39 |   $seg->output;
40 |   $seg->write_output; # writes features to to DB
41 | 
42 | =head1 DESCRIPTION
43 | 
44 |   This object wraps Bio::EnsEMBL::Analysis::Runnable::Panther
45 |   to add functionality to read and write to databases.
46 |   A Bio::EnsEMBL::Analysis::DBSQL::DBAdaptor is required for database access (db).
47 |   The query sequence is provided through the input_id.
48 |   The appropriate Bio::EnsEMBL::Analysis object
49 |   must be passed for extraction of parameters.
50 | 
51 | =head1 CONTACT
52 | 
53 |   Gary Williams
54 | 
55 | =head1 APPENDIX
56 | 
57 |   The rest of the documentation details each of the object methods. 
58 |   Internal methods are usually preceded with a _.
59 | 
60 | =cut
61 | 
62 | package Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation::Panther;
63 | 
64 | use warnings ;
65 | use strict;
66 | use vars qw(@ISA);
67 | 
68 | use Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation;
69 | use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Panther;
70 | 
71 | @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB::ProteinAnnotation);
72 | 
73 | 
74 | 
75 | 
76 | # runnable method
77 | sub fetch_input {
78 | 	my ($self,@args)=@_;
79 | 	$self->SUPER::fetch_input(@args);
80 | 	my $run = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Panther->new(-query => $self->query,-analysis => $self->analysis);
81 | 	$self->runnable($run);
82 | } 
83 | 1;
84 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Config/CollapseAffyProbes.pm.example:
--------------------------------------------------------------------------------
 1 | #
 2 | # package Bio::EnsEMBL::Pipeline::Config::ExonerateTranscript
 3 | # 
 4 | # Cared for by EnsEMBL (http://lists.ensembl.org/mailman/listinfo/dev)
 5 | #
 6 | # Copyright GRL & EBI
 7 | #
 8 | # You may distribute this module under the same terms as perl itself
 9 | 
10 | # POD documentation - main docs before the code
11 | 
12 | =head1 NAME
13 | 
14 | Bio::EnsEMBL::Pipeline::Config::Affy::Exonerate2Affy
15 | 
16 | =head1 SYNOPSIS
17 | 
18 |     use Bio::EnsEMBL::Pipeline::Config::Exonerate2Genes;
19 | 
20 | =head1 DESCRIPTION
21 | 
22 | This contains the configuration for step 1 of the 
23 | process which maps Affymetric probes to the Genome.
24 | 
25 | The layout of the configuration is a set of hashes,
26 | each one keyed by logic name. There is also a DEFAULT hash,
27 | which is used as the default for all logic names (this
28 | was the configuration pattern stolen from Exonerate2Genes,
29 | although in this case it's very unlikely you will need to have
30 | different configs by logic name).
31 | 
32 | =head1 CONTACT
33 | 
34 | =cut
35 | 
36 | 
37 | package Bio::EnsEMBL::Analysis::Config::CollapseAffyProbes;
38 | 
39 | use strict;
40 | use vars qw( %Config );
41 | 
42 | # Hash containing config info
43 | # -- one hashnode per logic name, with a 'DEFAULT' logic name provided
44 | #
45 | 
46 | %Config = (
47 |   AFFY_CONFIG => {
48 |     DEFAULT => {
49 |       # All input probes must be kept in one huge (possibly redundant) fasta file
50 |       QUERYSEQS            => '/ecs2/work3/vvi/osgb/affy/data/all_probes.fa',
51 |       # The output of this module writes a set of affy probes into the OUTDB.affy_probe table,
52 |       # and also writes the nonredundant probes into this fasta file,
53 |       # with the fasta headers keyed with the affy probes' internal id. 
54 |       NON_REDUNDANT_PROBE_SEQS => '/ecs2/work3/vvi/osgb/affy/data/all_nr_probes.fa',
55 |       # DB containing all affy_arrays, affy_probes and (next step) affy_features
56 |       OUTDB => {
57 |         -dbname => 'vivek_homo_sapiens_test_26_35',
58 |         -host => 'ecs2',
59 |         -port => '3362',
60 |         -user => 'ensadmin',
61 |         -pass => 'xxxxx',
62 |         },
63 |     },
64 |   }
65 | );
66 | 
67 | sub import {
68 |   my ($callpack) = caller(0); # Name of the calling package
69 |   my $pack = shift; # Need to move package off @_
70 | 
71 |   # Get list of variables supplied, or else everything
72 |   my @vars = @_ ? @_ : keys( %Config );
73 |   return unless @vars;
74 |   
75 |   # Predeclare global variables in calling package
76 |   eval "package $callpack; use vars qw("
77 |     . join(' ', map { '$'.$_ } @vars) . ")";
78 |     die $@ if $@;
79 | 
80 | 
81 |     foreach (@vars) {
82 | 	if ( defined $Config{$_} ) {
83 |             no strict 'refs';
84 | 	    # Exporter does a similar job to the following
85 | 	    # statement, but for function names, not
86 | 	    # scalar variables:
87 | 	    *{"${callpack}::$_"} = \$Config{ $_ };
88 | 	} else {
89 | 	    die "Error: Config: $_ not known\n";
90 | 	}
91 |     }
92 | }
93 | 
94 | 1;
95 | 


--------------------------------------------------------------------------------
/modules/Bio/EnsEMBL/Analysis/Hive/RunnableDB/HiveIndexGenome.pm:
--------------------------------------------------------------------------------
 1 | # Copyright [1999-2016] the EMBL-European Bioinformatics Institute
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | package Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveIndexGenome;
16 | 
17 | use strict;
18 | use warnings;
19 | 
20 | use File::Spec;
21 | 
22 | use parent ('Bio::EnsEMBL::Analysis::Hive::RunnableDB::HiveBaseRunnableDB');
23 | 
24 | 
25 | =head2 fetch_input
26 | 
27 |  Arg [1]    : None
28 |  Description: Create the command to execute genome indexing with STAR
29 |  Returntype : None
30 |  Exceptions : Throw if splitpath does not return an existing directory name
31 | 
32 | =cut
33 | 
34 | sub fetch_input {
35 |   my ($self) = @_;
36 | 
37 |   my (undef, $dirname, $file) = File::Spec->splitpath($self->param('wide_genome_file'));
38 |   $self->throw("File::Spec->splitpath failed, $dirname does not exist") unless (-e $dirname);
39 |   if (-e "$dirname/SA") {
40 |     $self->complete_early($self->param('wide_genome_file').'is already indexed!');
41 |   }
42 |   else {
43 |     my @command = ($self->param('wide_short_read_aligner'), '--runMode genomeGenerate');
44 |     push(@command, '--runThreadN', $self->param('use_threading'))
45 |       if ($self->param_is_defined('use_threading') and $self->param('use_threading') > 0);
46 |     push(@command, '--genomeDir', $dirname);
47 |     push(@command, '--genomeFastaFiles', $file);
48 |     push(@command, '--sjdbGTFfile', $self->param('annotation_gtf'))
49 |       if ($self->param_is_defined('annotation_gtf'));
50 |     push(@command, '--sjdbOverhang', $self->param('read_length')-1)
51 |       if ($self->param_is_defined('read_length') and $self->param('read_length') > 1);
52 |     push(@command, $self->param('extra_options'))
53 |       if ($self->param_is_defined('extra_options'));
54 |     $self->param('commandline', \@command);
55 |   }
56 | }
57 | 
58 | 
59 | =head2 run
60 | 
61 |  Arg [1]    : None
62 |  Description: Run the STAR command, it will generate the indexes
63 |  Returntype : None
64 |  Exceptions : Throws if STAR fails
65 | 
66 | =cut
67 | 
68 | sub run {
69 |   my ($self) = @_;
70 | 
71 |   $self->throw('Could not execute: '.join(' ', @{$self->param('commandline')}))
72 |     if (system(@{$self->param('commandline')}));
73 | }
74 | 
75 | 
76 | =head2 write_output
77 | 
78 |  Arg [1]    : None
79 |  Description: Return 1 to override SUPER method
80 |  Returntype : Integer 1
81 |  Exceptions : None
82 | 
83 | =cut
84 | 
85 | sub write_output {
86 |   my ($self) = @_;
87 | 
88 |   return 1;
89 | }
90 | 
91 | 1;
92 | 


--------------------------------------------------------------------------------