├── 00_build_index_hisat2.sh ├── 01_lastz_prepare_kmers.sh ├── CopyNumberGen.sh ├── ExtractSCOs.sh ├── ExtractSeq.sh ├── Fastq2CA.sh ├── GI2Taxonomy.sh ├── GOseq_auto.sh ├── GuessEncoding.sh ├── JRC.sh ├── JobBLAST.sh ├── JobP128.sh ├── JobQ.sh ├── JobR.sh ├── JobR_BLAST.sh ├── JobR_condo.sh ├── JobTime.sh ├── JobTime2.sh ├── Khmer_interleave.sh ├── L3.ModuleGen.sh ├── LAS-makeSLURMp.py ├── Lines_based_file_deletion.sh ├── ModuleGen.sh ├── N50Calc.pl ├── PcB_filter_stats.sh ├── Quiver_01.sh ├── Quiver_02.sh ├── README.md ├── Rscript_for_plotting_bayescan_results.sh ├── Rscripts ├── ElementScript.R ├── Elementsource.R ├── QS.R └── gff2gtf.R ├── SAM_to_sortedBAM.sh ├── SOYGO ├── ANML │ ├── ANML.4Kevin │ ├── ANML.BP.final │ ├── ANML.MF.final │ ├── ANML.list │ ├── ANML.list_BP.txt │ ├── ANML.list_BP_fisher.txt │ ├── ANML.list_BP_names.txt │ ├── ANML.list_BP_out.txt │ ├── ANML.list_BP_output.txt │ ├── ANML.list_ERROR.txt │ ├── ANML.list_MF.txt │ ├── ANML.list_MF_fisher.txt │ ├── ANML.list_MF_names.txt │ ├── ANML.list_MF_out.txt │ ├── ANML.list_MF_output.txt │ └── ANML.list~ ├── ATH_GO_GOSLIM.022714 ├── GOinfoKevinupdate2.pl ├── Gmv2_GODb ├── README_SOYBASE ├── SOYGO.pl ├── Script_Fisher.R ├── SoyGO.sh └── combinefilesbyGO.pl ├── SRAfq2FASTQ.sh ├── a_loghistory_func.sh ├── a_longhistory_func.sh ├── abacas.1.3.1.pl ├── acd_func.sh ├── addFastaHeaders.pl ├── backup ├── bayes_script.sh ├── biostar ├── biostar.cpp ├── blastXML2Tab.py ├── blast_job_gen.sh ├── blast_wrapper.sh ├── bowtie2_se_noclip.sh ├── bwa_map_sort.sh ├── cb ├── cdgit ├── cegma.sh ├── checkBlastStatus.sh ├── clean_trinity.sh ├── combineRawFiles.sh ├── configFiles ├── PcB_subreadfiltering_settings.xml └── vcf_structure.spid ├── connected_graphs.awk ├── count.sh ├── count_fastq.sh ├── create_sam.sh ├── decison_tree_for_picking_common_strata_for_orthogroups.sh ├── dos2unix ├── downloadSRA_ebi.sh ├── edit_sam_files.sh ├── end_time.sh ├── extract_seq.sh ├── extract_seq.sh.save ├── fasta-splitter.pl ├── fasta2fastq.py ├── fastaMulti2singleLine.pl ├── fastaSortByName.pl ├── fasta_distribution.pl ├── fasta_length.py ├── fastarange ├── fastarange.c ├── fastasplitn ├── fastasplitn.c ├── fastq-splitter.pl ├── fastq2fasta.sh ├── fastqc_parse.py ├── fastqc_stats.sh ├── fasttrans ├── fasttrans.c ├── filecount ├── filter_parllel_log.sh ├── firstInstanceOf.awk ├── fna_qual2fastq.py ├── formatOut2Tab.sh ├── fq2bam.sh ├── genome-gaps-as-bed.py ├── get_GitHub_file ├── get_GitHub_file.sh └── get_GitHub_folder.sh ├── get_ip ├── get_taxanomy.sh ├── gff2fasta.pl ├── gitgrep ├── gmap_cdna.sh ├── gsize.sh ├── gsnap_pe.sh ├── gsnap_pe2.sh ├── gsnap_pe_clip.sh ├── gsnap_pe_clip_final.sh ├── gsnap_pe_noclip.sh ├── gsnap_pe_noclip_final.sh ├── gsnap_se.sh ├── gsnap_stats.sh ├── gtf2gff3.pl ├── guess_encoding.py ├── header_replace_fasta.pl ├── histogram.awk ├── htseq_count.sh ├── interleave_PE_fastq.sh ├── intervalBins.awk ├── join.pl ├── join_files.sh ├── joinr.sh ├── khmer_pe.sh ├── mac2unix ├── makeLocalSLURMp.py ├── makeLocalSLURMs.py ├── makeNovaSLURMs.py ├── makePBSp.py ├── makePBSs.py ├── makeSLURM_bridges.py ├── makeSLURMp.py ├── makeSLURMp_ceres.py ├── makeSLURMs.py ├── makeSLURMs_ceres.py ├── mayday.sh ├── md ├── mismatch-counter.sh ├── mpblast.pl ├── nanoVersions.sh ├── newModuleGen.sh ├── new_Assemblathon.pl ├── noTabCompletion.sh ├── numberPatMatch_byLine ├── parse.sh ├── pathadd ├── pb_errc.sh ├── pb_errc2.sh ├── prepare_genome_modules.old ├── qcMarkdownGenerator.sh ├── qiime_config.sh ├── qn ├── qourum_ec.sh ├── quedel.sh ├── readme.md ├── removeRedundantScaffolds.sh ├── removeSeqsFromReads.sh ├── renamed_results.sh ├── reorder_fasta.py ├── rowsums ├── rsem_analyize_dge.sh ├── rsem_dge.sh ├── rsem_estm_matrix.sh ├── runAugustus.sh ├── runBUSCO.sh ├── runBWAcrossspp.sh ├── runBayesScan.sh ├── runBlobtools.sh ├── runBraker.sh ├── runDigiNorm_pe.sh ├── runGenomeScope.sh ├── runGenomeScope_arun.sh ├── runGmap.sh ├── runGuidenceAA.sh ├── runGuidenceNT.sh ├── runHISAT2.sh ├── runIRF.sh ├── runLTRfinder.sh ├── runMaSuRCA.sh ├── runMegablast.sh ├── runMinimap2-cDNA.sh ├── runPlatanus.sh ├── runRAxML.sh ├── runRepeatExplorer.sh ├── runRepeatModeler.sh ├── runSNPHYLO.sh ├── runSOAP.sh ├── runSPAdes.sh ├── runSTAR.sh ├── runSpliceSites.sh ├── runStringtie.sh ├── runSynteny.sh ├── runTabix.sh ├── runTasselGWAS.sh ├── runTraning.sh ├── run_gmap.sh ├── rundipSPAdes.sh ├── sample_fastq.py ├── scaffold2contig.pl ├── scores2dokuwiki.sh ├── seprate_paired_reads.sh ├── soap_config_file.txt ├── split_submit.sh ├── sr_config_MaSuRCA.txt ├── sr_config_MaSuRSOAP.txt ├── star_build.sh ├── sub ├── summary.sh ├── taxadump_gen.sh ├── taxaids2division.sh ├── taxaids2lineage.sh ├── taxid_ranks.py ├── template.slurm ├── test_bioperl.pl ├── tophat_se.sh ├── transpose.awk ├── trim_pe.sh ├── trim_se.sh ├── trimmomatic_pe.sh ├── trimmomatic_se.sh ├── unix2dos ├── unix2mac ├── validate_features.pl ├── vcf-subset.py └── walltime /00_build_index_hisat2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #if [ "$#" -ne 2 ] ; then 3 | #echo "please provide:" 4 | #echo -e "\t\t(1) name for index, preferably the NAM line name" 5 | #echo -e "\t\t(2) genome sequences, use only scaffolds" 6 | #echo ""; 7 | #echo "./00_build_index_hisat2 " ; 8 | #echo ""; 9 | #exit 0; 10 | #fi 11 | 12 | NAM=$(basename $(dirname $(pwd))) 13 | file=$1 14 | 15 | module load hisat2 16 | hisat2-build ${file} $NAM 17 | -------------------------------------------------------------------------------- /01_lastz_prepare_kmers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ############ 3 | # files needed 4 | # 1. Genome File to which the sequences needs to aligned 5 | # 2. Output folder where all the temp and final results will be written 6 | # 3. Draft assembly sequences 7 | # enter them in that order for this scripts 8 | # note that, for all the files you need to enter the full path 9 | ############ 10 | 11 | ## define where the scripts are located (for add_contig_ID_to_seg.pl & merge_kmer_ranges.pl) 12 | BINDIR="/data003/GIF/arnstrm/20150506_Hufford_maker/alignment/raw_files/kmer_align" 13 | CPU=48 # number of parallel jobs 14 | KMER=23 # kmer size 15 | TASSEL="/home/arnstrm/tassel/dist" 16 | MEM=1500 # total system memory in GB 17 | MPJ=$((${MEM} / ${CPU})) # calculate per job memory and round it to nearest integer 18 | 19 | # modules required 20 | module load parallel 21 | module load java 22 | module load tassel/5.2.16 23 | module load perl 24 | module load python 25 | 26 | # easy names 27 | TARGETDIR="$2" 28 | REF="$1" 29 | ASSEMBLY="$3" 30 | 31 | # set-up for analyses 32 | mkdir -p ${TARGETDIR} && cd ${TARGETDIR} 33 | rm -f tasks_java tasks_sort tasks_merge 34 | 35 | # generate job lists 36 | for f in $(echo {A,T,G,C}{A,T,G,C}{A,T,G,C}); do 37 | echo "java -Xmx${MPJ}g -cp ${TASSEL}/tassel5-active.jar net.maizegenetics.ed.t5.KmerAnalysisVB1 $KMER ${f} ${OUTNAME}_${f} ${REF} ${ASSEMBLY}" >> tasks_java 38 | echo "sort -S ${MPJ}G -k 1,1 -k 2,2n ${OUTNAME}_${f}.seg >& ${OUTNAME}_${f}.seg.srt12" >> tasks_sort 39 | echo "${BINDIR}/merge_kmer_ranges.pl ${OUTNAME}_${f}.seg.srt12 >& ${OUTNAME}_${f}.seg.srt12.mrg" >> tasks_merge 40 | done 41 | 42 | parallel --jobs ${CPU} --joblog java_tasks.log --workdir $PWD < tasks_java 43 | parallel --jobs ${CPU} --joblog java_sort.log --workdir $PWD < tasks_sort 44 | parallel --jobs ${CPU} --joblog java_merge.log --workdir $PWD < tasks_merge 45 | cat ${OUTNAME}_???.seg.srt12.mrg | sort -S ${MEM} --parallel=${CPU} -k 1,1 -k 2,2n >& ${OUTNAME}.mrg.srt12 46 | ${BINDIR}/merge_kmer_ranges.pl ${OUTNAME}.mrg.srt12 >& ${OUTNAME}.mrg.srt12.mrg 47 | ${BINDIR}/add_contig_ID_to_seg.pl ${OUTNAME}.mrg.srt12.mrg ${REF} ${ASSEMBLY} | sort -k 8,8n -k 7,7r -k 5,5n - | cut -d " " -f 8 --complement > ${OUTNAME}.mrg.qsrt 48 | 49 | -------------------------------------------------------------------------------- /CopyNumberGen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This is a bash script that generates the table for number of orthologs present in each species. 3 | # It takes the output generated by "orthomclMclToGroups" and converts ids to numbers. 4 | # by default it prints to stdout 5 | 6 | # Arun Seetharam 7 | 8 | scriptName="${0##*/}" 9 | 10 | function printUsage() { 11 | cat < ${file}.temp # separate gene ids from the species identifier 58 | names=`head -n 500 ${file}.temp | tr -s " " "\n" |sed '/^".*/d'|sed '/^OG.*/d'| sort |uniq |tr -s "\n" " "; echo ""` #array of all species names 59 | echo -en "OG_name\t" #print the header line 60 | for name in ${names[@]}; do 61 | echo -en "$name\t"; 62 | done 63 | echo ""; 64 | while read line; do #count frequency 65 | ogroup=$(echo $line|cut -d " " -f 1 ); 66 | echo -en "$ogroup\t"; 67 | for name2 in ${names[@]}; do 68 | freq=`echo $line | awk -F "$name2" '{print NF-1}'`; 69 | echo -ne "$freq\t"; 70 | done 71 | echo ""; 72 | done<${file}.temp; 73 | rm ${file}.temp; #delete temp file 74 | 75 | -------------------------------------------------------------------------------- /ExtractSCOs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This is a bash script that extracts all ortholog groups having single copy gene in different species. 3 | # It takes the output generated by "CopyNumberGen.sh" and prints single copy orthologs. 4 | # by default it prints to stdout 5 | 6 | # Arun Seetharam 7 | 8 | scriptName="${0##*/}" 9 | declare -i DEFAULT_COPY=1 10 | declare -i copynum=DEFAULT_COPY 11 | 12 | function printUsage() { 13 | cat < 9 | 10 | scriptName="${0##*/}" 11 | outdir=$(pwd) 12 | function printUsage() { 13 | cat <> ${outdir}/${myArray[0]}.fa; 80 | done <${file} 81 | -------------------------------------------------------------------------------- /Fastq2CA.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | R1=$2 3 | R2=$3 4 | LB=$1 5 | OUTFILE=$(basename ${R1} | sed 's/_1.fq//g'); 6 | fastqToCA -insertsize 500 100 -libraryname $LB -technology illumina -type sanger -innie -mates ${R1},${R2} > ${OUTFILE}.frg 7 | -------------------------------------------------------------------------------- /GI2Taxonomy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # donwload nodes.dmp and names.dmp to provide the loaction here 3 | # run it as: 4 | # parallel "sh GI2Taxonomy.sh {}" ::: gi_ids.file 5 | # 6 | 7 | 8 | NAMES="names.dmp" 9 | NODES="nodes.dmp" 10 | GI_TO_TAXID="gi_taxid_nucl.dmp" 11 | TAXONOMY="" 12 | GI="${1}" 13 | 14 | # Obtain the name corresponding to a taxid or the taxid of the parent taxa 15 | get_name_or_taxid() 16 | { 17 | grep --max-count=1 "^${1}"$'\t' "${2}" | cut --fields="${3}" 18 | } 19 | 20 | # Get the taxid corresponding to the GI number 21 | TAXID=$(get_name_or_taxid "${GI}" "${GI_TO_TAXID}" "2") 22 | 23 | # Loop until you reach the root of the taxonomy (i.e. taxid = 1) 24 | while [[ "${TAXID}" -gt 1 ]] ; do 25 | # Obtain the scientific name corresponding to a taxid 26 | NAME=$(get_name_or_taxid "${TAXID}" "${NAMES}" "3") 27 | # Obtain the parent taxa taxid 28 | PARENT=$(get_name_or_taxid "${TAXID}" "${NODES}" "3") 29 | # Build the taxonomy path 30 | TAXONOMY="${NAME};${TAXONOMY}" 31 | TAXID="${PARENT}" 32 | done 33 | 34 | echo -e "${GI}\t${TAXONOMY}" 35 | 36 | exit 0 37 | -------------------------------------------------------------------------------- /GOseq_auto.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | module unload R 4 | module load R/3.1.2 5 | 6 | 7 | #define the go annotation file 8 | export goAnnot="go_annotation.sorted.uniq.Mapped" 9 | #define the gene lengths file 10 | export geneLen="gene.lengths.txt" 11 | 12 | 13 | #create a list of your genes of interest 14 | #one per line 15 | #geneID1 16 | #geneID2 17 | #etc 18 | #GOI = Genes of Interest 19 | #export GOI="GOI.txt" 20 | export GOI=$1 21 | 22 | #create a factor label for the GOI 23 | #export labelgoi="incOverTm" 24 | export labelgoi=$1 25 | 26 | #create the factor labeled file 27 | #GOI geneID1 28 | #GOI geneID2 29 | #GOI etc 30 | #awk 'BEGIN{OFS="\t"} {print "'$labelgoi'",$1}' $GOI > GOI_factorFile.txt 31 | 32 | #create a list of all your genes 33 | export Allgenes="Allgenes.txt" 34 | 35 | #create a factor label for the GOI 36 | export labelallg="Allgenes" 37 | 38 | #create the factor labeled file 39 | #seriola geneID1 40 | #seriola geneID2 41 | #seriola etc 42 | #r awk 'BEGIN{OFS="\t"} {print "'$labelallg'",$1}' $GOI > Allgenes_factorFile.txt 43 | 44 | #r awk '{print $1}' GOI_factorFile.txt Allgenes_factorFile.txt 45 | cat $GOI $Allgenes | sort | uniq -c | awk 'BEGIN{OFS="\t"} ($1==1) {print "'$labelallg'",$2} ($1==2) {print "'$labelgoi'",$2}' >factor_file_$GOI 46 | 47 | run_GOseq.pl --factor_labeling factor_file_$GOI --GO_assignments $goAnnot --lengths $geneLen 48 | 49 | mkdir factor_file 50 | mv factor_file_* factor_file 51 | mkdir GO_OUT 52 | mv *enriched GO_OUT/ 53 | mv *depleted GO_OUT/ 54 | mkdir transcriptsIDs 55 | mv *.txt transcriptsIDs/ 56 | 57 | -------------------------------------------------------------------------------- /GuessEncoding.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | INFILE="$1" 3 | head -n 100000 ${INFILE} | awk '{if(NR%4==0) printf("%s",$0);}' | od -A n -t u1 | awk 'BEGIN{min=100;max=0;}{for(i=1;i<=NF;i++) {if($i>max) max=$i; if($i73 && min>=64) print "Phred+64"; else if(min>=59 && min<64 && max>73) print "Solexa+64"; else print "Unknown score encoding!";}' 4 | -------------------------------------------------------------------------------- /JRC.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #script to create a submission script that then can easily be appended to. 3 | # 05/23/2016 4 | #Andrew severin 5 | 6 | function printUsage () { 7 | cat < $1.sub 38 | #!/bin/bash 39 | #PBS -l nodes=1:ppn=16 40 | #PBS -l walltime=$2:00:00 41 | #PBS -N $1 42 | #PBS -o \${PBS_JOBNAME}.o\${PBS_JOBID} -e \${PBS_JOBNAME}.e\${PBS_JOBID} 43 | #PBS -m ae -M $netid@iastate.edu 44 | cd \$PBS_O_WORKDIR 45 | ulimit -s unlimited 46 | chmod g+rw \${PBS_JOBNAME}.[eo]\${PBS_JOBID} 47 | module use /shared/modulefiles 48 | module load LAS/parallel/20150922 49 | 50 | JOBHEAD 51 | -------------------------------------------------------------------------------- /JobBLAST.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rm cmds 3 | pre=$(pwd) 4 | for file in trinity_all.part-???.fasta; do 5 | num=$(echo ${file%.*}| cut -d "-" -f 2); 6 | echo "blastx -query ${file} -db ${pre}/DATABASE/uniref90.fasta -num_threads 4 -max_target_seqs 1 -outfmt 6 > uniref90.blastx.outfmt6.${num}" >> cmds 7 | done 8 | 9 | split -d -l 8 cmds blast_cmds_ 10 | 11 | for file in blast_cmds_*; do 12 | num=$(echo ${file} | cut -d "_" -f 3); 13 | cat < blast_${num}.sub 14 | #!/bin/bash 15 | #PBS -l vmem=256Gb,pmem=8Gb,mem=256Gb 16 | #PBS -l nodes=1:ppn=32:ib 17 | #PBS -l walltime=48:00:00 18 | #PBS -N blast_${num} 19 | #PBS -o \${PBS_JOBNAME}.o\${PBS_JOBID} -e \${PBS_JOBNAME}.e\${PBS_JOBID} 20 | #PBS -m ae -M arnstrm@gmail.com 21 | cd \$PBS_O_WORKDIR 22 | ulimit -s unlimited 23 | chmod g+rw \${PBS_JOBNAME}.[eo]\${PBS_JOBID} 24 | module use /data004/software/GIF/modules 25 | module load ncbi-blast 26 | module load parallel 27 | parallel <> blast_${num}.sub 30 | echo "EOF" >> blast_${num}.sub 31 | echo "qstat -f \"\$PBS_JOBID\"" >> blast_${num}.sub 32 | done 33 | -------------------------------------------------------------------------------- /JobP128.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script to generate PBS sub files reading a command file 3 | # 01/26/2015 4 | # Andrew Severin 5 | 6 | function printUsage () { 7 | cat < 12 | 13 | Description 14 | 15 | This is a bash script that will create a sub file to subdivide a single file and parallize a bash script 16 | across 128 cpus to run on the subdivisions of the file. 17 | The submission file is formatted to run on the Condo with 1 hours walltime on default queue. 18 | The output will be named with the bash SCRIPT and the jobID. 19 | 20 | -h, --help 21 | Brings up this help page 22 | 23 | 24 |