├── .gitignore
├── ColLink.pl
├── GBS
    ├── ext_rd_withAPEKI.pl
    └── sep_rd_by_keyFile.pl
├── LICENSE
├── MyPM
    ├── ConfigSunhh.pm
    ├── LogInforSunhh.pm
    ├── ReadInAlnSunhh.pm
    ├── ReadInSeqSunhh.pm
    ├── SNP_tbl.pm
    ├── SNP_tbl.pm_bk
    ├── SeqAlnSunhh.pm
    ├── fastaSunhh.pm
    ├── fileSunhh.pm
    ├── fromBraker.pm
    ├── gffSunhh.pm
    ├── mathSunhh.pm
    ├── mcsSunhh.pm
    ├── plotSunhh.pm
    └── wm97Sunhh.pm
├── README.md
├── annot_tools
    ├── PASA
    │   ├── Launch_PASA_pipeline.pl.edit
    │   └── build_comprehensive_transcriptome.dbi
    ├── add_tag_to_gffID.pl
    ├── ahrd
    │   ├── annot.cfg
    │   ├── cmd_list
    │   ├── generate_ahrd_yml.sh
    │   ├── generate_ahrd_yml_fromCDS.sh
    │   ├── pipe_for_functional_annotation.pl
    │   └── trim_orphan_right_brack.pl
    ├── annot_mapEst2genome.sh
    ├── annot_mapProt2genome.sh
    ├── annot_mapRNA2genome.sh
    ├── augustus.accuracy_calculator.pl
    ├── b2g_graph_software
    │   └── fmt_b2g_enrich.pl
    ├── blast_xml_parse.py
    ├── clean_pasa_med_files.pl
    ├── cmd_list_run_b2g
    ├── cnvt_infernal_tbl.pl
    ├── cnvt_maker2aug_gff3.pl
    ├── cnvt_spaln2makerAln_prot_gff3.pl
    ├── cnvt_uniprot_dat2fa.pl
    ├── copy_species.pl
    ├── createAugustusJoblist.pl
    ├── evm_tools
    │   ├── chk_pcTE_ahrd
    │   ├── cmd_list
    │   ├── cmd_list_finalize_gene_ID
    │   ├── cmd_list_old
    │   ├── cnvt_maker2evmProtGff3.pl
    │   ├── evm_weight.txt
    │   ├── filter_cds2Bad_bn6.pl
    │   ├── fit_evm_pred.pl
    │   ├── get_longCDS_in_gff.pl
    │   ├── good_desc
    │   ├── list.gnPref_mkPref_gnFa_gff
    │   ├── list.toAnnot
    │   ├── param.cfg
    │   ├── param_list
    │   ├── pipe_revise_byEVM.pl
    │   ├── run_evm_21TJS6_wiEVMpipe.sh
    │   ├── run_evm_21TJS6_woEVMpipe.sh
    │   └── run_evm_wiEVMpipe.sh
    ├── example_pipes
    │   ├── bad_prot_IDs
    │   ├── cmd_list_protAln_generate
    │   ├── cmd_list_protAln_single_1
    │   ├── cmd_list_protAln_single_1add
    │   ├── cmd_list_transAln_generate
    │   └── cmd_list_transAln_single_1
    ├── find_complete_prot_byBlastp.pl
    ├── fitGff_aug2maker.pl
    ├── fix_1bpLoc_by_zff2Gb.pl
    ├── genemark
    │   └── get_intron_hints_fromGff.pl
    ├── get_gff_byScfID.pl
    ├── get_maker_result.sh
    ├── get_sameCDSGff.pl
    ├── gff3_preds2models
    ├── gff3_select
    ├── go
    │   ├── cnvt_GOobo_to_detail_list.pl
    │   ├── generate_topGO_gene2GO_list.pl
    │   └── mk_GAF2.0.pl
    ├── good_model_from_gff3.pl
    ├── intron2exex.pl
    ├── iprscan
    │   ├── cmd_list
    │   ├── cnt_TEIPRacc.pl
    │   ├── cnvt_iprJson2tbl.pl
    │   ├── converter_iprV4.pl
    │   ├── in.ipr.tsv
    │   ├── list_IPRacc.pl
    │   └── potential_TE_IPRacc
    ├── join_b2g_annot.pl
    ├── keep_nonRedundant_list.pl
    ├── kegg
    │   ├── 1_extract_KeggMapRes.pl
    │   ├── 2_join_mapIDs.pl
    │   ├── cnvt_keggPWYReconstruction_to_tab.pl
    │   ├── list1.keggMap.txt
    │   ├── list1.keggMap.txt.tbl.comb
    │   └── record
    ├── liftoff_tools
    │   ├── blk2bed.pl
    │   ├── blk2gff.pl
    │   ├── chk_only_pan.pl
    │   ├── cnt_R2Q_liftoff_info.pl
    │   ├── cnvt_genePAV_to_grpPAV.pl
    │   ├── cnvt_genemap_to_QlocBlk.pl
    │   ├── cnvt_gff_to_cdsBed.pl
    │   ├── filter_R2Q_liftoff_tbl.pl
    │   ├── fit_gff_4igv.pl
    │   ├── fmt_gff_trim2CDS.pl
    │   ├── fmt_grp_by_spec.pl
    │   ├── grp2single.pl
    │   ├── info_bedtools_intersect.pl
    │   ├── info_liftoffGff.pl
    │   ├── prepare_gff3_to_blk.pl
    │   ├── prepare_input.pl
    │   ├── psl_to_geneLoc.pl
    │   ├── remove_ovl_loc.pl
    │   ├── ret_good_model_from_liftoff_gff3.pl
    │   ├── retrieve_QlocSeq.pl
    │   ├── retrieve_QlocSeq_fromBlk.pl
    │   ├── retrieve_liftoff_pairs.pl
    │   ├── rm_head_partial_frame_inGff.pl
    │   ├── run_liftoff_FromTo.pl
    │   ├── run_liftoff_FromTo_pan.pl
    │   ├── run_liftoff_FromTo_pan_para.pl
    │   ├── simple_group_pairs.pl
    │   └── slct_best_psl.pl
    ├── maker
    │   ├── add_ipr.sh
    │   ├── ret_ipr_nonovl_genes.pl
    │   ├── ret_makerGff_fromAbGff.pl
    │   ├── ret_maker_abinit_gff3.pl
    │   └── rm_maker_fasta.pl
    ├── merge_blast_xml.pl
    ├── mkCmd_blast2Nr.sh
    ├── pasa
    │   ├── alignAssembly.config
    │   └── pair_ovlp.pl
    ├── pasa_gff_to_alnGff.pl
    ├── pipe_get_complete_models.pl
    ├── predictByAug_rna2genome.pl
    ├── protein
    │   ├── cnvt_spalnGff_to_infoTab.pl
    │   └── trimProt4spaln.pl
    ├── rename_by_GffJnLoc.pl
    ├── repAnno_tools
    │   ├── LTR_dist_est.mao
    │   ├── ProtExcluder1.1
    │   │   ├── GCcontent.pl
    │   │   ├── Installer.pl
    │   │   ├── ProtExcluder.npl
    │   │   ├── ProtExcluder.pl
    │   │   ├── blastformatProt.pl
    │   │   ├── countaanu.pl
    │   │   ├── fasta-reformat.pl
    │   │   ├── getanycolumnuni.pl
    │   │   ├── matchtract.pl
    │   │   ├── mergequeryBF.pl
    │   │   ├── mergeunmatchedregion.pl
    │   │   ├── mspesl-sfetch.npl
    │   │   ├── mspesl-sfetch.pl
    │   │   ├── rmlistedseq.pl
    │   │   ├── rmlowcomfromBF.pl
    │   │   ├── rmlowcomplexitymathc.pl
    │   │   ├── rmshortseq_noN.pl
    │   │   └── unmatchedregionBF.pl
    │   ├── add_repClass.pl
    │   ├── build_Examplar_byFa.pl
    │   ├── build_Examplar_byFa.pl_bak
    │   ├── ch_gff_to_tab.pl
    │   ├── ch_seqID.pl
    │   ├── cmd_list_forFinalRepDB
    │   ├── cnt_ltr_dist.pl
    │   ├── detect_centromere
    │   │   ├── cnt_CLfreqInChr.pl
    │   │   └── get_candidate_cent_from_whole.pl
    │   ├── filter_RepMsk_out.pl
    │   ├── filter_flank.pl
    │   ├── filter_gff.pl
    │   ├── filter_tab_byPBSPPT.pl
    │   ├── fit_rmOut_to_makerGff.pl
    │   ├── get_LTR_wi_Termi.pl
    │   ├── lis_masked_RepMsk_out.pl
    │   ├── mk_outID.pl
    │   ├── muscle3.8.31_i86linux64
    │   ├── name_from_tab.pl
    │   ├── path.conf
    │   ├── pipe_ltr85.pl
    │   ├── pipe_ltr99.pl
    │   ├── pipe_trim85.pl
    │   ├── pipe_trim99.pl
    │   ├── rmOutToGFF3_with_TEclass.pl
    │   ├── rm_geneFrag.pl
    │   ├── run_MITE.sh
    │   ├── run_repClass.pl
    │   ├── run_repClass_ltr.sh
    │   ├── run_repClass_mite.sh
    │   ├── run_repeatmasker.sh
    │   ├── run_rm_GF.sh
    │   ├── run_rpmd.sh
    │   └── seqID_to_scaf.pl
    ├── replace_blast_asn_db.pl
    ├── rmRedunt_inputNucl.pl
    ├── rmRedunt_inputProt.pl
    ├── rmRepGff_withBadTarget.pl
    ├── rmShrtExon_spaln_prot2genom.pl
    ├── rm_ExactDup_gene_model.pl
    ├── rm_ovlap_gene_model.pl
    ├── run_maker.sh
    ├── run_spaln_prot2genom.pl
    ├── satisfied_prot.sh
    ├── set_stopCodonFreq.pl
    ├── simplify_gff3.pl
    ├── slct_maker_gff3.pl
    ├── snap_good_wrn_by_valid.pl
    ├── tRNA
    │   ├── cnvt_trnascan_to_table.pl
    │   ├── cnvt_trnascanse_to_boxStat.pl
    │   ├── filter_trnascanSE_result.pl
    │   └── stat_trnaFreq.pl
    ├── train_augustus_goodTrain.sh
    ├── train_augustus_goodTrain_v2.sh
    ├── train_snap1.sh
    ├── trim_orphan_right_brack.pl
    └── zff2augustus_gbk.pl
├── assemble_tools
    ├── LAI
    │   ├── cmd_list
    │   ├── pepper_genome.fa.chrID
    │   ├── pepper_genome.fa.out.LAI_unlock.gz
    │   ├── pepper_genome.fa.out.LAI_unlock_distr_byChr.pdf
    │   ├── pepper_genome.fa.out.LAI_wiDefault
    │   ├── pepper_genome.fa.tbl
    │   ├── plot_LAI_id85.R
    │   ├── scrn.para_LAI.gz
    │   └── scrn.prepare_genometools
    ├── add_1bp_ctg_toAGP_jcviPlot.pl
    ├── add_dep_to_chopInf.pl
    ├── add_tag_to_fsa.pl
    ├── bionano
    │   ├── rename_xmap.pl
    │   └── sv_related_xmap.pl
    ├── brk_fas_by_FRBadWind_for15kb.pl
    ├── busco
    │   ├── cmd_list
    │   ├── geneCopyN_busco_full_table.pl
    │   ├── rm_busco_intermediate_files.pl
    │   └── summary_busco_full_table.pl
    ├── calc_ident_from_sam2pairwise.pl
    ├── classify_tools
    │   ├── classify_region_byBn6.pl
    │   ├── cmd_list
    │   ├── cnt_In_bp.pl
    │   ├── get_Ex_region.pl
    │   ├── line_bn6_query.pl
    │   ├── recog_organelle_rDNA_from_classJn.pl
    │   ├── run_seg_blastn.pl
    │   └── temporary_prepare_rmCont.pl
    ├── clip_scaf_end.pl
    ├── cmd_batch_for_mugsy
    ├── cnvt_loc_fromAGP_toAGP_forLoci.pl
    ├── cnvt_loci_97_v1_to_vRILs.pl
    ├── cnvt_quast_unaligned_info_to_tbl.pl
    ├── ctgBn6_to_scfCov.pl
    ├── deduplicate_ncpu.pl
    ├── draw_aln_from_bam.pl
    ├── estimate_gap_size.pl
    ├── extract_ctg_from_scaf.pl
    ├── fill_SingleNgap.pl
    ├── filter_dropGap.pl
    ├── format_maf_forMugsy.pl
    ├── funcs_for_compare_genome.r
    ├── get_frag_cov.pl
    ├── get_paired_maf.pl
    ├── get_rep_loc_fromPileup.pl
    ├── good_link_fromMaf.pl
    ├── grp_maf_byLinkage.pl
    ├── hifi_hic
    │   ├── cnvt_gfa2fa.pl
    │   ├── cnvt_num2tigID.pl
    │   ├── convert_3ddna_assembly_to_agp.pl
    │   └── get_HiCanu_ctg.pl
    ├── high_tandem_repeat
    │   └── count_kmer_distr.pl
    ├── hist_plot_ins.R
    ├── join_link.pl
    ├── kmer
    │   ├── cmd_list
    │   ├── cnvt_qualFa_to_wind_avgTbl.pl
    │   ├── extract_genomescope_summary.pl
    │   ├── get_kmer_by_seq.pl
    │   ├── get_kmer_by_seq_summary.pl
    │   └── get_seq_by_kmer.pl
    ├── lift_bed_jcviPlot.pl
    ├── link_scf2chr.pl
    ├── link_seq_by_agp.pl
    ├── list_run_last2scaf.pl
    ├── loc_repeat.pl
    ├── maf2fasta.pl
    ├── map_ctg_to_scf
    │   ├── cmd_list
    │   ├── simple_fill.pl
    │   └── stat_ragtag_agp.pl
    ├── mk_bed_from_agp.pl
    ├── mk_hapmap_from_SNPtbl.pl
    ├── mk_wind_for_INScnt.pl
    ├── order_scf_1.pl
    ├── pacBio_tools
    │   └── infor_PBjelly_assembly_chunk_err.pl
    ├── plot_kmer_prop_along_chr.r
    ├── resize_N_in_agp.pl
    ├── rmRed_byIdentCov_InCtg.pl
    ├── rm_redundant_loci.pl
    ├── run_last_to_scaffold.sh
    ├── run_mugsy_MP.pl
    ├── scf2LG_to_AGP.pl
    └── slct_pe.pl
├── bp0_2_bp6.pl
├── calc_est_in_psl.pl
├── cmd_ctrl
    ├── kill_by_ppid.pl
    ├── log_func.sh
    ├── rm_list.pl
    ├── run_cmd_in_batch.sh
    ├── split_scrn_time.pl
    ├── wait_for_pid.pl
    └── wrap_sh.pl
├── cnvrt_embl_to_fasta.pl
├── deal_augustus.pl
├── deal_bnx.pl
├── deal_fasta.pl
├── deal_fastq.pl
├── deal_iprscan.pl
├── deal_table.pl
├── drop_dup_both_end.pl
├── drop_dup_single.pl
├── enrich
    ├── README.md
    ├── cmd_list
    ├── example_data
    │   ├── KEGG_PWY.txt
    │   ├── P1Genom_V1p2.ipr_background
    │   ├── combined_KO_pwy.txt.tbl.ma.bg
    │   ├── example-GOen-testLg10.svg
    │   ├── example-GOen.svg
    │   ├── example-GOen.tsv
    │   ├── goslim_plant.obo.20181129
    │   ├── goslim_plant.obo.20181129.tab
    │   ├── in_geneID-IPRen-testLg10.svg
    │   ├── in_geneID-IPRen.svg
    │   ├── in_geneID-IPRen.tsv
    │   ├── in_geneID.list
    │   ├── in_whole_genome_gene-ghostKID.tsv
    │   ├── in_whole_genome_gene.annot
    │   ├── in_whole_genome_gene.annot-GOinEnrich
    │   ├── keggPWYByKO_KOenrich_bg
    │   ├── keggPWYByKO_enrich_bg
    │   ├── pub-go.obo.tab
    │   └── synFam-ipr-GOinEnrich
    └── scripts
    │   ├── GOenrich_topGO.r
    │   ├── cnvt_GOobo_to_tab.pl
    │   ├── cnvt_GOobo_to_tab.r
    │   ├── enrich_IPR.pl
    │   ├── enrich_keggPWY.pl
    │   ├── enrichment_mine_fit.pl
    │   ├── extend_GOannot_for_GOenrich.pl
    │   ├── extend_IPRannot_for_IPRenrich.pl
    │   ├── hs_enrich.pl
    │   ├── hs_enrich_old.pl
    │   ├── mk_bg_fromKeggPWY.pl
    │   ├── prepare_enrich.pl
    │   ├── run_GOenrich.r
    │   ├── run_IPRenrich.r
    │   ├── run_KEGGenrich.r
    │   ├── split_jnGO.pl
    │   ├── stat_goslim.pl
    │   └── test_enrichGO.pl
├── evolution_tools
    ├── SV_detection
    │   ├── add_geneAHRD.pl
    │   ├── add_rdCov2vcf.pl
    │   ├── align_2seq_by_query_segments.pl
    │   ├── assemblytics_scripts
    │   │   ├── Assemblytics
    │   │   ├── Assemblytics_ori
    │   │   ├── Assemblytics_within_alignment.py
    │   │   └── Assemblytics_within_alignment.py_ori
    │   ├── callsv_AW_NDF.sh
    │   ├── cmd_list_example_to_detect_sv
    │   ├── cnvt_anchors_to_tbl.pl
    │   ├── cnvt_ndfGff2vcf_snps.pl
    │   ├── cnvt_ndfGff2vcf_struct.pl
    │   ├── cnvt_ndfGff2vcf_struct_v1.pl
    │   ├── cnvt_vcf_sub2ins.pl
    │   ├── cnvt_vcf_sub2insdel.pl
    │   ├── filter_maf_by_tab.pl
    │   ├── filter_rdCov_vcf.pl
    │   ├── find_nonOvlCDS.pl
    │   ├── fix_sam_cigarID.pl
    │   ├── fmt_paf.pl
    │   ├── get_qryLoc_by_refLoc_inBam.py
    │   ├── get_qry_ref_shared_var_nucdiff.pl
    │   ├── get_shrt_or_ident_mafTab.pl
    │   ├── get_sv_affected_genes.pl
    │   ├── get_sv_inVCF.pl
    │   ├── gff2bed.pl
    │   ├── if_needIDfix.pl
    │   ├── join_maf_blk.pl
    │   ├── join_samAln.pl
    │   ├── make_fakeCDS_fromPAF.pl
    │   ├── mmp2Aln_anchorsInMafTab.pl
    │   ├── nucdiff_modification
    │   │   ├── class_nucmer.py_revised
    │   │   ├── cmd_list
    │   │   ├── initial_preparation.py_revised
    │   │   └── nucdiff.py_revised
    │   ├── remove_gap_var.pl
    │   ├── restore_sam_position.pl
    │   ├── rmNvar_inVCF.pl
    │   ├── rm_0span_maf.pl
    │   ├── rmdup_fromNormVcf.pl
    │   ├── run_mm2nucdiff.sh
    │   ├── run_mm2paftool.sh
    │   ├── run_ndf.sh
    │   ├── sam2delta.py
    │   ├── select_var.pl
    │   ├── summary_svs.pl
    │   └── view_anchors.pl
    ├── compare_assemblies
    │   ├── byMUMmer
    │   │   ├── cmd_list
    │   │   ├── stat1_mergeCov.pl
    │   │   ├── stat2_chk_gapCover.pl
    │   │   └── stat3_addGeneTag.pl
    │   ├── cnt_ovl_fromBeds.pl
    │   ├── cnvt_ploidy_inVCF.pl
    │   └── mcscanTab_to_dupTxt.pl
    ├── copy_number_var
    │   ├── README.md
    │   ├── compare_gene_expansion.r
    │   └── get_CLV_expansion.pl
    ├── cvt_mscGff_scf2chr.pl
    ├── draw_syn_dotplot.pl
    ├── expansion_tools
    │   ├── 01.clean_nwk.pl
    │   ├── 01.prepare_cafe_tab.pl
    │   ├── 01.prepare_ortho_to_tbl.pl
    │   ├── 02.cafe_to_grp.pl
    │   ├── 03.replace_geneID_in_orthomcl.pl
    │   ├── 04.get_expansion_tab.pl
    │   ├── 05.add_desciption_to_OGcsv.pl
    │   └── jn_gene_byIPR.pl
    ├── follow_sibelia.pl
    ├── mummer_tools
    │   ├── join_coords.pl
    │   └── mummerplot
    ├── ortho_tools
    │   ├── 01.ortho_list_from_orthoOut.pl
    │   ├── 02.list_run_muscle.pl
    │   ├── cdsAln2bppAln.pl
    │   ├── cnvt_fa2nex.pl
    │   ├── combine_tree_for_treeAnnot.pl
    │   ├── filter_bp6_byTopScore.pl
    │   ├── fmt_data_for_orthoAln.pl
    │   ├── join_cdsFmt_faAln.pl
    │   ├── mk_sep_blastp_shell.sh
    │   ├── ortho_cnt_c1_cmn.pl
    │   ├── replace_all_blast_file.sh
    │   ├── run_ete3_genetree.pl
    │   ├── run_positive_selection.pl
    │   ├── sep_alnFas.pl
    │   └── trim_faAln.pl
    ├── plot_color_tree.r
    ├── plot_label_color_tree.r
    ├── plot_syn.pl
    ├── plot_syn.pl_bak
    ├── plot_syn_bk.pl
    ├── prepare_SynChro.pl
    ├── rbh_byBp6.pl
    ├── rbh_inBlock.pl
    ├── structure
    │   ├── 00.run_Structure.pl
    │   ├── 03.rm_Nmiss_maf.pl
    │   ├── cnvt_clumppOut_to_tab.pl
    │   ├── collect_rand_result.pl
    │   ├── extraparams
    │   ├── get_LnPD.pl
    │   ├── get_structure_input.pl
    │   ├── get_time_from_structScrn.pl
    │   ├── mainparams
    │   ├── mv_result_files.pl
    │   ├── new_mainparams.pl
    │   ├── order_ClumppIndFileOut_byIndID.pl
    │   ├── order_structureIndv_byIndID.pl
    │   ├── prepare_structure_input.pl
    │   ├── rand_small_position.pl
    │   ├── result_report.pl
    │   ├── shrt_col0.pl
    │   ├── structure
    │   └── structure_Temple
    │   │   └── extraparams
    └── vcf_tab
    │   ├── add_ref_as_indv_in_vcfTab.pl
    │   ├── cnt_allele_withPop.pl
    │   ├── cols2vcfTab.pl
    │   ├── cvt_snp_to_itayNeed.pl
    │   ├── extract_dp_per_indv.pl
    │   ├── slct_sites_fromVCF.pl
    │   └── tab2vcf.pl
├── extract_fq_by_list.pl
├── file_type_based
    ├── Proc_Reads
    │   ├── adaptors.fa
    │   ├── chk_INS_byChlo.sh
    │   ├── cleanPE_byTrimmo.sh
    │   ├── cleanSE_byTrimmo.sh
    │   ├── illumina_adapters.fa
    │   ├── polyAT_adp.fa
    │   ├── run_bowtie.pl
    │   ├── run_bowtie2.pl
    │   ├── run_bwaAln.pl
    │   ├── run_bwaAln.sh
    │   ├── run_fastqC.pl
    │   ├── run_ndupB.sh
    │   ├── run_rmRRNA.pl
    │   ├── run_rmRRNA.sh
    │   ├── run_tophat2.pl
    │   ├── run_trimmoPE.pl
    │   ├── run_trimmoSE.pl
    │   ├── run_trimmomatic.pl
    │   ├── srch_barcode.pl
    │   ├── trimmomatic
    │   │   ├── AUTHORS.jbzip2
    │   │   ├── FastqRecord.java
    │   │   ├── LICENCE.jbzip2
    │   │   ├── META-INF
    │   │   │   └── MANIFEST.MF
    │   │   ├── SlidingWindowTrimmer.java
    │   │   ├── SlidingWindowTrimmer.java.version1.bk
    │   │   ├── org
    │   │   │   ├── itadaki
    │   │   │   │   └── bzip2
    │   │   │   │   │   ├── BZip2BlockCompressor.class
    │   │   │   │   │   ├── BZip2BlockDecompressor.class
    │   │   │   │   │   ├── BZip2Constants.class
    │   │   │   │   │   ├── BZip2DivSufSort$PartitionResult.class
    │   │   │   │   │   ├── BZip2DivSufSort$StackEntry.class
    │   │   │   │   │   ├── BZip2DivSufSort$TRBudget.class
    │   │   │   │   │   ├── BZip2DivSufSort.class
    │   │   │   │   │   ├── BZip2HuffmanStageDecoder.class
    │   │   │   │   │   ├── BZip2HuffmanStageEncoder.class
    │   │   │   │   │   ├── BZip2InputStream.class
    │   │   │   │   │   ├── BZip2OutputStream.class
    │   │   │   │   │   ├── BitInputStream.class
    │   │   │   │   │   ├── BitOutputStream.class
    │   │   │   │   │   ├── CRC32.class
    │   │   │   │   │   ├── HuffmanAllocator.class
    │   │   │   │   │   └── MoveToFront.class
    │   │   │   └── usadellab
    │   │   │   │   └── trimmomatic
    │   │   │   │       ├── Pairomatic.class
    │   │   │   │       ├── TrimStats.class
    │   │   │   │       ├── Trimmomatic.class
    │   │   │   │       ├── TrimmomaticPE.class
    │   │   │   │       ├── TrimmomaticSE.class
    │   │   │   │       ├── fasta
    │   │   │   │           ├── FastaParser.class
    │   │   │   │           ├── FastaRecord.class
    │   │   │   │           └── FastaSerializer.class
    │   │   │   │       ├── fastq
    │   │   │   │           ├── FastqParser.class
    │   │   │   │           ├── FastqRecord.class
    │   │   │   │           ├── FastqRecord.java
    │   │   │   │           └── FastqSerializer.class
    │   │   │   │       ├── threading
    │   │   │   │           ├── BlockOfRecords.class
    │   │   │   │           ├── BlockOfWork.class
    │   │   │   │           ├── ParserWorker.class
    │   │   │   │           ├── SerializerWorker.class
    │   │   │   │           ├── TrimLogRecord.class
    │   │   │   │           ├── TrimLogWorker.class
    │   │   │   │           └── TrimStatsWorker.class
    │   │   │   │       ├── trim
    │   │   │   │           ├── AbstractSingleRecordTrimmer.class
    │   │   │   │           ├── AvgQualTrimmer.class
    │   │   │   │           ├── BarcodeSplitter.class
    │   │   │   │           ├── CropTrimmer.class
    │   │   │   │           ├── HeadCropTrimmer.class
    │   │   │   │           ├── IlluminaClippingTrimmer$1.class
    │   │   │   │           ├── IlluminaClippingTrimmer$IlluminaClippingSeq.class
    │   │   │   │           ├── IlluminaClippingTrimmer$IlluminaLongClippingSeq.class
    │   │   │   │           ├── IlluminaClippingTrimmer$IlluminaMediumClippingSeq.class
    │   │   │   │           ├── IlluminaClippingTrimmer$IlluminaPrefixPair.class
    │   │   │   │           ├── IlluminaClippingTrimmer$IlluminaShortClippingSeq.class
    │   │   │   │           ├── IlluminaClippingTrimmer.class
    │   │   │   │           ├── LeadingTrimmer.class
    │   │   │   │           ├── MaximumInformationTrimmer.class
    │   │   │   │           ├── MinLenTrimmer.class
    │   │   │   │           ├── SlidingWindowTrimmer.class
    │   │   │   │           ├── SlidingWindowTrimmer.java
    │   │   │   │           ├── ToPhred33Trimmer.class
    │   │   │   │           ├── ToPhred64Trimmer.class
    │   │   │   │           ├── TrailingTrimmer.class
    │   │   │   │           ├── Trimmer.class
    │   │   │   │           └── TrimmerFactory.class
    │   │   │   │       └── util
    │   │   │   │           ├── ConcatGZIPInputStream$1.class
    │   │   │   │           ├── ConcatGZIPInputStream$GZIPHelperInputStream.class
    │   │   │   │           ├── ConcatGZIPInputStream.class
    │   │   │   │           └── PositionTrackingInputStream.class
    │   │   └── trimmomatic.jar
    │   ├── using_subfunc.R
    │   ├── using_subfunc.R.bk
    │   ├── using_subfunc.R.cmd
    │   ├── using_subfunc.R.cmd.R
    │   ├── using_subfunc.R.rm_polyAT.R
    │   └── using_subfunc.R.rm_polyAT_useRight.R
    └── Proc_Sam
    │   ├── get_required_sam.pl
    │   └── trim_rdEnd_inSam.pl
├── follow_mcscan.pl
├── gm_tools
    ├── abh_to_qtlCsv.pl
    ├── cnt_allele_withPop.pl
    ├── cnvt_abhJn_to_abh.pl
    ├── cnvt_snp_to_itayNeed.pl
    ├── cnvt_snp_to_itayNeed_cnt.pl
    ├── cvt_snp_to_itayNeed.pl
    ├── rnaseq_snp_tools.pl
    └── tomato_loc_V2p4_to_V2p5.pl
├── log_tools
    ├── fastqC_data_summary.pl
    ├── filt_bwa_log.pl
    ├── form_cleanQ20_tbl.pl
    └── infor_ndupB.pl
├── maker_pm_edit
    ├── PhatHit_utils.pm_edit
    ├── PhatHit_utils.pm_raw
    ├── auto_annotator.pm_edit
    ├── auto_annotator.pm_raw
    ├── shadow_AED.pm_edit
    └── shadow_AED.pm_raw
├── pcr_tools
    ├── cmd_list
    ├── generate_kaspLGC_template.pl
    ├── get_priming_loc_bnV2_2_24.pl
    ├── retrieve_template_forSNP.pl
    ├── run_primer3_general.pl
    ├── run_primer_forSNP.pl
    └── site.lis
├── plotting
    ├── README.md
    ├── example_data
    │   ├── 65K_DEL-3class
    │   ├── FleshBrix_19YQ_1
    │   ├── FleshBrix_19YQ_2
    │   ├── FleshBrix_22HN_1
    │   ├── FleshBrix_22HN_2
    │   └── in.table-for_grouped_barplot_with_SD
    └── scripts
    │   ├── plot_barplot.r
    │   ├── plot_barplot_wiSD_twoGroups.r
    │   ├── plot_boxplot.r
    │   ├── plot_upset.r
    │   └── plot_venn.r
├── project
    └── watermelon_pan_phaseI
    │   ├── cmd_list
    │   ├── cnt_CDS_dup.pl
    │   ├── cnt_TEIPRacc.pl
    │   ├── cnt_gene_PAV_byDepCov.pl
    │   ├── cnt_gene_PAV_byDepCov_fromISbed.pl
    │   ├── cnvt_ext2nov_to_agp.pl
    │   ├── comb_gene_PAV_byDepCov.pl
    │   ├── convert_PAVtab_to_rmOlapTab.pl
    │   ├── dvd_grp_bySyn.pl
    │   ├── extract_N50.pl
    │   ├── extract_map_ratio_from_PEfstat_comb.pl
    │   ├── extract_novelOriFlankSeq.pl
    │   ├── list_IPRacc.pl
    │   ├── map_accDedup_to_asm.pl
    │   ├── map_dedup_to_novel.pl
    │   ├── pipe_for_functional_annotation.pl
    │   ├── ret_maker_abinit_gff3.pl
    │   ├── ret_maker_abinit_gff3_simpleCDS.pl
    │   ├── rm_Qloc_only_groups.pl
    │   ├── rm_Qloc_ovl2pred.pl
    │   ├── rm_overlap_OGs.pl
    │   ├── slct_OG_gene_pairs.pl
    │   ├── summary_covBp_byRdDep.pl
    │   ├── summary_gene_PAV_byDepCov.pl
    │   └── trim_gff_to_novel_ctg_region.pl
├── relate_loc_1to1
    ├── add_loc_fromNewP.pl
    ├── cmd_list
    ├── relate_pos.pl
    ├── scfP_to_chrP.pl
    └── sumP_in_bed_wiCDS.pl
├── reseq_tools
    ├── C_exe
    │   ├── maskClose_in_1col
    │   ├── maskClose_in_1col.c
    │   ├── rmSameSite
    │   ├── rmSameSite.c
    │   └── self_functions.c
    ├── LD_ana
    │   ├── bin_LD_cnt.pl
    │   └── pipe_cnt_LD.pl
    ├── README.md
    ├── SNP_effect.pl
    ├── SNP_effect_edit.pl
    ├── SNP_in_region.pl
    ├── add_SNP1col_to_basic.pl
    ├── basic_snp_infor_bySite.pl
    ├── basic_snp_infor_bySite_inVcfTab.pl
    ├── bsa
    │   ├── README.md
    │   ├── example_data
    │   │   └── template-QTLseqr_result.xlsx
    │   └── scripts
    │   │   ├── ana_bsa_Gprime.pl
    │   │   ├── cnvt_var2tab.pl
    │   │   ├── filter_vcfTab_forBsaParent.pl
    │   │   ├── get_pval_for_Gprime.R
    │   │   ├── plot_pipeResult.R
    │   │   ├── plot_pipeResult_minCnt.R
    │   │   ├── run_QTLseqr.r
    │   │   ├── slct_sites_by_windows.pl
    │   │   └── slct_sites_forBsa.pl
    ├── class_SNPeffect_tbl.pl
    ├── cnt_NHH_byIndv.pl
    ├── cnt_Nmiss_ratio_heteNotN.pl
    ├── cnt_diff_inTbl.pl
    ├── cnt_genotype_in_1col.pl
    ├── cnt_genotype_inpileup.pl
    ├── cnt_homo_hete_ratio.pl
    ├── cnt_maf_ratio.pl
    ├── cnt_mapRdN_inBam.pl
    ├── cnt_pileup_depC.pl
    ├── cnvt_agp2chain.pl
    ├── cnvt_tools
    │   ├── cols2LD.pl
    │   ├── cols2fas.pl
    │   ├── cols2fstat.pl
    │   ├── cols2meg.pl
    │   ├── cols2pca.pl
    │   ├── cols2ped.pl
    │   ├── cols2phy.pl
    │   ├── cols2tab.pl
    │   ├── cols2vcf.pl
    │   ├── fas2meg.pl
    │   ├── fas2phyml.pl
    │   ├── qopt2shrtStructure.pl
    │   ├── tab2cols.pl
    │   ├── tab2gl.pl
    │   ├── tbl2LD.pl
    │   ├── tbl2fas.pl
    │   ├── tbl2meg.pl
    │   └── tbl2phy.pl
    ├── cols2ped.pl
    ├── cols2vcf.pl
    ├── detect_mix
    │   ├── class_grpCnt_in_mjTab.pl
    │   ├── class_mjAL.pl
    │   └── class_snpTab_by_mjAL.pl
    ├── draw_SNP_dist.pl
    ├── example_data
    │   ├── list.long_deletions
    │   ├── list.sample_bam
    │   ├── out_geno-mat.tab
    │   └── out_geno-melt.tab
    ├── extract_pileup.pl
    ├── extract_sites_by_list.pl
    ├── extract_top1_vcfPI_wind.R
    ├── filter_tools
    │   └── cnt_depth
    │   │   └── sumDepBySite.pl
    ├── for_yb63
    │   └── spec_inGrp.pl
    ├── fst
    │   ├── cnvt_tbl2fstat.pl
    │   ├── cols2fstat.pl
    │   ├── extract_top1_fst_site.R
    │   ├── extract_top1_vcfFst_wind.R
    │   ├── get_stat.pl
    │   ├── join_fst_siteChrPos.pl
    │   ├── pipe_get_fst.pl
    │   ├── run_hierfstat.pl
    │   └── snpTbl_sepByWind.pl
    ├── gatk
    │   ├── CatVariants.pl
    │   ├── conf_pipe_gatk
    │   ├── est_depth_inBam.pl
    │   ├── gatk_dvd_step8_combineGVCF_interval.pl
    │   ├── gatk_dvd_step9_gvcf2var.pl
    │   ├── gatk_dvd_step9_gvcf2var_fromGVCF.pl
    │   ├── get_pass_vcf.pl
    │   ├── pipe_gatk.pl
    │   ├── pipe_gatk_conf
    │   ├── pipe_gatk_inFqList.pl
    │   ├── pipe_gatk_singleAlnBam.pl
    │   ├── pipe_maoSNP.pl
    │   ├── pipe_sbSNP.pl
    │   ├── pref_lmyPM
    │   └── revert_alnBam_to_uBam.pl
    ├── get_set2_varOnlyHete.pl
    ├── get_set3_varWiIndel.pl
    ├── lcnt_to_represent_allele.pl
    ├── mao_exe
    │   ├── combine2PileFiles
    │   ├── reSeqPrintRefChr
    │   ├── reSeqPrintSample.indel.fast
    │   ├── reSeqPrintSample.indel.fast.strAssign
    │   ├── reSeqPrintSample.indel.fast.strAssign.moreHeter
    │   ├── rmRedunSam2
    │   └── rmRedunSam3
    ├── maskClose_in_1col.pl
    ├── mask_vcf_geno_byGQ.pl
    ├── mask_weiredSNP.pl
    ├── mk_wind_from_noNlis.pl
    ├── pca
    │   ├── cmd_list_set01
    │   ├── cnvt_pcaEvec_to_tbl.pl
    │   ├── cnvt_snp2pca.pl
    │   └── plot_EVs.R
    ├── phylo_tools
    │   ├── generate_dataset.pl
    │   ├── infer_NJ_nucleotide_bt500.mao
    │   └── replace_ID_in_nwk.pl
    ├── rand_site_wiWind.pl
    ├── rdNum_in_bam.pl
    ├── rename_plink_map.pl
    ├── rm_Nmiss_sites.pl
    ├── rm_adjacent_sites.pl
    ├── rm_same_site.pl
    ├── rm_same_site_hete2N.pl
    ├── scripts
    │   ├── cntRdMismatch_inSam.pl
    │   ├── cntRd_spanJunctionSite_inBam.pl
    │   ├── cnvt_melt_to_matrix.pl
    │   ├── cnvt_msaFa_to_sam.pl
    │   └── genotype_longDEL_byListBam.pl
    ├── slct_sweep
    │   ├── get_mean.pl
    │   ├── merge_wind.pl
    │   ├── merge_wind_pos.pl
    │   ├── ret_annot_by_loc.pl
    │   ├── rm_overlap_wind.pl
    │   ├── rod_from_PIavg.pl
    │   ├── slct_sweep_wind.pl
    │   └── wind_in_region_list.pl
    ├── slim_SNP_sites.pl
    ├── snpTbl_stats.pl
    ├── snpeff_data
    │   ├── extract_coding.pl
    │   ├── simple_sv_class
    │   └── simplify_tbl1.pl
    ├── tassel
    │   └── cnvt_col_to_TasselTaxaList.pl
    ├── vcf_simplify_addRef.pl
    └── xpclr
    │   ├── chk_nonsyn.pl
    │   ├── cluster_xpclrscore.pl
    │   ├── cluster_xpclrscore.pl_ori
    │   ├── cumPos.pl
    │   ├── get_uniq_cM.pl
    │   ├── pipe_xpclr_forWM97.pl
    │   ├── pipe_xpclr_fromSNPtbl.pl
    │   ├── pipe_xpclr_fromSNPtbl_cpu.pl
    │   ├── plot_manhattan
    │       ├── chrLen
    │       ├── chrLen_cum
    │       └── plot_xpclr_cum.R
    │   ├── prepare_xpclr_input_wiGmP.pl
    │   ├── sep_run_xpclr.pl
    │   ├── set_GMpos_to_SNP.pl
    │   └── xpclr_wind_cmd_wiGmP.pl
├── rnaseq_tools
    ├── DEG_byList.pl
    ├── DEGbyEdgeR_exactTest.pl
    ├── DEGtool_withSizeFactor.pl
    ├── FET_getCnt.pl
    ├── R_cmd_list
    ├── add_sizefactor.pl
    ├── cntRdInGene_wiHTSeq.pl
    ├── cnt_rd_in_bam.pl
    ├── cnt_uniqMap_in_bam.pl
    ├── cnvt_cnt_to_TPM.pl
    ├── coexp
    │   ├── add_abs_toKME.pl
    │   ├── cmd_list
    │   ├── dist_of_twoKME.pl
    │   ├── down_phenoAssoc_binary.r
    │   ├── down_phenoAssoc_pearson.r
    │   ├── heatmap_by_mod_dist.r
    │   ├── input
    │   │   ├── dat1_pheno
    │   │   └── dat1_rpkmMean.gz
    │   ├── redo_pheno_ass.r
    │   ├── run_wgcna_signed.r
    │   └── run_wgcna_unsigned.r
    ├── combine_samCnt.pl
    ├── combine_samCnt_by_Pref.pl
    ├── combine_samCnt_by_Pref_antiSense.pl
    ├── compare_SensAnti.pl
    ├── draw_SNP.pl
    ├── extract_samAln_by_fq.pl
    ├── fix_SRAfqID.pl
    ├── fix_excelV.pl
    ├── fromOthers
    │   ├── record
    │   └── run_TMM_scale_matrix.pl
    ├── get_DESeqNormCnt.pl
    ├── get_MPCnt.pl
    ├── get_meanTPM.pl
    ├── graft
    │   ├── find_transmit_step1.pl
    │   ├── find_transmit_step2.pl
    │   ├── find_transmit_step3.pl
    │   ├── find_transmit_stepX1.pl
    │   ├── find_transmit_stepX2.pl
    │   ├── get_alnBam_by_src2tgt_rdList.pl
    │   └── simple_pipe_find_graft_rd_SE.pl
    ├── join_samCnt.pl
    ├── map_to_genome
    │   ├── README.md
    │   ├── cnvt_featureCounts_to_tpm.r
    │   ├── cnvt_gene2group_DEGlabel.pl
    │   ├── cnvt_gene2group_val.pl
    │   ├── combine_DEGs.pl
    │   ├── fix_NHnum.pl
    │   ├── label_DEGs.pl
    │   ├── runHisat2_with2pass.pl
    │   └── run_deseq2_tpm.r
    ├── map_to_transcriptome
    │   ├── README.md
    │   ├── cnvt_synOGgrp_to_trans2gene.pl
    │   ├── get_salmon_gene_quant_batch.r
    │   └── run_deseq2_salmon.r
    ├── plot_expr_heatmap.r
    ├── plot_heatmap_by_geneList.pl
    ├── rmRRNA_in_fqFiles.pl
    ├── sep_reads_by_toRef.pl
    └── summary_ht2_log.pl
├── rscript_examples
    └── ggplot_manhattan.r
├── run_clean_mp_all.R
├── run_cmd_in_batch.pl
├── run_reapr.sh
├── sam_filter.pl
├── sam_flag_chk.pl
├── sample_scripts
    ├── PG1_GC.cfg
    ├── PG1_ctg.cfg
    ├── PG1_scf.cfg
    ├── check_pm_version.pl
    ├── cmd_list_trinity_denovo
    ├── run_jf2.sh
    ├── run_soapd2_R02.sh
    ├── run_trinity_guided.sh
    ├── sumV_inWind.pl
    └── svg2png.pl
├── save_mate_fq.pl
├── save_single_fq.pl
├── self_interest
    ├── list_all_dir.pl
    └── price_by_subway.pl
├── sepRun_cmd.pl
├── site_search
    ├── cmd_list
    ├── parse_fimoTSV.pl
    ├── setup_keySite.pl
    └── yyt_motif.meme
├── software_fix
    ├── anchorwave
    │   └── fix_awMAF.pl
    └── last
    │   └── v869
    │       ├── last-dotplot
    │       ├── last-map-probs
    │       ├── last-postmask
    │       ├── last-train
    │       ├── maf-convert
    │       ├── maf-join
    │       └── maf-swap
├── solQ2phredQ.pl
└── temp
    ├── README.md
    ├── abh_to_rate.pl
    ├── cds2prot.pl
    ├── chk_gff3_avg_cds.pl
    ├── cmd_list_busco
    ├── cmd_list_cegma
    ├── cmd_list_tassel4
    ├── cmd_list_tassel5
    ├── cnvt_pairwise_to_tab.pl
    ├── deal_gff3.pl
    ├── detect_syn_dots.pl
    ├── forRonan
        ├── addID_to_loci.pl
        ├── depC_cutoff_by_dep_stat.pl
        ├── filter_sam.pl
        ├── map_region_by_bn6.pl
        ├── merge_tbls.pl
        └── samDep_to_loci.pl
    ├── get_cds_from_gff3.pl
    ├── good_pos.pl
    ├── ncbi_esearch.pl
    ├── plot_boxplot_wi_points.r
    ├── reformat_tabHit.pl
    ├── replace_unicode.pl
    ├── rm_gff_byLis.pl
    ├── scripts
        └── cnvt_pdf_to_tiff.pl
    ├── simple_gff3_to_gtf.pl
    ├── simple_sort_gff3.pl
    ├── slct_gff_byLis.pl
    ├── temp_fix_gff3
        ├── Grif_1614.fix.gff3.gz
        ├── Grif_1614.gff3.gz
        ├── cmd_list
        └── fix_gff.pl
    └── temp_process_ONT
        ├── cdna_classifier_report.pdf
        ├── cdna_classifier_report.tsv
        ├── cmd_list
        ├── correction_pipeline.sh
        ├── raw_rdN.tbl
        ├── test_SIRV
            ├── cdna_classifier_report.pdf
            ├── cdna_classifier_report.tsv
            ├── cmd_list
            ├── correction_pipeline.sh
            ├── scrn.SIRV_test.gz
            └── scrn.corr
        ├── test_small
            └── cmd_list
        └── tools
            └── correction_pipeline.sh


/.gitignore:
--------------------------------------------------------------------------------
 1 | blib/
 2 | .build/
 3 | _build/
 4 | cover_db/
 5 | inc/
 6 | Build
 7 | !Build/
 8 | Build.bat
 9 | .last_cover_stats
10 | Makefile
11 | Makefile.old
12 | MANIFEST.bak
13 | META.yml
14 | MYMETA.yml
15 | nytprof.out
16 | pm_to_blib
17 | 


--------------------------------------------------------------------------------
/GBS/ext_rd_withAPEKI.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl 
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | !@ARGV and die "perl $0 barcode input.fastq\n\nI use CAGC/CTGC as the required restricted site.\nPlease note that I will ignore read if meeting Ns within the first 64bp.\n\n"; 
 7 | 
 8 | my $barC = shift; 
 9 | my $addCut1 = "${barC}CAGC"; 
10 | my $addCut2 = "${barC}CTGC"; 
11 | my $ll = length($barC); 
12 | 
13 | my $n = 0; 
14 | while (my $id = <>) {
15 | 	$n ++; 
16 | 	$n % 1e6 == 1 and &tsmsg("[Msg] $n reads treated.\n"); 
17 | 	my $seq=<>; 
18 | 	<>; 
19 | 	my $qual = <>; 
20 | 	if ($seq =~ m/^(?:$addCut1|$addCut2)/o) {
21 | 		my $sub_seq = substr($seq, 0, 64+$ll); 
22 | 		$sub_seq =~ m/N/ and next; 
23 | 		print STDOUT "$id$seq+\n$qual"; 
24 | 	}
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Frequently-used-tools-for-data-processing
2 | =========================================
3 | 
4 | Tool set for processing fasta/fastq/table formated data. Usually they are perl scripts. 
5 | 


--------------------------------------------------------------------------------
/annot_tools/add_tag_to_gffID.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | !@ARGV and die "perl $0 tag in.fmt.gff3\n"; 
 6 | 
 7 | my $tag = shift; 
 8 | 
 9 | while (<>) {
10 | 	chomp; 
11 | 	if (m/^(\s*$|#)/) {
12 | 		print "$_\n"; 
13 | 		next; 
14 | 	}
15 | 	my @ta = split(/\t/, $_); 
16 | 	if ($ta[2] =~ m!^(protein_match)$!i) {
17 | 		$ta[8] =~ s!^ID=!ID=$tag!; 
18 | 	} elsif ($ta[2] =~ m!^(match_part)$!i) {
19 | 		$ta[8] =~ s!(^|\s|;)Parent=!$1Parent=$tag!; 
20 | 	} elsif ($ta[2] =~ m!^dispersed_repeat$!i) {
21 | 		$ta[8] =~ s!^ID=!ID=$tag!; 
22 | 	} else {
23 | 		die "$_\n"; 
24 | 	}
25 | 	print join("\t", @ta)."\n"; 
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/annot_tools/ahrd/cmd_list:
--------------------------------------------------------------------------------
 1 | perl pipe_for_functional_annotation.pl -onlyAHRD annot.cfg
 2 | mkdir -p 04.AHRD/v1/
 3 | cd 04.AHRD/
 4 | bash ../generate_ahrd_yml.sh ../evmMerged.p.fa ./v1/ahrd_output_v1.csv > ahrd_in_v1.yml
 5 | java -Xmx10G -jar /data/Sunhh/src/annotation/ahrd/AHRD/dist/ahrd.jar ahrd_in_v1.yml
 6 | perl ../trim_orphan_right_brack.pl ./v1/ahrd_output_v1.csv > ./v1/ahrd_output_v1.csv_trim
 7 | deal_table.pl -column 0-5 ./v1/ahrd_output_v1.csv_trim > ./v1/ahrd_output_v1.final.csv
 8 | deal_table.pl -column 0,3 ./v1/ahrd_output_v1.final.csv > ./v1/ahrd_output_v1.final.csv.2col
 9 | 
10 | cat ./v1/ahrd_output_v1.final.csv.2col | deal_table.pl -col_repCount 1 | tail -n +2 | deal_table.pl -col_sort 0 | deal_table.pl -reverse > ./v1/ahrd.rcnt
11 | cat ./v1/ahrd.rcnt | perl -e 'while (<>) { chomp; m!retro|reverse|transpos|gag\b|polyprotein!i and print "$_\n";}' | less -S
12 | 
13 | # False positives - functions that are not TE-related but match the regular expression pattern:
14 | #   AT3G25590: Micronuclear linker histone polyprotein-like protein
15 | 
16 | 


--------------------------------------------------------------------------------
/annot_tools/augustus.accuracy_calculator.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | !@ARGV and die "perl $0 augusts_test1.stdout augusts_test2.stdout > all_accuracy.tbl\n"; 
 6 | 
 7 | for my $fn (@ARGV) {
 8 | 	my $v1 = &accuracy_calculator($fn); 
 9 | 	print STDOUT "$fn\t$v1\n"; 
10 | }
11 | 
12 | # calculate the result of testing AUGUSTUS on genbank files in a single number
13 | # Copied from braker.pl
14 | sub accuracy_calculator{
15 |     my $aug_out=shift;
16 |     my ($nu_sen, $nu_sp, $ex_sen, $ex_sp, $gen_sen, $gen_sp);
17 |     open(AUGOUT, "$aug_out") or die ("Could not open $aug_out!\n");
18 |     while(<AUGOUT>){
19 |         if(/^nucleotide level\s*\|\s*(\S+)\s*\|\s*(\S+)/){
20 |             $nu_sen=$1;
21 |             $nu_sp=$2;
22 |         }
23 |         if(/^exon level\s*\|.*\|.*\|.*\|.*\|.*\|\s*(\S+)\s*\|\s*(\S+)/){
24 |             $ex_sen=$1;
25 |             $ex_sp=$2;
26 |         }
27 |         if(/^gene level\s*\|.*\|.*\|.*\|.*\|.*\|\s*(\S+)\s*\|\s*(\S+)/){
28 |             $gen_sen=$1;
29 |             $gen_sp=$2;
30 |         }
31 |     }
32 |     my $target=(3*$nu_sen+2*$nu_sp+4*$ex_sen+3*$ex_sp+2*$gen_sen+1*$gen_sp)/15;
33 |     return $target;
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/annot_tools/b2g_graph_software/fmt_b2g_enrich.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | -t and !@ARGV and die "perl $0 3.Locule.Up.full.txt > 3.Locule.Up.full.txt.tab\n";
 6 | 
 7 | while (<>) {
 8 |   chomp;
 9 |   my @ta=split(/\t/, $_);
10 |   if ($ta[0] eq 'Tags') {
11 |     $ta[10] eq 'TestSet Sequences' or die "Bad columns\n";
12 |     print join("\t", @ta[1..5], 'Total DEGs', 'DEGs annotated', 'Genes annotated', 'Total genes', "Genes")."\n";
13 |     next;
14 |   }
15 |   my $ttl_geneN = $ta[6]+$ta[7]+$ta[8]+$ta[9];
16 |   my $ttl_degN  = $ta[6]+$ta[8];
17 |   $ta[10] //= "";
18 |   print join("\t", @ta[1..5], $ttl_degN, $ta[6], $ta[7], $ttl_geneN, $ta[10])."\n";
19 | }
20 | 
21 | # 0   Tags
22 | # 1   GO ID
23 | # 2   GO Name : description of the category;
24 | # 3   GO Category: category of the function;
25 | # 4   FDR
26 | # 5   P-Value
27 | # 6   Nr Test : number of transcripts in the sample with that function
28 | # 7   Nr Reference: number of transcripts in the reference transcriptome with that function
29 | # 8   Non Annot Test: number of transcripts without that function in the sample.
30 | # 9   Non Annot Reference: number of transcripts without that function in the reference.
31 | # 10  TestSet Sequences
32 | 
33 | 


--------------------------------------------------------------------------------
/annot_tools/evm_tools/evm_weight.txt:
--------------------------------------------------------------------------------
1 | PROTEIN	spliced_protein_alignments	1
2 | 


--------------------------------------------------------------------------------
/annot_tools/evm_tools/filter_cds2Bad_bn6.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | -t and !@ARGV and die "perl $0 evmMerged.c.fa.toBad.bn6 > evmMerged.c.fa.toBad.bn6.toRM\n";
 6 | 
 7 | my $min_ident = 97;
 8 | my $min_percCov  = 95; # 
 9 | 
10 | my %h;
11 | 
12 | while (<>) {
13 |   chomp;
14 |   my @ta=split(/\t/, $_);
15 |   defined $h{$ta[0]} and next;
16 |   $ta[2] >= $min_ident or next;
17 |   $ta[7]-$ta[6]+1 >= $ta[12] * $min_percCov / 100 or next;
18 |   $h{$ta[0]} = 1;
19 |   print STDOUT "$_\n";
20 | }
21 | 
22 | 


--------------------------------------------------------------------------------
/annot_tools/evm_tools/good_desc:
--------------------------------------------------------------------------------
1 | Micronuclear linker histone polyprotein-like protein
2 | 


--------------------------------------------------------------------------------
/annot_tools/evm_tools/param_list:
--------------------------------------------------------------------------------
1 | C31	C38	0.97	0.95	0.97
2 | C31	C39	0.97	0.95	0.97
3 | C38	C37	0.97	0.95	0.97
4 | C38	C39	0.97	0.95	0.97
5 | C39	C37	0.97	0.95	0.97
6 | C39	C38	0.97	0.95	0.97
7 | 


--------------------------------------------------------------------------------
/annot_tools/example_pipes/bad_prot_IDs:
--------------------------------------------------------------------------------
1 | CmaCh14G019040.1
2 | CmoCh01G021080.1
3 | CmUC00G223800.1
4 | AT1G44191.1
5 | sp|P12978|EBNA2_EBVB9
6 | 


--------------------------------------------------------------------------------
/annot_tools/example_pipes/cmd_list_protAln_single_1add:
--------------------------------------------------------------------------------
 1 | # Re-align on SWIFT server.
 2 | cd /data/Sunhh/src/align/spaln/spaln2.1.4.linux64/seqdb; ln -s /data/Sunhh/wmhifi/analysis/gene_prediction/db/in_genome/22CEXU3.chr.fa ./22CEXU3.mfa; ./makeidx.pl -inp 22CEXU3.mfa; cd -;
 3 | 
 4 | deal_fasta.pl CmoshV1.p.fa -nres CmoCh01G021080.1 > CmoshV1.p_fixed.fa
 5 | ls CmoshV1.p_fixed.fa > lis_prot_r2
 6 | perl /home/Sunhh/tools/github/NGS_data_processing/annot_tools/run_spaln_prot2genom.pl  -aln_type prot2genome  -db 22CEXU3  -inFaLis lis_prot_r2  -para_spaln " -t1 -M4 -Q7 -O0 -LS "  -cpuN 100 -cnvt2maker -pl_cnvt2maker /home/Sunhh/tools/github/NGS_data_processing/annot_tools/cnvt_spaln2makerAln_prot_gff3.pl
 7 | 
 8 | mv CmoshV1.p_fixed.fa.spaln.gff3 CmoshV1.p.fa.spaln.gff3
 9 | perl /home/Sunhh/tools/github/NGS_data_processing/annot_tools/rmShrtExon_spaln_prot2genom.pl CmoshV1.p.fa.spaln.gff3 > CmoshV1.p.fa.s1.gff3
10 | perl /home/Sunhh/tools/github/NGS_data_processing/annot_tools/cnvt_spaln2makerAln_prot_gff3.pl -protKLfile protein_kl -trimOverflow -scafKLfile scaffold_kl CmoshV1.p.fa.s1.gff3 -outFile CmoshV1.p.fa.s2.gff3
11 | perl /home/Sunhh/tools/github/NGS_data_processing/annot_tools/pasa_gff_to_alnGff.pl -notPasa -addTag "3.9:prot:" CmoshV1.p.fa.s2.gff3 > CmoshV1.p.fa.s2.4maker.gff3
12 | 


--------------------------------------------------------------------------------
/annot_tools/fix_1bpLoc_by_zff2Gb.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | while (<>) {
 6 | 	while (s/complement\((\d+)\)/$1..$1/) {
 7 | 		1; 
 8 | 	}
 9 | 	print; 
10 | }
11 | 


--------------------------------------------------------------------------------
/annot_tools/get_gff_byScfID.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | use Getopt::Long; 
 7 | my %opts; 
 8 | GetOptions(\%opts, 
 9 | 	"help!", 
10 | 	"faF:s", "gffF:s", 
11 | 	"scfID:s", "suff:s", 
12 | ); 
13 | sub usage {
14 | 	print <<HH; 
15 | ################################################################################
16 | # perl $0 -scfID scfID -suff ''
17 | # 
18 | # -faF    [input.scf.fa]
19 | # -gffF   [all.gff3]
20 | ################################################################################
21 | HH
22 | 	exit 1; 
23 | }
24 | 
25 | $opts{'help'} and &usage(); 
26 | 
27 | my $faF  = $opts{'faF'} // 'PG1All_v2_Scf.unmsk.fa'; 
28 | my $gffF = $opts{'gffF'} // 'r2_all.gff3'; 
29 | 
30 | my $id = $opts{'scfID'} // shift; 
31 | my $add = $opts{'suff'} // ''; 
32 | 
33 | defined $id or &usage(); 
34 | 
35 | open F,'<',"$gffF" or die; 
36 | open O,'>',"cur$add.gff3" or die; 
37 | while (<F>) {
38 | 	m/^$id\t/o or next; 
39 | 	chomp; 
40 | 	my @ta = split(/\t/, $_); 
41 | #	unless ($ta[1] =~ m/^pred_gff:augustus|maker|pred_gff:augustus_masked|pred_gff:snap_masked$/) {
42 | #		$ta[8] =~ s!Name=[^;\s]+;?!!g; 
43 | 		$ta[8] =~ s!Target=[^;\s]+;?!!g; 
44 | #	}
45 | 	print O join("\t", @ta)."\n"; 
46 | }
47 | close O; 
48 | close F; 
49 | exeCmd( "deal_fasta.pl $faF -res $id > cur$add.fasta" ); 
50 | 


--------------------------------------------------------------------------------
/annot_tools/get_maker_result.sh:
--------------------------------------------------------------------------------
 1 | ### Basic functions.
 2 | function exe_cmd {
 3 | 	echo "[$(date)][CMD] $1"
 4 | 	eval "$1"
 5 | 	echo "[$(date)][Rec] Done."
 6 | }
 7 | 
 8 | function tsmsg {
 9 | 	echo "[$(date)]$1"
10 | }
11 | 
12 | roundN=4
13 | 
14 | path_gffMerge=gff3_merge
15 | path_fasMerge=fasta_merge
16 | 
17 | db_idx=PG1All_v2_Scf.unmsk_master_datastore_index.log
18 | 
19 | exe_cmd "mkdir ../result/r$roundN"
20 | exe_cmd "$path_gffMerge -d $db_idx -g -o ../result/r${roundN}/r${roundN}_maker.gff3"
21 | exe_cmd "$path_gffMerge -d $db_idx -n -o ../result/r${roundN}/r${roundN}_all.gff3"
22 | exe_cmd "$path_fasMerge -d $db_idx -o ../result/r${roundN}/r${roundN}"
23 | 
24 | 


--------------------------------------------------------------------------------
/annot_tools/iprscan/cmd_list:
--------------------------------------------------------------------------------
1 | # Remove TE-associated proteins according to InterPro IDs (iprscan).
2 | ### The file 'potential_TE_IPRacc' is updated on [2/1/2022]
3 | awk -F "\t" '$12 ~ /^IPR/' in.ipr.tsv > in.ipr.tsv.IPRacc
4 | perl list_IPRacc.pl in.ipr.tsv.IPRacc > in.ipr.tsv.IPRacc.line
5 | perl cnt_TEIPRacc.pl  potential_TE_IPRacc in.ipr.tsv.IPRacc.line > in.ipr.tsv.IPRacc.line.cnt
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/annot_tools/iprscan/cnvt_iprJson2tbl.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | -t and !@ARGV and die "perl $0 ipr.SearchResults-transposases.json > ipr.SearchResults-transposases.json.tbl\n";
 6 | 
 7 | print STDOUT join("\t", qw/id source source_database name description/)."\n";
 8 | while (<>) {
 9 |   m!^\s*(#|$)! and next;
10 |   chomp;
11 |   my @lines = &parse_iprJson($_);
12 |   for my $a1 (@lines) {
13 |     for my $k1 (keys %$a1) {
14 |       $a1->{$k1} //= "NA";
15 |     }
16 |     print STDOUT join("\t", @{$a1}{qw/id source source_database name description/})."\n";
17 |   }
18 | }
19 | 
20 | sub parse_iprJson {
21 |   my ($txt1) = @_;
22 |   $txt1 =~ s!^\s*\[(.*)\]\s*$!$1! or die "Err 1:$txt1\n";
23 |   my @back;
24 |   while ($txt1 =~ s!^\s*\{ "id":"(\S+?)", "source":"([^"]+)", "fields":\{ "description":\[(?:"(.*?)")?\], "name":\["([^"]+)"\], "source_database":\["([^"]+)"\] \} \s* \}\s*,*!!x) {
25 |     my %h;
26 |     @h{qw/id source description name source_database/} = ($1, $2, $3, $4, $5);
27 |     push(@back, \%h);
28 |   }
29 |   $txt1 =~ m!^\s*$! or die "Err 2: |$txt1|\n";
30 |   return(@back);
31 | }# parse_iprJson()
32 | 
33 | # {
34 | #"id":"PTHR22955",
35 | #"source":"interpro7_family",
36 | #"fields":{
37 | #  "description":[],
38 | #  "name":["RETROTRANSPOSON"],
39 | #  "source_database":["PANTHER"]
40 | #}
41 | #}
42 | 
43 | 


--------------------------------------------------------------------------------
/annot_tools/iprscan/list_IPRacc.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings;
 4 | 
 5 | -t and !@ARGV and die "perl $0 ipr_all6_tsv.TEprot.IPRacc > ipr_all6_tsv.TEprot.IPRacc.line\n";
 6 | 
 7 | my %h;
 8 | while (<>) {
 9 |   chomp;
10 |   my @ta=split(/\t/, $_);
11 |   $h{$ta[0]}{'ipr'}{$ta[11]} = $ta[12];
12 | }
13 | for my $id1 (sort keys %h) {
14 |   my @k1 = sort keys %{$h{$id1}{'ipr'}};
15 |   my @v1 = @{$h{$id1}{'ipr'}}{@k1};
16 |   print join("\t", $id1, join(";;", @k1), join(";;", @v1))."\n";
17 | }
18 | 


--------------------------------------------------------------------------------
/annot_tools/keep_nonRedundant_list.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings; 
 4 | 
 5 | my %h; 
 6 | while (<>) {
 7 | 	chomp; 
 8 | 	my @ta = split(/\t/, $_); 
 9 | 	my $tk = "$ta[0]\t$ta[1]"; 
10 | 	$h{$tk} = [ @ta[12,13,10] ]; 
11 | }
12 | 
13 | my %skip; 
14 | my %rm; 
15 | for my $tk ( sort {$h{$a}[2] <=> $h{$b}[2] || $h{$b}[0] <=> $h{$a}[0]} keys %h ) {
16 | 	my ($id1, $id2) = split(/\t/, $tk); 
17 | 	my ($len1, $len2, $score) = @{$h{$tk}}; 
18 | 	defined $skip{$tk} and next; 
19 | 	if ( $len2 > $len1 ) {
20 | 		if ( !defined $rm{$id2} ) {
21 | 			$rm{$id1} = $h{$tk}[0]; 
22 | 		}
23 | 	} else {
24 | 		if ( !defined $rm{$id1} ) {
25 | 			$rm{$id2} = $h{$tk}[1]; 
26 | 		}
27 | 	}
28 | 	$skip{"$tk"} = 1; 
29 | 	$skip{"$id2\t$id1"} = 1; 
30 | }
31 | for (sort keys %rm) {
32 | 	print "$_\t$rm{$_}\n"; 
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/annot_tools/kegg/1_extract_KeggMapRes.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Copy the returned text from kegg mapper - search pathway (https://www.kegg.jp/kegg/tool/map_pathway1.html)
 3 | # Run this script to get a table with repeated KO IDs. 
 4 | use strict; 
 5 | use warnings; 
 6 | 
 7 | my %h; 
 8 | while (<>) {
 9 |   chomp; 
10 |   m!^\s*$! and next; 
11 |   if (m!^map\d+!) {
12 |     %h = (); 
13 |     m!^(map\d+) (.+) \(\d+\)$! or die "$_\n"; 
14 |     $h{'mapID'} = $1; 
15 |     $h{'mapDesc'} = $2; 
16 |   } elsif (m!^\s\sko:(K\d+) (.+)$!) {
17 |     my ($kid, $kdesc) = ($1, $2); 
18 |     print STDOUT join("\t", $kid, $kdesc, $h{'mapID'}, $h{'mapDesc'})."\n";
19 |   } else {
20 |     die "$_\n"; 
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/annot_tools/kegg/2_join_mapIDs.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | my %h; 
 6 | while (<>) {
 7 |   chomp; 
 8 |   my @ta=split(/\t/, $_); 
 9 |   $h{$ta[0]}{'desc'}{$ta[1]}++; 
10 |   $h{$ta[0]}{'map'}{"$ta[2]__($ta[3])"} ++; 
11 |   $h{$ta[0]}{'rank'} //= $.; 
12 | }
13 | 
14 | for my $id (sort keys %h) {
15 |   scalar(keys %{$h{$id}{'desc'}}) == 1 or die "$id\n"; 
16 |   my ($desc) = (keys %{$h{$id}{'desc'}});
17 |   print STDOUT join("\t", $id, $desc, join(";;", sort keys %{$h{$id}{'map'}}))."\n"; 
18 | }
19 | 


--------------------------------------------------------------------------------
/annot_tools/kegg/record:
--------------------------------------------------------------------------------
 1 | KO annotation : 
 2 | Use BlastKOALA (Version 2.1, https://www.kegg.jp/blastkoala/) to annotate KO definitions for genes (~5k genes each time/batch). 
 3 |  Enter taxonomy group of your genome              : Using taxonomy ID : 3653  (Citrullus genus)
 4 |  Enter KEGG GENES database file to be searched    : genus_eukaryotes
 5 |  Enter your email address : sunhonghe_1984@163.com (Only one data could be run at the same time with the same email address).
 6 | 
 7 | Download KO_definition (details: "View"=>Download) from the resultant web-link. The output table's header is 'Gene_ID \t KO \t Definition \t Score \t Second-KO \t Second-Score'. 
 8 | Save "Reconstruct Pathway" webpage for pathway table construction. 
 9 | 
10 | Reconstruct pathway : 
11 | Use "KEGG Mapper – Reconstruct Pathway" tool : https://www.kegg.jp/kegg/tool/map_pathway.html
12 | Different 'KEGG Mapper' version may provides slightly different pathway mapping results, so I would like to recommand re-construct the maps at the same time for each analysis, and to record the 'KEGG Mapper' version used. 
13 |  Input genelist.txt file : 
14 |    format example : 
15 |      geneID \t KO_ID 
16 |      geneID \t KO_ID
17 |      ...
18 |  Click 'Show all objects' to expand all list; 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/annot_tools/liftoff_tools/blk2bed.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | -t and !@ARGV and die "perl $0 output/CX.cds.blk.CL > output/CX.cds.blk.CL.bed\n";
 6 | 
 7 | while (<>) {
 8 |   chomp;
 9 |   my @ta=split(/\t/, $_);
10 |   my $i=0;
11 |   for my $tb (split(/;/, $ta[3])) {
12 |     $i++;
13 |     $tb =~ m!^(\d+)\,(\d+)$! or die "$tb\n";
14 |     print join("\t", $ta[1], $1-1, $2, "$ta[0]_HS$i")."\n";
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/annot_tools/liftoff_tools/blk2gff.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | -t and !@ARGV and die "perl $0 output/CX.cds.blk.CL > output/CX.cds.blk.CL.gff3\n";
 6 | 
 7 | while (<>) {
 8 |   chomp;
 9 |   my @ta=split(/\t/, $_);
10 |   my $i=0;
11 |   my (@cds, $cS, $cE);
12 |   for my $tb (split(/;/, $ta[3])) {
13 |     $tb =~ m!^(\d+)\,(\d+)$! or die "$tb\n";
14 |     push(@cds, [$ta[1], "blk", "CDS", $1, $2, ".", $ta[2], ".", "Parent=$ta[0]"]);
15 |     $cS //= $1; $cS > $1 and $cS = $1;
16 |     $cE //= $2; $cE < $2 and $cE = $2;
17 |   }
18 |   print join("\t", $ta[1], "blk", "gene", $cS, $cE, ".", $ta[2], ".", "ID=$ta[0]-G")."\n";
19 |   print join("\t", $ta[1], "blk", "mRNA", $cS, $cE, ".", $ta[2], ".", "ID=$ta[0];Parent=$ta[0]-G")."\n";
20 |   for my $a1 (@cds) {
21 |     print join("\t", @$a1)."\n";
22 |   }
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/annot_tools/liftoff_tools/chk_only_pan.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | -t and !@ARGV and die "perl $0 comb.grp2.novl_loc.wiRepre.fmt > comb.grp2.novl_loc.wiRepre.fmt.ifOnlyPan\n";
 6 | 
 7 | while (<>) {
 8 |   chomp;
 9 |   $. == 1 and do { print "$_\n"; next; };
10 |   my @ta=split(/\t/, $_);
11 |   my @ifpan;
12 |   for (my $i=7; $i<@ta; $i++) {
13 |     for my $tb (split(/;/, $ta[$i])) {
14 |       $tb =~ m!^\S+:\d+\-\d+:[+-]$! and next;
15 |       $tb =~ m!^C.Pan! and next;
16 |       $ifpan[$i-7] ++;
17 |     }
18 |   }
19 |   for (my $i=0; $i<4; $i++) {
20 |     $ifpan[$i] //= 0;
21 |   }
22 |   print join("\t", @ta[0..6], @ifpan)."\n";
23 | }
24 | 


--------------------------------------------------------------------------------
/annot_tools/liftoff_tools/cnvt_gff_to_cdsBed.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | my $htxt = <<HH;
 6 | 
 7 | perl $0 out_prefix in.gff3
 8 | 
 9 | # Result files are:
10 |   out_prefix.p.CDS.bed
11 |   out_prefix.m.CDS.bed
12 | 
13 | HH
14 | 
15 | !@ARGV and die "$htxt";
16 | # perl tools/cnvt_gff_to_cdsBed.pl $_.gff3 > $_.CDS.bed
17 | 
18 | my $opref = shift;
19 | 
20 | open OP,'>',"$opref.p.CDS.bed" or die;
21 | open OM,'>',"$opref.m.CDS.bed" or die;
22 | my %h;
23 | while (<>) {
24 |   m!^\s*(#|$)! and next;
25 |   chomp;
26 |   my @ta=split(/\t/, $_);
27 |   $ta[2] =~ m!^CDS$!i or next;
28 |   $ta[8] =~ m!Parent=([^\s;]+)! or die "$_\n";
29 |   $h{$1} ++;
30 |   if      ($ta[6] eq '+') {
31 |     print OP join("\t", $ta[0], $ta[3]-1, $ta[4], "${1}_HS$h{$1}", $1)."\n";
32 |   } elsif ($ta[6] eq '-') {
33 |     print OM join("\t", $ta[0], $ta[3]-1, $ta[4], "${1}_HS$h{$1}", $1)."\n";
34 |   } else {
35 |     die "[Err] Bad str line: $_\n";
36 |   }
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/annot_tools/liftoff_tools/filter_R2Q_liftoff_tbl.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | my @a1;
 6 | while (<>) {
 7 |   chomp;
 8 |   my @ta=split(/\t/, $_);
 9 |   $ta[0] eq 'trans_R_ID' and do { print "$_\n"; next; };
10 |   # (1) Require coverage > 0.9;
11 |   $ta[1] >= 0.9 or next;
12 |   # (2) Require at least 100 bp or 50% of shorter CDS overlapping. or no overlapping to any gene.
13 |   $ta[9] eq '.' or $ta[3] >= 100 or $ta[3] >= 0.5 * $ta[7] or $ta[3] >= 0.5 * $ta[10] or next;
14 |   print "$_\n";
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/annot_tools/liftoff_tools/fit_gff_4igv.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | -t and !@ARGV and die "perl $0 in.gff3 > in.rmGene.gff3\n"; 
 6 | 
 7 | while (<>) {
 8 |   m!^\s*(#|$)! and next;
 9 |   chomp;
10 |   my @ta=split(/\t/, $_);
11 |   scalar(@ta) > 6 or next;
12 |   $ta[2] =~ m!^(mRNA|CDS)$!i or next;
13 |   s!Name=[^\s;]+!!;
14 |   print "$_\n";
15 | }
16 | 


--------------------------------------------------------------------------------
/annot_tools/liftoff_tools/grp2single.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | while (<>) {
 6 |   chomp;
 7 |   my @ta=split(/\t/, $_);
 8 |   for my $tb (@ta[2..$#ta]) {
 9 |     print "$tb\t$ta[0]\n";
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/annot_tools/liftoff_tools/info_bedtools_intersect.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # [3/25/2022] The column number may be different!!!
 3 | # [9/28/2022] In some bedtools version, unmapped is presented as '-1' instead of '.'.
 4 | # [2/13/2025] Allow empty file in .m. bedtools intersections.
 5 | use strict;
 6 | use warnings;
 7 | 
 8 | -t and !@ARGV and die "perl $0 bedtools_intersect_wao.out > bedtools_intersect_wao.out.tbl\n";
 9 | 
10 | my $cN_mID_1 = 4;
11 | my $cN_mID_2 = 9;
12 | my $cN_ovlLen = 10;
13 | 
14 | my %h;
15 | my %trans_cdsLen;
16 | while (<>) {
17 |   chomp;
18 |   my @ta=split(/\t/, $_);
19 |   if (scalar(@ta) == $cN_ovlLen - 1) {
20 |     splice(@ta, $#ta, 0, '.', '.');
21 |   }
22 |   $ta[$cN_mID_2] eq '-1' and $ta[$cN_mID_2] = '.';
23 |   $h{$ta[$cN_mID_1]}{$ta[$cN_mID_2]} += $ta[$cN_ovlLen];
24 |   $trans_cdsLen{$ta[$cN_mID_1]} += $ta[$cN_ovlLen];
25 | }
26 | my @o1;
27 | for my $g1 (keys %h) {
28 |   my @tg2 = keys %{$h{$g1}};
29 |   for my $g2 (@tg2) {
30 |     if ($g2 eq "." and scalar(@tg2) > 1) {
31 |       next;
32 |     }
33 |     push(@o1, [$g1, $g2, $h{$g1}{$g2}]); # [mID_1, mID_2, overlap_size]
34 |   }
35 | }
36 | print STDOUT join("\t", qw/trans_mrnaID tgt_mrnaID trans_cdsLen trans_ovlLen/)."\n";
37 | for my $a1 (sort {$b->[2] <=> $a->[2]} @o1) {
38 |   print STDOUT join("\t", $a1->[0], $a1->[1], $trans_cdsLen{$a1->[0]}, $a1->[2])."\n";
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/annot_tools/liftoff_tools/prepare_gff3_to_blk.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | !@ARGV and die "perl $0  CLpan  CLpan.trim2CDS.gff3.JnLoc > CLpan.trim2CDS.blk\n";
 6 | 
 7 | my $tag = shift;
 8 | my $fn_JnLoc = shift;
 9 | 
10 | $tag ne '' and $tag .= ':';
11 | 
12 | open F,'<', $fn_JnLoc or die;
13 | while (<F>) {
14 |   chomp;
15 |   my @ta=split(/\t/, $_);
16 |   $ta[0] eq 'mrnaID' and next;
17 |   my @se = split(/;/, $ta[9]);
18 |   my $cdsL = 0;
19 |   for my $tb (@se) {
20 |     $tb =~ m!^(\d+)\,(\d+)$! or die "[Err] bad blk format |$tb|\n";
21 |     $cdsL += ($2-$1+1);
22 |   }
23 |   print STDOUT join("\t", "$tag$ta[0]", @ta[2,5,9], '.', $cdsL)."\n";
24 | }
25 | close F;
26 | 
27 | 


--------------------------------------------------------------------------------
/annot_tools/liftoff_tools/remove_ovl_loc.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | !@ARGV and die "perl $0 comb.grp2 > comb.grp2.novl_loc\n";
 6 | 
 7 | while (<>) {
 8 |   chomp;
 9 |   my @ta=split(/\t/, $_);
10 |   my (@gID, @loci);
11 |   for my $tb (@ta[2..$#ta]) {
12 |     if ($tb =~ m!^(\S+):(\d+)\-(\d+):([+-])$!) {
13 |       push(@loci, [$1, $2, $3, $4, $3-$2+1]); # chrID, start, end, str, span_length
14 |     } else {
15 |       push(@gID, $tb);
16 |     }
17 |   }
18 |   my @new_loci;
19 |   @loci = sort { $b->[4] <=> $a->[4] } @loci;
20 |   for my $l1 (@loci) {
21 |     my $is_ovl = 0;
22 |     for my $l2 (@new_loci) {
23 |       $l1->[0] eq $l2->[0] or next;
24 |       $l1->[3] eq $l2->[3] or next;
25 |       $l1->[1] > $l2->[2] and next;
26 |       $l1->[2] < $l2->[1] and next;
27 |       $is_ovl = 1;
28 |       last;
29 |     }
30 |     $is_ovl == 0 and push(@new_loci, [@$l1]);
31 |   }
32 |   print STDOUT join("\t", $ta[0], scalar(@gID)+scalar(@new_loci), @gID, (map { "$_->[0]:$_->[1]-$_->[2]:$_->[3]" } @new_loci))."\n";
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/annot_tools/liftoff_tools/retrieve_QlocSeq_fromBlk.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # [3/28/2022] Retrieve CDS sequences if they have not been predicted yet.
 3 | use strict;
 4 | use warnings;
 5 | use fastaSunhh;
 6 | my $fs_obj = fastaSunhh->new();
 7 | 
 8 | !@ARGV and die "perl $0 genomic.fa  output/Qcds.CX.blk.CL > output/Qcds.CX.blk.CL.fa\n";
 9 | 
10 | my $fnFas = shift;
11 | my %seq = %{ $fs_obj->save_seq_to_hash( 'faFile' => $fnFas ) };
12 | for (keys %seq) { $seq{$_}{'seq'} =~ s!\s!!g; $seq{$_}{'len'} = length($seq{$_}{'seq'}); }
13 | 
14 | my %h;
15 | while (<>) {
16 |   chomp;
17 |   my @ta=split(/\t/, $_);
18 |   my $ele_id = $ta[0];
19 |   $ele_id eq 'Q_ID' and next;
20 |   # $ta[0] =~ m!^C\S:\S+:\d+\-\d+:[+-]$! or die "$_\n";
21 |   defined $h{$ta[0]} and next;
22 |   $h{$ele_id} = 1;
23 |   my $chr_id  = $ta[1];
24 |   my $chr_str = $ta[2];
25 |   my $blks    = $ta[3];
26 |   my $cds_seq = '';
27 |   defined $seq{$chr_id} or die "$chr_id\n";
28 |   for my $tb (split(/;/, $blks)) {
29 |     $tb =~ m!^(\d+)\,(\d+)$! or die "$tb\n";
30 |     $cds_seq .= substr($seq{$chr_id}{'seq'}, $1-1, $2-$1+1);
31 |   }
32 |   if ($chr_str eq '-') {
33 |     &fastaSunhh::rcSeq(\$cds_seq, 'rc');
34 |   }
35 |   print STDOUT ">$ele_id\n$cds_seq\n";
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/annot_tools/maker/rm_maker_fasta.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | -t and !@ARGV and die "perl $0 r1_maker_wiFa.gff3 > r1_maker_woFa.gff3\n"; 
 6 | 
 7 | while (<>) {
 8 | 	m!^\s*#+FASTA\s*$!i and last; 
 9 | 	print; 
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/annot_tools/mkCmd_blast2Nr.sh:
--------------------------------------------------------------------------------
 1 | ### Basic functions.
 2 | function exe_cmd {
 3 | 	echo "[$(date)][CMD] $1"
 4 | 	eval $1
 5 | 	echo "[$(date)][Rec] Done."
 6 | }
 7 | 
 8 | function tsmsg {
 9 | 	echo "[$(date)]$1"
10 | }
11 | 
12 | dbFa='/share/nas2/xigua/sunhonghe/database/db_blast/ncbi/nr'
13 | dbTag='toNr'
14 | 
15 | qryFa=$1
16 | cpuN=10
17 | 
18 | echo "blastp -outfmt 11 -db $dbFa -evalue 1e-3 -num_alignments 20 -seg yes -num_threads $cpuN -query $qryFa -out $qryFa.$dbTag.asn.1"
19 | 
20 | # blastp -outfmt 5 -db nr -evalue 1e-3 -num_alignments 20 -seg yes -num_threads 20 -query wcgP_cutted/wcgP_00033.fasta -out wcgP_cutted/wcgP_00033.fasta.xml
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/annot_tools/pasa/alignAssembly.config:
--------------------------------------------------------------------------------
 1 | 
 2 | ## templated variables to be replaced exist as <__var_name__>
 3 | 
 4 | # MySQL settings
 5 | MYSQLDB=P1denovoAndGG_pasa
 6 | 
 7 | 
 8 | #######################################################
 9 | # Parameters to specify to specific scripts in pipeline
10 | # create a key = "script_name" + ":" + "parameter" 
11 | # assign a value as done above.
12 | 
13 | #script validate_alignments_in_db.dbi
14 | validate_alignments_in_db.dbi:--MIN_PERCENT_ALIGNED=95
15 | validate_alignments_in_db.dbi:--MIN_AVG_PER_ID=98
16 | validate_alignments_in_db.dbi:--NUM_BP_PERFECT_SPLICE_BOUNDARY=3
17 | validate_alignments_in_db.dbi:--MAX_INTRON_LENGTH=50000
18 | 
19 | 
20 | #script subcluster_builder.dbi
21 | subcluster_builder.dbi:-m=50
22 | 
23 | 


--------------------------------------------------------------------------------
/annot_tools/pasa/pair_ovlp.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | my %loc; 
 6 | while (<>) {
 7 | 	chomp; 
 8 | 	my @ta = split(/\t/, $_); 
 9 | 	push(@{$loc{$ta[1]}}, [@ta[0,2,3]]); 
10 | 
11 | }
12 | 
13 | 
14 | for my $k ( sort keys %loc ) {
15 | 
16 | 	my @loc = sort { $a->[1] <=> $b->[1] || $a->[2] <=> $b->[2] } @{$loc{$k}}; 	
17 | 	for (my $i=0; $i<@loc; $i++){
18 | 		my $has = 0; 
19 | 		for (my $j=$i+1; $j<@loc; $j++) {
20 | 			if ($loc[$i][2] < $loc[$j][1] ) {
21 | 				last;  
22 | 			} elsif ( $loc[$i][2] >= $loc[$j][1] and $loc[$i][1] <= $loc[$j][2] ) {
23 | 				print "$loc[$i][0]\t$loc[$j][0]\n"; 
24 | 				$has = 1; 
25 | 			} else {
26 | 				die "@{$loc[$i]}\n@{$loc[$j]}\n"; 
27 | 			}
28 | 		}
29 | 		if ($has == 0) {
30 | 			print "$loc[$i][0]\t\n"; 
31 | 		}
32 | 	}
33 | } 
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/annot_tools/protein/trimProt4spaln.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | !@ARGV and die "perl $0 in_prot.fa out_prot.fa\n"; 
 7 | 
 8 | my $fin = shift; 
 9 | my $fout = shift; 
10 | 
11 | &exeCmd_1cmd("cat $fin | deal_fasta.pl -rmTailX_prot | deal_fasta.pl -frag 0-0 -frag_width 100 -frag_head | deal_fasta.pl -chopKey ':1\\-\\d+' | deal_fasta.pl -rmDefinition > $fout"); 
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/ProtExcluder1.1/Installer.pl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl 
 2 | 
 3 | $usage = "Installer.pl -m hmmerDIR -p ProtexcluderDIR\n";
 4 | 
 5 | # to install Protexcluder
 6 | 
 7 | use Getopt::Std;
 8 | 
 9 | getopts("m:p:");
10 | 
11 | $hmmerDIR = defined $opt_m ? $opt_m : "";
12 | 
13 | $prexcDIR = defined $opt_p ? $opt_p : "";
14 | 
15 | @Raw_Files = glob "*.npl";
16 | foreach(@Raw_Files) {
17 |         $NPL = $_;
18 |         open(RF, "$NPL")||die"$!\n";
19 |         $PL = $NPL;
20 |         $PL =~ s/\.npl/\.pl/;
21 |         open(PL, ">$PL")||die"$!\n";
22 |         while(<RF>) {
23 |                 chomp;
24 |                 $Line = $_;
25 | 
26 |                 $Line =~ s/_hmmer_/$hmmerDIR/;
27 |                 $Line =~ s/_prexc_/$prexcDIR/;
28 | 
29 |                 print(PL "$Line\n");
30 |         }
31 |         close(RF);
32 |         close(PL);
33 | 
34 |         system "chmod 755 $PL\n";
35 | }
36 | 
37 | print "Install finished!\n";
38 | print "If you input the wrong path, you can do it again with corrected paths\n";
39 | print "--------------------------- Have a nice day! -------------------------\n\n\n";
40 | 
41 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/ProtExcluder1.1/fasta-reformat.pl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl -w
 2 | 
 3 | $usage = "fasta-reformat.pl input-fasta-file number-of-positions-per-line\n";
 4 | 
 5 | # to reformat fasta file so that each line containing number of letters given by the user
 6 | 
 7 | if (@ARGV < 2) {die $usage;}
 8 | if ($ARGV[1] < 1) {die $usage;}
 9 | 
10 | open(FA, "$ARGV[0]") || die $usage;
11 | 
12 | $seq = "";
13 | while (<FA>) {
14 |     if (/>\s*(.+)/) {
15 | 	if ($seq) {
16 | 	    @sym = split(//, $seq);
17 | 	    $ct = 0;
18 | 	    foreach $sym (@sym) {
19 | 		print $sym;
20 | 		$ct ++;
21 | 		if ( !($ct%$ARGV[1]) ) {print "\n";}
22 | 	    }
23 | 	    if ($ct%$ARGV[1]) {print "\n";}
24 | 	}
25 | 	printf ">%s\n", $1;
26 | 	$seq = "";
27 |     } else {
28 | 	chomp;
29 | 	$seq .= $_;
30 |     }
31 | }
32 | close FA;
33 | 
34 | @sym = split(//, $seq);
35 | $ct = 0;
36 | foreach $sym (@sym) {
37 |     print $sym;
38 |     $ct ++;
39 |     if ( !($ct%$ARGV[1]) ) {print "\n";}
40 | }
41 | if ($ct%$ARGV[1]) {print "\n";}
42 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/ProtExcluder1.1/getanycolumnuni.pl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl -w
 2 | 
 3 | $usage = "getanycolumnuni.pl file column wanted\n";
 4 | 
 5 | # to get a list from a given column in a text file
 6 | # redundancy is excluded if the same items are next to each other
 7 | 
 8 | if (@ARGV < 2) {die "$usage";}
 9 | 
10 | open(MSP, "$ARGV[0]") || die "Can not open the input MSP file $ARGV[0]\n$usage";
11 | 
12 | $lquery = "";
13 | 
14 | while (<MSP>) {
15 |     @line = split;
16 |     $i = $ARGV[1] -1;
17 |     if ($line[$i] ne $lquery) {
18 | 	printf "%s\n",$line[$i];
19 |     }
20 |     $lquery = $line[$i];
21 | }
22 | close MSP;
23 | 
24 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/ProtExcluder1.1/matchtract.pl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 | 
 3 | $usage = "matchtract.pl blastx_output_file\n";
 4 | 
 5 | # to extract matched amino acids in blastx file
 6 | 
 7 | if (@ARGV < 1) {die "$usage";}
 8 | if (@ARGV > 1) {$score_cutoff = $ARGV[1];}
 9 | else {$score_cutoff = 0;}
10 | if (@ARGV > 2) {$iden_cutoff = $ARGV[2];}
11 | else  {$iden_cutoff = 0;}
12 | open(BLT, "$ARGV[0]") || die "Can not open BLAST output $ARGV[0].\n$usage";
13 | 
14 | $score = -1;
15 | while (<BLT>) {
16 |     if (/^Query=\s+(\S+)/) {
17 | 	$query = $1;
18 |     }
19 |     elsif (/^>\s+(\S+)/) {
20 | 	$subject = $1;
21 | 	printf ">%s %s\n",$subject,$query;
22 |     }
23 |     elsif (/^Query\s+(\S+)/) {
24 | 	$take = 1;
25 |     }
26 |     elsif ($take) {
27 | 	print;
28 | 	$take = 0;
29 |     }
30 | }
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/ProtExcluder1.1/mergequeryBF.pl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 |  
 3 | $usage = "mergequeryBF.pl BF maximum gap to merge\n";
 4 | 
 5 | # to merge matched region in query if they are within given distance
 6 | 
 7 | 
 8 | if (@ARGV < 2) {die "$usage";}
 9 | 
10 | `sort -k 6,6 -k 3,3n $ARGV[0] > $ARGV[0]s`;
11 | 
12 | open(MSP, "$ARGV[0]s") || die $usage;
13 | 
14 | while (<MSP>) {
15 |   
16 | if   (/^\s*\d+\s+\d+\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+\d+\s+\d+\s+\d+\s+(\S+)\s*/)  {
17 |     if ($4 ne $lTE) {
18 | 	printf "%-30s %06d %06d %06d %s\n", $lTE, $start,$end,$llen,$lsubj;
19 | 	$start = $1;
20 | 	$end = $2;
21 |     }
22 |     elsif (($1 - $end) > $ARGV[1]) {
23 | 	printf "%-30s %06d %06d %06d %s\n", $lTE, $start,$end,$llen,$lsubj;
24 | 	$start = $1;
25 | 	$end = $2;
26 |     }
27 |     elsif ($2 > $end) {
28 | 	$end = $2;
29 |     }
30 |     $llen = $3;
31 |     $lTE = $4;
32 |     $lsubj = $5;
33 |     }
34 | }
35 | 
36 | close MSP;
37 | 
38 | printf "%-30s %06d %06d %06d %s\n", $lTE, $start,$end,$llen,$lsubj;
39 | 
40 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/ProtExcluder1.1/mergeunmatchedregion.pl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl -w
 2 |  
 3 | $usage = "mergeunmatchedregion.pl seqfile\n";
 4 | 
 5 | # to merge multiple pieces from a single sequence into one piece
 6 |  
 7 | open(DB, "$ARGV[0]") || die "Can not open the seqfile $ARGV[0]\n$usage";
 8 |  
 9 | $lTE = "";
10 | while (<DB>) {
11 |     if (/^>(\S+)\D\d+-\d+\s*(.*)$/){
12 | 	if ($1 ne $lTE) {
13 | 	    printf ">%s\t %s\n",$1, $2;
14 | 	}
15 | 	$lTE = $1;
16 |     } else {print;}
17 | }
18 | 
19 | close DB;
20 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/ProtExcluder1.1/mspesl-sfetch.npl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl -w
 2 | 
 3 | $usage = "mspesl-sfetch.pl database msp_file integer output_file\nwhere integer is how far away you'd like to extend from the match region\n";
 4 | 
 5 | # to fetch sequences from a list 
 6 | 
 7 | # call esl-sfetch from hmmer-3.1
 8 | 
 9 | if (@ARGV < 4) {die "$usage";}
10 | 
11 | open(TEST, "$ARGV[0]") || die "Can not open the database $ARGV[0]\n$usage";
12 | close TEST;
13 | open(MSP, "$ARGV[1]") || die "Can not open the input MSP file $ARGV[1]\n$usage";
14 | 
15 | `rm -f $ARGV[3]`;
16 | 
17 |  `_hmmer_binaries/esl-sfetch --index $ARGV[0]`;
18 | 
19 | while (<MSP>) {
20 |     @line = split;
21 |     if ($line[2] < $line[3]) {$from=$line[5]-$ARGV[2]; $to=$line[6]+$ARGV[2];}
22 |     else {$from=$line[6]+$ARGV[2]; $to=$line[5]-$ARGV[2];}
23 |     if ($from < 1) {$from=1;}
24 |     if ($to < 1) {$to=1;}
25 |     if ($line[2] < $line[3]) {
26 |     `_hmmer_binaries/esl-sfetch -c $from..$to $ARGV[0] $line[7] >> $ARGV[3]`;
27 |     }
28 |     else { 
29 | `_hmmer_binaries/esl-sfetch -c $from..$to -r $ARGV[0] $line[7] >> $ARGV[3]`;
30 |     }
31 | 
32 |  if ($?) {print $_, "failure\n\n"; $failure++;}
33 | }
34 | close MSP;
35 | -f $ARGV[3] or `echo "" > $ARGV[3]`; 
36 | 
37 | if ($failure) {
38 |     print "Total failed case: ", $failure;
39 | }
40 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/ProtExcluder1.1/rmlistedseq.pl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl -w
 2 | 
 3 | $usage = "rmlistedseq.pl namelist fasta file \n";
 4 | 
 5 | # This is for removing a subset of sequences from a seqfile
 6 | 
 7 | if (@ARGV < 2) {die $usage;}
 8 | 
 9 | open(RM, "$ARGV[0]") || die $usage;
10 | 
11 | $ct = 0;
12 | 
13 | while (<RM>) {
14 |     if (/^>*(\S+)\s*/) {
15 |         $seq{$ct} = $1;
16 | 	$ct ++;
17 |     }
18 | }
19 | close RM;
20 | 
21 | open(FA, "$ARGV[1]") || die $usage;
22 | 
23 | while (<FA>) {
24 |     if (/^>(\S+)\s*/) {
25 | 	if (&comparison) { 
26 | 	    $take = 0;
27 | 	}
28 | 	else {
29 | 	    $take = 1;
30 | 	}
31 |     }
32 |     if ($take) {
33 | 	print;
34 |     }
35 | }
36 | 
37 | close FA;
38 | 
39 | 
40 | 
41 | 
42 |  
43 | sub comparison {
44 |  foreach $key (keys %seq){
45 |      if ($1 eq $seq{$key}){ 
46 | 	 return 1;
47 |      }
48 |  }
49 | 
50 | }
51 | 
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/ProtExcluder1.1/rmlowcomfromBF.pl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | my $usage = "rmlowcomfromMSP.pl mtca BFfile\n";
 6 | 
 7 | if (@ARGV < 2) {die $usage;}
 8 | 
 9 | # This is for finding the intems in file two  but not in file one
10 | 
11 | open(RM, "$ARGV[0]") || die "Cannot open $ARGV[0]";
12 | 
13 | my $ct = 0; 
14 | my ($lpr, $lseq) = ('', ''); 
15 | my (%pr, %seq); 
16 | while (<RM>) {
17 | 	if (/^>(\S+)\s+(\S+)\s*/) {
18 | 		unless ($1 eq $lpr && $2 eq $lseq) {
19 | 			$pr{$ct} = $1;
20 | 			$seq{$ct} = $2;
21 | 			$ct ++;
22 | 		}
23 | 		$lpr = $1;
24 | 		$lseq = $2;
25 | 	}
26 | }
27 | 
28 | open(MSP, "$ARGV[1]") || die "Cannot open $ARGV[1]";
29 | while (<MSP>) {
30 | 	if (/^\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+(\S+)\s+\d+\s+\d+\s+\d+\s+(\S+)\s*/) {
31 | 		if (&comparison) {
32 | 			print;
33 | 		}
34 | 	}
35 | }
36 | close MSP;
37 | 
38 | sub comparison  {
39 | 	foreach my $key (keys %pr){
40 | 		if ($1 eq $seq{$key} && $2 eq $pr{$key} ) { return 1; }
41 | 	}
42 | }
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/ProtExcluder1.1/rmlowcomplexitymathc.pl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 | 
 3 | $usage = "gcalongenes.pl gc3 file aa number  minimum percent\n";
 4 | 
 5 | 
 6 | if (@ARGV < 3) {die $usage;}
 7 | 
 8 | 
 9 | open(MSP, "$ARGV[0]") || die $usage;
10 |  
11 | while (<MSP>){
12 |     @line = split;
13 |     $total = 0;
14 |     $i = 3;
15 |     $j = $i + $ARGV[1];
16 | while ($i < $j) {
17 |     $total = $total + $line[$i];
18 | 	$i ++;
19 |     }
20 |     if ($line[2]) {
21 |     $rate = $total*100/$line[2];
22 |     }
23 |     if ($rate < $ARGV[2]) {
24 | 	print;
25 |     }
26 | 
27 | }
28 | 
29 | close MSP;
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/ProtExcluder1.1/rmshortseq_noN.pl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 |  
 3 | $usage = "rmshortseq.pl  stfile fastafile minmusize\n";
 4 | 
 5 | # to delete sequences short than given size
 6 | 
 7 | if (@ARGV < 3) {die "$usage";}
 8 | 
 9 | open(LENGTH, "$ARGV[0]") || die $usage;
10 | 
11 | while (<LENGTH>){
12 |     if (/^\s*(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s/) {
13 | 	$nonN = $2 + $3 + $4 + $5;
14 | 	if ($nonN >= $ARGV[2]) {
15 | 	    $name = $1; 
16 |      $len{$name} = $nonN; }
17 |  }
18 | }
19 | close LENGTH;
20 | 
21 | open(FASTA, "$ARGV[1]") || die $usage;
22 | while (<FASTA>) {
23 |   
24 | if   (/^>(\S+)s*/)  {
25 |    
26 | if (&comparison){$take = 1;}
27 |     else {$take = 0;}
28 | }
29 |               if ($take){
30 | 		  print;
31 | }
32 | }
33 | close FASTA;
34 |                                                                                 
35 | sub comparison  {
36 |     foreach $key (keys %len){
37 |   if ($key eq $1)
38 |   { return 1;}
39 | }
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/ProtExcluder1.1/unmatchedregionBF.pl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 |  
 3 | $usage = "unmatchedregionBF.pl BFm50 bpflanking\n";
 4 | 
 5 | # to extract the unmatched portion of the sequence
 6 | 
 7 | if (@ARGV < 2) {die "$usage";}
 8 | 
 9 | open(BF, "$ARGV[0]") || die $usage;
10 | 
11 | while (<BF>) {
12 |   
13 | if   (/^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+\S+\s*/)  {
14 |     $head = $lend + $ARGV[1];
15 |     if ($1 ne $lTE) {
16 | 		if ($head < $llen) {
17 | 		printf "1000 85 1 200 query %06d %06d %s\n", $head, $llen,$lTE;
18 | 	}
19 | 	if ($2 > $ARGV[1]) {
20 | 	printf "1000 85 1 200 query 000001 %06d %s\n", ($2-$ARGV[1]),$1;
21 | 	}
22 |     }
23 |     elsif ($head < ($2 - $ARGV[1])) {
24 | 	printf "1000 85 1 200 query %06d %06d %s\n", $head, ($2-$ARGV[1]),$1;
25 |     }
26 |     $lTE = $1;
27 |     $lend = $3;
28 |     $llen = $4;
29 |     }
30 | }
31 | 
32 | close BF;
33 | 
34 | $head = $lend + $ARGV[1];
35 | if ($head < $llen) {
36 |     printf "1000 85 1 200 query %06d %06d %s\n", $head, $llen,$lTE;
37 | }
38 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/add_repClass.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | !@ARGV and die "perl $0 RepClass in.fa\n"; 
 7 | 
 8 | my $tag = shift; 
 9 | &tsmsg("[Rec] Add [$tag] to repeat name.\n"); 
10 | while (<>) {
11 | 	if (m/^\s*>/) {
12 | 		s/^>(\S+)/>$1#$tag/ or &stopErr("[Err] $_"); 
13 | 	}
14 | 	print STDOUT $_; 
15 | }
16 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/cmd_list_forFinalRepDB:
--------------------------------------------------------------------------------
1 | cat allLTR.lib.noProtFinal.classified use_MITE.lib.noProtFinal.classified ModelerID.lib.noProtFinal Modelerunknown.lib.noProtFinal | deal_fasta.pl -frag_head -frag 0-0 -frag_width 100 | deal_fasta.pl -chopKey ':\d+\-\d+$' | perl -e ' while (<>) { s!\t! !g; print; } ' > allRepeats_v1.lib
2 | cat allRepeats_v1.lib | deal_fasta.pl -nres 'Unknown$' > KnownRepeats_v1.lib
3 | 
4 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/detect_centromere/get_candidate_cent_from_whole.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | !@ARGV and die "perl $0 num_chr min_monomer_len whole/after_clusterin_summary_whole.txt.cntChr > whole/after_clusterin_summary_whole.txt.cntChr.slct\n";
 6 | 
 7 | my $nChr = shift;
 8 | my $min_monomer_len = shift;
 9 | 
10 | while (<>) {
11 |   chomp;
12 |   my @ta=split(/\t/, $_);
13 |   $ta[0] eq "CL_ID" and next;
14 |   $ta[4] == $nChr or next;
15 |   $ta[2] >= $min_monomer_len or next;
16 |   my @a1 = split(/;/, $ta[5]);
17 |   my @a2 = split(/;/, $ta[6]);
18 |   for (my $i=0; $i<@a1; $i++) {
19 |     my @a3 = split(/\<\*\>/, $a2[$i]);
20 |     print STDOUT join("\t", $a2[$i], "$a1[$i]__$ta[0]", $a1[$i], $ta[0], $a3[2], $a3[3])."\n";
21 |   }
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/filter_RepMsk_out.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | !@ARGV and die "perl $0 in_RepMsk.out\n"; 
 7 | 
 8 | while (<>) {
 9 | 	unless ( m/^\s*\d+/ ) {
10 | 		# print; 
11 | 		next; 
12 | 	}
13 | 	chomp; 
14 | 	s/^\s+//; s/\s+$//; 
15 | 	my @ta = split(/\s+/, $_); 
16 | 	my $name1=$ta[4]; 
17 | 	my $name2=$ta[9]; 
18 | 	$name1 =~ m/^RR\d+_(seq\d+)_(\d+)_(\d+)_INN_(\S+)$/ or die "name1=$name1\n"; 
19 | 	my @nn1 = ($1,$2,$3,$4); 
20 | 	$name2 =~ m/^([^\s:]+):(\d+)\-(\d+):([FR])$/ or die "$name2\n"; 
21 | 	my @nn2 = ($1,$2,$3,$4); 
22 | 	my $a = 0; 
23 | 	if ( $nn1[3] eq $nn2[0] ) {
24 | 		$nn1[1]-1 == $nn2[2] and $a = 1; 
25 | 		$nn1[2]+1 == $nn2[1] and $a = 1; 
26 | 	}
27 | 	$a == 1 or print STDOUT "$_\n"; 
28 | }
29 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/mk_outID.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | # seqID   LTR1_S  LTR1_E  LTR2_S  LTR2_E  Inner_S Inner_E PBS_S   PBS_E   Strand  scfID
 5 | # seq10   785209  785369  788589  788749  785370  788588  785373  785384  +       S400016_pilon
 6 | 
 7 | while (<>) {
 8 | 	chomp; 
 9 | 	my @ta = split(/\t/, $_); 
10 | 	print STDOUT join("\t", $_, "$ta[0]_$ta[1]_$ta[4]_$ta[10]")."\n"; 
11 | }
12 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/muscle3.8.31_i86linux64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/annot_tools/repAnno_tools/muscle3.8.31_i86linux64


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/name_from_tab.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | -t and !@ARGV and die "perl $0 dgt.tab\n"; 
 7 | 
 8 | # eleID   eleS    eleE    Str     seqID   LTR1_S  LTR1_E  LTR2_S  LTR2_E  Inner_S Inner_E PBS_S   PBS_E   PPT_S   PPT_E   scfID
 9 | # RR1     98334   105001  ?       seq10   98339   99198   104137  104996  99199   104136  -1      -1      -1      -1      S400016_pilon
10 | # RR2     785204  788754  +       seq10   785209  785369  788589  788749  785370  788588  785373  785384  -1      -1      S400016_pilon
11 | 
12 | while (<>) {
13 | 	chomp; 
14 | 	my @ta = split(/\t/, $_); 
15 | 	if ( $ta[0] eq 'eleID' ) {
16 | 		# print STDOUT "$_\n"; 
17 | 		next; 
18 | 	}
19 | 	my ($inner_s, $inner_e, $pbs_s, $pbs_e, $ppt_s, $ppt_e) = @ta[9,10, 11,12, 13,14]; 
20 | 	my ($eleID, $seqID, $scfID) = @ta[0,4,15]; 
21 | 	my $tk1 = "${eleID}_${seqID}_$ta[1]_$ta[2]_ELE_$scfID"; # Element region. (with TSD)
22 | 	my $tk2 = "${eleID}_${seqID}_$ta[5]_$ta[8]_LTR_$scfID"; # LTR region. (without TSD)
23 | 	my $tk3 = "${eleID}_${seqID}_$ta[9]_$ta[10]_INN_$scfID"; # Internal region. (without ltr region)
24 | 	print STDOUT "$tk1\t${eleID}_\t${eleID}\n"; 
25 | 	print STDOUT "$tk2\t${eleID}_\t${eleID}\n"; 
26 | 	print STDOUT "$tk3\t${eleID}_\t${eleID}\n"; 
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/path.conf:
--------------------------------------------------------------------------------
 1 | dir2	/home/Sunhh/tools/github/NGS_data_processing
 2 | pl_deal_fasta	__dir2__/deal_fasta.pl
 3 | pl_deal_table	__dir2__/deal_table.pl
 4 | 
 5 | dir1	/home/Sunhh/tools/github/NGS_data_processing/repAnno_tools
 6 | pl_ch_gff_to_tab	__dir1__/ch_gff_to_tab.pl
 7 | pl_ch_seqID	__dir1__/ch_seqID.pl
 8 | pl_filter_tab_byPBSPPT	__dir1__/filter_tab_byPBSPPT.pl
 9 | pl_name_from_tab	__dir1__/name_from_tab.pl
10 | pl_filter_flank	__dir1__/filter_flank.pl
11 | pl_filter_RepMsk_out	__dir1__/filter_RepMsk_out.pl
12 | pl_build_Examplar_byFa	__dir1__/build_Examplar_byFa.pl
13 | pl_lis_masked_RepMsk_out	__dir1__/lis_masked_RepMsk_out.pl
14 | pl_get_LTR_wi_Termi	__dir1__/get_LTR_wi_Termi.pl
15 | 
16 | exe_RepeatMasker	/data/Sunhh/src/Annot/repeatmasker/RepeatMasker/RepeatMasker
17 | exe_gt	/data/Sunhh/src/Annot/genometools/gt-1.5.3-complete/bin/gt
18 | exe_makeblastdb	/usr/local/bin/makeblastdb
19 | exe_blastn	/usr/local/bin/blastn
20 | exe_mv	/usr/bin/mv
21 | 
22 | eu_tRNA	/data/Sunhh/P1_repeat/db/eukaryotic-tRNAs.fa
23 | ref_dbLTR_ltr99	LTR99_named.lib
24 | ref_dbLTR_trim99	TRIM99_named.lib
25 | refFa	P1Genom_Gt5h.scf.fa
26 | refIdx	P1GenomeGt5hScf
27 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/rmOutToGFF3_with_TEclass.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | if (@ARGV < 1) {
 6 |   die "Usage: $0 <RepeatMasker.out file>\n";
 7 | }
 8 | 
 9 | my $file = $ARGV[0];
10 | open my $in, '<', $file or die "Cannot open $file: $!\n";
11 | 
12 | while (<$in>) {
13 |   next if /^#/;
14 |   next unless /^\s*\d+/;
15 | 
16 |   my @fields = split ' ';
17 |   my ($score, $div, $del, $ins, $query, $q_start, $q_end, $q_left, $strand,
18 |     $repeat, $class, $r_start, $r_end, $r_left, $id) = @fields;
19 | 
20 |   if ($strand eq "C") {
21 |     $strand = "-";
22 |     ($r_start, $r_end) = ($r_left, $r_end);
23 |   } else {
24 |     $strand = "+";
25 |   }
26 |   my $sim = sprintf("%0.1f", 100-$div);
27 | 
28 |   # my $attributes = "ID=$id;Target=$repeat $r_start $r_end;Class=$class";
29 |   my $attributes = "Target=$repeat $r_start $r_end;Classification=$class;Identity=$sim";
30 |   my $source = "RepeatMasker";
31 |   my $type = "dispersed_repeat";
32 | 
33 |   print join("\t", $query, $source, $type, $q_start, $q_end, $score, $strand, ".", $attributes), "\n";
34 | }
35 | 
36 | close $in;
37 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/run_MITE.sh:
--------------------------------------------------------------------------------
 1 | ### Basic functions.
 2 | function exe_cmd {
 3 | 	echo "[$(date)][CMD] $1"
 4 | 	eval "$1"
 5 | 	echo "[$(date)][Rec] Done."
 6 | }
 7 | 
 8 | function tsmsg {
 9 | 	echo "[$(date)]$1"
10 | }
11 | 
12 | pl_dealFa=$HOME/tools/github/NGS_data_processing/deal_fasta.pl
13 | pl_addClass=$HOME/tools/github/NGS_data_processing/repAnno_tools/add_repClass.pl
14 | 
15 | pl_MITE=$HOME/src/Annotation/MITE_Hunter/MITE_Hunter_blast216/MITE_Hunter_manager.pl
16 | 
17 | refFa='P1All.scf.fa'
18 | outG='P1AllGt5hScf'
19 | cpuN=10
20 | grpN=10
21 | 
22 | tsmsg "Start."
23 | 
24 | # exe_cmd "mkdir running/ Step8/"
25 | 
26 | [ -d "running" ] || exe_cmd "mkdir running"
27 | [ -d "Step8" ] || exe_cmd "mkdir Step8"
28 | 
29 | cd running
30 | ln -s ../$refFa .
31 | 
32 | exe_cmd "perl $pl_MITE -c $cpuN -n $grpN -S 12345678 -i $refFa -g $outG"
33 | exe_cmd "cp -p *_Step8*.fa ../Step8/"
34 | exe_cmd "cat ${outG}_Step8*.fa > ../MITE_raw.lib"
35 | exe_cmd "perl $pl_dealFa ../MITE_raw.lib -frag_head -frag_width 80 -frag 0-0 | perl $pl_dealFa -chopKey ':\\d+-\\d+\$' | perl $pl_addClass MITE > ../MITE_named.lib"
36 | 
37 | cd ../
38 | 
39 | tsmsg "All done."
40 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/run_repClass_ltr.sh:
--------------------------------------------------------------------------------
 1 | ### Basic functions.
 2 | function exe_cmd {
 3 | 	echo "[$(date)][CMD] $1"
 4 | 	eval $1
 5 | 	echo "[$(date)][Rec] Done."
 6 | }
 7 | 
 8 | function tsmsg {
 9 | 	echo "[$(date)]$1"
10 | }
11 | 
12 | exe_cmd "/share/app/Annotation/repeatmodeler/RepeatModeler/RepeatClassifier -consensi allLTR_rmGen_chop.fa -engine ncbi"
13 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/run_repClass_mite.sh:
--------------------------------------------------------------------------------
 1 | ### Basic functions.
 2 | function exe_cmd {
 3 | 	echo "[$(date)][CMD] $1"
 4 | 	eval $1
 5 | 	echo "[$(date)][Rec] Done."
 6 | }
 7 | 
 8 | function tsmsg {
 9 | 	echo "[$(date)]$1"
10 | }
11 | 
12 | exe_cmd "/share/app/Annotation/repeatmodeler/RepeatModeler/RepeatClassifier -consensi MITE_rmGen_chop.fa -engine ncbi"
13 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/run_repeatmasker.sh:
--------------------------------------------------------------------------------
 1 | ### Basic functions.
 2 | function exe_cmd {
 3 | 	echo "[$(date)][CMD] $1"
 4 | 	eval $1
 5 | 	echo "[$(date)][Rec] Done."
 6 | }
 7 | 
 8 | function tsmsg {
 9 | 	echo "[$(date)]$1"
10 | }
11 | 
12 | exe_rpmk="/data/Sunhh/src/annotation/repeatmasker/RepeatMasker/RepeatMasker"
13 | pl_buildSum="/data/Sunhh/src/annotation/repeatmasker/RepeatMasker/util/buildSummary.pl"
14 | 
15 | cpuN=40
16 | repLib="D202306.TElib.fa"
17 | seqfile="hap1.fa"
18 | tsv_genom="$seqfile.tsv"
19 | 
20 | tsmsg "[Rec] All start."
21 | 
22 | deal_fasta.pl -baseCount $seqfile | awk 'NR > 1 {print $1"\t"$8-$6}' > $tsv_genom
23 | 
24 | exe_cmd "$exe_rpmk -s -x -lib $repLib $seqfile -nolow -norna -no_is -pa $cpuN -a 1>$seqfile.stdout_RepMsk 2>$seqfile.stderr_RepMsk"
25 | exe_cmd "perl $pl_buildSum -useAbsoluteGenomeSize -genome $tsv_genom $seqfile.out > $seqfile.out.NuclGenom.summary"
26 | 
27 | tsmsg "[Rec] All done."
28 | 
29 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/run_rm_GF.sh:
--------------------------------------------------------------------------------
 1 | ### Basic functions.
 2 | function exe_cmd {
 3 | 	echo "[$(date)][CMD] $1"
 4 | 	eval "$1"
 5 | 	echo "[$(date)][Rec] Done."
 6 | }
 7 | 
 8 | function tsmsg {
 9 | 	echo "[$(date)]$1"
10 | }
11 | 
12 | pl_GF="$HOME/tools/github/NGS_data_processing/repAnno_tools/rm_geneFrag.pl"
13 | pl_PE="/data/Sunhh/P1_annot/01.P1_repeat/05.rmGeneFrag/tools/ProtExcluder1.1/ProtExcluder.pl"
14 | pl_dealFa="$HOME//tools/github/NGS_data_processing/deal_fasta.pl"
15 | 
16 | dbProt='uniprot_sprot_plants_rmTransProt.fa'
17 | dbProt='/data/Sunhh/database/db_fasta/uniprot/20140917/uniprot_sprot_plants_rmTransProt.fa'
18 | 
19 | inLibLis="inLibLis"
20 | cpuN=30
21 | 
22 | tsmsg "[Rec] Start"
23 | 
24 | exe_cmd "perl $pl_GF -evalue 1e-2 -rawLibLis $inLibLis -cpuN $cpuN -dbProt $dbProt -pl_ProtExcluder $pl_PE -pl_dealFa $pl_dealFa"
25 | 
26 | tsmsg "[Rec] All done."
27 | 
28 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/run_rpmd.sh:
--------------------------------------------------------------------------------
 1 | ### Basic functions.
 2 | function exe_cmd {
 3 | 	echo "[$(date)][CMD] $1"
 4 | 	eval "$1"
 5 | 	echo "[$(date)][Rec] Done."
 6 | }
 7 | 
 8 | function tsmsg {
 9 | 	echo "[$(date)]$1"
10 | }
11 | 
12 | 
13 | dir_rpmd='/data/Sunhh/src/Annot/repeatmodeler/RepeatModeler'
14 | exe_rpmd="$dir_rpmd/RepeatModeler"
15 | exe_rpmdBD="$dir_rpmd/BuildDatabase"
16 | exe_rpmk='/data/Sunhh/src/Annot/repeatmasker/RepeatMasker/RepeatMasker'
17 | 
18 | pl_dealFa='/home/Sunhh/tools/github/NGS_data_processing/deal_fasta.pl'
19 | 
20 | refFa='P3Genom_Gt5h.scf.fa'
21 | repDb='all_LTR_MITE.lib'
22 | 
23 | exe_cmd "$exe_rpmk -lib $repDb $refFa -x -nolow -norna -no_is -pa 40 -a 1>stdout.RepMsk 2>stderr.RepMsk"
24 | exe_cmd "perl $pl_dealFa -listSite '[ATGCNatgcn]+' $refFa.masked > $refFa.um_list"
25 | exe_cmd "perl $pl_dealFa $refFa -drawByList -drawList $refFa.um_list -drawLcol 0,2,3 > $refFa.um"
26 | 
27 | exe_cmd "$exe_rpmdBD -name um -engine ncbi $refFa.um"
28 | exe_cmd "$exe_rpmd -database um -pa 40 "
29 | 
30 | 


--------------------------------------------------------------------------------
/annot_tools/repAnno_tools/seqID_to_scaf.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings; 
 4 | 
 5 | my %h; 
 6 | print STDERR join("\t", qw/seqID scfID/)."\n"; 
 7 | while (<>) {
 8 | 	if ( m/^>/ ) {
 9 | 		m/^>(\S+) .*\(dbseq\-nr (\d+)\) \[(\d+),(\d+)\]$/ or die "$_\n"; 
10 | 		my $seqID = "seq$2"; 
11 | 		my $scfID = "$1"; 
12 | 		my ($eleS, $eleE) = ($3,$4); 
13 | 		$_ = ">${seqID}_${eleS}_${eleE}_$scfID\n"; 
14 | 		if ( defined $h{$seqID} ) {
15 | 			$h{$seqID} eq $scfID or die "$h{$seqID} eq $scfID\n$_\n"; 
16 | 		} else {
17 | 			print STDERR join("\t", $seqID, $scfID)."\n"; 
18 | 			$h{$seqID} = $scfID; 
19 | 		}
20 | 	}
21 | 	print; 
22 | }
23 | 


--------------------------------------------------------------------------------
/annot_tools/replace_blast_asn_db.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # 20190717 : Replace database file in .asn.1 files to use blast_formatter. 
 3 | use strict; 
 4 | use warnings; 
 5 | 
 6 | !@ARGV and die "perl $0 new_db_path input_raw.asn > new.asn\n"; 
 7 | 
 8 | my $new_db = shift; 
 9 | 
10 | while (<>) {
11 | 	if ( m!^\s+subject database \"! ) {
12 | 		s!^(\s+subject database \").+\"!$1${new_db}"!o or die "Failed at line: $_\n"; 
13 | 	}
14 | 	print ; 
15 | }
16 | 


--------------------------------------------------------------------------------
/annot_tools/run_maker.sh:
--------------------------------------------------------------------------------
 1 | ### Basic functions.
 2 | function exe_cmd {
 3 | 	echo "[$(date)][CMD] $1"
 4 | 	eval $1
 5 | 	echo "[$(date)][Rec] Done."
 6 | }
 7 | 
 8 | function tsmsg {
 9 | 	echo "[$(date)]$1"
10 | }
11 | 
12 | cpuN=50
13 | ctl_bopts="maker_bopts.ctl"
14 | ctl_exe="maker_exe.ctl"
15 | 
16 | roundN=2
17 | ctl_opts="maker_opts.r${roundN}.ctl"
18 | exe_cmd "mpiexec -n $cpuN maker $ctl_opts $ctl_bopts $ctl_exe 1>maker.${roundN}-log 2>maker.${roundN}-err"
19 | 
20 | #roundN=3
21 | #ctl_opts="maker_opts.r${roundN}.ctl"
22 | #exe_cmd "mpiexec -n $cpuN maker $ctl_opts $ctl_bopts $ctl_exe 1>maker.${roundN}-log 2>maker.${roundN}-err"
23 | 
24 | # Get result. 
25 | dir_maker='PG1All_v2_Scf.unmsk.maker.output'
26 | db_idx='PG1All_v2_Scf.unmsk_master_datastore_index.log'
27 | path_gffMerge=gff3_merge
28 | path_fasMerge=fasta_merge
29 | 
30 | cd $dir_maker/
31 | exe_cmd "mkdir ../result/r$roundN"
32 | exe_cmd "$path_gffMerge -d $db_idx -g -o ../result/r${roundN}/r${roundN}_maker.gff3"
33 | exe_cmd "$path_gffMerge -d $db_idx -n -o ../result/r${roundN}/r${roundN}_all.gff3"
34 | exe_cmd "$path_fasMerge -d $db_idx -o ../result/r${roundN}/r${roundN}"
35 | cd ../
36 | 
37 | # ipr_update_gff
38 | # https://groups.google.com/forum/#!msg/maker-devel/VaoXWlGHOjs/kbh0YDl1b5gJ
39 | 
40 | # GlimmerHMM
41 | # http://ccb.jhu.edu/software/glimmerhmm/man.shtml#spec_org
42 | 
43 | 


--------------------------------------------------------------------------------
/annot_tools/satisfied_prot.sh:
--------------------------------------------------------------------------------
 1 | ### Basic functions.
 2 | function exe_cmd {
 3 | 	echo "[$(date)][CMD] $1"
 4 | 	eval "$1"
 5 | 	echo "[$(date)][Rec] Done."
 6 | }
 7 | 
 8 | function tsmsg {
 9 | 	echo "[$(date)]$1"
10 | }
11 | 
12 | pl_bpTrans=$HOME/tools/github/NGS_data_processing/bp0_2_bp6.pl
13 | pl_slctGff=$HOME/tools/github/NGS_data_processing/annot_tools/slct_maker_gff3.pl
14 | 
15 | dbPath=prot_db/uniprot_sprot_plants.fas
16 | dbTag="Sprot"
17 | cpuN=50
18 | 
19 | maxDist2Edge=9
20 | 
21 | rN=2
22 | 
23 | inProtFa="r${rN}_maker.prot.fa"
24 | inGff="r${rN}_maker.gff3"
25 | outGff="r${rN}_maker_good.gff3"
26 | 
27 | exe_cmd "blastp -evalue 1e-10 -query $inProtFa -db $dbPath -num_threads $cpuN -out ${inProtFa}.to${dbTag}.bp0"
28 | exe_cmd "perl $pl_bpTrans -in ${inProtFa}.to${dbTag}.bp0 -out ${inProtFa}.to${dbTag}.bp6"
29 | exe_cmd "awk ' \$7 <= $maxDist2Edge+1 && \$9 <= $maxDist2Edge+1 && \$10 >= \$14-$maxDist2Edge && \$8 >= \$13-$maxDist2Edge && \$3 >= 60 ' ${inProtFa}.to${dbTag}.bp6 > ${inProtFa}.to${dbTag}.bp6.good" 
30 | exe_cmd "perl $pl_slctGff ${inProtFa}.to${dbTag}.bp6.good $inGff > $outGff"
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/annot_tools/simplify_gff3.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | -t and !@ARGV and die "perl $0 in.gff3 > simple_IDs.gff3\n";
 6 | 
 7 | while (<>) {
 8 |   m!^\s*(#|$)! and do { print; next; };
 9 |   chomp;
10 |   my @ta=split(/\t/, $_);
11 |   if ($ta[2] =~ m!^gene$!i) {
12 |     $ta[8] =~ m!(?:^|\s|;)ID=([^\s;]+)! or die "$_\n";
13 |     $ta[8] = "ID=$1";
14 |   } elsif ($ta[2] =~ m!^mRNA$!i) {
15 |     my ($mid, $gid);
16 |     $ta[8] =~ m!(?:^|\s|;)ID=([^\s;]+)! or die "$_\n";
17 |     $mid = $1;
18 |     $ta[8] =~ m!(?:^|\s|;)Parent=([^\s;]+)!i and $gid = $1;
19 |     if (defined $gid) {
20 |       $ta[8] = "ID=$mid;Parent=$gid";
21 |     } else {
22 |       $ta[8] = "ID=$mid";
23 |     }
24 |   } elsif ($ta[2] =~ m!^CDS|exon$!i) {
25 |     $ta[8] =~ m!(?:^|\s|;)Parent=([^\s;]+)!i or die "$_\n";
26 |     $ta[8] = "Parent=$1";
27 |   } else {
28 |     die "[Err] Unknown feature [$ta[2]]\n";
29 |   }
30 |   print join("\t", @ta)."\n";
31 | }
32 | #21QDX551_Chr02	EVM	gene	77439	81626	.	+	.	ID=21QDX551C02G000010;Name=EVM%20prediction%2021QDX551_Chr02.1
33 | #21QDX551_Chr02	EVM	mRNA	77439	81626	.	+	.	ID=21QDX551C02G000010.1;Parent=21QDX551C02G000010;Name=EVM%20prediction%2021QDX551_Chr02.1
34 | #21QDX551_Chr02	EVM	exon	77439	77768	.	+	.	ID=evm.model.21QDX551_Chr02.1.exon1;Parent=21QDX551C02G000010.1
35 | 
36 | 


--------------------------------------------------------------------------------
/annot_tools/tRNA/stat_trnaFreq.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | -t and !@ARGV and die "perl $0 arab.chr.fa.trnascan.o.slct > arab.chr.fa.trnascan.o.slct.stat\n"; 
 7 | 
 8 | # [Sunhh@bioinfor01 work]$ head -4 arab.chr.fa.trnascan.o.slct
 9 | # trna_1  1       +       306384  306456  Val     TAC     ggtgctgtggtgtagtggttatcacgtttgccttacacgcaaaaggtctccagttcgatcctgggcagcacca
10 | # trna_2  1       +       515494  515566  Phe     GAA     gcggggatagctcagttgggagagcgtcagactgaagatctgaaggtcgcgtgttcgatccacgctcaccgca
11 | # trna_3  1       +       552640  552711  His     GTG     gtggctgtagtttagtggtaagaattccacgttgtggccgtggagacctgggctcgaatcccagcagccaca
12 | # trna_4  1       +       604402  604474  Lys     CTT     gcccgtctagctcagttggtagagcgcaaggctcttaaccttgtggtcgtgggttcgagccccacggtgggcg
13 | 
14 | my %h; 
15 | while (<>) {
16 | 	chomp; 
17 | 	my @ta = split(/\t/, $_); 
18 | 	$h{$ta[5]}{$ta[6]}++; 
19 | }
20 | for my $aa ( sort keys %h ) {
21 | 	my $sum_gene = 0; 
22 | 	for my $cc ( sort keys %{$h{$aa}} ) {
23 | 		$sum_gene += $h{$aa}{$cc}; 
24 | 	}
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/annot_tools/trim_orphan_right_brack.pl:
--------------------------------------------------------------------------------
1 | /home/Sunhh/tools/github/NGS_data_processing/annot_tools/ahrd/trim_orphan_right_brack.pl


--------------------------------------------------------------------------------
/assemble_tools/LAI/pepper_genome.fa.chrID:
--------------------------------------------------------------------------------
 1 | Chr01
 2 | Chr02
 3 | Chr03
 4 | Chr04
 5 | Chr05
 6 | Chr06
 7 | Chr07
 8 | Chr08
 9 | Chr09
10 | Chr10
11 | Chr11
12 | Chr12
13 | 


--------------------------------------------------------------------------------
/assemble_tools/LAI/pepper_genome.fa.out.LAI_unlock.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/assemble_tools/LAI/pepper_genome.fa.out.LAI_unlock.gz


--------------------------------------------------------------------------------
/assemble_tools/LAI/pepper_genome.fa.out.LAI_unlock_distr_byChr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/assemble_tools/LAI/pepper_genome.fa.out.LAI_unlock_distr_byChr.pdf


--------------------------------------------------------------------------------
/assemble_tools/LAI/scrn.para_LAI.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/assemble_tools/LAI/scrn.para_LAI.gz


--------------------------------------------------------------------------------
/assemble_tools/add_1bp_ctg_toAGP_jcviPlot.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use Getopt::Long; 
 5 | my %opts; 
 6 | GetOptions(\%opts, 
 7 | 	"help!", 
 8 | 	"gapLen:i", # 1000
 9 | ); 
10 | $opts{'gapLen'} //= 1000; 
11 | 
12 | -t and !@ARGV and die "perl $0 -gapLen 1000 in.agp > o.agp\n"; 
13 | 
14 | my %aa; 
15 | while (<>) {
16 | 	chomp; 
17 | 	my @ta = split(/\t/, $_); 
18 | 	$aa{$ta[0]}{'ord'} //= $.; 
19 | 	push(@{$aa{$ta[0]}{'arr'}}, [@ta]); 
20 | }
21 | for my $k1 (sort { $aa{$a}{'ord'} <=> $aa{$b}{'ord'} } keys %aa) {
22 | 	my @ta2; 
23 | 	for my $t1 (@{$aa{$k1}{'arr'}}) {
24 | 		print join("\t", @$t1)."\n"; 
25 | 		@ta2 = @$t1; 
26 | 	}
27 | 	print join("\t", $ta2[0], $ta2[2]+1, $ta2[2]+$opts{'gapLen'}, $ta2[3]+1, 'N', $opts{'gapLen'}, 'scaffold', 'yes', 'map')."\n";
28 | 	print join("\t", $ta2[0], $ta2[2]+$opts{'gapLen'}+1, $ta2[2]+$opts{'gapLen'}+1, $ta2[3]+2, 'W', 'NA', 1,1, '+')."\n";  
29 | }
30 | 


--------------------------------------------------------------------------------
/assemble_tools/add_tag_to_fsa.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | !@ARGV and die "perl $0 tag in.fa\n"; 
 6 | my $tag = shift; 
 7 | 
 8 | while (<>) {
 9 | 	if (m/^>(\S+)/) {
10 | 		$_ = ">$tag.$1 $tag\n"; 
11 | 	}
12 | 	print; 
13 | }
14 | 


--------------------------------------------------------------------------------
/assemble_tools/bionano/rename_xmap.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | @ARGV >= 2 or die "perl $0 raw.xmap new.xmap\n"; 
 5 | my $fn = shift; 
 6 | my $new_fn = shift; 
 7 | $fn =~ s/\.xmap$//; 
 8 | $new_fn =~ s/\.xmap$//; 
 9 | 
10 | open F,'<',"${fn}.xmap" or die; 
11 | open O,'>',"${new_fn}.xmap" or die; 
12 | while (<F>) {
13 | 	if (m/^#/) {
14 | 		s/^(# Reference Maps From:\s*)\S+\.cmap/${1}${new_fn}_r.cmap/; 
15 | 		s/^(# Query Maps From:\s*)\S+\.cmap/${1}${new_fn}_q.cmap/; 
16 | 		print O; 
17 | 	} else {
18 | 		print O; 
19 | 	}
20 | }
21 | close F; 
22 | system "rm ${fn}.xmap"; 
23 | system "mv ${fn}_q.cmap ${new_fn}_q.cmap"; 
24 | system "mv ${fn}_r.cmap ${new_fn}_r.cmap"; 
25 | 
26 | 


--------------------------------------------------------------------------------
/assemble_tools/busco/geneCopyN_busco_full_table.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | !@ARGV and -t and die "perl $0 full_table.tsv > full_table.tsv.cnt\n"; 
 6 | 
 7 | my %h; 
 8 | while (<>) {
 9 |   m!^\s*#! and next; 
10 |   m!^\s*$! and next; 
11 |   chomp; 
12 |   my @ta=split(/\t/, $_); 
13 |   if ($ta[1] eq "Complete" or $ta[1] eq "Duplicated") {
14 |     $h{"complete"}{$ta[0]} ++; 
15 |   } elsif ($ta[1] eq "Fragmented") {
16 |     $h{"fragmented"}{$ta[0]} ++; 
17 |   } elsif ($ta[1] eq "Missing") {
18 |     $h{"missing"}{$ta[0]} ++; 
19 |   } else {
20 |     die "unknown tag [$ta[1]]\n"; 
21 |   }
22 | }
23 | for my $k1 (qw/complete fragmented missing/) {
24 |   $h{$k1} //= {}; 
25 |   for my $k2 (sort keys %{$h{$k1}}) {
26 |     print join("\t", $k2, $k1, $h{$k1}{$k2})."\n"; 
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/assemble_tools/busco/rm_busco_intermediate_files.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # [8/9/2022] Compress .log files.
 3 | use strict;
 4 | use warnings;
 5 | use LogInforSunhh;
 6 | use fileSunhh;
 7 | 
 8 | !@ARGV and die "perl $0 busco_output_dir_toRM/\n";
 9 | 
10 | for my $dd (@ARGV) {
11 |   opendir my $dir, "$dd" or die;
12 |   my @files = grep { $_ !~ m!^\.+$! } readdir($dir);
13 |   closedir($dir);
14 |   for my $f1 (@files) {
15 |     if ($f1 =~ m!^run_\S+_odb\d+$!) {
16 |       -e "$dd/$f1/hmmer_output/" and &fileSunhh::_rmtree("$dd/$f1/hmmer_output/");
17 |       -e "$dd/$f1/busco_sequences/" and &fileSunhh::_rmtree("$dd/$f1/busco_sequences/");
18 |       -e "$dd/$f1/metaeuk_output/" and &fileSunhh::_rmtree("$dd/$f1/metaeuk_output/");
19 |     }
20 |   }
21 |   for my $a1 (qw/busco hmmsearch_out metaeuk_out hmmsearch_err metaeuk_err/) {
22 |     -e "$dd/logs/$a1.log" and &runCmd("bgzip -@ 10 $dd/logs/$a1.log");
23 |   }
24 | }
25 | 
26 | # rm -rf prot_*/run_embryophyta_odb10/hmmer_output/
27 | # rm -rf prot_*/run_embryophyta_odb10/busco_sequences/
28 | # rm -rf genom_*/run_embryophyta_odb10/hmmer_output/
29 | # rm -rf genom_*/run_embryophyta_odb10/busco_sequences/
30 | # rm -rf genom_*/run_embryophyta_odb10/metaeuk_output/
31 | # gzip genom_*/logs/*.log
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/assemble_tools/classify_tools/cnt_In_bp.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | 
 6 | my %inCnt;
 7 | my %ord;
 8 | while (<>) {
 9 |   chomp;
10 |   my @ta=split(/\t/, $_);
11 |   $ta[0] eq 'qseqid' and next;
12 |   $ta[6] =~ m!In:([\.\d]+)! or next;
13 |   my $inDep = $1;
14 |   $inDep > 0 or next;
15 |   $inCnt{$ta[0]} += $ta[4];
16 |   $ord{$ta[0]} //= $.;
17 | }
18 | for my $k1 (sort { $ord{$a} <=> $ord{$b} } keys %ord) {
19 |   print join("\t", $)."\n";
20 | }
21 | 
22 | 
23 | 
24 | # Sunhh@swift:/data/Sunhh/cmaxima/rmcont$ head -4 hf2.noRed.tochk.fa.toNt.bn6.jnInEx
25 | #qseqid  qlen    qstart  qend    qspan   KingdomCounts   InExcludeCounts
26 | #ptg000012l      137195  1       7424    7424    rDNA:5.28       Ex:5.28
27 | #ptg000012l      137195  7425    7472    48      Un:0.00 In:0.00
28 | #ptg000012l      137195  7473    18297   10825   rDNA:16.51      Ex:16.51
29 | 
30 | 


--------------------------------------------------------------------------------
/assemble_tools/classify_tools/line_bn6_query.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | !@ARGV and -t and die "perl $0 in.bn6 > in.bn6.1query1line\n";
 6 | 
 7 | my @bn6_colName = qw/qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen sstrand staxids sscinames sskingdoms stitle/;
 8 | my @col_want = (1,2,3,10,11,17,16,18);
 9 | my $topN = 5;
10 | my @txt_want = (@bn6_colName[@col_want]) x $topN;
11 | print STDOUT join("\t", "qseqid", @txt_want)."\n";
12 | 
13 | 
14 | 
15 | my (%h, @qIDs);
16 | while (<>) {
17 |   chomp;
18 |   my @ta=split(/\t/, $_);
19 |   defined $h{$ta[0]} or push(@qIDs, $ta[0]);
20 |   push(@{$h{$ta[0]}}, [$ta[11], [@ta[@col_want]]]);
21 | }
22 | for my $qid (@qIDs) {
23 |   my @out_line = ($qid);
24 |   @{$h{$qid}} = sort { $b->[0] <=> $a->[0] } @{$h{$qid}};
25 |   my $i=0;
26 |   for my $t1 (@{$h{$qid}}) {
27 |     $i < $topN or last;
28 |     push(@out_line, @{$t1->[1]});
29 |     $i++;
30 |   }
31 |   print STDOUT join("\t", @out_line)."\n";
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/assemble_tools/cmd_batch_for_mugsy:
--------------------------------------------------------------------------------
1 | perl run_mugsy_MP.pl -cpuN 1 -seqPerBatch 1000 -outfile S1_syn_batch -inmaf S1.fitMugsy.maf -infas S1_all.fitMugsy.fa -printCmd
2 | nohup run_cmd_in_batch.pl tmp1 -cpuN 40 >> scrn.cmd_batch
3 | 


--------------------------------------------------------------------------------
/assemble_tools/cnvt_quast_unaligned_info_to_tbl.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings;
 4 | 
 5 | -t and !@ARGV and die "perl $0 ref_hc0/contigs_reports/contigs_report_hf2-noRed.unaligned.info | less -S\n"; 
 6 | 
 7 | 
 8 | print join("\t", qw/Contig Total_length Unaligned_length Unaligned_type unAln_start unAln_end segment_len/)."\n"; 
 9 | while (<>) {
10 |   chomp;
11 |   my @ta=split(/\t/, $_); 
12 |   $ta[1] eq 'Total_length' and next; 
13 |   my @tb = split(/,/, $ta[4]); 
14 |   for my $tc (@tb) {
15 |     $tc =~ m!^(\d+)\-(\d+)$! or die "$tc in $_\n"; 
16 |     my ($s,$e) = ($1, $2); 
17 |     print join("\t", @ta[0,1,2,3], $s, $e, $e-$s+1)."\n"; 
18 |   }
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/assemble_tools/get_paired_maf.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use ReadInAlnSunhh; 
 5 | use Getopt::Long;
 6 | 
 7 | my %opts; 
 8 | GetOptions(\%opts, 
 9 | 	"help!", 
10 | 	"out:s", 
11 | ); 
12 | 
13 | -t and !@ARGV and &usage(); 
14 | $opts{help} and &usage(); 
15 | 
16 | sub usage {
17 | print STDOUT <<HELP; 
18 | perl $0 in.maf
19 | -help 
20 | -out        [\\*STDOUT] Output file name. 
21 | HELP
22 | 	exit 1; 
23 | }
24 | 
25 | my $oFh = \*STDOUT; 
26 | if (defined $opts{out}) {
27 | 	my $tfh; 
28 | 	open $tfh,'>',"$opts{out}" or die "Failed to open $opts{out}\n$!\n"; 
29 | 	$oFh = $tfh; 
30 | }
31 | 
32 | my @FH; 
33 | !(-t) and push(@FH, \*STDIN); 
34 | for (@ARGV) {
35 | 	my $fh; 
36 | 	open $fh, '<', "$_" or die; 
37 | 	push(@FH, $fh); 
38 | }
39 | 
40 | print {$oFh} "##maf version=1\n"; 
41 | my @all_blks; 
42 | for my $fh (@FH) {
43 | 	while ( my %rec1 = %{readMAF($fh)} ) {
44 | 		chomp( $rec1{a}[0] ); 
45 | 		my @lines; 
46 | 		for my $tline (@{$rec1{o}}) {
47 | 			$tline =~ m/^s\s/ or next; 
48 | 			chomp($tline); 
49 | 			push(@lines, $tline); 
50 | 		}
51 | 		scalar(@lines) >= 2 or next; 
52 | 		print {$oFh} $rec1{a}[0] . "\n"; 
53 | 		for my $tline (@lines) {
54 | 			print {$oFh} $tline . "\n"; 
55 | 		}
56 | 		print {$oFh} "\n"; 
57 | 	}
58 | }
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/assemble_tools/get_rep_loc_fromPileup.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | # scaffold10      1       A       1       ^].     ?
 7 | # scaffold10      2       A       1       .       ?
 8 | # scaffold10      3       T       1       .       ?
 9 | # scaffold10      4       A       1       .       ?
10 | 
11 | my $maxDepth = 115 * 2; 
12 | my $maxGap = 1000; 
13 | my %block; 
14 | my @ids; 
15 | while (<>) {
16 | 	chomp; 
17 | 	my @ta = split(/\t/, $_); 
18 | 	if ( $ta[3] > $maxDepth ) {
19 | 		if ( defined $block{$ta[0]} ) {
20 | 			if ( $block{$ta[0]}[-1][1]+1+$maxGap >= $ta[1] ) {
21 | 				$block{$ta[0]}[-1][1] = $ta[1]; 
22 | 			} else {
23 | 				push(@{$block{$ta[0]}}, [$ta[1], $ta[1]]); 
24 | 			}
25 | 		} else {
26 | 			push(@ids, $ta[0]); 
27 | 			push(@{$block{$ta[0]}}, [$ta[1], $ta[1]]); 
28 | 		}
29 | 	}
30 | }
31 | 
32 | for (@ids) {
33 | 	for my $tr1 ( @{$block{$_}} ) {
34 | 		print STDOUT join("\t", $_, $tr1->[0], $tr1->[1])."\n"; 
35 | 	}
36 | }
37 | 


--------------------------------------------------------------------------------
/assemble_tools/hifi_hic/cnvt_gfa2fa.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Convert hifiasm.p_ctg.gfa file to fasta file. 
 3 | use strict; 
 4 | use warnings; 
 5 | # awk '/^S/{print ">"$2;print $3}' test.p_ctg.gfa > test.p_ctg.fa  # get primary contigs in FASTA
 6 | !@ARGV and -t and die "perl $0 hifiasm_asm.p_ctg.gfa > hifiasm_asm.p_ctg.fa\n# Used to get the primary contigs in FASTA format.\n\n"; 
 7 | 
 8 | while (<>) {
 9 | 	chomp; 
10 | 	m!^S\s+(\S+)\s+(\S+)! or next; 
11 | 	print ">$1\n$2\n"; 
12 | }
13 | 
14 | 


--------------------------------------------------------------------------------
/assemble_tools/hifi_hic/cnvt_num2tigID.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | !@ARGV and die "perl $0 example_tigID in_num_file > out_tigID_file\n"; 
 6 | 
 7 | my $sample = shift; 
 8 | 
 9 | $sample =~ m!^(tig\d+)$! or die "bad example tigID [$sample]\n"; 
10 | 
11 | my $nLen = length($sample) - 3; 
12 | 
13 | while (<>) {
14 |   chomp; 
15 |   my @ta=split(/\t/, $_); 
16 |   if (m!^\s*(#|$)!) {
17 |     print "$ta[0]\n"; 
18 |     next; 
19 |   }
20 |   $ta[0] =~ m!^\d+$! or die "bad number [$ta[0]]\n"; 
21 |   my $newID = sprintf("%0${nLen}d", $ta[0]); 
22 |   $newID = "tig$newID"; 
23 |   print "$newID\n"; 
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/assemble_tools/hifi_hic/get_HiCanu_ctg.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Get primary contigs from HiCanu assemblies by removing bubble elements. 
 3 | 
 4 | use strict; 
 5 | use warnings; 
 6 | use fastaSunhh; 
 7 | my $fs_obj = fastaSunhh->new(); 
 8 | use fileSunhh; 
 9 | 
10 | -t and !@ARGV and die "perl $0 HiCanu_asm.contigs.fasta HiCanu_asm.contigs.layout.tigInfo > HiCanu_asm_ctg.fa\n"; 
11 | 
12 | my $f1 = shift; 
13 | my $f2 = shift; 
14 | 
15 | my %faSeq = %{ $fs_obj->save_seq_to_hash('faFile'=>$f1) }; 
16 | my @ids = sort { $faSeq{$a}{'Order'} <=> $faSeq{$b}{'Order'} } keys %faSeq; 
17 | 
18 | 
19 | $ids[0] =~ m!^(tig\d+)$! or die "bad example tigID [$ids[0]]\n"; 
20 | my $nLen = length($ids[0]) - 3;
21 | 
22 | my $ofh2 = &openFH($f2); 
23 | 
24 | while (<$ofh2>) {
25 |   chomp;
26 |   my @ta=split(/\t/, $_); 
27 |   if (m!^\s*(#|$)!) {
28 |     # print "$ta[0]\n";
29 |     next;
30 |   }
31 |   $ta[3] eq 'contig' or next; 
32 |   $ta[5] eq 'no' or next; 
33 |   $ta[0] =~ m!^\d+$! or die "bad number [$ta[0]]\n";
34 |   my $newID = sprintf("%0${nLen}d", $ta[0]);
35 |   $newID = "tig$newID";
36 |   defined $faSeq{$newID} or die "no seq for ID [$newID]: $_\n"; 
37 |   print STDOUT ">$faSeq{$newID}{'head'}\n$faSeq{$newID}{'seq'}\n"; 
38 | }
39 | close($ofh2); 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/assemble_tools/kmer/get_kmer_by_seq_summary.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use fileSunhh; 
 5 | use LogInforSunhh; 
 6 | 
 7 | -t and !@ARGV and die "perl $0 R01/ct_R01.scafSeq.k27.counts > R01/ct_R01.scafSeq.k27.counts.tbl\n"; 
 8 | 
 9 | my (%seqs, $k, @klist); 
10 | while (<>) {
11 | 	chomp; 
12 | 	if (m!^\>!) {
13 | 		m!^\>(\S+)! or die "$_\n"; 
14 | 		$k = $1; 
15 | 		defined $seqs{$k} and die "repeated k $k\n"; 
16 | 		push(@klist, $k); 
17 | 		next; 
18 | 	}
19 | 	s!^\s+!!; 
20 | 	s!\s+$!!; 
21 | 	$seqs{$k} .= " $_"; 
22 | }
23 | my $wdir = &fileSunhh::new_tmp_dir('create'=>1); 
24 | 
25 | my @ostat = qw/interval_mean interval_median interval_stdev MEAN MEDIAN MIN MAX NoNull/; 
26 | print STDOUT join("\t", qw/SeqID/, @ostat)."\n"; 
27 | for my $v1 (@klist) {
28 | 	if (!defined $seqs{$v1}) {
29 | 		print STDOUT join("\t", $v1, (('0') x scalar(@ostat)))."\n"; 
30 | 		next; 
31 | 	}
32 | 	# &tsmsg("[Msg] Calculating for [$v1]\n"); 
33 | 	$seqs{$v1} =~ s!^\s+!!; 
34 | 	$seqs{$v1} =~ s!\s+!\n!g; 
35 | 	&fileSunhh::write2file("$wdir/nn", "$seqs{$v1}\n",'>'); 
36 | 	my %vstat = map { 
37 | 		chomp($_); 
38 | 		split(/\t/, $_); 
39 | 	} `deal_table.pl $wdir/nn -col_stat 0 -col_stat_AsINS | deal_table.pl -transpose `; 
40 | 	print STDOUT join("\t", $v1, @vstat{@ostat})."\n"; 
41 | }
42 | 
43 | &fileSunhh::_rmtree($wdir); 
44 | 
45 | 


--------------------------------------------------------------------------------
/assemble_tools/slct_pe.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | my @points_slct = 30e3; 
 6 | my $max_allowed = 280e3; 
 7 | my $min_allowed = 70e3; 
 8 | # For PE points 
 9 | @points_slct = (); 
10 | for (my $i=5e3; $i<=70e3; $i+=2e3) {
11 | 	push(@points_slct, $i);  
12 | }
13 | # Ref chloroplast
14 | $max_allowed = 80e3; 
15 | $min_allowed = 2e3; 
16 | 
17 | # For MP points 
18 | #@points_slct = (); 
19 | #for (my $i=30e3; $i<=50e3; $i+=2e3) {
20 | #	push(@points_slct, $i);  
21 | #}
22 | # Ref chloroplast
23 | #$max_allowed = 80e3; 
24 | #$min_allowed = 2e3; 
25 | #@points_slct = (100e3, 130e3, 160e3, 190e3, 220e3); 
26 | 
27 | 
28 | 
29 | 
30 | while (<>) {
31 | 	chomp; 
32 | 	my @ta = split(/\t/, $_); 
33 | 	my $is_o = 0; 
34 | 	for my $p (@points_slct) {
35 | 		if ( $ta[3] <= $p ) {
36 | 			$ta[3]+$ta[8] > $p and $ta[3]+$ta[8] < $max_allowed and $ta[3] > $min_allowed and do { $is_o = 1; last; }; 
37 | 		} else {
38 | 			$ta[3]+$ta[8] < $p and $ta[3] < $max_allowed and $ta[3]+$ta[4] > $min_allowed and do { $is_o = 1; last; }; 
39 | 		}
40 | 	}
41 | 	# $is_o == 1 and print "$_\n"; 
42 | 	$is_o == 1 and print "$ta[8]\n"; 
43 | }
44 | 
45 | 


--------------------------------------------------------------------------------
/calc_est_in_psl.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl 
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | -t and !@ARGV and die "perl $0 est2genome.psl\n"; 
 6 | 
 7 | my @lvls = (0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 1); 
 8 | 
 9 | my %calc; 
10 | while (<>) {
11 | 	chomp; 
12 | 	my @ta = split(/\t/, $_); 
13 | 	(defined $ta[0] and $ta[0] ne "" and $ta[0] =~ m/^(\d+)$/ and $ta[0] > 0) or next; 
14 | 	my $cov_len = $ta[12]-$ta[11]; 
15 | 	for my $r1 (@lvls) {
16 | 		$cov_len >= $ta[10] * $r1 and $calc{$r1}{$ta[9]}++; 
17 | 	}
18 | }
19 | print STDOUT join("\t", qw/Olap_Ratio EST_Number/)."\n"; 
20 | for my $r1 (@lvls) {
21 | 	my $num = scalar( keys %{$calc{$r1}} ); 
22 | 	print STDOUT join("\t", $r1, $num)."\n"; 
23 | }
24 | 


--------------------------------------------------------------------------------
/cmd_ctrl/log_func.sh:
--------------------------------------------------------------------------------
 1 | ### Basic functions.
 2 | function exe_cmd {
 3 |   echo "[$(date)][CMD] $1"
 4 |   eval "$1"
 5 |   if [[ $? -eq "0" ]]
 6 |   then
 7 |     echo "[$(date)][CMD_done] $1"
 8 |   else
 9 |     echo "[$(date)][CMD_err] $1"
10 |     exit 1
11 |   fi
12 | }
13 | 
14 | function tsmsg {
15 |   echo "[$(date)][Msg] $1"
16 | }
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/cmd_ctrl/rm_list.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use fileSunhh; 
 5 | 
 6 | -t and !@ARGV and die "perl $0 filelist_toRM\n"; 
 7 | 
 8 | while (<>) {
 9 | 	chomp; 
10 | 	my @ta = &splitL("\t", $_); 
11 | 	&fileSunhh::_rmtree($ta[0]); 
12 | }
13 | 
14 | 


--------------------------------------------------------------------------------
/cmd_ctrl/split_scrn_time.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | -t and !@ARGV and die "perl $0 scrn.t1 | less -S\n"; 
 6 | 
 7 | while (<>) {
 8 | 	chomp; 
 9 | 	# Begin index search ...[Fri Jun 12 10:21:32 2020][CMD_done]gzip clean_files/lnc_GWRT_Rep2_*fq
10 | 	if ( s!^\[([^\[\]]+)\]\[([^\[\]]+)\]!! ) {
11 | 		my ($t1, $t2) = ($1, $2); 
12 | 		s!^\s+!!; 
13 | 		print join("\t", $t1, $t2, $_)."\n"; 
14 | 	} elsif ( s!^.*\[(\S\S\S +\S\S\S +\d+ +\d+:\d+:\d+ +\d+)\]\[([^\[\]]+)\]!! ) {
15 | 		my ($t1, $t2) = ($1, $2); 
16 | 		s!^\s+!!; 
17 | 		print join("\t", $t1, $t2, $_)."\n"; 
18 | 	}
19 | }
20 | 


--------------------------------------------------------------------------------
/cmd_ctrl/wrap_sh.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | print STDOUT <<HH;
 6 | ### Basic functions.
 7 | function exe_cmd {
 8 | 	echo \"[\$(date)][CMD] \$1\"
 9 | 	eval \$1
10 | 	echo \"[\$(date)][Rec] Done.\"
11 | }
12 | function tsmsg {
13 | 	echo \"[\$(date)]\$1\"
14 | }
15 | 
16 | HH
17 | 
18 | while (<>) {
19 | 	if (m/^\s*#/ or m/^\s*$/) {
20 | 		print; 
21 | 	} else {
22 | 		s/[^\t \S]+$//; 
23 | 		print "exe_cmd \"$_\"\n"; 
24 | 	}
25 | }
26 | 


--------------------------------------------------------------------------------
/deal_fasta.pl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/deal_fasta.pl


--------------------------------------------------------------------------------
/deal_table.pl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/deal_table.pl


--------------------------------------------------------------------------------
/enrich/example_data/in_geneID.list:
--------------------------------------------------------------------------------
 1 | og24679
 2 | og24447
 3 | og23589
 4 | og00306
 5 | og18760
 6 | og25399
 7 | og26601
 8 | og23340
 9 | og21065
10 | og18067
11 | 


--------------------------------------------------------------------------------
/enrich/scripts/extend_IPRannot_for_IPRenrich.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | !@ARGV and die "perl $0 pub_IPR-entry.list  in_gene2IPR.combined  out_file\n";
 6 | 
 7 | 
 8 | my $ifnIPRentry = shift;
 9 | my $ifhGene2IPR = shift;
10 | my $ofnEnrich   = shift;
11 | 
12 | my (%ipr2name);
13 | 
14 | open F1,'<',"$ifnIPRentry" or die;
15 | while (<F1>) {
16 |   chomp;
17 |   my @ta=split(/\t/, $_);
18 |   $ta[0] eq 'ENTRY_AC' and next;
19 |   $ipr2name{$ta[0]} = $ta[2];
20 | }
21 | close F1;
22 | 
23 | open F2,'<',"$ifhGene2IPR" or die;
24 | open O1,'>',"$ofnEnrich" or die;
25 | my (%hasOut, %notFound);
26 | while (<F2>) {
27 |   my @ta=split(/\t/, $_);
28 |   chomp(@ta);
29 |   if ($ta[1] eq "" or $ta[1] =~ m!^na$!i) {
30 |     print O1 join("\t", $ta[0], "", "")."\n";
31 |     $hasOut{$ta[0]}{""} = 1;
32 |     next;
33 |   }
34 |   for my $id1 (split(/;/, $ta[1])) {
35 |     if (defined $ipr2name{$id1}) {
36 |       defined $hasOut{$ta[0]}{$id1} and next;
37 |       print O1 join("\t", $ta[0], $id1, $ipr2name{$id1})."\n";
38 |       $hasOut{$ta[0]}{$id1} = 1;
39 |     } else {
40 |       $notFound{$ta[0]}{$id1} = 1;
41 |     }
42 |   }
43 | }
44 | for my $gene1 (sort keys %notFound) {
45 |   defined $hasOut{$gene1} and next;
46 |   print O1 join("\t", $gene1, "", "")."\n";
47 | }
48 | close O1;
49 | close F2;
50 | 
51 | 


--------------------------------------------------------------------------------
/evolution_tools/SV_detection/cnvt_anchors_to_tbl.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | my %blk;
 6 | while (<>) {
 7 |   chomp;
 8 |   m!^\s*(#|$)! and next;
 9 |   my @ta=split(/\t/, $_);
10 |   $ta[1] eq 'referenceStart' and next;
11 |   $blk{$ta[8]}{'rID'} //= $ta[0];
12 |   $blk{$ta[8]}{'qID'} //= $ta[3];
13 |   $blk{$ta[8]}{'str'} //= $ta[6];
14 |   $blk{$ta[8]}{'Rstart'} //= $ta[1]; $blk{$ta[8]}{'Rstart'} > $ta[1] and $blk{$ta[8]}{'Rstart'} = $ta[1];
15 |   $blk{$ta[8]}{'Rend'}   //= $ta[2]; $blk{$ta[8]}{'Rend'}   < $ta[2] and $blk{$ta[8]}{'Rend'}   = $ta[2];
16 |   $blk{$ta[8]}{'Qstart'} //= $ta[4]; $blk{$ta[8]}{'Qstart'} > $ta[4] and $blk{$ta[8]}{'Qstart'} = $ta[4];
17 |   $blk{$ta[8]}{'Qend'}   //= $ta[5]; $blk{$ta[8]}{'Qend'}   < $ta[5] and $blk{$ta[8]}{'Qend'}   = $ta[5];
18 | }
19 | for (sort {$blk{$a}{'rID'} cmp $blk{$b}{'rID'} || $blk{$a}{'Rstart'} <=> $blk{$b}{'Rstart'} } keys %blk) {
20 |   $blk{$_}{'lenQ'} = $blk{$_}{'Qend'} - $blk{$_}{'Qstart'} + 1;
21 |   $blk{$_}{'lenR'} = $blk{$_}{'Rend'} - $blk{$_}{'Rstart'} + 1;
22 |   print STDOUT join("\t", @{$blk{$_}}{qw/qID Qstart Qend str rID Rstart Rend lenQ lenR/}, , $_)."\n";
23 | }
24 | 


--------------------------------------------------------------------------------
/evolution_tools/SV_detection/find_nonOvlCDS.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | use LogInforSunhh;
 5 | use fileSunhh;
 6 | 
 7 | !@ARGV and die "perl $0 s0.anchors.gff3 toAdd_CDS.gff3 out_prefix\n";
 8 | 
 9 | my $f1 = shift;
10 | my $f2 = shift;
11 | my $opre = shift;
12 | 
13 | open F1,'<',"$f1" or die;
14 | my %loc1;
15 | while (<F1>) {
16 |   m!^\s*(#|$)! and next;
17 |   chomp;
18 |   my @ta=split(/\t/, $_);
19 |   $ta[2] eq 'gene' or next;
20 |   push(@{$loc1{$ta[0]}}, [@ta[3,4]]);
21 | }
22 | close F1;
23 | for (keys %loc1) { @{$loc1{$_}} = sort {$a->[0] <=> $b->[0] || $a->[1] <=> $b->[1]} @{$loc1{$_}}; }
24 | 
25 | open F2,'-|',"perl /home/Sunhh/tools/github/NGS_data_processing/temp/deal_gff3.pl -inGff $f2 -getJnLoc " or die;
26 | &fileSunhh::write2file("${opre}.list","", '>');
27 | while (<F2>) {
28 |   chomp;
29 |   my @ta=split(/\t/, $_);
30 |   $ta[0] eq 'mrnaID' and next;
31 |   my $is_ovl = 0;
32 |   for my $a1 (@{$loc1{$ta[2]}}) {
33 |     $a1->[1] < $ta[6] and next;
34 |     $a1->[0] > $ta[7] and last;
35 |     $is_ovl = 1;
36 |     last;
37 |   }
38 |   $is_ovl == 1 and next;
39 |   &fileSunhh::write2file("${opre}.list", "$ta[0]\n",'>>');
40 | }
41 | close F2;
42 | &runCmd("perl /home/Sunhh/tools/github/NGS_data_processing/temp/deal_gff3.pl -inGff $f2 -gffret ${opre}.list -idType mRNA > ${opre}.gff3");
43 | 
44 | 


--------------------------------------------------------------------------------
/evolution_tools/SV_detection/get_shrt_or_ident_mafTab.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | !@ARGV and die "perl $0 38000 pident_99 align2w38_anc_fix.maf.blasttab > align2w38_anc_fix.maf.blasttab.shrt\n";
 6 | 
 7 | my $maxLen   = shift;
 8 | my $minIdent = shift;
 9 | 
10 | while (<>) {
11 |   chomp;
12 |   my @ta=split(/\t/,$_);
13 |   if (abs($ta[7]-$ta[6])+1 < $maxLen and abs($ta[9]-$ta[8])+1 < $maxLen) {
14 |     print "$_\n";
15 |   } elsif ($ta[2] >= $minIdent) {
16 |     print "$_\n";
17 |   }
18 | }
19 | 
20 | # [Sunhh@panda with_allNewR2_ori]$ deal_table.pl align2w38_anc_fix.maf.blasttab -col_head 
21 | # 0       22CEXU11_Chr06
22 | # 1       22CEXU43_Chr06
23 | # 2       100.00
24 | # 3       2385
25 | # 4       0
26 | # 5       0
27 | # 6       20755838
28 | # 7       20758222
29 | # 8       19857222
30 | # 9       19859606
31 | 
32 | 


--------------------------------------------------------------------------------
/evolution_tools/SV_detection/if_needIDfix.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | !@ARGV and die "perl $0 in.sam > chk_tbl\n";
 6 | 
 7 | my $f=shift;
 8 | open F,'<',"$f" or die;
 9 | my $is=0;
10 | while (<F>) {
11 |   m!^@! and next;
12 |   chomp;
13 |   my @ta=split(/\t/, $_);
14 |   $ta[5] =~ m!^(\d+H)?\d+[ID]! and do {$is = 1; last;};
15 |   $ta[5] =~ m!\d+[ID](\d+H)?$! and do {$is = 1; last;};
16 | }
17 | close F;
18 | print join("\t", $f, $is)."\n";
19 | 
20 | 


--------------------------------------------------------------------------------
/evolution_tools/SV_detection/nucdiff_modification/cmd_list:
--------------------------------------------------------------------------------
1 | # replace files in dir: ~/.local/lib/python2.7/site-packages/nucdiff/
2 | 
3 | 


--------------------------------------------------------------------------------
/evolution_tools/SV_detection/remove_gap_var.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | !@ARGV and die "perl $0 flank_len_100 C31.ndf_ref_snps.gff > C31.ndf_ref_snps.rmGap.gff\n";
 6 | 
 7 | my $flank_len = shift;
 8 | 
 9 | my (@lines, %gaps);
10 | while (<>) {
11 |   m!^\s*(#|$)! and next;
12 |   chomp;
13 |   my @ta=split(/\t/, $_);
14 |   if ($ta[8] =~ m!Name=[^;\s]*(gap|ATGCN)!) {
15 |     push(@{$gaps{$ta[0]}}, [@ta[3,4]]);
16 |   } else {
17 |     push(@lines, [@ta]);
18 |   }
19 | }
20 | 
21 | for my $cid (keys %gaps) {
22 |   @{$gaps{$cid}} = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @{$gaps{$cid}};
23 | }
24 | for my $l1 (@lines) {
25 |   my $close2gap = 0;
26 |   for my $seR (@{$gaps{$l1->[0]}}) {
27 |     $seR->[1] < $l1->[3]-$flank_len and next;
28 |     $seR->[0] > $l1->[4]+$flank_len and last;
29 |     $close2gap = 1;
30 |     last;
31 |   }
32 |   $close2gap == 1 and next;
33 |   print STDOUT join("\t", @$l1)."\n";
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/evolution_tools/SV_detection/rmNvar_inVCF.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | while (<>) {
 6 |   chomp;
 7 |   m!^#! and do { print "$_\n"; next; };
 8 |   my @ta=split(/\t/, $_);
 9 |   $ta[3] =~ m![nN]! and next;
10 |   $ta[4] ne '<INV>' and $ta[4] =~ m![nN]! and next;
11 |   print "$_\n";
12 | }
13 | 


--------------------------------------------------------------------------------
/evolution_tools/SV_detection/rm_0span_maf.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | use fileSunhh;
 5 | 
 6 | !@ARGV and die "perl $0 align2w38_anc_fix_jn.maf > fixed.maf\n";
 7 | 
 8 | while (<>) {
 9 |   chomp;
10 |   if (m!^\s*#!) {
11 |     print STDOUT "$_\n";
12 |     next;
13 |   }
14 |   my $l2=<>; chomp($l2);
15 |   my $l3=<>; chomp($l3);
16 |   my $l4=<>; chomp($l4);
17 |   m!^a\s! or die "bad2:$_\n";
18 |   $l2 =~ m!^s\s+(\S+)\s+(\d+)\s+(\d+)\s+([+-])\s! or die "bad3:$l2\n";
19 |   $3 == 0 and next;
20 |   $l3 =~ m!^s\s+(\S+)\s+(\d+)\s+(\d+)\s+([+-])\s! or die "bad4:$l3\n";
21 |   $3 == 0 and next;
22 |   print "$_\n$l2\n$l3\n$l4\n";
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/evolution_tools/SV_detection/rmdup_fromNormVcf.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # [3/9/2023] Only applicable after running 'bcftools norm -m-both | bcftools norm -d none --fasta-ref xx.fa | bcftools sort ';
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | -t and !@ARGV and die "perl $0 normSrt_dup.vcf > dedup.vcf\n";
 7 | 
 8 | my $prevLine = '';
 9 | my ($prevID, $prevPos, $prevLen) = ("", -1, -1);
10 | while (<>) {
11 |   m!^\s*(#|$)! and do {print; next;};
12 |   chomp;
13 |   my @ta=split(/\t/, $_);
14 |   my $currLen = abs(length($ta[3])-length($ta[4]));
15 |   $ta[4] eq '<INV>' and $currLen = 1e9;
16 |   if ($ta[0] eq $prevID and $ta[1] == $prevPos) {
17 |     # dup
18 |     if ($currLen > $prevLen) {
19 |       $prevLine = $_;
20 |       $prevLen = $currLen;
21 |     }
22 |   } else {
23 |     $prevLine ne '' and print STDOUT "$prevLine\n";
24 |     $prevLine = $_;
25 |     $prevID = $ta[0];
26 |     $prevPos = $ta[1];
27 |     $prevLen = $currLen;
28 |   }
29 | }
30 | if ($prevLine ne '') {
31 |   print STDOUT "$prevLine\n";
32 |   $prevLine = '';
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/evolution_tools/SV_detection/run_mm2paftool.sh:
--------------------------------------------------------------------------------
 1 | ### Basic functions.
 2 | function exe_cmd {
 3 |   echo "[$(date)][CMD] $1"
 4 |   eval "$1"
 5 |   if [[ $? -eq "0" ]]
 6 |   then
 7 |     echo "[$(date)][CMD_done] $1"
 8 |   else
 9 |     echo "[$(date)][CMD_err] $1"
10 |   fi
11 | }
12 | 
13 | function tsmsg {
14 |   echo "[$(date)][Msg] $1"
15 | }
16 | 
17 | outDir="by_minimap2_paftools/output/"
18 | refFaFn=/data/Sunhh/cpepo/analysis/SV_detection/in_genomes/final_C39.chr.fa
19 | qryDir=/data/Sunhh/cpepo/analysis/SV_detection/in_genomes/
20 | [[ -e $outDir ]] || mkdir -p $outDir
21 | cd $outDir/
22 | for cid in C31 C38
23 | do
24 |   exe_cmd "minimap2 -cx asm20 -t 20 --cs $refFaFn  $qryDir/final_${cid}.chr.fa  > ${cid}.asm20.paf"
25 |   exe_cmd "sort -k6,6 -k8,8n ${cid}.asm20.paf | paftools.js call -L 10000 -s $cid -f $refFaFn - > ${cid}.asm20.vcf"
26 | done
27 | cd -
28 | 
29 | 


--------------------------------------------------------------------------------
/evolution_tools/SV_detection/run_ndf.sh:
--------------------------------------------------------------------------------
 1 | ### Basic functions.
 2 | function exe_cmd {
 3 |   echo "[$(date)][CMD] $1"
 4 |   eval "$1"
 5 |   if [[ $? -eq "0" ]]
 6 |   then
 7 |     echo "[$(date)][CMD_done] $1"
 8 |   else
 9 |     echo "[$(date)][CMD_err] $1"
10 |   fi
11 | }
12 | 
13 | function tsmsg {
14 |   echo "[$(date)][Msg] $1"
15 | }
16 | 
17 | 
18 | export PATH="/data/Sunhh/src/Align/mummer/install/mummer4/bin/:$PATH"
19 | refFaFn=/data/Sunhh/cpepo/analysis/SV_detection/in_genomes/final_C39.chr.fa
20 | qryDir=/data/Sunhh/cpepo/analysis/SV_detection/in_genomes/
21 | outDir="by_nucdiff/output/"
22 | [[ -e $outDir ]] || mkdir -p $outDir
23 | cd $outDir/
24 | for cid in C31 C38
25 | do
26 |   exe_cmd "nucdiff --proc 10 --nucmer_opt ' -t 20 --batch 1 ' $refFaFn $qryDir/final_${cid}.chr.fa ./ ${cid}.ndf"
27 | done
28 | cd -
29 | 
30 | 


--------------------------------------------------------------------------------
/evolution_tools/SV_detection/select_var.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | !@ARGV and die "perl $0 min_var_len_20 C38.ndf_ref_struct.rmGap.effG > C38.ndf_ref_struct.rmGap.effG.slct1\n";
 6 | 
 7 | my $min_var_len = shift; # 20
 8 | 
 9 | while (<>) {
10 |   m!^\s(#|$)! and next;
11 |   chomp;
12 |   my @ta=split(/\t/, $_);
13 |   m!\tVAR_annot|Name=(insertion|duplication|tandem_duplication|unaligned_end|unaligned_beginning|deletion|collapsed_repeat|collapsed_tandem_repeat);! or next;
14 |   if (m!(?:overlap|subst|del|ins|blk)_len=(\d+)!){
15 |     $1 >= $min_var_len or next;
16 |   }
17 |   print STDOUT "$_\n";
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/evolution_tools/SV_detection/view_anchors.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | 
 6 | while (<>) {
 7 |   chomp;
 8 |   m!^\s*(#|$)! and next;
 9 |   my @ta=split(/\t/, $_);
10 |   if ($ta[0] eq 'refChr') {
11 |     print join("\t", qw/refSpan qrySpan/, @ta)."\n";
12 |     next;
13 |   }
14 |   my $lenR = $ta[2]-$ta[1]+1;
15 |   my $lenQ = $ta[5]-$ta[4]+1;
16 |   print join("\t", $lenR, $lenQ, @ta)."\n";
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/evolution_tools/compare_assemblies/byMUMmer/stat2_chk_gapCover.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | # ==> ngs.nlis <==
 6 | # Key	Length	MatchStart	MatchEnd	MatchLen
 7 | # WM97_scaffold11	169061	3288	3336	49
 8 | # WM97_scaffold11	169061	14316	14502	187
 9 | # WM97_scaffold11	169061	58876	58908	33
10 | # 
11 | # ==> manual_alignments.txt.stat <==
12 | # ID1	WM97pbV0_000000F	16657	113511	96855
13 | # ID1	WM97pbV0_000000F	128093	139235	11143
14 | # ID1	WM97pbV0_000000F	181677	203420	21744
15 | # ID1	WM97pbV0_000000F	207866	208637	772
16 | 
17 | !@ARGV and die "perl $0 ngs.nlis manual_alignments.txt.stat > ngs.nlis.coverTag\n"; 
18 | my $nlis = shift; 
19 | my $stat = shift; 
20 | 
21 | open F2,'<',"$stat" or die; 
22 | my %covered; 
23 | while (<F2>) {
24 | 	chomp; 
25 | 	my @ta = split(/\t/, $_); 
26 | 	$ta[0] eq 'ID1' and next; 
27 | 	push(@{$covered{$ta[1]}}, [@ta[2,3]]); 
28 | }
29 | close F2; 
30 | 
31 | open F1,'<',"$nlis" or die; 
32 | while (<F1>) {
33 | 	chomp; 
34 | 	my @ta = split(/\t/, $_); 
35 | 	if ($ta[0] eq 'Key') {
36 | 		print join("\t", @ta, "Covered")."\n"; 
37 | 		next; 
38 | 	}
39 | 	my $is_cover = 0; 
40 | 	for my $t1 (@{$covered{$ta[0]}}) {
41 | 		$t1->[0] <= $ta[2] and $t1->[1] >= $ta[3] and do { $is_cover = 1; last; };  
42 | 	}
43 | 	print join("\t", @ta, $is_cover)."\n"; 
44 | }
45 | close F1; 
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/evolution_tools/compare_assemblies/mcscanTab_to_dupTxt.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | -t and !@ARGV and die "cat 02_ma_mo_byScf/ma_mo_byChr.coll.ks.tab.mo_mo | perl $0 > data/dup.txt\n"; 
 6 | 
 7 | while (<>) { 
 8 | 	chomp; 
 9 | 	my @ta=split(/\t/, $_); 
10 | 	@ta = @ta[1,2,3,4,5,6,7]; 
11 | 	$ta[0] eq 'Chrom1' and next; 
12 | 	$ta[1]>$ta[2] and @ta[1,2]=@ta[2,1]; 
13 | 	$ta[4]>$ta[5] and @ta[4,5]=@ta[5,4]; 
14 | 	$ta[6] eq "-" and @ta[5,4]=@ta[4,5]; 
15 | 	$ta[1]--; 
16 | 	$ta[4]--; 
17 | 	print join("\t", @ta[0..5] )."\n"; 
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/evolution_tools/copy_number_var/README.md:
--------------------------------------------------------------------------------
 1 | # Gene CNV analysis.
 2 | 
 3 | ## Test by FET.
 4 | - Prepare orthologous group count file. (`synFam.cnt`). First line shows accession names.
 5 | - Prepare two-column meta file projecting accessions to populations. (`map.acc_pop`)
 6 | - Test presence frequency change between two populations.
 7 | 
 8 | ```sh
 9 | Rscript compare_gene_expansion.r -a synFam.cnt -b map.acc_pop  -f landrace    -t cultivar  -o landrace_to_cultivar.CNV.tbl
10 | Rscript compare_gene_expansion.r -a synFam.cnt -b map.acc_pop  -f cordophanus -t landrace  -o cordophanus_to_landrace.CNV.tbl
11 | ```
12 | 
13 | ## Combine test results.
14 | ```sh
15 | echo -e "landrace_to_cultivar.CNV.tbl\land_cult" > meta.comparison_name
16 | echo -e "cordophanus_to_landrace.CNV.tbl\tCLC_land" >> meta.comparison_name
17 | 
18 | perl combine_DEGs.pl meta.comparison_name 1 > combined.CNV.tbl
19 | ```
20 | 
21 | ## Identify gene families with truely expansion enriched in one group.
22 | - Those gene families with contraction enriched in the other group should be removed.
23 | - For example, expanded in `CLV` instead of contracted in `CA`.
24 | 
25 | ```sh
26 | perl get_CLV_expansion.pl -final_label CLV_high -expan_label CLV_high CA_to_CLV.tbl > CA_to_CLV-CLV_expanded.tbl
27 | ```
28 | 
29 | 


--------------------------------------------------------------------------------
/evolution_tools/expansion_tools/01.clean_nwk.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use Text::Balanced qw( extract_bracketed ); 
 5 | use LogInforSunhh; 
 6 | 
 7 | # http://www.perlmonks.org/?node_id=547596
 8 | 
 9 | while (<>) {
10 | 	chomp; 
11 | 	my $n = &rmStatNum($_); 
12 | 	my $m = &fmtBranch($n); 
13 | 	print "$m\n"; 
14 | }
15 | 
16 | # (((Sly:0.11746590,Vvi:0.07461103)0.7300:0.02307856,Ath:0.14512370)1.0000:0.03408003,(SpiOl:0.05289341,Bvu:0.04303894)1.0000:0.05605601);
17 | 
18 | sub rmStatNum {
19 | 	while ($_[0] =~ s/\)\s*[\d.]+/)/g) {
20 | 	# while ($_[0] =~ s/:[\d.]+//g or $_[0] =~ s/\)\s*[\d.]+/)/g) {
21 | 	}
22 | 	return $_[0]; 
23 | }
24 | sub fmtBranch {
25 | 	while ($_[0] =~ s/:(\d+\.\d+)/":" . int($1*100)/eg) {
26 | 	}
27 | 	return $_[0]; 
28 | }
29 | 


--------------------------------------------------------------------------------
/evolution_tools/expansion_tools/01.prepare_ortho_to_tbl.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl 
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | use Getopt::Long; 
 6 | my %opts; 
 7 | GetOptions(\%opts, 
 8 | 	"help!", 
 9 | ); 
10 | 
11 | my $help_txt = <<HH; 
12 | 
13 | perl $0 all_orthomcl.out > all_orthomcl.out.tab
14 | 
15 | HH
16 | 
17 | $opts{'help'} and &LogInforSunhh::usage($help_txt); 
18 | -t and !@ARGV and &LogInforSunhh::usage($help_txt); 
19 | 
20 | print STDOUT join("\t", qw/OrthoGrpID TaxID GeneID/)."\n"; 
21 | 
22 | while (<>) {
23 | 	chomp; 
24 | 	my @ta = split(/\t/, $_); 
25 | 	my @tb = split(/\s+/, $ta[1]); 
26 | 	my $grpID = &grpID( $ta[0] ); 
27 | 	for my $tc (@tb) {
28 | 		$tc =~ m/^\s*$/ and next; 
29 | 		$tc =~ m/^(\S+)\((\S+)\)$/ or &stopErr( "tc=[$tc]\n" ); 
30 | 		my ($gid, $taxID) = ($1, $2); 
31 | 		print STDOUT join("\t", $grpID, $taxID, $gid)."\n"; 
32 | 	}
33 | }
34 | 
35 | sub grpID {
36 | 	my $back = $_[0]; 
37 | 	if ($back =~ m/^(\S+)\s*\(\s*\d+\s+genes?\s*,\s*\d+\s*taxa\s*\)/) {
38 | 		$back = $1
39 | 	} else {
40 | 		$back =~ s/\s/_/g;
41 | 	}
42 | 	return $back; 
43 | }
44 | 


--------------------------------------------------------------------------------
/evolution_tools/expansion_tools/jn_gene_byIPR.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | use fileSunhh; 
 6 | 
 7 | !@ARGV and die "perl $0 in.iprV5.tsv\n"; 
 8 | 
 9 | my $fn = shift; 
10 | open F,'<',"$fn" or die; 
11 | my %h; 
12 | while (&wantLineC(\*F)) {
13 | 	my @ta = &splitL("\t", $_); 
14 | 	( defined $ta[11] and $ta[11] ne '' ) or next; 
15 | 	my $tk = "$ta[11] ($ta[12])"; 
16 | 	$h{$tk}{$ta[0]} //= $.; 
17 | }
18 | close F; 
19 | print STDOUT join("\t", 'IPRID', $fn)."\n"; 
20 | for my $iprK ( sort keys %h ) {
21 | 	my @tb = sort { $a cmp $b } keys %{$h{$iprK}}; 
22 | 	print STDOUT join("\t", $iprK, scalar(@tb) . ":" . join(',', @tb))."\n"; 
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/evolution_tools/ortho_tools/mk_sep_blastp_shell.sh:
--------------------------------------------------------------------------------
 1 | inFa=$1
 2 | dbFa=$2
 3 | cutN=$3
 4 | pref=$4
 5 | 
 6 | if [ -n "$pref" ] 
 7 | then
 8 | 	deal_fasta.pl $inFa -cut $cutN -cut_prefix $pref
 9 | 	ls ${pref}_cutted/*.fasta | perl -e ' while (<>) { chomp; print "blastp -query $_ -out $_.blast -db all.fa -evalue 1e-5 -max_target_seqs 1000 -outfmt 6 -num_threads 3 \n"; } ' > cmd_list_blastp_${pref}
10 | 	echo "CMD:    nohup run_cmd_in_batch.pl -cpuN 60 cmd_list_blastp_${pref} > scrn.sep_${pref}"
11 | else
12 | 	echo "bash $0    in.fa    db_name    cut_num    cut_pref"
13 | 	echo "cat ${pref}_cutted/*.blast > all.blast"
14 | 	exit
15 | fi
16 | 
17 | 


--------------------------------------------------------------------------------
/evolution_tools/ortho_tools/replace_all_blast_file.sh:
--------------------------------------------------------------------------------
 1 | pid_toKill=$1
 2 | file_toRM=$2
 3 | file_toUse=$3
 4 | if [ -n "$file_toUse" ]
 5 | then
 6 | 	rm $file_toRM
 7 | 	chmod a-w $file_toUse
 8 | 	cp -p $file_toUse $file_toRM
 9 | 	kill -9 $pid_toKill
10 | else
11 | 	echo ""
12 | 	echo "bash $0   PID_toKill   File_toRM   File_toUse" 
13 | 	echo ""
14 | 	exit; 
15 | fi
16 | 
17 | 


--------------------------------------------------------------------------------
/evolution_tools/structure/get_time_from_structScrn.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | -t and !@ARGV and die "perl $0 scrn.run_struct | less -S\n"; 
 6 | 
 7 | my @aa; 
 8 | while (<>) {
 9 | 	m!CMD! or next; 
10 | 	chomp; 
11 | 	s!structure!\tstructure!; 
12 | 	s!_(\d+)$!_$1\t$1!; 
13 | 	my @ta = split(/\t/, $_); 
14 | 	defined $ta[2] or die "bad line: $_\n"; 
15 | 	push(@aa, [@ta]); 
16 | }
17 | for (sort { $a->[2] <=> $b->[2] || $a->[2] cmp $b->[2] } @aa){
18 | 	print join("\t", @$_)."\n"; 
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/evolution_tools/structure/mv_result_files.pl:
--------------------------------------------------------------------------------
 1 | use strict; 
 2 | use warnings; 
 3 | 
 4 | -t and !@ARGV and die "perl $0 in_result_list\n"; 
 5 | 
 6 | -d "all_results" or mkdir("all_results/"); 
 7 | my %h; 
 8 | my $suff; 
 9 | while (<>) { 
10 | 	chomp; 
11 | 	m!^\./structure_(\d+)/structure_(K\d+)! or die "$_\n"; 
12 | 	$suff = "${1}_${2}"; 
13 | 	my $bn=$_; 
14 | 	$bn=~s!^.+/!!; 
15 | 	my $new_f = "${suff}_${bn}"; 
16 | 	defined $h{$new_f} and die "$new_f\n"; $h{$new_f} = 1; 
17 | 	system "cp -p $_ all_results/$new_f";  
18 | } 
19 | 
20 | 


--------------------------------------------------------------------------------
/evolution_tools/structure/order_ClumppIndFileOut_byIndID.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | !@ARGV and die "perl $0 order_list ClumppIndFile.output > ClumppIndFile.output.srt\n"; 
 6 | 
 7 | my $f_order = shift; 
 8 | my @id_ord; 
 9 | my %id; 
10 | open F,'<',"$f_order" or die; 
11 | while (<F>) {
12 | 	chomp; 
13 | 	my @ta= split(/\t/, $_); 
14 | 	push(@id_ord, $ta[0]); 
15 | 	defined $id{$ta[0]} and die "repeated ID [$ta[0]]\n"; 
16 | 	$id{$ta[0]} = $.; 
17 | }
18 | close F; 
19 | 
20 | my %lines; 
21 | while (<>) {
22 | 	chomp; 
23 | 	my $raw_line = $_; 
24 | 	m!^\s*(\S+)! or die "$_\n"; 
25 | 	my $ii = $1; 
26 | 	defined $id{$ii} or do { warn "Skip missing ID [$ii]: $_\n"; next; }; 
27 | 	defined $lines{$ii} and die "repeat ID line [$_]\n"; 
28 | 	$lines{$ii} = $raw_line; 
29 | }
30 | for (@id_ord) {
31 | 	defined $lines{$_} or do { warn "Skip bad ID [$_]\n"; next; }; 
32 | 	print "$lines{$_}\n"; 
33 | }
34 | 
35 | #   1        1   (0)      1 :  0.0000 1.0000
36 | #   2        2   (2)      1 :  0.0000 1.0000
37 | #   3        3   (0)      1 :  0.0000 1.0000
38 | #   4        4   (0)      1 :  0.0000 1.0000
39 | #   5        5   (0)      1 :  0.0000 1.0000
40 | #   6        6   (0)      1 :  0.0000 1.0000
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/evolution_tools/structure/shrt_col0.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | use Getopt::Long; 
 6 | my %opts; 
 7 | GetOptions(\%opts, 
 8 | 	"help!", 
 9 | 	"shrt_len:i", # 9 
10 | 	"shrt_col:i", # 0 
11 | ); 
12 | 
13 | $opts{'shrt_len'} //= 9; 
14 | $opts{'shrt_col'} //= 0; 
15 | 
16 | my $help_txt = <<HH; 
17 | 
18 | perl $0 long_indvID_table
19 | 
20 | -shrt_len           [$opts{'shrt_len'}]
21 | -shrt_col           [$opts{'shrt_col'}]
22 | 
23 | I will shorten the first column to [$opts{'shrt_len'}] characters. 
24 | 
25 | HH
26 | 
27 | -t and !@ARGV and &LogInforSunhh::usage($help_txt); 
28 | $opts{'help'} and &LogInforSunhh::usage($help_txt); 
29 | 
30 | my %h; 
31 | while (<>) {
32 | 	chomp; 
33 | 	my @ta = split(/\t/, $_); 
34 | 	$ta[ $opts{'shrt_col'} ] = substr($ta[ $opts{'shrt_col'} ], 0, $opts{'shrt_len'}); 
35 | 	my $tk = $ta[ $opts{'shrt_col'} ]; 
36 | 	my $suff = "a"; 
37 | 	while (defined $h{$tk}) {
38 | 		$suff++; 
39 | 		$tk = "$ta[$opts{'shrt_col'}]$suff"; 
40 | 	}
41 | 	$h{$tk} = 1; 
42 | 	$ta[ $opts{'shrt_col'} ] = $tk; 
43 | 	print join("\t", @ta)."\n"; 
44 | }
45 | 


--------------------------------------------------------------------------------
/evolution_tools/structure/structure:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/evolution_tools/structure/structure


--------------------------------------------------------------------------------
/evolution_tools/vcf_tab/add_ref_as_indv_in_vcfTab.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use Getopt::Long; 
 5 | my %opts; 
 6 | GetOptions(\%opts, 
 7 | 	"ref2id:s", 
 8 | ); 
 9 | ; 
10 | 
11 | -t and !@ARGV and die "perl $0 [-ref2id REF] in.vcfTab > out.vcfTab\n"; 
12 | 
13 | 
14 | while (<>) {
15 | 	chomp; 
16 | 	s!^(\S+\t\S+)\t(\S+)\t!! or die "bad line: $_\n"; 
17 | 	if ($. == 1) {
18 | 		$opts{'ref2id'} //= $2; 
19 | 		print STDOUT "$1\t$2\t$opts{'ref2id'}\t$_\n"; 
20 | 	} else {
21 | 		print STDOUT "$1\t$2\t$2/$2\t$_\n"; 
22 | 	}
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/adaptors.fa:
--------------------------------------------------------------------------------
 1 | >junc_seq_01 illinoi
 2 | CTGTCTCTTATACACATCT
 3 | >junc_seq_01:RC1-19 illinoi
 4 | AGATGTGTATAAGAGACAG
 5 | 
 6 | >junc_seq_02 Silin-P1P33-MP2k Silin-P1-MP5k
 7 | GGTCGATAACTTCGTATAATGTATGCTATACGAAGTTATACA
 8 | >junc_seq_02:RC1-42 Silin-P1P33-MP2k Silin-P1P3-MP5k
 9 | TGTATAACTTCGTATAGCATACATTATACGAAGTTATCGACC
10 | 
11 | >junc_seq_03 P1P3-ec5k
12 | CGTATAACTTCGTATAATGTATGCTATACGAAGTTATACA
13 | >junc_seq_03:RC1-40 P1P3-cre5kec5k
14 | TGTATAACTTCGTATAGCATACATTATACGAAGTTATACG
15 | 
16 | >junc_seq_04 P3-cre5k
17 | ATAACTTCGTATAATGTATGCTATACGAAGTTATACA
18 | >junc_seq_04:RC1-43 P3-cre5k
19 | TGTATAACTTCGTATAGCATACATTATACGAAGTTAT
20 | 
21 | >pe_seq_R1_p1
22 | AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
23 | >pe_seq_R1_p2
24 | ATCTCGTATGCCGTCTTCTGCTTG
25 | >pe_seq_R2
26 | AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT
27 | >pe_seq_R1_p1:RC1-34
28 | GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
29 | >pe_seq_R1_p2:RC1-24
30 | CAAGCAGAAGACGGCATACGAGAT
31 | >pe_seq_R2:RC1-58
32 | AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
33 | 
34 | 


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/cleanPE_byTrimmo.sh:
--------------------------------------------------------------------------------
 1 | function exe_cmd {
 2 | 	echo "[$(date)][CMD] $1"
 3 | 	eval "$1"
 4 | 	echo "[$(date)][Rec] Done."
 5 | }
 6 | 
 7 | function tsmsg {
 8 | 	echo "[$(date)]$1"
 9 | }
10 | 
11 | exe_java="java"
12 | exe_jar="/data/Sunhh/src/Assemble/Trimmomatic/Trimmomatic-0.32/trimmomatic-0.32.jar"
13 | 
14 | cpuN=30
15 | adp_fas="/data/Sunhh/src/Assemble/Trimmomatic/Trimmomatic-0.32/adapters/TruSeq3-PE-2.fa"
16 | minLen=25
17 | 
18 | para_jar="-threads $cpuN"
19 | para_PE="ILLUMINACLIP:$adp_fas:2:30:10:1:TRUE SLIDINGWINDOW:4:20 LEADING:3 TRAILING:3 MINLEN:$minLen"
20 | 
21 | tsmsg "[Rec] All start."
22 | for inPref in HKC_15_20kb HKC_8_10kb HWB_15_20kb HWB_8_10kb
23 | do
24 | 	tsmsg "[Rec] Dealing with $inPref"
25 | 	inFq1="${inPref}.p1"
26 | 	inFq2="${inPref}.p2"
27 | 	oPref="${inPref}"
28 | 	logFile="log.${oPref}"
29 | 	# para_jarAdd="-trimlog $logFile"
30 | 	para_jarAdd=""
31 | 	cmd="$exe_java -jar $exe_jar PE $para_jar $para_jarAdd $inFq1 $inFq2 ${oPref}_pTr_R1.fq ${oPref}_sTr_R1.fq ${oPref}_pTr_R2.fq ${oPref}_sTr_R2.fq $para_PE"
32 | 	exe_cmd "$cmd"
33 | done
34 | 
35 | tsmsg "[Rec] All done."
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/cleanSE_byTrimmo.sh:
--------------------------------------------------------------------------------
 1 | function exe_cmd {
 2 | 	echo "[$(date)][CMD] $1"
 3 | 	eval $1
 4 | 	echo "[$(date)][Rec] Done."
 5 | }
 6 | 
 7 | function tsmsg {
 8 | 	echo "[$(date)]$1"
 9 | }
10 | 
11 | exe_java="java"
12 | exe_jar="/data/Sunhh/src/Assemble/Trimmomatic/Trimmomatic-0.32/trimmomatic-0.32.jar"
13 | 
14 | cpuN=30
15 | minLen=25
16 | 
17 | para_jar="-threads $cpuN"
18 | para_PE="ILLUMINACLIP:$adp_fas:2:30:10:1 SLIDINGWINDOW:4:20 LEADING:3 TRAILING:3 MINLEN:$minLen"
19 | adp_fas="/data/Sunhh/src/Assemble/Trimmomatic/Trimmomatic-0.32/adapters/TruSeq3-PE-2.fa"
20 | 
21 | para_SE="ILLUMINACLIP:$adp_fas:2:30:10   SLIDINGWINDOW:4:20 LEADING:3 TRAILING:3 MINLEN:$minLen"
22 | adp_fas="/data/Sunhh/src/Assemble/Trimmomatic/Trimmomatic-0.32/adapters/TruSeq3-SE.fa"
23 | 
24 | tsmsg "[Rec] All start."
25 | for inPref in HKC_15_20kb HKC_8_10kb HWB_15_20kb HWB_8_10kb
26 | do
27 | 	tsmsg "[Rec] Dealing with $inPref"
28 | 	inFq1="${inPref}.p1"
29 | 	inFq2="${inPref}.p2"
30 | 	oPref="${inPref}"
31 | 	logFile="log.${oPref}"
32 | 	# para_jarAdd="-trimlog $logFile"
33 | 	para_jarAdd=""
34 | 	cmd="$exe_java -jar $exe_jar SE $para_jar $para_jarAdd $inFq1 $inFq2 ${oPref}_pTr_R1.fq ${oPref}_sTr_R1.fq ${oPref}_pTr_R2.fq ${oPref}_sTr_R2.fq $para_SE"
35 | 	exe_cmd "$cmd"
36 | done
37 | 
38 | tsmsg "[Rec] All done."
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/illumina_adapters.fa:
--------------------------------------------------------------------------------
 1 | >multiplexing-forward
 2 | GATCGGAAGAGCACACGTCT
 3 | >solexa-forward
 4 | AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
 5 | >truseq-forward-contam
 6 | AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
 7 | >truseq-reverse-contam
 8 | AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA
 9 | >nextera-forward-read-contam
10 | CTGTCTCTTATACACATCTCCGAGCCCACGAGAC
11 | >nextera-reverse-read-contam
12 | CTGTCTCTTATACACATCTGACGCTGCCGACGA
13 | >solexa-reverse
14 | AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
15 | 


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/polyAT_adp.fa:
--------------------------------------------------------------------------------
1 | >polyA
2 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
3 | >polyT
4 | TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
5 | 


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/run_ndupB.sh:
--------------------------------------------------------------------------------
 1 | pl_dropB="$HOME/tools/github/NGS_data_processing/drop_dup_both_end.pl"
 2 | 
 3 | subseqL=0
 4 | subseqS=0
 5 | 
 6 | # for inPre in NSP306_3hb NSP306_5hb NSP306_1kb NSP306_5kb NSP306_10kb NSP306_15kb
 7 | for inPre in NSP306_3hbr2
 8 | do
 9 | 	outPre=$inPre
10 | 	inFq1="${inPre}_R1.fastq.gz"
11 | 	inFq2="${inPre}_R2.fastq.gz"
12 | 	cmd="perl $pl_dropB -opre $outPre $inFq1 $inFq2 -subseq $subseqL -subseqS $subseqS -rcDup"
13 | 	echo "[Rec][$(date)] $cmd"
14 | 	eval $cmd
15 | done
16 | echo "[Rec][$(date)] Finished." 
17 | 
18 | # drop_dup_both_end.pl -opre CG_MiSeq02 CG_MiSeq02_R1.fastq.gz CG_MiSeq02_R2.fastq.gz -subseq 100 
19 | 
20 | 


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/run_rmRRNA.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | -t and !@ARGV and die "perl $0 inLis\n"; 
 7 | 
 8 | my $dbRRNA = '/share1/db_bowtie/rRNA_silva111'; 
 9 | my $cpuN = 10; 
10 | 
11 | my $pl_extractFq = '/home/Sunhh/tools/github/NGS_data_processing/extract_fq_by_list.pl'; 
12 | 
13 | 
14 | while (<>) {
15 | 	chomp; 
16 | 	my @ta = split(/\t/, $_); 
17 | 	my $pref = "$ta[0]"; 
18 | 	my $fqFile = "${pref}_highQ.single"; 
19 | 	my $oFqFile="${pref}_rmRRNA.fq"; 
20 | 	&tsmsg("[Rec] Dealing with [$fqFile]\n"); 
21 | 	&exeCmd("bowtie -v 3 -k 1 -S -p $cpuN $dbRRNA $fqFile | samtools view -S -F 4 -hb -o $fqFile.bam -"); 
22 | 	&exeCmd("samtools view $fqFile.bam | cut -f 1 > $fqFile.bam.rd"); 
23 | 	&exeCmd("perl $pl_extractFq -mode drop -rdKey -refLis $fqFile.bam.rd -srcFq $fqFile -outFq $oFqFile"); 
24 | }
25 | &tsmsg("[Rec] All done.\n"); 
26 | 


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/run_trimmoSE.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | !@ARGV and die "perl $0 in_RS.fq out_RS.fq\n"; 
 7 | 
 8 | my $inRSFq = shift; 
 9 | my $outRSFq = shift; 
10 | 
11 | my $exe_java = '/share/nas2/xigua/sunhonghe/src/java/jre1.8.0_131/bin/java'; 
12 | my $jar_trim = '/share/nas2/xigua/sunhonghe/src/reads/trimmo/Trimmomatic-0.36/trimmomatic-0.36.jar'; 
13 | my $dir_trim = '/data/Sunhh/src/general/trimmomatic/Trimmomatic-0.38/';
14 | my $fn_adp    = "$dir_trim/adapters/TruSeq3-SE.fa";
15 | my $para_trim = " ILLUMINACLIP:${fn_adp}\:2:30:10 SLIDINGWINDOW:4:20 LEADING:3 TRAILING:3 MINLEN:40 ";
16 | 
17 | $exe_java = '/usr/lib/jvm/java-11-openjdk-amd64/bin/java';
18 | $jar_trim = '/data/Sunhh/src/general/trimmomatic/Trimmomatic-0.38/trimmomatic-0.38.jar';
19 | $dir_trim = '/data/Sunhh/src/general/trimmomatic/Trimmomatic-0.38/';
20 | $fn_adp    = "$dir_trim/adapters/TruSeq3-SE.fa";
21 | $para_trim = " ILLUMINACLIP:${fn_adp}\:2:30:10 SLIDINGWINDOW:4:20 LEADING:3 TRAILING:3 MINLEN:40 ";
22 | 
23 | my $cmd = ""; 
24 | $cmd .= "$exe_java "; 
25 | $cmd .= " -jar $jar_trim "; 
26 | $cmd .= " SE "; 
27 | $cmd .= " -threads 2 "; 
28 | $cmd .= " $inRSFq $outRSFq "; 
29 | $cmd .= " $para_trim ";
30 | 
31 | &exeCmd_1cmd($cmd); 
32 | &exeCmd_1cmd("gzip $outRSFq"); 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/AUTHORS.jbzip2:
--------------------------------------------------------------------------------
1 | Matthew J. Francis
2 | 


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/LICENCE.jbzip2:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2010 Matthew J. Francis and Contributors of the jbzip2 Project
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/META-INF/MANIFEST.MF:
--------------------------------------------------------------------------------
1 | Manifest-Version: 1.0
2 | Ant-Version: Apache Ant 1.8.4
3 | Created-By: 1.7.0_40-mockbuild_2013_10_02_16_56-b00 (Oracle Corporatio
4 |  n)
5 | Main-Class: org.usadellab.trimmomatic.Trimmomatic
6 | 
7 | 


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2BlockCompressor.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2BlockCompressor.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2BlockDecompressor.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2BlockDecompressor.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2Constants.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2Constants.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2DivSufSort$PartitionResult.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2DivSufSort$PartitionResult.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2DivSufSort$StackEntry.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2DivSufSort$StackEntry.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2DivSufSort$TRBudget.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2DivSufSort$TRBudget.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2DivSufSort.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2DivSufSort.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2HuffmanStageDecoder.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2HuffmanStageDecoder.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2HuffmanStageEncoder.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2HuffmanStageEncoder.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2InputStream.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2InputStream.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2OutputStream.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BZip2OutputStream.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BitInputStream.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BitInputStream.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BitOutputStream.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/BitOutputStream.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/CRC32.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/CRC32.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/HuffmanAllocator.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/HuffmanAllocator.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/MoveToFront.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/itadaki/bzip2/MoveToFront.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/Pairomatic.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/Pairomatic.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/TrimStats.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/TrimStats.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/Trimmomatic.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/Trimmomatic.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/TrimmomaticPE.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/TrimmomaticPE.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/TrimmomaticSE.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/TrimmomaticSE.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/fasta/FastaParser.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/fasta/FastaParser.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/fasta/FastaRecord.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/fasta/FastaRecord.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/fasta/FastaSerializer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/fasta/FastaSerializer.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/fastq/FastqParser.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/fastq/FastqParser.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/fastq/FastqRecord.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/fastq/FastqRecord.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/fastq/FastqSerializer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/fastq/FastqSerializer.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/threading/BlockOfRecords.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/threading/BlockOfRecords.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/threading/BlockOfWork.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/threading/BlockOfWork.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/threading/ParserWorker.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/threading/ParserWorker.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/threading/SerializerWorker.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/threading/SerializerWorker.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/threading/TrimLogRecord.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/threading/TrimLogRecord.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/threading/TrimLogWorker.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/threading/TrimLogWorker.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/threading/TrimStatsWorker.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/threading/TrimStatsWorker.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/AbstractSingleRecordTrimmer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/AbstractSingleRecordTrimmer.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/AvgQualTrimmer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/AvgQualTrimmer.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/BarcodeSplitter.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/BarcodeSplitter.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/CropTrimmer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/CropTrimmer.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/HeadCropTrimmer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/HeadCropTrimmer.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/IlluminaClippingTrimmer$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/IlluminaClippingTrimmer$1.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/IlluminaClippingTrimmer$IlluminaClippingSeq.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/IlluminaClippingTrimmer$IlluminaClippingSeq.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/IlluminaClippingTrimmer$IlluminaLongClippingSeq.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/IlluminaClippingTrimmer$IlluminaLongClippingSeq.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/IlluminaClippingTrimmer$IlluminaMediumClippingSeq.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/IlluminaClippingTrimmer$IlluminaMediumClippingSeq.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/IlluminaClippingTrimmer$IlluminaPrefixPair.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/IlluminaClippingTrimmer$IlluminaPrefixPair.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/IlluminaClippingTrimmer$IlluminaShortClippingSeq.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/IlluminaClippingTrimmer$IlluminaShortClippingSeq.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/IlluminaClippingTrimmer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/IlluminaClippingTrimmer.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/LeadingTrimmer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/LeadingTrimmer.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/MaximumInformationTrimmer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/MaximumInformationTrimmer.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/MinLenTrimmer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/MinLenTrimmer.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/SlidingWindowTrimmer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/SlidingWindowTrimmer.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/ToPhred33Trimmer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/ToPhred33Trimmer.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/ToPhred64Trimmer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/ToPhred64Trimmer.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/TrailingTrimmer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/TrailingTrimmer.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/Trimmer.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/Trimmer.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/TrimmerFactory.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/trim/TrimmerFactory.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/util/ConcatGZIPInputStream$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/util/ConcatGZIPInputStream$1.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/util/ConcatGZIPInputStream$GZIPHelperInputStream.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/util/ConcatGZIPInputStream$GZIPHelperInputStream.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/util/ConcatGZIPInputStream.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/util/ConcatGZIPInputStream.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/util/PositionTrackingInputStream.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/org/usadellab/trimmomatic/util/PositionTrackingInputStream.class


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/trimmomatic/trimmomatic.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/file_type_based/Proc_Reads/trimmomatic/trimmomatic.jar


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/using_subfunc.R.rm_polyAT.R:
--------------------------------------------------------------------------------
 1 | ####### clean PolyAT SE 
 2 | 
 3 | source("./using_subfunc.R")
 4 | pref_lis <- read.table("pref_list", header=F, stringsAsFactors=F)
 5 | 
 6 | for ( i in 1:nrow(pref_lis) ) {
 7 | 	inFq1 <- paste0( pref_lis$V1[i], '_R1.fq' , sep='' )
 8 | 	oFq1  <- paste0( pref_lis$V1[i], '_trimAT.fq', sep='' )
 9 | 	adp1  <- 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
10 | 		myseq <- adp1
11 | 		myseq_comp <- chartr('ATGC', 'TACG', myseq) 
12 | 		substr( myseq[1], 1:nchar(myseq[1]), 1:nchar(myseq[1]) ) 
13 | 		x <- strsplit( myseq_comp, '' )
14 | 		x <- lapply(x, rev) 
15 | 		myseq_revcomp <- sapply(x, paste, collapse='') 
16 | 	adp1 <- c( adp1, myseq_revcomp )
17 | 	clean.pe.fq.file( inFqName1= inFq1, outFqName1= oFq1, adaptor1= adp1, RdPerYield= 10e6 , qual.opts=list( min.qual=0 ) )
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/file_type_based/Proc_Reads/using_subfunc.R.rm_polyAT_useRight.R:
--------------------------------------------------------------------------------
 1 | ####### clean PolyAT SE 
 2 | 
 3 | source("./using_subfunc.R")
 4 | pref_lis <- read.table("pref_list", header=F, stringsAsFactors=F)
 5 | 
 6 | for ( i in 1:nrow(pref_lis) ) {
 7 | 	inFq1 <- paste0( pref_lis$V1[i], '_R1.fq' , sep='' )
 8 | 	oFq1  <- paste0( pref_lis$V1[i], '_trimAT.fq', sep='' )
 9 | 	adp1  <- 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
10 | 		myseq <- adp1
11 | 		myseq_comp <- chartr('ATGC', 'TACG', myseq) 
12 | 		substr( myseq[1], 1:nchar(myseq[1]), 1:nchar(myseq[1]) ) 
13 | 		x <- strsplit( myseq_comp, '' )
14 | 		x <- lapply(x, rev) 
15 | 		myseq_revcomp <- sapply(x, paste, collapse='') 
16 | 	adp1 <- c( adp1, myseq_revcomp )
17 | 	clean.pe.fq.file( inFqName1= inFq1, outFqName1= oFq1, adaptor1= adp1, RdPerYield= 10e6 , qual.opts=list( min.qual=0 ), use.right=TRUE )
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/gm_tools/tomato_loc_V2p4_to_V2p5.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use strict; 
 3 | use warnings; 
 4 | use fileSunhh; 
 5 | use mathSunhh; 
 6 | my $ms_obj = mathSunhh->new(); 
 7 | use LogInforSunhh; 
 8 | 
 9 | !@ARGV and die "perl $0 in_old.agp in_new.agp loci_list\n"; 
10 | 
11 | my $fn_oldAGP = shift; 
12 | my $fn_newAGP = shift; 
13 | my $fn_loci = shift; 
14 | 
15 | my %old_c2s = %{ &fileSunhh::load_agpFile( $fn_oldAGP ) }; 
16 | my %new_c2s = %{ &fileSunhh::load_agpFile( $fn_newAGP ) }; 
17 | 
18 | my %old_s2c = %{ &fileSunhh::reverse_agpHash(\%old_c2s) }; 
19 | for my $sID (keys %old_s2c) {
20 | 	@{$old_s2c{ $sID }} = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @{$old_s2c{ $sID }}; 
21 | }
22 | 
23 | my @aa_loci = &fileSunhh::load_tabFile( $fn_loci , 1 ); 
24 | for my $a1 (@aa_loci) {
25 | 	@$a1 == 0 and do { print "chr\tpos\tstr\n"; next; }; 
26 | 	$a1->[0] =~ m!^\s*#! and do { print join("\t", @$a1, qw/chr pos str/)."\n"; next; }; 
27 | 	my ($old_scfID, $old_scfPos) = ($a1->[0], $a1->[1]); 
28 | 	my @new_scfInf = $ms_obj->transfer_position( 'from_ref2qry' => \%old_s2c, 'to_qry2ref' => \%new_c2s, 'fromLoc' => [$old_scfID, $old_scfPos] ); 
29 | 	print join("\t", $old_scfID, $old_scfPos, $new_scfInf[0], $new_scfInf[1], $new_scfInf[2])."\n"; 
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/log_tools/filt_bwa_log.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | my @pat_to_rm = ('^\[bwa_aln_core\]', '^\[infer_isize\]', '^\[bwa_sai2sam_pe_core\]', '\[bwa_paired_sw\]'); 
 6 | 
 7 | my @pat_use; 
 8 | for (@pat_to_rm) {
 9 | 	push(@pat_use, qr/$_/s );
10 | }
11 | 
12 | -t and !@ARGV and die "perl $0 log.bwa\nTo skip patterns : @pat_use\n"; 
13 | 
14 | 
15 | while (<>) {
16 | 	my $is_skip = 0; 
17 | 	for my $qPat (@pat_use) {
18 | 		m!$qPat! and do { $is_skip = 1; last; }; 
19 | 	}
20 | 	$is_skip or print; 
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/log_tools/infor_ndupB.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | print STDOUT join("\t", qw/Prefix Raw_RdPairs Kept_RdPairs/)."\n"; 
 7 | my %infor; 
 8 | while (<>) {
 9 | 	m!\[Rec\]! or next; 
10 | 	if ( m!\[Rec\] There are (\d+) read pairs in total \[([^\[\]\s]+)\]! ) {
11 | 		my %tmp; 
12 | 		$tmp{total_rdPN} = $1; 
13 | 		$tmp{pref} = $2; 
14 | 		if ( defined $infor{total_rdPN} and $infor{total_rdPN} ne '' ) {
15 | 			print STDOUT join("\t", @infor{qw/pref total_rdPN kept_rdPN/})."\n"; 
16 | 			%infor = (); 
17 | 		}
18 | 		%infor = %tmp; 
19 | 	} elsif ( m!\[Rec\] There are .+ (\d+) \([\d.]+\%\) reads kept in both! ) {
20 | 		$infor{kept_rdPN} = $1; 
21 | 		$infor{kept_perC} = $2; 
22 | 	} else {
23 | 	} 
24 | }
25 | 
26 | if ( defined $infor{total_rdPN} and $infor{total_rdPN} ne '' ) {
27 | 	print STDOUT join("\t", @infor{qw/pref total_rdPN kept_rdPN/})."\n"; 
28 | 	%infor = (); 
29 | }
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/pcr_tools/cmd_list:
--------------------------------------------------------------------------------
 1 | # Design primers for SNP list (site.list)
 2 | perl pcr_tools/retrieve_template_forSNP.pl site.list -out site -ref_fa /Data/Sunhh/marker_design/20210125_mitoMarker/db/CM3.5.1_wiMtCtDNA.fa -min_flank_size 50
 3 | perl pcr_tools/run_primer_forSNP.pl -in_tempTab site.tempX.tab -out_prefix site # Got site.primer.tab;
 4 | 
 5 | # Check false priming
 6 | less -S site.primer.tab | perl -e 'while (<>) { chomp; $. == 1 and next; my @ta=split(/\t/, $_); print ">$ta[1]_f\n$ta[2]\n>$ta[1]_r\n$ta[3]\n";  }' > chk1.fa
 7 | makeblastdb -in /Data/Sunhh/marker_design/20210125_mitoMarker/db/CM3.5.1_wiMtCtDNA.fa -dbtype nucl
 8 | bn6 -evalue 5000 -db /Data/Sunhh/marker_design/20210125_mitoMarker/db/CM3.5.1_wiMtCtDNA.fa -task blastn-short -num_threads 50 -query chk1.fa -out chk1.fa.bn6
 9 | perl /home/Sunhh/tools/github/NGS_data_processing/pcr_tools/get_priming_loc_bnV2_2_24.pl chk1.fa.bn6 > chk1.fa.bn6.primer_loc
10 | deal_table.pl site.primer.tab -column 1,0,2,4,3,5,15,9,12 > a1
11 | cat chk1.fa.bn6.primer_loc | deal_table.pl -col_uniq 0 > a2
12 | ColLink.pl a1 -f1 a2 -sign 'ok,multi' -add | ColLink.pl -f1 site.list -keyC1 0 -keyC2 1 -add -Col1 2,3,4,5 > a3
13 | 
14 | 


--------------------------------------------------------------------------------
/pcr_tools/site.lis:
--------------------------------------------------------------------------------
 1 | Site_ID	chr_ID	chr_Position	Base_Ref	Base_indv1	Base_indv2
 2 | Site.HS200616.01	chr1	3605732	T	TAGACTTTACTAAACGATC	T
 3 | Site.HS200616.02	chr3	3103223	T	TCTAAAAAGTATTCATGTA	T
 4 | Site.HS200616.03	chr3	4583857	GTTTAAATGTTTTAGCA	G	GTTTAAATGTTTTAGCA
 5 | Site.HS200616.04	chr3	6655932	T	T	TCCAAAAC
 6 | Site.HS200616.05	chr4	28488957	CACCATCACCATA	CACCATCACCATA	C
 7 | Site.HS200616.06	chr6	17888214	GAAAGGGGCGAA	GAAAGGGGCGAA	G
 8 | Site.HS200616.07	chr8	6362269	A	A	AACTAATTT
 9 | Site.HS200616.08	chr8	20036052	ATAGTGTTGTTATGTCC	ATAGTGTTGTTATGTCC	A
10 | Site.HS200616.09	chr9	2137202	T	TGGCTAC	T
11 | Site.HS200616.10	chr10	7249290	C	C	CTTCTTCT
12 | Site.HS200616.11	chr11	10333003	A	A	ATAAGAAGAACTATC
13 | Site.HS200616.12	chr12	24014358	TGAGTGAGTGAGA	T	TGAGTGAGTGAGA
14 | 


--------------------------------------------------------------------------------
/plotting/example_data/65K_DEL-3class:
--------------------------------------------------------------------------------
1 | Group	Single copy	Two copies
2 | cultivar	354	154
3 | landrace	91	15
4 | wild	239	0
5 | 


--------------------------------------------------------------------------------
/plotting/example_data/FleshBrix_19YQ_1:
--------------------------------------------------------------------------------
 1 | 8.2
 2 | 9.4
 3 | 9.4
 4 | 9.2
 5 | 9.8
 6 | 8.6
 7 | 9.8
 8 | 6.4
 9 | 10.6
10 | 9.3
11 | 11.4
12 | 9.1
13 | 10.3
14 | 7.2
15 | 8.7
16 | 9.3
17 | 8.7
18 | 7.6
19 | 5.1
20 | 7.2
21 | 7.3
22 | 8.2
23 | 5.7
24 | 


--------------------------------------------------------------------------------
/plotting/example_data/FleshBrix_19YQ_2:
--------------------------------------------------------------------------------
1 | 9.6
2 | 11.4
3 | 11.1
4 | 10.3
5 | 9.5
6 | 9.8
7 | 7.4
8 | 10.2
9 | 


--------------------------------------------------------------------------------
/plotting/example_data/FleshBrix_22HN_1:
--------------------------------------------------------------------------------
 1 | 10.3
 2 | 10.7
 3 | 10
 4 | 9
 5 | 10.2
 6 | 9.5
 7 | 11.8
 8 | 12
 9 | 11.3
10 | 10.2
11 | 11.3
12 | 11.5
13 | 10.3
14 | 10.7
15 | 11.3
16 | 11.2
17 | 9
18 | 12
19 | 11.5
20 | 11.3
21 | 10
22 | 11.8
23 | 9
24 | 9
25 | 11
26 | 10.3
27 | 9.8
28 | 10.3
29 | 11.8
30 | 8.5
31 | 10.5
32 | 9.8
33 | 12.2
34 | 7.7
35 | 11.3
36 | 11.2
37 | 11.3
38 | 9
39 | 11.5
40 | 4.3
41 | 10.7
42 | 10.7
43 | 12
44 | 10.8
45 | 10
46 | 6
47 | 9
48 | 13.2
49 | 10.7
50 | 11.3
51 | 11.7
52 | 3.3
53 | 10.3
54 | 4.3
55 | 8.7
56 | 6.8
57 | 8
58 | 5.7
59 | 


--------------------------------------------------------------------------------
/plotting/example_data/FleshBrix_22HN_2:
--------------------------------------------------------------------------------
 1 | 11.8
 2 | 10.7
 3 | 11
 4 | 11.3
 5 | 13
 6 | 10.8
 7 | 12
 8 | 11.3
 9 | 9.7
10 | 11.3
11 | 11.8
12 | 12.8
13 | 11.5
14 | 11.5
15 | 10.3
16 | 9.8
17 | 9.2
18 | 9.3
19 | 11.7
20 | 12.8
21 | 10.5
22 | 12.2
23 | 


--------------------------------------------------------------------------------
/plotting/example_data/in.table-for_grouped_barplot_with_SD:
--------------------------------------------------------------------------------
1 | Individual	Group	Rep. 1	Rep. 2	Rep. 3
2 | RNAi	Red	16.85	18.2	21.47
3 | WT	Red	32.75	38.36	34.5
4 | OE 	Red	44.08	49.69	50.03
5 | RNAi	Yellow	5.68	8.17	6.57
6 | WT	Yellow	15.44	12.39	17.17
7 | OE 	Yellow	21.06	20.85	23.64
8 | 


--------------------------------------------------------------------------------
/project/watermelon_pan_phaseI/cmd_list:
--------------------------------------------------------------------------------
1 | # Write command line guide later.
2 | 
3 | 


--------------------------------------------------------------------------------
/project/watermelon_pan_phaseI/cnt_CDS_dup.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | !@ARGV and die "perl $0 in.bn6 > in.bn6.ident_filteredNum\n";
 6 | 
 7 | my @lvl = reverse(0 .. 100);
 8 | 
 9 | my %h;
10 | while (<>) {
11 |   chomp;
12 |   my @ta=split(/\t/, $_);
13 |   my $glob_matN = $ta[2]*$ta[3];
14 |   for my $l1 (@lvl) {
15 |     $glob_matN >= $ta[12] * $l1 or next;
16 |     $h{$l1}{$ta[0]} ++;
17 |   }
18 | }
19 | for my $l1 (@lvl) {
20 |   $h{$l1} //= {};
21 |   my $cnt = scalar(keys %{$h{$l1}});
22 |   print "$l1\t$cnt\n";
23 | }
24 | 
25 | # 
26 | # [Sunhh@panda rmTE]$ head -3 1_maker_novCleanRmTEcomplete.c.fa.toRef.bn6 
27 | # snap-NODE_38989__1_2429_ext-processed-gene-0.1-mRNA-1   Cla97C10G191640.1       93.83   81      3       2       439     517     295     375     5e-27   121     573     375     plus
28 | # snap-NODE_38989__1_2429_ext-processed-gene-0.1-mRNA-1   Cla97C07G134700.1       93.83   81      3       2       439     517     295     375     5e-27   121     573     375     plus
29 | # snap-NODE_38989__1_2429_ext-processed-gene-0.1-mRNA-1   Cla97C07G133870.1       94.52   73      2       2       439     509     295     367     3e-24   111     573     462     plus
30 | #
31 | 


--------------------------------------------------------------------------------
/project/watermelon_pan_phaseI/cnvt_ext2nov_to_agp.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | !@ARGV and die "perl $0 CM.ext2nov.tbl > CM.ext2nov.agp\n";
 6 | 
 7 | while (<>) {
 8 |   chomp;
 9 |   my @ta=split(/\t/, $_);
10 |   print join("\t", $ta[4], 1, $ta[2]-$ta[1]+1, 1, "W", $ta[0], $ta[1], $ta[2], $ta[3])."\n";
11 | }
12 | 
13 | #==> CM.ori2ext.tbl <==
14 | #NODE_1015	1	47571	+	NODE_1015__1_47571_ext	WM1147_PI164248
15 | #NODE_10412	1	10272	+	NODE_10412__1_10272_ext	WM1147_PI164248
16 | #NODE_10828	1	9780	+	NODE_10828__1_9780_ext	WM1147_PI164248
17 | #
18 | #==> CM.ext2nov.tbl <==
19 | #NODE_1015__1_47571_ext	16875	17446	+	WM1147_PI164248_NODE_1015_16875-17446	WM1147_PI164248
20 | #NODE_10412__1_10272_ext	5450	6631	+	WM1147_PI164248_NODE_10412_5450-6631	WM1147_PI164248
21 | #NODE_10828__1_9780_ext	352	8977	+	WM1147_PI164248_NODE_10828_352-8977	WM1147_PI164248
22 | #     AGP : WM97pbV1_Chr06  1       29507460        1       W       ClaScf_0005     1       29507460        -       Scaffold5
23 | 
24 | 


--------------------------------------------------------------------------------
/project/watermelon_pan_phaseI/extract_N50.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | !@ARGV and die "perl $0 in.fa.N50 in_2.fa.N50 > out.table\n";
 6 | 
 7 | my @oKey = qw/asm_size asm_num longest asm_N25 asm_N50 asm_N90 asm_N95 asm_N99 shortest gnm_size GN90 GN50/;
 8 | print join("\t", 'accession', @oKey)."\n";
 9 | for my $fn (@ARGV) {
10 |   my %h;
11 |   open F,'<',"$fn" or die;
12 |   while (<F>) {
13 |     m!^Total sequences number.*:\s*(\d+)! and $h{'asm_num'} //= $1;
14 |     m!^Total sequences bp \(ATGC\):\s*(\d+)! and $h{'asm_size'} //= $1;
15 |     m!^Maximum length \(ATGC\)\s*:\s*(\d+)! and $h{'longest'} //= $1;
16 |     m!^Minimum length \(ATGC\)\s*:\s*(\d+)! and $h{'shortest'} //= $1;
17 |     m!^N25.+:\s*(\d+)! and $h{'asm_N25'} //= $1;
18 |     m!^N50.+:\s*(\d+)! and $h{'asm_N50'} //= $1;
19 |     m!^N90.+:\s*(\d+)! and $h{'asm_N90'} //= $1;
20 |     m!^N95.+:\s*(\d+)! and $h{'asm_N95'} //= $1;
21 |     m!^N99.+:\s*(\d+)! and $h{'asm_N99'} //= $1;
22 |     m!^Est\. Genome size\s*:\s*(\d+)! and $h{'gnm_size'} //= $1;
23 |     m!^NG50.+:\s*(\d+)! and $h{'GN50'} //= $1;
24 |     m!^NG90.+:\s*(\d+)! and $h{'GN90'} //= $1;
25 |   }
26 |   close F;
27 |   $h{'gnm_size'} == 0 and $h{'gnm_size'} = 'NA';
28 |   for my $k1 (@oKey) {
29 |     $h{$k1} //= 'NA';
30 |   }
31 |   print join("\t", $fn, @h{@oKey})."\n";
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/project/watermelon_pan_phaseI/list_IPRacc.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings;
 4 | 
 5 | -t and !@ARGV and die "perl $0 ipr_all6_tsv.TEprot.IPRacc > ipr_all6_tsv.TEprot.IPRacc.line\n";
 6 | 
 7 | my %h;
 8 | while (<>) {
 9 |   chomp;
10 |   my @ta=split(/\t/, $_);
11 |   $h{$ta[0]}{'ipr'}{$ta[11]} = $ta[12];
12 | }
13 | for my $id1 (sort keys %h) {
14 |   my @k1 = sort keys %{$h{$id1}{'ipr'}};
15 |   my @v1 = @{$h{$id1}{'ipr'}}{@k1};
16 |   print join("\t", $id1, join(";;", @k1), join(";;", @v1))."\n";
17 | }
18 | 


--------------------------------------------------------------------------------
/project/watermelon_pan_phaseI/pipe_for_functional_annotation.pl:
--------------------------------------------------------------------------------
1 | /home/Sunhh/tools/github/NGS_data_processing/annot_tools/ahrd/pipe_for_functional_annotation.pl


--------------------------------------------------------------------------------
/project/watermelon_pan_phaseI/ret_maker_abinit_gff3.pl:
--------------------------------------------------------------------------------
1 | ../../annot_tools/maker/ret_maker_abinit_gff3.pl


--------------------------------------------------------------------------------
/project/watermelon_pan_phaseI/rm_Qloc_only_groups.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | -t and !@ARGV and die "perl $0  ov1/comb.grp2.novl_loc.syn.grp_tbl > ov1/comb.grp2.novl_loc.syn.grp_tbl.filtered\n";
 6 | 
 7 | while (<>) {
 8 |   chomp;
 9 |   my @ta=split(/\t/, $_);
10 |   my $is_good = 0;
11 |   for my $tb (@ta[2..$#ta]) {
12 |     $tb =~ m!^\S+:\d+\-\d+:[+-]$! and next;
13 |     $is_good = 1;
14 |   }
15 |   $is_good == 1 or next;
16 |   print "$_\n";
17 | }
18 | 
19 | # cat ov1/comb.grp2.novl_loc.syn.grp_tbl | perl -e 'while (<>) { chomp; m!\t\S+:\d+\-\d+:[+-](\t|$)! or next; m!\tC.pan:[^\s:]+(\t|$)! and next; print "$_\n"; }' | wc -l
20 | # GrpSyn_000002   21      CApan:CaUC03G061460.1   CApan:CaUC03G061520.1   CApan:CaUC03G061550.1                   CApan:CaUC03G061560.1   CApan:Ciama_Chr03:24544226-24545011:+   CApan:Ciama_Chr03:30256016-30256508:+ 
21 | # GrpSyn_000003   19      CApan:CaUC10G186420.1   CApan:CaUC10G186430.1   CApan:Ciama_Chr10:12115704-12116238:+   CLpan:Cla97C10G191540.1 CLpan:Cla97C10G191550.1 CLpan:Cla97C10G191553.1 CLpan:Cla97C10G191557.1
22 | 
23 | 


--------------------------------------------------------------------------------
/reseq_tools/C_exe/maskClose_in_1col:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/reseq_tools/C_exe/maskClose_in_1col


--------------------------------------------------------------------------------
/reseq_tools/C_exe/rmSameSite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/reseq_tools/C_exe/rmSameSite


--------------------------------------------------------------------------------
/reseq_tools/SNP_effect.pl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/reseq_tools/SNP_effect.pl


--------------------------------------------------------------------------------
/reseq_tools/SNP_effect_edit.pl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/reseq_tools/SNP_effect_edit.pl


--------------------------------------------------------------------------------
/reseq_tools/bsa/example_data/template-QTLseqr_result.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/reseq_tools/bsa/example_data/template-QTLseqr_result.xlsx


--------------------------------------------------------------------------------
/reseq_tools/bsa/scripts/slct_sites_by_windows.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use fileSunhh; 
 5 | 
 6 | !@ARGV and die "perl $0 position_list  site.al\n"; 
 7 | 
 8 | my $fn1 = shift; # windows; 
 9 | my $fn2 = shift; # sties; 
10 | 
11 | my $fh1 = &openFH($fn1, '<'); 
12 | my %required; 
13 | while (<$fh1>) {
14 | 	chomp; 
15 | 	my @ta=split(/\t/, $_); 
16 | 	push(@{$required{$ta[0]}}, [@ta[1,2]]); 
17 | }
18 | close($fh1); 
19 | for my $k1 (keys %required) {
20 | 	@{$required{$k1}} = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @{$required{$k1}}; 
21 | }
22 | 
23 | my $fh2 = &openFH($fn2, '<'); 
24 | while (<$fh2>) {
25 | 	chomp; 
26 | 	my @ta=split(/\t/, $_); 
27 | 	if ($. == 1 and $ta[0] =~ m!^(chrom$|chr$)!i and $ta[1] =~ m!^(pos$|position$)!i) {
28 | 		print STDOUT "$_\n"; 
29 | 		next; 
30 | 	}
31 | 	defined $required{$ta[0]} or next; 
32 | 	my $is=0; 
33 | 	for my $a1 (@{$required{$ta[0]}}) {
34 | 		$a1->[0] > $ta[1] and last; 
35 | 		$a1->[1] < $ta[1] and next; 
36 | 		$is = 1; 
37 | 		last; 
38 | 	}
39 | 	$is == 1 and print STDOUT "$_\n"; 
40 | 	
41 | }
42 | close($fh2); 
43 | 
44 | 


--------------------------------------------------------------------------------
/reseq_tools/cnt_genotype_in_1col.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use fileSunhh; 
 5 | use LogInforSunhh; 
 6 | 
 7 | -t and !@ARGV and &LogInforSunhh::usage("\nperl $0 sample.1col > sample.1col.typeC\n\n"); 
 8 | 
 9 | my $fh = \*STDIN; 
10 | 
11 | @ARGV > 0 and $fh = &openFH($ARGV[0], '<'); 
12 | 
13 | my $head = <$fh>; 
14 | my %cnt; 
15 | while (<$fh>) {
16 | 	m/^\s*$/ and next; 
17 | 	$cnt{'total'} ++; 
18 | 	if ( m/^([ATGC])\1*$/ ) {
19 | 		$cnt{"HomoBase"} ++; 
20 | 	} elsif ( m/^N+$/i ) {
21 | 		$cnt{"N"} ++; 
22 | 	} elsif ( m/^[ATGC][ATGC]$/ ) {
23 | 		$cnt{"DiHete"} ++; 
24 | 	} elsif ( m/^[ATGC\*][ATGC\*]$/ ) {
25 | 		$cnt{'heteDel'} ++; 
26 | 	} elsif ( m/^\*+$/ ) {
27 | 		$cnt{"homoDel"} ++; 
28 | 	} elsif ( m/^[ATGCN]\+/) {
29 | 		$cnt{"homoIns"} ++; 
30 | 	} elsif ( m/^[ATGCN*][ATGCN]+/ ) {
31 | 		$cnt{'heteIns'} ++; 
32 | 	} else {
33 | 		$cnt{"Other"} ++; 
34 | 	}
35 | }
36 | 
37 | chomp($head); 
38 | print join("\t", 'Type', $head)."\n"; 
39 | for (qw/total N HomoBase homoDel heteDel homoIns heteIns DiHete Other/) {
40 | 	$cnt{$_} //= 0; 
41 | 	print join("\t", $_, $cnt{$_})."\n"; 
42 | }
43 | 


--------------------------------------------------------------------------------
/reseq_tools/cnt_pileup_depC.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | use LogInforSunhh; 
 6 | 
 7 | for (@ARGV) {
 8 | 	&exeCmd("awk '\$3 != \"N\" \&\& \$1 != \"CG_Chr00\" { print \$4 }' $_ | deal_table.pl -col_repCount 0 | deal_table.pl -col_sort 1 > $_.chr.noN.depC"); 
 9 | }
10 | 


--------------------------------------------------------------------------------
/reseq_tools/cnvt_tools/cols2LD.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | my $pl_dir = '/data/Sunhh/watermelon/source_reseq/new_source/04.LD/hap_LD'; 
 7 | my $pl_rmSame = "$pl_dir/" . 'rm_same_site_hete2N.pl'; 
 8 | my $pl_mkHap = "$pl_dir/" . 'tbl2hap.pl'; 
 9 | my $pl_binLD = "$pl_dir/" . 'get_the_LD_decay_file.pl'; 
10 | 
11 | !@ARGV and die "perl $0 in.snp\n"; 
12 | 
13 | my $snpF = shift; 
14 | 
15 | &exeCmd_1cmd("perl $pl_rmSame $snpF > $snpF.var"); 
16 | &exeCmd_1cmd("perl $pl_mkHap $snpF.var $snpF.ped $snpF.info"); 
17 | &exeCmd_1cmd("java -Xmx20960M -jar /data/Sunhh/src/Evolution/haploview/Haploview4.1.jar -nogui -minMAF 0.05 -hwcutoff 0.001 -dprime -log $snpF.log -out $snpF -pedfile $snpF.ped -info $snpF.info"); 
18 | &exeCmd_1cmd("perl $pl_binLD $snpF.LD 1000 $snpF.LD_bin1k"); 
19 | 
20 | &tsmsg("[Rec] Done.\n"); 
21 | 
22 | 


--------------------------------------------------------------------------------
/reseq_tools/cnvt_tools/cols2meg.pl:
--------------------------------------------------------------------------------
 1 | use strict; 
 2 | use warnings; 
 3 | use SNP_tbl; 
 4 | 
 5 | !@ARGV and die "perl $0 in.snp\n"; 
 6 | 
 7 | my $inF = shift; 
 8 | my $st = SNP_tbl->new(filename=>$inF); 
 9 | $st->readTbl(); 
10 | $st->tbl2meg(ofile=>"$inF.meg"); 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/reseq_tools/cnvt_tools/fas2meg.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use fastaSunhh; 
 5 | my $fs_obj = fastaSunhh->new(); 
 6 | use LogInforSunhh; 
 7 | 
 8 | !@ARGV and die "perl $0 in.fas > out.meg\n"; 
 9 | 
10 | my $file = shift; 
11 | my %s2h = %{ $fs_obj->save_seq_to_hash( 'faFile'=>$file, 'has_head'=>1 ) }; 
12 | print STDOUT <<HEADER; 
13 | #mega
14 | !Title $file;
15 | 
16 | HEADER
17 | 
18 | for my $tk (sort { $s2h{$a}{'Order'} <=> $s2h{$b}{'Order'} } keys %s2h) {
19 | 	$s2h{$tk}{'seq'} =~ s/\s//gs; 
20 | 	$s2h{$tk}{'seq'} =~ s/(.{100})/$1\n/g; chomp( $s2h{$tk}{'seq'} ); 
21 | 	print STDOUT "#$s2h{$tk}{'key'}\n$s2h{$tk}{'seq'}\n"; 
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/reseq_tools/cnvt_tools/qopt2shrtStructure.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | -t !@ARGV and die "perl $0 usetax_var_K_2.qopt > usetax_var_K_2.qopt_f\n"; 
 6 | 
 7 | 
 8 | while (<>) {
 9 | 	chomp; 
10 | 	my @ta = split(/\s+/, $_); 
11 | 	print join("    ", $., $., "(0)", 1, ":" , @ta)."\n"; 
12 | }
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/reseq_tools/cnvt_tools/tbl2LD.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | my $pl_dir = '/data/Sunhh/watermelon/source_reseq/new_source/04.LD/hap_LD'; 
 7 | my $pl_rmSame = "$pl_dir/" . 'rm_same_site_hete2N.pl'; 
 8 | my $pl_mkHap = "$pl_dir/" . 'tbl2hap.pl'; 
 9 | my $pl_binLD = "$pl_dir/" . 'get_the_LD_decay_file.pl'; 
10 | 
11 | !@ARGV and die "perl $0 in.snp\n"; 
12 | 
13 | my $snpF = shift; 
14 | 
15 | &exeCmd_1cmd("perl $pl_rmSame $snpF > $snpF.var"); 
16 | &exeCmd_1cmd("perl $pl_mkHap $snpF.var $snpF.ped $snpF.info"); 
17 | &exeCmd_1cmd("java -Xmx20960M -jar /data/Sunhh/src/Evolution/haploview/Haploview4.1.jar -nogui -minMAF 0.05 -hwcutoff 0.001 -dprime -log $snpF.log -out $snpF -pedfile $snpF.ped -info $snpF.info"); 
18 | &exeCmd_1cmd("perl $pl_binLD $snpF.LD 1000 $snpF.LD_bin1k"); 
19 | 
20 | &tsmsg("[Rec] Done.\n"); 
21 | 
22 | 


--------------------------------------------------------------------------------
/reseq_tools/cnvt_tools/tbl2meg.pl:
--------------------------------------------------------------------------------
 1 | use strict; 
 2 | use warnings; 
 3 | use SNP_tbl; 
 4 | 
 5 | !@ARGV and die "perl $0 in.snp\n"; 
 6 | 
 7 | my $inF = shift; 
 8 | my $st = SNP_tbl->new(filename=>$inF); 
 9 | $st->readTbl(); 
10 | $st->tbl2meg(ofile=>"$inF.meg"); 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/reseq_tools/example_data/list.long_deletions:
--------------------------------------------------------------------------------
 1 | 1_608963_v75845_65251	CLV01_Chr01	608964	674214
 2 | 1_671689_v81114_65251	CLV01_Chr01	671690	736940
 3 | 1_8330539_v1378562_21344	CLV01_Chr01	8330540	8351883
 4 | 1_8341886_v1380573_21684	CLV01_Chr01	8341887	8363570
 5 | 1_9330197_v1526693_21740	CLV01_Chr01	9330198	9351937
 6 | 1_9437841_v1543167_24382	CLV01_Chr01	9437842	9462223
 7 | 1_11693114_v1846227_70613	CLV01_Chr01	11693115	11763727
 8 | 1_11718294_v1848597_23199	CLV01_Chr01	11718295	11741493
 9 | 1_11719323_v1848659_35854	CLV01_Chr01	11719324	11755177
10 | 1_12085642_v1884138_23144	CLV01_Chr01	12085643	12108786
11 | 


--------------------------------------------------------------------------------
/reseq_tools/example_data/list.sample_bam:
--------------------------------------------------------------------------------
 1 | ARO18917	bam/ARO18917.dedup.bam
 2 | ARO18920	bam/ARO18920.dedup.bam
 3 | ARO19494	bam/ARO19494.dedup.bam
 4 | ARO20587	bam/ARO20587.dedup.bam
 5 | ARO21031	bam/ARO21031.dedup.bam
 6 | ARO22357	bam/ARO22357.dedup.bam
 7 | ARO22359	bam/ARO22359.dedup.bam
 8 | ARO23071	bam/ARO23071.dedup.bam
 9 | ARO23967	bam/ARO23967.dedup.bam
10 | bulldog2017	bam/bulldog2017.dedup.bam
11 | 


--------------------------------------------------------------------------------
/reseq_tools/example_data/out_geno-mat.tab:
--------------------------------------------------------------------------------
 1 | Sample	1_608963_v75845_65251	1_671689_v81114_65251	1_8330539_v1378562_21344	1_8341886_v1380573_21684	1_9330197_v1526693_21740	1_9437841_v1543167_24382	1_11693114_v1846227_70613	1_11718294_v1848597_23199	1_11719323_v1848659_35854	1_12085642_v1884138_23144
 2 | ARO18917	1/1	0/0	0/0	0/0	0/0	0/0	1/1	1/1	1/1	1/1
 3 | ARO18920	1/1	0/0	0/0	0/0	0/0	0/0	1/1	1/1	1/1	1/1
 4 | ARO19494	1/1	0/0	0/0	0/0	0/0	0/0	1/1	1/1	1/1	1/1
 5 | ARO20587	1/1	0/0	0/0	0/0	0/0	0/0	1/1	1/1	0/0	1/1
 6 | ARO21031	1/1	0/0	0/0	0/0	0/0	0/0	1/1	1/1	0/0	1/1
 7 | ARO22357	1/1	0/0	1/1	0/0	0/0	0/0	1/1	1/1	0/0	1/1
 8 | ARO22359	1/1	0/0	1/1	0/0	0/0	0/0	1/1	1/1	1/1	1/1
 9 | ARO23071	1/1	0/0	0/0	0/0	0/0	0/0	1/1	1/1	1/1	1/1
10 | ARO23967	1/1	0/0	0/0	0/0	0/0	0/0	1/1	1/1	1/1	1/1
11 | bulldog2017	1/1	0/0	0/0	0/0	0/0	0/0	1/1	1/1	0/0	0/0
12 | 


--------------------------------------------------------------------------------
/reseq_tools/extract_pileup.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w 
 2 | use strict; 
 3 | 
 4 | !@ARGV and die "perl $0 fsadf\n"; 
 5 | 
 6 | for (`ls *.pileup`) { 
 7 | 	chomp; 
 8 | 	print "Start $_;" . scalar(localtime()) . " \n"; 
 9 | 	system "uniqComb.pl $_ -index ../basic_SNP_WM97toPI -col 0,1 -newCol 0,1 -exist > $_.basic"; 
10 | 	print "End $_;" . scalar(localtime()) . "\n"; 
11 | } 
12 | print "All extractions are over.\n";
13 | 
14 | 


--------------------------------------------------------------------------------
/reseq_tools/filter_tools/cnt_depth/sumDepBySite.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | while (<>) {
 6 | 	chomp; 
 7 | 	my @ta = split(/\t/, $_); 
 8 | 	if ($ta[0] eq '#CHROM') {
 9 | 		print join("\t", @ta[0,1], 'DP_SUM')."\n"; 
10 | 		next; 
11 | 	}
12 | 	my $sumDepth = 0; 
13 | 	for my $cnt (@ta[2 .. $#ta]) {
14 | 		$cnt eq '.' and next; 
15 | 		$sumDepth += $cnt; 
16 | 	}
17 | 	print join("\t", @ta[0,1], $sumDepth)."\n"; 
18 | }
19 | 


--------------------------------------------------------------------------------
/reseq_tools/fst/extract_top1_fst_site.R:
--------------------------------------------------------------------------------
 1 | # ==> set01_13_to_11.fst.perSiteChrPos <==
 2 | # chr     pos     Ho      Hs      Ht      Dst     Htp     Dstp    Fst     Fstp    Fis     Dest
 3 | # chr10   467     0       0       0       0       0       0       NA      NA      NA      0
 4 | 
 5 | argvs <- commandArgs( trailingOnly=TRUE ) ;
 6 | fn   <- argvs[1]   # in.fst.perSiteChrPos
 7 | topR <- as.numeric( argvs[2] ) # 0.01 
 8 | if (topR > 1) {
 9 | 	print (topR)
10 | 	quit()
11 | }
12 | 
13 | aa <- read.table( file=fn, header=T, stringsAsFactors=F )
14 | # Filter out organelles and NA sites. 
15 | aa.kk <- aa$chr != "plast" & aa$chr != "mito" & !is.na(aa$Fst) & aa$Fst >= 0
16 | aa <- aa[ aa.kk, ]
17 | aa.kk <- NULL 
18 | 
19 | # Find the threshold of topR 
20 | aa.qt <- quantile( aa$Fst, probs=c(0,0.5,0.95,1-topR,1) )
21 | aa.thres <- aa.qt[4]
22 | cat("threshold for", topR, "is ", aa.thres, "\n")
23 | 
24 | # Get the selected sites. 
25 | aa.slct <- aa[ aa$Fst >= aa.thres, ]
26 | write.table( aa.slct, file=paste0( fn, ".top", topR, sep=""), append=F, row.names=F, col.names=T, quote=F, sep="\t" )
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/reseq_tools/fst/get_stat.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | !@ARGV and die "perl $0 in.win.fst\n"; 
 6 | 
 7 | my $f = shift; 
 8 | 
 9 | system "awk ' NR > 1 && ( \$4 >= 50 && \$6 >= 0 ) ' $f | deal_table.pl -col_stat 5 -col_stat_AsINS | deal_table.pl -transpose > $f.statMean"; 
10 | 
11 | 


--------------------------------------------------------------------------------
/reseq_tools/fst/join_fst_siteChrPos.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | use fileSunhh; 
 6 | use Getopt::Long; 
 7 | my %opts; 
 8 | GetOptions(\%opts, 
 9 | 	"help!", 
10 | 	"perWind!", 
11 | 	"addSuff:s", '' 
12 | ); 
13 | $opts{'addSuff'} //= ''; 
14 | 
15 | my $help_txt = <<HH; 
16 | 
17 | perl $0 list_of_fst_perSiteChrPos > merged.ChrPos
18 | 
19 | -perWind        [Bool]
20 | -addSuff        ['']
21 | 
22 | HH
23 | 
24 | -t and !@ARGV and &LogInforSunhh::usage($help_txt); 
25 | $opts{'help'} and &LogInforSunhh::usage($help_txt); 
26 | 
27 | my %used; 
28 | while (my $fn = <>) {
29 | 	chomp($fn); 
30 | 	my @ta = split(/\t/, $fn); 
31 | 	my $fh = &openFH("$ta[0]$opts{'addSuff'}", '<'); 
32 | 	while (<$fh>) {
33 | 		chomp; 
34 | 		my @tb = split(/\t/, $_); 
35 | 		my $tk = ($opts{'perWind'}) ? $tb[0] : "$tb[0]\t$tb[1]"; 
36 | 		defined $used{$tk} and next; 
37 | 		$used{$tk} = 1; 
38 | 		print STDOUT "$_\n"; 
39 | 	}
40 | 	close($fh); 
41 | }
42 | 
43 | 


--------------------------------------------------------------------------------
/reseq_tools/gatk/CatVariants.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use fileSunhh; 
 5 | 
 6 | -t and !@ARGV and die "perl $0 sorted_GVCF_list\n"; 
 7 | 
 8 | my $has_header = 0; 
 9 | 
10 | while (my $l = <>) {
11 | 	chomp($l); 
12 | 	$l =~ m!^\s*($|#)! and next; 
13 | 	my ($fn) = (&splitL("\t", $l))[0]; 
14 | 	my $ifh = &openFH($fn, '<'); 
15 | 	while (<$ifh>) {
16 | 		if (m!^#!) {
17 | 			$has_header == 1 and next; 
18 | 			m!^#CHROM\t! and $has_header = 1; 
19 | 			print STDOUT $_; 
20 | 			next; 
21 | 		}
22 | 		print STDOUT $_; 
23 | 	}
24 | 	close($ifh); 
25 | }
26 | 


--------------------------------------------------------------------------------
/reseq_tools/gatk/get_pass_vcf.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use fileSunhh; 
 5 | 
 6 | -t and !@ARGV and die "gzip -cd lmyPM_filtV.vcf.gz | perl $0 > lmyPM_filtV_PASS.vcf\n"; 
 7 | 
 8 | while (<>) {
 9 | 	if (m!^#!) {
10 | 		print STDOUT $_; 
11 | 		next; 
12 | 	}
13 | 	chomp; 
14 | 	my @ta = split(/\t/, $_); 
15 | 	$ta[6] =~ m!^PASS$!i or next; 
16 | 	print STDOUT "$_\n"; 
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/reseq_tools/get_set3_varWiIndel.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | use Getopt::Long; 
 6 | my %opts; 
 7 | GetOptions(\%opts,
 8 | 	"help!", 
 9 | 	"startColN:i", # 2 
10 | 	"noHeader!", 
11 | ); 
12 | 
13 | $opts{'startColN'} //= 2; 
14 | 
15 | sub usage {
16 | 	print STDERR <<HH; 
17 | 
18 | perl $0 in.snp > out_woIndel.snp
19 | 
20 | -help
21 | -startColN      [$opts{'startColN'}] 
22 | -noHeader       [Bool]
23 | 
24 | HH
25 | 	exit(1); 
26 | }
27 | 
28 | -t and !@ARGV and &usage(); 
29 | $opts{'help'} and &usage(); 
30 | 
31 | while (<>) {
32 | 	$. % 1e6 == 1 and &tsmsg("[Msg] Reading $. lines.\n"); 
33 | 	s/[^\S\t]+$//; 
34 | 	if ( $. == 1 and !$opts{'noHeader'} ) {
35 | 		print "$_\n"; 
36 | 		next; 
37 | 	}
38 | 	my @ta = split(/\t/, $_); 
39 | 	my $has_indel = 0; 
40 | 	for my $tb (@ta[ $opts{'startColN'} .. $#ta ]) {
41 | 		$tb =~ m/\*|\+/ and do { $has_indel = 1; last; }; 
42 | 	}
43 | 	$has_indel == 1 and print "$_\n"; 
44 | }
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/reseq_tools/mao_exe/combine2PileFiles:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/reseq_tools/mao_exe/combine2PileFiles


--------------------------------------------------------------------------------
/reseq_tools/mao_exe/reSeqPrintRefChr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/reseq_tools/mao_exe/reSeqPrintRefChr


--------------------------------------------------------------------------------
/reseq_tools/mao_exe/reSeqPrintSample.indel.fast:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/reseq_tools/mao_exe/reSeqPrintSample.indel.fast


--------------------------------------------------------------------------------
/reseq_tools/mao_exe/reSeqPrintSample.indel.fast.strAssign:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/reseq_tools/mao_exe/reSeqPrintSample.indel.fast.strAssign


--------------------------------------------------------------------------------
/reseq_tools/mao_exe/reSeqPrintSample.indel.fast.strAssign.moreHeter:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/reseq_tools/mao_exe/reSeqPrintSample.indel.fast.strAssign.moreHeter


--------------------------------------------------------------------------------
/reseq_tools/mao_exe/rmRedunSam2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/reseq_tools/mao_exe/rmRedunSam2


--------------------------------------------------------------------------------
/reseq_tools/mao_exe/rmRedunSam3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/reseq_tools/mao_exe/rmRedunSam3


--------------------------------------------------------------------------------
/reseq_tools/mask_weiredSNP.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl 
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | use Getopt::Long; 
 6 | my %opts; 
 7 | GetOptions(\%opts, 
 8 | 	"help!", 
 9 | 	"startColN:i", # 
10 | ); 
11 | 
12 | my $geno_col = 2; 
13 | 
14 | sub usage {
15 | 	print STDERR <<HH; 
16 | 
17 | Only simple genotypes /^[ATGC*N]\$|^[ATGC]\\+[ATGC]+\$|^[ATGC*][ATGC*]\$/ kept.
18 | 
19 | perl $0 merged.snp > merged.snp.woWeired
20 | 
21 | Please note that the geno_col is $geno_col
22 | And first line is not checked. 
23 | 
24 | HH
25 | 	exit(1); 
26 | }
27 | 
28 | !@ARGV and &usage(); 
29 | 
30 | my $l = <>; 
31 | print $l; 
32 | # print join("\t",qw/ChromID Pos GenoN HomoRatio HeteRatio/)."\n"; 
33 | while (<>) {
34 | 	$. % 1e6 == 1 and &tsmsg("line $.\n"); 
35 | 	chomp; 
36 | 	my @ta = split(/\t/, $_); 
37 | 	for my $tb ( @ta[$geno_col .. $#ta] ) {
38 | 		$tb =~ m/^[ATGC*N]$|^[ATGC]\+[ATGC]+$|^[ATGC*][ATGC*]$/ or $tb = "N"; 
39 | 		# Sometimes there will be sth. like 'AG+AAA' heterozygous site, but I don't want it . 
40 | 	}
41 | 	print join("\t", @ta)."\n"; 
42 | }
43 | 


--------------------------------------------------------------------------------
/reseq_tools/rename_plink_map.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | -t and !@ARGV and die "perl $0 plink_fmt.map plink_IDnamed.map\n"; 
 6 | 
 7 | 
 8 | my %h; 
 9 | while (<>) {
10 | 	chomp; 
11 | 	my @ta=split(/\t/, $_); 
12 | 	$ta[1] eq "." and $ta[1] = "$ta[0]_$ta[3]"; 
13 | 	defined $h{$ta[1]} and die "[Err] Repeat site ID [$ta[1]]\n"; 
14 | 	$h{$ta[1]} = 1; 
15 | 	print join("\t", @ta)."\n"; 
16 | }
17 | 


--------------------------------------------------------------------------------
/reseq_tools/rm_adjacent_sites.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl 
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | # Rules: 
 7 | #  R1. Remove adjacent SNP sites within 5bp. 
 8 | 
 9 | my $within_dist = 5; 
10 | 
11 | my %prev; 
12 | while (<>) {
13 | 	s/[^\t\S]+$//; 
14 | 	my @ta = split(/\t/, $_); 
15 | 	my ($chr, $pos, $refB) = @ta[0,1,2]; 
16 | 	if ($chr eq 'chr') {
17 | 		print STDOUT "$_\n"; 
18 | 		next; 
19 | 	}
20 | 
21 | 	# Rule 1: 
22 | 	my %curr; 
23 | 	$curr{'chr'} = $chr; 
24 | 	$curr{'pos'} = $pos; 
25 | 	$curr{line} = $_; 
26 | 	$curr{is_good} = 1; 
27 | 	if (scalar(keys %prev) == 0 or $prev{chr} ne $chr) {
28 | 		defined $prev{'is_good'} and $prev{'is_good'} == 1 and print STDOUT "$prev{'line'}\n"; 
29 | 	} else {
30 | 		my $dist2prev = $pos - $prev{pos}+1; 
31 | 		if ( $dist2prev <= $within_dist ) {
32 | 			# Both are bad. 
33 | 			$curr{is_good} = 0; 
34 | 			# $prev{is_good} = 0; 
35 | 		} else {
36 | 			$prev{is_good} == 1 and print STDOUT "$prev{line}\n"; 
37 | 		}
38 | 	}
39 | 	%prev = %curr; 
40 | }
41 | $prev{is_good} == 1 and print STDOUT "$prev{line}\n"; 
42 | 


--------------------------------------------------------------------------------
/reseq_tools/rm_same_site.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | my $geno_col = 2; 
 7 | 
 8 | -t and !@ARGV and die "perl $0 in_pasted_1col.tbl > snp.tbl\nPlease note that the geno_col is $geno_col\n"; 
 9 | 
10 | 
11 | while (<>) {
12 | 	$. % 1e6 == 1 and &tsmsg("[Msg] Reading $. lines.\n");
13 | 	s/[^\S\t]+$//;
14 | 	my @ta = split(/\t/, $_);
15 | 	if ( $ta[0] eq 'chr' ) {
16 | 		print "$_\n";
17 | 		next;
18 | 	}
19 | 	my $base = 'N';
20 | 	my $has_diff = 0;
21 | 	for (my $i=$geno_col; $i<@ta; $i++) {
22 | 		$ta[$i] = uc($ta[$i]);
23 | 		$ta[$i] eq 'N' and next;
24 | 		$base eq 'N' and $base = $ta[$i];
25 | 		$base ne $ta[$i] and do { $has_diff = 1; last; };
26 | 	}
27 | 	$has_diff == 1 and print "$_\n";
28 | }
29 | 


--------------------------------------------------------------------------------
/reseq_tools/rm_same_site_hete2N.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | my $geno_col = 2; 
 7 | -t and !@ARGV and die "perl $0 in_wiSame.snp\nPlease note that geno_col=$geno_col\nHere we treat heterozygous site as 'N', with indel accepted.\n"; 
 8 | 
 9 | 
10 | while (<>) {
11 | 	$. % 1e6 == 1 and &tsmsg("[Msg] Reading $. lines.\n"); 
12 | 	s/[^\S\t]+$//; 
13 | 	my @ta = split(/\t/, $_); 
14 | 	if ( $ta[0] eq 'chr' ) {
15 | 		print "$_\n"; 
16 | 		next; 
17 | 	}
18 | 	my $base = 'N'; 
19 | 	my $has_diff = 0; 
20 | 	for (my $i=$geno_col; $i<@ta; $i++) {
21 | 		$ta[$i] = uc($ta[$i]); 
22 | 		$ta[$i] =~ m/^[ATGC]$|\*|\+/ or $ta[$i] = 'N'; 
23 | 		$ta[$i] eq 'N' and next; 
24 | 		$base eq 'N' and $base = $ta[$i]; 
25 | 		$base ne $ta[$i] and do { $has_diff = 1; last; }; 
26 | 	}
27 | 	$has_diff == 1 and print "$_\n"; 
28 | }
29 | 


--------------------------------------------------------------------------------
/reseq_tools/scripts/cnvt_melt_to_matrix.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | -t and !@ARGV and die "perl $0 in.melt > out.matrix\n";
 6 | 
 7 | # 1_608963_v75845_65251   ARO18917        1/1     14      0
 8 | # 1_608963_v75845_65251   ARO18920        1/1     45      0
 9 | # 1_608963_v75845_65251   ARO19494        1/1     24      0
10 | # 1_608963_v75845_65251   ARO20587        1/1     16      0
11 | 
12 | my (%col1, %col2, %col_val);
13 | 
14 | while (<>) {
15 |   my @ta=split(/\t/, $_);
16 |   chomp(@ta);
17 |   $col1{$ta[0]} //= $.;
18 |   $col2{$ta[1]} //= $.;
19 |   $col_val{$ta[0]}{$ta[1]} //= $ta[2];
20 | }
21 | my @arr1 = sort {$col1{$a}<=>$col1{$b}} keys %col1;
22 | my @arr2 = sort {$col2{$a}<=>$col2{$b}} keys %col2;
23 | print STDOUT join("\t", "Sample", @arr1)."\n";
24 | for my $a2 (@arr2) {
25 |   my @o = ($a2);
26 |   for my $a1 (@arr1) { $col_val{$a1}{$a2} //= "./."; push(@o, $col_val{$a1}{$a2}); }
27 |   print STDOUT join("\t", @o)."\n";
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/reseq_tools/slct_sweep/merge_wind.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use mathSunhh; 
 5 | my $mat_obj = mathSunhh->new(); 
 6 | 
 7 | my %raw_blks; 
 8 | my %ord; 
 9 | while (<>) {
10 | 	chomp; 
11 | 	my @ta = split(/\t/, $_); 
12 | 	$ta[1] eq 'chrS' and next; 
13 | 	$ord{$ta[0]} //= $.; 
14 | 	push(@{$raw_blks{$ta[0]}}, [$ta[1], $ta[2]]); 
15 | }
16 | my %merged_blks; 
17 | print STDOUT join("\t", qw/chr start end/)."\n"; 
18 | for my $tk (sort {$ord{$a} <=> $ord{$b}} keys %raw_blks) {
19 | 	$merged_blks{$tk} = $mat_obj->mergeLocBlk( $raw_blks{$tk} ); 
20 | 	for my $tr1 (@{$merged_blks{$tk}}) {
21 | 		print STDOUT join("\t", $tk, $tr1->[0], $tr1->[1])."\n"; 
22 | 	}
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/reseq_tools/slct_sweep/merge_wind_pos.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | use mathSunhh; 
 6 | use Getopt::Long; 
 7 | my %opts; 
 8 | GetOptions(\%opts, 
 9 | 	"dist2join:i", # 1 
10 | 	"help!", 
11 | ); 
12 | $opts{'dist2join'} //= 1; 
13 | 
14 | my $help_txt = <<HH; 
15 | 
16 | perl   $0    sep_loc    -dist2join $opts{'dist2join'}   > merged_loc 
17 | 
18 | -dist2join     [$opts{'dist2join'}]
19 | 
20 | HH
21 | 
22 | -t and !@ARGV and &LogInforSunhh::usage($help_txt); 
23 | $opts{'help'} and &LogInforSunhh::usage($help_txt); 
24 | 
25 | my $mat_obj = mathSunhh->new(); 
26 | 
27 | my %raw_blks; 
28 | my %ord; 
29 | while (<>) {
30 | 	chomp; 
31 | 	my @ta = split(/\t/, $_); 
32 | 	$ta[1] =~ m/^[\d\.]+$/ or next; 
33 | 	$ord{$ta[0]} //= $.; 
34 | 	$ta[2] //= $ta[1]; 
35 | 	push(@{$raw_blks{$ta[0]}}, [$ta[1], $ta[2]]); 
36 | }
37 | my %merged_blks; 
38 | print STDOUT join("\t", qw/chr start end/)."\n"; 
39 | for my $tk (sort {$ord{$a} <=> $ord{$b}} keys %raw_blks) {
40 | 	$merged_blks{$tk} = $mat_obj->mergeLocBlk( $raw_blks{$tk}, 'dist2join'=>$opts{'dist2join'} ); 
41 | 	for my $tr1 (@{$merged_blks{$tk}}) {
42 | 		print STDOUT join("\t", $tk, $tr1->[0], $tr1->[1])."\n"; 
43 | 	}
44 | }
45 | 
46 | 


--------------------------------------------------------------------------------
/reseq_tools/slct_sweep/rm_overlap_wind.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | # ChromID WindS   WindE   WindL   BpCnt   perKb_0
 6 | # SpoScf_00001    1       10000   10000   10000   0.0924198651487334
 7 | # SpoScf_00001    1001    11000   10000   10000   0.0439350166638849
 8 | # SpoScf_00001    2001    12000   10000   10000   0.0348612437039331
 9 | 
10 | my @prev; 
11 | while (<>) {
12 | 	chomp; 
13 | 	my @ta = split(/\t/, $_); 
14 | 	if (!@prev or $prev[0] ne $ta[0]) {
15 | 		print "$_\n"; 
16 | 		@prev = @ta; 
17 | 		next; 
18 | 	}
19 | 	$prev[2] >= $ta[1] and next; 
20 | 	print "$_\n"; 
21 | 	@prev = @ta; 
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/reseq_tools/slct_sweep/rod_from_PIavg.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | my $avgC1 = 5; # high
 6 | my $avgC2 = 6; # low
 7 | 
 8 | -t and !@ARGV and die "paste filt1_w50ks5k.pi.gCC.PIavg filt1_w50ks5k.pi.gCA.PIavg | deal_table.pl -column 0-5,11 | perl $0 > CC_CA.avgComp\n"; 
 9 | 
10 | while (<>) {
11 | 	chomp; 
12 | 	my @ta=split(/\t/, $_); 
13 | 	if ($ta[0] eq 'ChromID') {
14 | 		print STDOUT join("\t", qw/CHROM BIN_START BIN_END BpCnt perKb_High perKb_Low MEAN_Est/)."\n"; 
15 | 		next; 
16 | 	}
17 | 	my $est = ( $ta[$avgC1]+$ta[$avgC2] > 0 ) ? ($ta[$avgC1]/($ta[$avgC1]+$ta[$avgC2])) : 'NA' ; 
18 | 	print STDOUT join("\t", @ta[0,1,2,4,$avgC1, $avgC2], $est)."\n"; 
19 | }
20 | 


--------------------------------------------------------------------------------
/reseq_tools/tassel/cnvt_col_to_TasselTaxaList.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | print STDOUT <<TTT0; 
 6 | 
 7 | {
 8 |     "TaxaList":[
 9 | TTT0
10 | my $cnt = 0; 
11 | my @taxaArr; 
12 | while (<>) {
13 | 	chomp; 
14 | 	my @ta=split(/\t/, $_); 
15 | 	$cnt ++; 
16 | 	my $taxaTxt = <<TTT1; 
17 |         {
18 |             "name":"$ta[0]"
19 |         }
20 | TTT1
21 | 	chomp($taxaTxt); 
22 | 	push(@taxaArr, $taxaTxt); 
23 | }
24 | print STDOUT join(",\n", @taxaArr)."\n"; 
25 | my $tailTxt = <<TTT2; 
26 |     ]
27 | }
28 | TTT2
29 | chomp($tailTxt); 
30 | print STDOUT $tailTxt; 
31 | 
32 | 


--------------------------------------------------------------------------------
/reseq_tools/vcf_simplify_addRef.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | 
 6 | !@ARGV and die "perl $0 refID sample.vcf\n"; 
 7 | 
 8 | my $id = shift; 
 9 | 
10 | while (<>) {
11 | 	chomp; 
12 | 	m!^\s*##! and do { print "$_\n"; next; }; 
13 | 	my @ta = split(/\t/, $_); 
14 | 	if (m!^\s*#CHROM!) {
15 | 		print join("\t", @ta[0..8], $id, @ta[9..$#ta])."\n"; 
16 | 		next; 
17 | 	}
18 | 	$ta[7] = '.'; 
19 | 	my $ii; 
20 | 	if ($ta[8] =~ m!^GT(:|$)!) {
21 | 		$ii = 0; 
22 | 	} else {
23 | 		my @tc = split(/:/, $ta[8]); 
24 | 		for (my $i0=0; $i0<@tc; $i0++) {
25 | 			$tc[$i0] eq 'GT' and do { $ii = $i0; last; }; 
26 | 		}
27 | 	}
28 | 	defined $ii or do { &tsmsg("[Err][Wrn] bad FORMAT: [$ta[8]]: $_\n"); next; }; 
29 | 	$ta[8] = 'GT'; 
30 | 	for my $tb (@ta[9..$#ta]) {
31 | 		$tb = (split(/:/, $tb))[$ii]; 
32 | 	}
33 | 	print join("\t", @ta[0..8], '0/0', @ta[9..$#ta])."\n"; 
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/reseq_tools/xpclr/chk_nonsyn.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | my %prev; 
 6 | my @lines; 
 7 | while (<>) {
 8 | 	chomp; 
 9 | 	my @ta = split(/\t/, $_); 
10 | 	$ta[0] eq 'MarkerID' and next; 
11 | 	$ta[1] eq $ta[7] or die "Bad:$_\n"; 
12 | 	push(@lines, [$_, '']); 
13 | 	my %curr; 
14 | 	$curr{'chrID'} = $ta[1]; 
15 | 	$curr{'chrP'}  = $ta[2]; 
16 | 	$curr{'chrMend'} = $ta[8]; 
17 | 	$curr{'chrLine'} = $_; 
18 | 
19 | 	if ( !(defined $prev{'chrID'}) or $prev{'chrID'} ne $curr{'chrID'} ) {
20 | 		%prev = %curr; 
21 | 		next; 
22 | 	}
23 | 	
24 | 	if ( $prev{'chrMend'} < $curr{'chrMend'} and $prev{'chrP'} > $curr{'chrP'} ) {
25 | 		$lines[-2][1] = 'Chk:'; 
26 | 		$lines[-1][1] = 'Chk:'; 
27 | 	}
28 | 
29 | 	%prev = %curr; 
30 | }
31 | for (@lines) {
32 | 	print "$_->[1]$_->[0]\n"; 
33 | }
34 | 


--------------------------------------------------------------------------------
/reseq_tools/xpclr/get_uniq_cM.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | use fileSunhh; 
 6 | use mathSunhh; 
 7 | 
 8 | !@ARGV and die "perl $0   marker_loc2GM_man\n"; 
 9 | 
10 | my $fn_gm  = shift; 
11 | 
12 | my $fh_gm  = &openFH($fn_gm, '<'); 
13 | my %gmP_to_line; 
14 | my %ord; 
15 | while (&wantLineC($fh_gm)) {
16 | 	my @ta = &splitL("\t", $_); 
17 | 	if ( $. == 0 and $ta[0] eq 'MarkerID' ) {
18 | 		print STDOUT "$_\n"; 
19 | 		next; 
20 | 	}
21 | 	my $tk = "$ta[7]_$ta[8]"; 
22 | 	push(@{$gmP_to_line{$tk}}, [@ta]); 
23 | 	$ord{$tk} //= $.; 
24 | }
25 | close($fh_gm); 
26 | 
27 | for my $tk (sort { $ord{$a} <=> $ord{$b} } keys %gmP_to_line) {
28 | 	my $midN = int( $#{$gmP_to_line{$tk}}/2 ); 
29 | 	print STDOUT join("\t", @{$gmP_to_line{$tk}[$midN]})."\n"; 
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/reseq_tools/xpclr/plot_manhattan/chrLen:
--------------------------------------------------------------------------------
 1 | WM97_Chr01	34083085
 2 | WM97_Chr02	34414252
 3 | WM97_Chr03	28939167
 4 | WM97_Chr04	24315960
 5 | WM97_Chr05	33714806
 6 | WM97_Chr06	27018480
 7 | WM97_Chr07	31477646
 8 | WM97_Chr08	26149438
 9 | WM97_Chr09	34986854
10 | WM97_Chr10	28419553
11 | WM97_Chr11	27106780
12 | 


--------------------------------------------------------------------------------
/reseq_tools/xpclr/plot_manhattan/chrLen_cum:
--------------------------------------------------------------------------------
 1 | chrID	chrLen	chrCumS	chrCumE
 2 | WM97_Chr01	34083085	1	34083085
 3 | WM97_Chr02	34414252	34183086	68597337
 4 | WM97_Chr03	28939167	68697338	97636504
 5 | WM97_Chr04	24315960	97736505	122052464
 6 | WM97_Chr05	33714806	122152465	155867270
 7 | WM97_Chr06	27018480	155967271	182985750
 8 | WM97_Chr07	31477646	183085751	214563396
 9 | WM97_Chr08	26149438	214663397	240812834
10 | WM97_Chr09	34986854	240912835	275899688
11 | WM97_Chr10	28419553	275999689	304419241
12 | WM97_Chr11	27106780	304519242	331626021
13 | 


--------------------------------------------------------------------------------
/reseq_tools/xpclr/xpclr_wind_cmd_wiGmP.pl:
--------------------------------------------------------------------------------
 1 | #/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use LogInforSunhh; 
 5 | use fileSunhh; 
 6 | 
 7 | !@ARGV and die "perl $0 wind_list grpList_1 grpList_2 work_dir\n"; 
 8 | 
 9 | my $oDir = pop(@ARGV); 
10 | -e $oDir and die "exist $oDir\n"; 
11 | mkdir($oDir); 
12 | 
13 | open F,'<',"$ARGV[0]" or die; 
14 | my %tmp_cnt = ( 'cntN_base'=>0 , 'cntN_step'=>5e5 ); 
15 | while (<F>) {
16 | 	&fileSunhh::log_section( $. , \%tmp_cnt ) and &tsmsg( "[Msg] $. line.\n" ); 
17 | 	chomp; 
18 | 	my @ta=split(/\t/, $_); 
19 | 	$ta[1] =~ m/^chr(\d+)$/i or next; 
20 | 	my $cn = $1; 
21 | 	my $oWind = $ta[0]; 
22 | 	$oWind =~ m!^.*/([^/]+)$! or die "bad $oWind\n"; 
23 | 	$oWind = "$oDir/$1"; 
24 | 	&fileSunhh::_copy( $ta[0], $oWind ); 
25 | 	print ("perl prepare_xpclr_input_wiGmP.pl $oWind.xpclr $oWind $ARGV[1] $ARGV[2] ; XPCLR -xpclr $oWind.xpclr.chr${cn}_g1.geno $oWind.xpclr.chr${cn}_g2.geno $oWind.xpclr.chr${cn}.snp $oWind.xpclr.chr${cn}.snp.out -w1 0.0005 100 100 $cn -p0 0.7 ;\n"); 
26 | }
27 | close F; 
28 | 
29 | 


--------------------------------------------------------------------------------
/rnaseq_tools/add_sizefactor.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use fileSunhh; 
 5 | 
 6 | !@ARGV and die "perl $0 in.total.sizefactor in.cnt\n"; 
 7 | 
 8 | open F1,'<',"$ARGV[0]" or die; 
 9 | my %h; 
10 | while (&wantLineC(\*F1)) {
11 | 	my @ta=&splitL("\t", $_); 
12 | 	$ta[0] eq 'sampleNames' and next; 
13 | 	# $ta[0] =~ s/^\S+_([FP][13]_[^_]+_rep\d+)$/$1/ or die "$_\n"; 
14 | 	$ta[0] =~ s/^\S+_([SM](?:FL|FR|LV|RT|SD|ST)(?:F1|P1|P3)_rep\d+)/$1/ or die "$ta[0]\n"; 
15 | 	$h{$1} = $ta[1]; 
16 | }
17 | close F1; 
18 | 
19 | open F2,'<',"$ARGV[1]" or die; 
20 | while (<F2>) {
21 | 	print "$_"; 
22 | 	if ($. == 1) {
23 | 		chomp; 
24 | 		my @ta = split(/\t/, $_); 
25 | 		my @tb = ('sizeFactor'); 
26 | 		for (my $i=1; $i<@ta; $i++) {
27 | 			# $ta[$i] =~ s/^\S+_([FP][13]_[^_]+_rep\d+)$/$1/ or die "$ta[$i]\n"; 
28 | 			$ta[$i] =~ s/^\S+_([SM](?:FL|FR|LV|RT|SD|ST)(?:F1|P1|P3)_rep\d+)$/$1/ or die "$ta[$i]\n"; 
29 | 			defined $h{$ta[$i]} or die "$ta[$i]\n"; 
30 | 			$tb[$i] = $h{$ta[$i]}; 
31 | 		}
32 | 		print join("\t", @tb)."\n"; 
33 | 	}
34 | }
35 | close F2; 
36 | 
37 | 


--------------------------------------------------------------------------------
/rnaseq_tools/coexp/add_abs_toKME.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | while (<>) {
 6 | 	chomp; 
 7 | 	my @ta=split(/\t/, $_); 
 8 | 	if ($. == 1) {
 9 | 		print join("\t", @ta, "absKME")."\n"; 
10 | 	} else {
11 | 		print join("\t", @ta, abs($ta[1]))."\n"; 
12 | 	}
13 | }
14 | 


--------------------------------------------------------------------------------
/rnaseq_tools/coexp/cmd_list:
--------------------------------------------------------------------------------
1 | ~/bin/Rscript dat1_wgcna.r
2 | 


--------------------------------------------------------------------------------
/rnaseq_tools/coexp/heatmap_by_mod_dist.r:
--------------------------------------------------------------------------------
 1 | #!/home/Sunhh/bin/Rscript
 2 | argvs <- commandArgs( trailingOnly=TRUE );
 3 | if ( is.na(argvs[1]) ) {
 4 |   message("Rscript this.R   <sim_mat.tab>   <out_pdf_file>");
 5 |   q();
 6 | }
 7 | fn_simMat        <- as.character(argvs[1]) ; # Out put of : perl dist_of_twoKME.pl sign redo/wgcna_dat1_signed/dat1_KME.txt unsi redo/wgcna_dat1_unsign/dat1_KME.txt > tt
 8 | fn_outPdf        <- as.character(argvs[2]) ; 
 9 | 
10 | library(pheatmap)
11 | library(RColorBrewer)
12 | colors <- colorRampPalette( rev(brewer.pal(n= 9, name= "OrRd")) )(255)
13 | 
14 | simMat <- read.table(fn_simMat, header=T, row.names=1, stringsAsFactors=F)
15 | distMat <- as.dist(1-simMat)
16 | distMat.mat <- as.matrix(distMat)
17 | 
18 | pdf(file=fn_outPdf, width=8, height=8)
19 | pheatmap(distMat.mat, 
20 |   clustering_distance_rows = distMat , 
21 |   clustering_distance_cols = distMat , 
22 |   col = colors , 
23 |   show_colnames=TRUE)
24 | dev.off()
25 | 
26 | 


--------------------------------------------------------------------------------
/rnaseq_tools/coexp/input/dat1_rpkmMean.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/rnaseq_tools/coexp/input/dat1_rpkmMean.gz


--------------------------------------------------------------------------------
/rnaseq_tools/compare_SensAnti.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use fileSunhh; 
 5 | use LogInforSunhh; 
 6 | 
 7 | my $hh = <<"H1"; 
 8 | 
 9 | perl $0 S636_fixNH.bam.cntSens S636_fixNH.bam.cntAnti S636_fixNH.bam_pref > S636_fixNH.bam.SensByAnti
10 | 
11 | H1
12 | 
13 | @ARGV >= 3 or &LogInforSunhh::usage($hh); 
14 | 
15 | my $fn1 = shift; 
16 | my $fn2 = shift; 
17 | my $pref = shift; 
18 | 
19 | my ($sum1, $sum2); 
20 | 
21 | for (map { $_->[1] } grep { !($_->[0] =~ m!^__!)} &fileSunhh::load_tabFile($fn1)) {
22 | 	$sum1 += $_; 
23 | }
24 | for (map { $_->[1] } grep { !($_->[0] =~ m!^__!)} &fileSunhh::load_tabFile($fn2)) {
25 | 	$sum2 += $_; 
26 | }
27 | 
28 | print STDOUT join("\t", qw/inPref sumSens sumAnti SensByAnti SensByTotl/)."\n"; 
29 | my $r = ($sum2 > 0) ? sprintf("%0.4f", $sum1/$sum2) : -1 ; 
30 | my $r2 = ($sum1 > 0 and $sum2 > 0) ? sprintf("%0.4f", $sum1/($sum1+$sum2)) : -1; 
31 | print STDOUT join("\t", $pref, $sum1, $sum2, $r, $r2)."\n"; 
32 | 
33 | 


--------------------------------------------------------------------------------
/rnaseq_tools/fix_SRAfqID.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | -t and !@ARGV and die "perl $0 down_SRR_R2.fq > down_SRR_R2.fix.fq\n"; 
 6 | 
 7 | while (my $l1 = <>) {
 8 |   my $l2 = <>; 
 9 |   my $l3 = <>; 
10 |   my $l4 = <>; 
11 |   if ($l1 =~ s!^\@\S+\.([12])\s+([^:]+:\d+:[^:]+(:\d+){4})(?:\s+.+|\s*)$!\@$2 $1!) {
12 |   } elsif ($l1 =~ s!^\@(\S+)\.([12])(?:\s*|\s+length=\d+\s*)$!\@$1 $2!) {
13 |   }
14 |   $l3 =~ s!^\+.*$!+!; 
15 |   print "$l1$l2$l3$l4"; 
16 | }
17 | 


--------------------------------------------------------------------------------
/rnaseq_tools/fix_excelV.pl:
--------------------------------------------------------------------------------
 1 | #!/user/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use fileSunhh; 
 5 | 
 6 | # Min value that I want to use in Excel is '9E-308'; 
 7 | -t and !@ARGV and die "perl $0 compList_inFruit_01.FDR > compList_inFruit_01.FDR_fixSmallV\n"; 
 8 | 
 9 | while (<>) {
10 | 	chomp; 
11 | 	my @ta = &splitL("\t", $_); 
12 | 	for my $tb (@ta) {
13 | 		if ($tb =~ m!^([+-]?)(\d+(?:\.\d+)?)e\-(\d+)$!i) {
14 | 			my ($str, $prevE, $afterE) = ($1, $2, $3); 
15 | 			if      ( $afterE > 308 ) {
16 | 				$tb = "9E-308";
17 | 			} elsif ( $afterE == 308 and $prevE < 9 ) {
18 | 				$tb = "9E-308"; 
19 | 			} else {
20 | 			}
21 | 		}
22 | 	}
23 | 	print STDOUT join("\t", @ta)."\n"; 
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/rnaseq_tools/fromOthers/record:
--------------------------------------------------------------------------------
1 | run_TMM_scale_matrix.pl : This script comes from Trinity, and converts raw counts matrix (not nomalized) to TMM scaled. The input could also be TPM without normalization across samples. 
2 | 
3 | 


--------------------------------------------------------------------------------
/rnaseq_tools/get_DESeqNormCnt.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use Getopt::Long; 
 5 | my %opts; 
 6 | GetOptions(\%opts, 
 7 | 	"digits:i", # 2
 8 | ); 
 9 | 
10 | -t and !@ARGV and die "\nperl $0 P1g_perGene.sense.cnt.3_noSum_keep_wiSizeFact\n\n"; 
11 | 
12 | $opts{'digits'} //= 2; 
13 | 
14 | my @sf; 
15 | while (<>) {
16 | 	chomp; 
17 | 	my @ta = split(/\t/, $_); 
18 | 	if ($. == 1) {
19 | 		print STDOUT "$_\n"; 
20 | 		next; 
21 | 	}
22 | 	if ($. == 2) {
23 | 		$ta[0] =~ m/sizeFactor/i or die "The second line should be sizeFactor\n"; 
24 | 		@sf = @ta; 
25 | 		next; 
26 | 	}
27 | 	for (my $i=1; $i<@ta; $i++) {
28 | 		$ta[$i] = sprintf("%0.$opts{'digits'}f", $ta[$i]/$sf[$i]); 
29 | 	}
30 | 	print STDOUT join("\t", @ta)."\n"; 
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/rnaseq_tools/map_to_genome/cnvt_featureCounts_to_tpm.r:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | # Read command line arguments
 4 | args <- commandArgs(trailingOnly = TRUE)
 5 | 
 6 | if (length(args) != 2) {
 7 |   stop("Usage: Rscript calculate_tpm.R <featureCounts_file> <output_file>")
 8 | }
 9 | 
10 | input_file <- args[1]
11 | output_file <- args[2]
12 | 
13 | # Read featureCounts output
14 | fc <- read.delim(input_file, comment.char = "#")
15 | 
16 | # Remove summary rows if present (e.g., "__no_feature")
17 | fc <- fc[!grepl("^__", fc[,1]), ]
18 | 
19 | # Extract gene lengths and raw counts
20 | gene_lengths <- fc$Length  # column usually named 'Length'
21 | raw_counts <- fc[, 7:ncol(fc)]  # columns after column 6 are sample counts
22 | 
23 | # Calculate RPK
24 | rpk <- raw_counts / (gene_lengths / 1000)
25 | 
26 | # Calculate TPM
27 | tpm <- apply(rpk, 2, function(x) x / sum(x) * 1e6)
28 | 
29 | # Combine with gene ID
30 | tpm_df <- data.frame(GeneID = fc$Geneid, tpm)
31 | 
32 | # Write to output
33 | write.table(tpm_df, file = output_file, sep = "\t", quote = FALSE, row.names = FALSE)
34 | 


--------------------------------------------------------------------------------
/rnaseq_tools/map_to_transcriptome/cnvt_synOGgrp_to_trans2gene.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | -t and !@ARGV and die "perl $0 data/slct-synOG.grp > data/slct-synOG.trans2gene.txt\n";
 6 | 
 7 | print "transcript_id\tgene_id\n";
 8 | while (<>) {
 9 |   chomp;
10 |   my @ta=split(/\s|,/, $_);
11 |   $ta[0] eq "OGID" and next;
12 |   my ($grpID) = shift(@ta);
13 |   for my $tb (@ta) {
14 |     $tb =~ m!^\s*$! and next;
15 |     print "$tb\t$grpID\n";
16 |   }
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/run_reapr.sh:
--------------------------------------------------------------------------------
 1 | ### Basic functions.
 2 | function exe_cmd {
 3 | 	echo "[$(date)][CMD] $1"
 4 | 	eval "$1"
 5 | 	echo "[$(date)][Rec] Done."
 6 | }
 7 | 
 8 | function tsmsg {
 9 | 	echo "[$(date)]$1"
10 | }
11 | 
12 | 
13 | exe_reapr='/home/Sunhh/src/Assemble/REAPR/Reapr_1.0.17/reapr'
14 | cpuN=10
15 | rd_ident=0.99
16 | 
17 | in_fa='NSP306_Pla03s01GC_Gt5h.scf.fa'
18 | use_faPref='NSP306_Pla03s01GC_Gt5h'
19 | odir="${use_faPref}_15kb"
20 | 
21 | #long_fq1=/home/Sunhh/Assembly/Sweetpotato/NSP306/FinalFq/NSP306_15kb_rc_sub100_R1.fq
22 | #long_fq2=/home/Sunhh/Assembly/Sweetpotato/NSP306/FinalFq/NSP306_15kb_rc_sub100_R2.fq
23 | long_fq1=/home/Sunhh/Assembly/Sweetpotato/NSP306/FinalFq/NSP306_15kb_pTr_rc_R1.fq
24 | long_fq2=/home/Sunhh/Assembly/Sweetpotato/NSP306/FinalFq/NSP306_15kb_pTr_rc_R2.fq
25 | fq_label=$odir
26 | 
27 | shrtBamPref=""
28 | longBam="${fq_label}_long.bam"
29 | 
30 | para_smalt="-n $cpuN -y $rd_ident"
31 | para_pipe="-break a=1 -score f=15"
32 | 
33 | tsmsg "[Rec] Begin."
34 | 
35 | cmd="$exe_reapr facheck $in_fa $use_faPref"
36 | exe_cmd $cmd
37 | 
38 | cmd="$exe_reapr smaltmap $para_smalt ${use_faPref}.fa $long_fq1 $long_fq2 $longBam"
39 | exe_cmd $cmd
40 | 
41 | cmd="$exe_reapr pipeline $para_pipe ${use_faPref}.fa $longBam $odir $shrtBamPref"
42 | exe_cmd $cmd
43 | 
44 | tsmsg "[Rec] All done."
45 | 


--------------------------------------------------------------------------------
/sample_scripts/check_pm_version.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Copied from http://www.perlmonks.org/?node_id=37237
 3 | 
 4 | use CPAN;
 5 | 
 6 | printf("%-20s %10s %10s\n", "Module", "Installed", "CPAN");
 7 | 
 8 | foreach $a (@ARGV) {
 9 |   foreach $mod (CPAN::Shell->expand("Module", $a)){
10 |     printf("%-20s %10s %10s %s\n",
11 |       $mod->id,
12 |       $mod->inst_version eq "undef" || !defined($mod->inst_version)
13 |         ? "-" : $mod->inst_version,
14 |       $mod->cpan_version eq "undef" || !defined($mod->cpan_version)
15 |         ? "-" : $mod->cpan_version,
16 |       $mod->uptodate ? "" : "*"
17 |     );
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/sample_scripts/cmd_list_trinity_denovo:
--------------------------------------------------------------------------------
 1 | # https://github.com/trinityrnaseq/RNASeq_Trinity_Tuxedo_Workshop/wiki/Trinity-De-novo-Transcriptome-Assembly-Workshop
 2 | /data/Sunhh/src/Assemble/trinity/trinityrnaseq-Trinity-v2.4.0/Trinity \
 3 |  --seqType fq \
 4 |  --max_memory 200G \
 5 |  --left  reads/Flower_R1.fq,reads/Fruit_R1.fq,reads/Leaf_R1.fq,reads/Root_R1.fq,reads/Stem_R1.fq \
 6 |  --right reads/Flower_R2.fq,reads/Fruit_R2.fq,reads/Leaf_R2.fq,reads/Root_R2.fq,reads/Stem_R2.fq \
 7 |  --KMER_SIZE 32 \
 8 |  --CPU   24 \
 9 |  --SS_lib_type RF \
10 |  --min_contig_length 200 \
11 |  --output trinity_novo01 \
12 |  --full_cleanup
13 | /data/Sunhh/src/Assemble/trinity/trinityrnaseq-Trinity-v2.4.0/Trinity \
14 |  --seqType fq \
15 |  --max_memory 200G \
16 |  --left  reads/Flower_R1.fq,reads/Fruit_R1.fq,reads/Leaf_R1.fq,reads/Root_R1.fq,reads/Stem_R1.fq \
17 |  --right reads/Flower_R2.fq,reads/Fruit_R2.fq,reads/Leaf_R2.fq,reads/Root_R2.fq,reads/Stem_R2.fq \
18 |  --KMER_SIZE 32 \
19 |  --CPU   24 \
20 |  --SS_lib_type RF \
21 |  --min_contig_length 200 \
22 |  --normalize_max_read_cov 100 \
23 |  --output trinity_novo02 \
24 |  --full_cleanup
25 | 


--------------------------------------------------------------------------------
/sample_scripts/run_trinity_guided.sh:
--------------------------------------------------------------------------------
1 | /data/Sunhh/src/Trinity/trinityrnaseq-2.0.6/Trinity --single reads/C_6_GCCAAT_rmRRNA.fq,reads/C_7_CAGATC_rmRRNA.fq,reads/C_8_ACTTGA_rmRRNA.fq,reads/C_13_AGTCAA_rmRRNA.fq,reads/C_14_AGTTCC_rmRRNA.fq,reads/C_15_ATGTCA_rmRRNA.fq,reads/C_22_CGTACG_rmRRNA.fq,reads/C_23_GAGTGG_rmRRNA.fq,reads/C_24_GGTAGC_rmRRNA.fq,reads/C_31_CACGAT_rmRRNA.fq,reads/C_32_CACTCA_rmRRNA.fq,reads/C_33_CAGGCG_rmRRNA.fq,reads/C_40_TGACCA_rmRRNA.fq,reads/C_41_ACAGTG_rmRRNA.fq,reads/C_42_GCCAAT_rmRRNA.fq,reads/C_49_AGTCAA_rmRRNA.fq,reads/C_50_AGTTCC_rmRRNA.fq,reads/C_51_ATGTCA_rmRRNA.fq --seqType fq --max_memory 40G --SS_lib_type R --CPU 16 --normalize_reads --output P1_trinity_guided --genome_guided_bam 02.map2P1AllUnmsk_thout.accepted_hits.srt.bam --genome_guided_max_intron 100000 --genome_guided_min_coverage 2 
2 | 


--------------------------------------------------------------------------------
/sample_scripts/svg2png.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w 
 2 | use strict; 
 3 | use Cwd; 
 4 | 
 5 | !@ARGV and die "perl $0 fdsaf\n"; 
 6 | 
 7 | my $origin_dir = getcwd(); 
 8 | 
 9 | for my $dn (`ls -d *_data`) {
10 | 	chomp($dn); 
11 | 	print STDOUT "DIR=[$dn]\n"; 
12 | 	-d $dn or next; 
13 | 	chdir($dn); 
14 | 	my $rilN = $dn; 
15 | 	$rilN =~ s/_data$//; 
16 | 	
17 | 	mkdir("PNG", 0755); 
18 | 	mkdir("SVG", 0755); 
19 | 	my @png_files; 
20 | 	my @svg_files; 
21 | 	for my $fn (`ls *.svg`) {
22 | 		chomp($fn); 
23 | 		$fn =~ s/\.svg$//; 
24 | 		system "convert $fn.svg $fn.png"; 
25 | 		print "convert $fn.svg $fn.png\n"; 
26 | 		push(@png_files, "$fn.png"); 
27 | 		push(@svg_files, "$fn.svg"); 
28 | 	}
29 | 	my $nn = scalar(@png_files); 
30 | 	my $tt   = $" ; 
31 | 	local $" = " "; 
32 | 	my $merge_cmd = "montage @png_files -tile 1x$nn -geometry -0-0 ${rilN}_Chroms.png"; 
33 | 	my $mv_svg_cmd = "mv @svg_files SVG/"; 
34 | 	my $mv_png_cmd = "mv @png_files PNG/"; 
35 | 	$" = $tt; 
36 | 	system("$merge_cmd"); 
37 | 	print STDOUT "[Cmd]$merge_cmd\n"; 
38 | 	system("$mv_svg_cmd"); 
39 | 	print STDOUT "[Cmd]$mv_svg_cmd\n"; 
40 | 	system("$mv_png_cmd"); 
41 | 	print STDOUT "[Cmd]$mv_png_cmd\n"; 
42 | 	chdir($origin_dir); 
43 | 	print STDOUT "[Msg]$dn processing done.\n"; 
44 | }
45 | 
46 | 


--------------------------------------------------------------------------------
/self_interest/list_all_dir.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | my $pd0 = shift; 
 6 | my @all_sub = &list_sub($pd0); 
 7 | for (@all_sub) {
 8 | 	print "$_\n"; 
 9 | }
10 | 
11 | sub list_sub {
12 | 	# Skip links; 
13 | 	my ($pd) = @_; 
14 | 	# warn "pd=$pd\n"; 
15 | 	my @subB = ($pd); 
16 | 	if (! -d $pd ) {
17 | 		# $pd is not a folder; return $pd itself; 
18 | 		return(@subB); 
19 | 	}
20 | 	# $pd is a folder, get the children of $pd; 
21 | 	opendir D0,$pd or die "failed to opendir [$pd]\n"; 
22 | 	my @sub1 = map { "$pd/$_" } grep { $_ !~ m!^\.\.?$! } readdir(D0); 
23 | 	push(@subB, @sub1); 
24 | 	# There might be subdirs in @sub1, so check it. 
25 | 	for my $sd1 (@sub1) {
26 | 		my @sub2 = &list_sub($sd1); 
27 | 		push(@subB, @sub2[1..$#sub2]); 
28 | 	}
29 | 	return(@subB); 
30 | }# list_sub() 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/site_search/cmd_list:
--------------------------------------------------------------------------------
 1 | fimo --o fimoOut_allTrans  --bfile --motif--  yyt_motif.meme  Cmel351.annot.chr.gff3.jnLoc.YYTpromoter_loc.fa
 2 | perl parse_fimoTSV.pl fimoOut_allTrans/fimo.tsv > fimoOut_allTrans/fimo.tsv.info
 3 | perl setup_keySite.pl fimoOut_allTrans/fimo.tsv.info > fimoOut_allTrans/fimo.tsv.info.keysite
 4 | 
 5 | perl pipe_gatk_inFqList.pl \
 6 |  -cpuN          15 \
 7 |  -conf_file     BY9Hyyt_gatk.conf \
 8 |  -in_pref_list  pref_BY9H \
 9 |  -prj_ID        BY9Hyyt \
10 |  -wrk_dir       proc_BY9Hyyt \
11 |  -doStep        1,2,3,4,5,6,7,8,9 \
12 |  -plCatVar -intervalLen 1000000 \
13 |  -CallByScf
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/site_search/parse_fimoTSV.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | while (<>) {
 6 | 	m!^\s*$! and next; 
 7 | 	m!^\s*#! and next; 
 8 | 	chomp; 
 9 | 	my @ta=split(/\t/, $_); 
10 | 	if ($. == 1) {
11 | 		print join("\t", @ta, qw/mrnaID SeqID MotifStart MotifEnd MotifStr/)."\n"; 
12 | 		next; 
13 | 	}
14 | 	# Parse ID;
15 | 	$ta[2] =~ m!^(\S+):([^\s:]+):([+-]):(\d+):(\d+)$! or die "bad ID [$ta[2]]\n"; 
16 | 	my ($mrnaID, $seqID, $str, $pS, $pE) = ($1, $2, $3, $4, $5); 
17 | 	my ($seqS, $seqE, $seqStr); 
18 | 	if ($str eq '+') {
19 | 		$seqS = $pS + $ta[3] - 1; 
20 | 		$seqE = $pS + $ta[4] - 1; 
21 | 		$seqStr = $str; 
22 | 	} elsif ($str eq '-') {
23 | 		# $seqS = $pS + ($pE-$pS+1-$ta[3]); 
24 | 		# $seqE = $pS + ($pE-$pS+1-$ta[4]); 
25 | 		$seqE = $pE - $ta[3] + 1; 
26 | 		$seqS = $pE - $ta[4] + 1; 
27 | 		$seqStr = $str; 
28 | 	} else {
29 | 		die "$_\n"; 
30 | 	}
31 | 	if ($ta[5] eq '-') {
32 | 		$seqStr =~ tr/+-/-+/; 
33 | 	}
34 | 	print join("\t", @ta, $mrnaID, $seqID, $seqS, $seqE, $seqStr)."\n"; 
35 | }
36 | # motif_id	motif_alt_id	sequence_name	start	stop	strand	score	p-value	q-value	matched_sequence
37 | # yytNOR		MELO3C024429T1:Cmel351_Chr01:+:35115704:35118203	349	359	+	16.7447	8.7e-08	1	ACACGTCACCT
38 | # yytNOR		MELO3C000679T1:Cmel351_Chr00:-:17643862:17646361	429	439	-	16.7447	8.7e-08	1	ACACGTCACCT
39 | 


--------------------------------------------------------------------------------
/site_search/yyt_motif.meme:
--------------------------------------------------------------------------------
 1 | MEME version 4
 2 | 
 3 | ALPHABET= ACGT
 4 | 
 5 | strands: + -
 6 | 
 7 | Background letter frequencies
 8 | A 0.334 C 0.166 G 0.166 T 0.334
 9 | 
10 | 
11 | MOTIF yytNOR
12 | letter-probability matrix: alength= 4 w= 11 
13 |  0.683734 0.069873 0.127581 0.118812
14 |  0.003460 0.924404 0.049505 0.022631
15 |  1.000000 0.000000 0.000000 0.000000
16 |  0.000000 1.000000 0.000000 0.000000
17 |  0.000000 0.000000 1.000000 0.000000
18 |  0.025360 0.229520 0.057426 0.687694
19 |  0.530410 0.368034 0.015842 0.085714
20 |  0.635361 0.000000 0.000000 0.364639
21 |  0.100141 0.524752 0.083168 0.291938
22 |  0.057709 0.378501 0.057426 0.506365
23 |  0.212164 0.172843 0.128147 0.486846
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/software_fix/anchorwave/fix_awMAF.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # [12/21/2022] Fix out.maf output from AnchorWave.
 3 | ### http://genomewiki.ucsc.edu/index.php/Coordinate_Transforms
 4 | use strict;
 5 | use warnings;
 6 | 
 7 | -t and !@ARGV and die "perl $0 AnchorWave_out_align1.maf > fixed.maf\n";
 8 | 
 9 | while (<>) {
10 |   m!^#! and do { print; next; };
11 |   m!^a\s! or die "[Err] Unexpected format of MAF:$_\n";
12 |   my $s1=<>;
13 |   my $s2=<>;
14 |   my $blank=<>;
15 |   $blank =~ m!^$! or die "[Err] Unexpected blank line: $blank\n";
16 |   $s1 = &fix_strPos($s1);
17 |   $s2 = &fix_strPos($s2);
18 |   print "$_$s1$s2$blank";
19 | }
20 | 
21 | sub fix_strPos {
22 |   my ($ss) = @_;
23 |   $ss =~ m!^s\s+\S+\s+(\d+)\s+(\d+)\s+([+-])\s+(\d+)! or die "[Err] Unexpected s line: $ss\n";
24 |   if ($3 eq '-') {
25 |     my $newS = $4-($1+$2);
26 |     $ss =~ s!^(s\s+\S+\s+)\d+(\s+)!$1$newS$2! or die "[Err] Failed to change: $ss\n";
27 |   } elsif ($3 eq '+') {
28 |     ;
29 |   } else {
30 |     die "[Err] Impossible strand $3: $ss\n";
31 |   }
32 |   return($ss);
33 | }# fix_strPos()
34 | 
35 | 


--------------------------------------------------------------------------------
/temp/README.md:
--------------------------------------------------------------------------------
 1 | # List of script functions.
 2 | 
 3 | ## Convert figure formats.
 4 | - Convert `pdf` format to `tiff` and `png`.
 5 | 
 6 | ```sh
 7 | perl scripts/cnvt_pdf_to_tiff.pl /path/to/input.pdf  -out_format 'tiff,png' [ -out_dpi 300 ]
 8 | ```
 9 | 
10 | 


--------------------------------------------------------------------------------
/temp/cmd_list_busco:
--------------------------------------------------------------------------------
1 | # python3 BUSCO_v1.1b1/BUSCO_v1.1b1.py --cpu 20 -o spinach_unigene_0415_byBUSCO -in spinach_unigene_0415.fa -l /data/Sunhh/src/Annot/BUSCO/eukaryota -m trans
2 | python3 BUSCO_v1.1b1/BUSCO_v1.1b1.py --cpu 20 -o spinach_unigene_0415_byBUSCO_wiAug -sp SPGr2FiltUse_AED0 -in spinach_unigene_0415.fa -l /data/Sunhh/src/Annot/BUSCO/eukaryota -m trans
3 | 
4 | 


--------------------------------------------------------------------------------
/temp/cmd_list_cegma:
--------------------------------------------------------------------------------
1 | cegma -g spinach_unigene_0415.fa -o spinach_unigene_0415 --max_intron 0 -T 20 --verbose 
2 | 
3 | 


--------------------------------------------------------------------------------
/temp/cnvt_pairwise_to_tab.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | use Bio::SearchIO; 
 5 | 
 6 | !@ARGV and die "perl $0 in.blast > out.tab\n"; 
 7 | 
 8 | my $inBlastFile = shift; 
 9 | 
10 | my $in = new Bio::SearchIO(
11 |   -format => 'blast', 
12 |   -file   => $inBlastFile
13 | ); 
14 | 
15 | while( my $result = $in->next_result ) {
16 |   ## $result is a Bio::Search::Result::ResultI compliant object
17 |   while( my $hit = $result->next_hit ) {
18 |     ## $hit is a Bio::Search::Hit::HitI compliant object
19 |     while( my $hsp = $hit->next_hsp ) {
20 |       ## $hsp is a Bio::Search::HSP::HSPI compliant object
21 |       print STDOUT join("\t", $result->query_name(), $hit->name(), $hit->description(), $hsp->score(), $hsp->evalue())."\n"; 
22 | #      if( $hsp->length('total') > 50 ) {
23 | #        if ( $hsp->percent_identity >= 75 ) {
24 | #          print "Query=",   $result->query_name,
25 | #            " Hit=",        $hit->name,
26 | #            " Length=",     $hsp->length('total'),
27 | #            " Percent_id=", $hsp->percent_identity, "\n";
28 | #        }
29 | #      }
30 |     }  
31 |   }
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/temp/forRonan/addID_to_loci.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | # Add new ID to .loc table. 
 5 | # Format of .loc : ID \\t Start \\t End \\n
 6 | # New format :   : ID \\t Start \\t End \\t Pref_ID_Start_End \\n 
 7 | 
 8 | !@ARGV and die "perl $0 pref in_raw.loc\n"; 
 9 | 
10 | my $pref = shift; 
11 | 
12 | my %used; 
13 | while (<>) {
14 | 	chomp; 
15 | 	my @ta = split(/\t/, $_); 
16 | 	my $new_id = "${pref}_$ta[0]_$ta[1]_$ta[2]"; 
17 | 	defined $used{$new_id} and die "repeat new id=$new_id\n"; 
18 | 	$used{$new_id} = 1; 
19 | 	print STDOUT join("\t", @ta[0,1,2], $new_id)."\n"; 
20 | }
21 | 
22 | 


--------------------------------------------------------------------------------
/temp/forRonan/depC_cutoff_by_dep_stat.pl:
--------------------------------------------------------------------------------
 1 | #!/bin/env perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | my ($interval_mean, $interval_stdev); 
 6 | while (<>) {
 7 | 	chomp; 
 8 | 	m!^interval_mean\t([\+\-\d.]+)$! and $interval_mean = $1; 
 9 | 	m!^interval_stdev\t([\+\-\d.]+)$! and $interval_stdev = $1; 
10 | }
11 | print "INS_mean=$interval_mean\n"; 
12 | print "INS_stdev=$interval_stdev\n"; 
13 | print "INS_cutoff=" . ($interval_mean+3*$interval_stdev) . "\n"; 
14 | 
15 | 


--------------------------------------------------------------------------------
/temp/forRonan/filter_sam.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | # 2015-07-14 For ronan's internship project. 
 3 | #  Filter parent_Rd to parent_Asm alignments
 4 | #   1. Only unique alignments kept. 
 5 | #   2. Only 100% match alignments kept. 
 6 | #   3. Only mapping quality >= 1 kept. 
 7 | #  After filtering, only alignments exactly the same as the reference are kept, which can be used to call same region between Rd_reads and Asm_reference. 
 8 | use strict; 
 9 | use warnings; 
10 | use LogInforSunhh; 
11 | use SeqAlnSunhh; 
12 | 
13 | -t and !@ARGV and die "perl $0 in_rd2Asm.sam\n"; 
14 | 
15 | while (<>) {
16 | 	m!^\@! and do { print; next; }; 
17 | 	m!\t(?:XT:A:U|NM:i:0)(?:\t|$)! or next; 
18 | 	my @ta = split(/\t/, $_); 
19 | 	$ta[4] >= 1 or next; 
20 | 	print ; 
21 | }
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/temp/reformat_tabHit.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | while (<>) {
 6 | 	chomp; 
 7 | 	my @ta = split(/\t/, $_); 
 8 | 	$ta[2] =~ s!^(\S+) !! or die "$_\n"; 
 9 | 	$ta[1] = $1; 
10 | 	print STDOUT join("\t", @ta)."\n"; 
11 | }
12 | 


--------------------------------------------------------------------------------
/temp/replace_unicode.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # [5/10/2022] Replace unicode characters to space. At first I need to provide a list of good characters by "perl -e 'while (<>) { split(//, $_);}'"
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | !@ARGV and die "perl $0 good_char.list input.file > input.replaced\n";
 7 | 
 8 | my $f1 = shift;
 9 | open F1,'<',"$f1" or die;
10 | my %h;
11 | while (<F1>) {
12 |   chomp;
13 |   $_ = "AA${_}AA";
14 |   my @ta=split(/\t/, $_);
15 |   $ta[0] =~ s!^AA!!;
16 |   $ta[-1] =~ s!AA$!!;
17 |   $h{$ta[0]} = $ta[0];
18 | }
19 | close F1;
20 | $h{"\t"} = "\t";
21 | 
22 | while (<>) {
23 |   chomp;
24 |   my @ta=split(//, $_);
25 |   for my $tb (@ta) {
26 |     defined $h{$tb} or $tb = " ";
27 |   }
28 |   print join("", @ta)."\n";
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/temp/rm_gff_byLis.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | !@ARGV and die "perl $0 rm_list in_gff\n"; 
 6 | 
 7 | my $lisF =shift; 
 8 | my $gffF = shift; 
 9 | open LF,'<',"$lisF" or die; 
10 | my %rmid; 
11 | while (<LF>) {
12 | 	chomp; 
13 | 	m/^\s*(#|$)/ and next; 
14 | 	my @ta = split(/\t/, $_); 
15 | 	$rmid{$ta[0]} = 1; 
16 | }
17 | close LF; 
18 | open GF,'<',"$gffF" or die; 
19 | while (<GF>) {
20 | 	chomp; 
21 | 	if ( m/^\s*(#|$)/ ) {
22 | 		print "$_\n"; 
23 | 		next; 
24 | 	} 
25 | 	my @ta = split(/\t/, $_); 
26 | 	if ($ta[8] =~ m/(?:^|;|\s)ID=([^\s;]+)/i) {
27 | 		defined $rmid{$1} and next; 
28 | 	}
29 | 	if ($ta[8] =~ m/(?:^|;|\s)Parent=([^\s;]+)/i) {
30 | 		defined $rmid{$1} and next; 
31 | 	}
32 | 	print "$_\n"; 
33 | }
34 | close GF; 
35 | 


--------------------------------------------------------------------------------
/temp/scripts/cnvt_pdf_to_tiff.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | use LogInforSunhh;
 5 | use Getopt::Long;
 6 | my %opts;
 7 | GetOptions(\%opts,
 8 |   "out_format:s", # tiff.
 9 |   "out_dpi:i",  # 300;
10 |   "help!"
11 | );
12 | 
13 | $opts{'out_format'} //= 'tiff';
14 | $opts{'out_dpi'}  //= 300;
15 | 
16 | my $help_txt = <<HH;
17 | ######################################################################
18 | perl $0  input.pdf
19 | 
20 | -out_format       [$opts{'out_format'}] Can also be 'tiff,png'
21 | -out_dpi          [$opts{'out_dpi'}]
22 | ######################################################################
23 | HH
24 | 
25 | !@ARGV and &LogInforSunhh::usage($help_txt);
26 | defined $opts{'help'} and &LogInforSunhh::usage($help_txt);
27 | 
28 | my $f=shift;
29 | $f =~ m!^(\S+)\.(pdf|svg)$!i or die "|$f|\n";
30 | my $baseName = $1;
31 | 
32 | for my $ofmt (split(/,/, $opts{'out_format'})) {
33 |   $ofmt =~ s!\s!!g;
34 |   &runCmd("convert -density $opts{'out_dpi'} $f -quality 100 $1.$ofmt");
35 | }
36 | # &runCmd("pdf2svg $f $1.svg");
37 | # pdf2svg domestication_time-CLC_CLV.csv_splittime.histo.pdf domestication_time-CLC_CLV.csv_splittime.histo.svg
38 | 
39 | 


--------------------------------------------------------------------------------
/temp/slct_gff_byLis.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict; 
 3 | use warnings; 
 4 | 
 5 | !@ARGV and die "perl $0 rm_list in_gff\n"; 
 6 | 
 7 | my $lisF =shift; 
 8 | my $gffF = shift; 
 9 | open LF,'<',"$lisF" or die; 
10 | my %rmid; 
11 | while (<LF>) {
12 | 	chomp; 
13 | 	m/^\s*(#|$)/ and next; 
14 | 	my @ta = split(/\t/, $_); 
15 | 	$rmid{$ta[0]} = 1; 
16 | }
17 | close LF; 
18 | open GF,'<',"$gffF" or die; 
19 | while (<GF>) {
20 | 	chomp; 
21 | 	if ( m/^\s*(#|$)/ ) {
22 | 		print "$_\n"; 
23 | 		next; 
24 | 	} 
25 | 	my @ta = split(/\t/, $_); 
26 | 	my $is_o = 0; 
27 | 	if ($ta[8] =~ m/(?:^|;|\s)ID=([^\s;]+)/i) {
28 | 		defined $rmid{$1} and $is_o = 1; 
29 | 	}
30 | 	if ($ta[8] =~ m/(?:^|;|\s)Parent=([^\s;]+)/i) {
31 | 		defined $rmid{$1} and $is_o = 1; 
32 | 	}
33 | 	$is_o == 1 and print "$_\n"; 
34 | }
35 | close GF; 
36 | 


--------------------------------------------------------------------------------
/temp/temp_fix_gff3/Grif_1614.fix.gff3.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/temp/temp_fix_gff3/Grif_1614.fix.gff3.gz


--------------------------------------------------------------------------------
/temp/temp_fix_gff3/Grif_1614.gff3.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/temp/temp_fix_gff3/Grif_1614.gff3.gz


--------------------------------------------------------------------------------
/temp/temp_process_ONT/cdna_classifier_report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/temp/temp_process_ONT/cdna_classifier_report.pdf


--------------------------------------------------------------------------------
/temp/temp_process_ONT/cmd_list:
--------------------------------------------------------------------------------
 1 | # https://github.com/ksahlin/isONcorrect#Using-conda
 2 | conda create -n isoncorrect python=3 pip
 3 | conda activate isoncorrect
 4 | pip install isONcorrect
 5 | conda install -c bioconda spoa
 6 | # isONcorrect --help
 7 | pip install isONclust
 8 | conda install -c bioconda "hmmer>=3.0"
 9 | conda install -c bioconda "pychopper>=2.0"
10 | 
11 | # 
12 | nohup bash correction_pipeline.sh /data/Sunhh/temp/ont/reads/ERR3588903_1.fastq  /data/Sunhh/temp/ont/out_SIRV/  60 > scrn.SIRV_test
13 | 
14 | nohup bash correction_pipeline.sh /data/wushan/Cnr_RNA_ONT/01.err_correction/01.raw_reads/cnr_breaker.fastq  /data/Sunhh/temp/ont/o_cnr_breaker/  60 > scrn.o_cnr_breaker
15 | 
16 | 
17 | 
18 | ##
19 | run_isoncorrect --t 40 --fastq_folder ./01.raw/ --outfolder ./02.corrected/ --split_wrt_batches
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/temp/temp_process_ONT/raw_rdN.tbl:
--------------------------------------------------------------------------------
1 | InFile	Total_size	Total_Rd_num	Mean_Rd_size	Range_Rd_size	PhredCut	Time
2 | reads/ERR3588903_1.fastq.gz	1418512089	1680000	844.352433928571	83-5919	Phred33	Mon Dec 19 11:47:05 2022
3 | 


--------------------------------------------------------------------------------
/temp/temp_process_ONT/test_SIRV/cdna_classifier_report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/temp/temp_process_ONT/test_SIRV/cdna_classifier_report.pdf


--------------------------------------------------------------------------------
/temp/temp_process_ONT/test_SIRV/cmd_list:
--------------------------------------------------------------------------------
 1 | # https://github.com/ksahlin/isONcorrect#Using-conda
 2 | conda create -n isoncorrect python=3 pip
 3 | conda activate isoncorrect
 4 | pip install isONcorrect
 5 | conda install -c bioconda spoa
 6 | # isONcorrect --help
 7 | pip install isONclust
 8 | conda install -c bioconda "hmmer>=3.0"
 9 | conda install -c bioconda "pychopper>=2.0"
10 | 
11 | 
12 | # wget https://raw.githubusercontent.com/ksahlin/isONcorrect/master/test_data/isoncorrect/0.fastq 
13 | # wget https://raw.githubusercontent.com/ksahlin/isONcorrect/master/scripts/correction_pipeline.sh 
14 | 
15 | # 
16 | nohup bash correction_pipeline.sh /data/Sunhh/temp/ont/reads/ERR3588903_1.fastq  /data/Sunhh/temp/ont/out_SIRV/  60 > scrn.SIRV_test
17 | 
18 | nohup bash correction_pipeline.sh /data/wushan/Cnr_RNA_ONT/01.err_correction/01.raw_reads/cnr_breaker.fastq  /data/Sunhh/temp/ont/o_cnr_breaker/  60 > scrn.o_cnr_breaker
19 | 
20 | 
21 | 
22 | ##
23 | run_isoncorrect --t 40 --fastq_folder ./01.raw/ --outfolder ./02.corrected/ --split_wrt_batches
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/temp/temp_process_ONT/test_SIRV/scrn.SIRV_test.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sunhh/NGS_data_processing/7c202e9c150708c32c2d2a852bb7925adceb70f7/temp/temp_process_ONT/test_SIRV/scrn.SIRV_test.gz


--------------------------------------------------------------------------------
/temp/temp_process_ONT/test_SIRV/scrn.corr:
--------------------------------------------------------------------------------
1 | Usage: correction_pipeline.sh  <raw_reads.fq>  <outfolder>  <num_cores> 
2 | 


--------------------------------------------------------------------------------
/temp/temp_process_ONT/test_small/cmd_list:
--------------------------------------------------------------------------------
1 | isONcorrect --fastq 0.fastq --outfolder out
2 | 


--------------------------------------------------------------------------------