├── fasta_paths.config
├── getCodingSequence.R
├── getCodingSequenceNonstop.R
├── README.md
├── getMutNeoantigenBindersMHCClassII.sh
├── getMutNeoantigenBinders.sh
├── runNetMHCpan.py
├── mutationPostProcess.py
├── mafToFastaV2.py
└── LICENSE


/fasta_paths.config:
--------------------------------------------------------------------------------
1 | [Reference Paths]
2 | GRCh37cds: /xchip/cga_home/margolis/mutationsToNeoantigen/goldStandard/Homo_sapiens.GRCh37.cds.all.fa
3 | GRCh37cdna: /xchip/cga_home/margolis/mutationsToNeoantigen/goldStandard/Homo_sapiens.GRCh37.cdna.all.fa
4 | GRCh38cds: /xchip/cga_home/margolis/mutationsToNeoantigen/goldStandard/Homo_sapiens.GRCh38.cds.all.fa
5 | GRCh38cdna: /xchip/cga_home/margolis/mutationsToNeoantigen/goldStandard/Homo_sapiens.GRCh38.cdna.all.fa
6 | 


--------------------------------------------------------------------------------
/getCodingSequence.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | library('biomaRt')
 4 | 
 5 | # Get transcript ID passed from python script
 6 | functinput <- commandArgs(trailingOnly = TRUE)
 7 | 
 8 | # Function to grab coding sequence
 9 | getCodingSeq <- function(transcriptID) {
10 | 	# Set up mart database
11 | 	ensembl <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", host="grch37.ensembl.org")
12 | 	ensembl <- useDataset("hsapiens_gene_ensembl", mart=ensembl)
13 | 	# Do getSequence query
14 | 	transcriptseq <- getSequence(id=transcriptID, type="ensembl_transcript_id", seqType="coding", mart=ensembl)
15 | 	# Return to command line
16 | 	cat(transcriptseq$coding)
17 | }
18 | 
19 | # Call function
20 | getCodingSeq(functinput)
21 | 


--------------------------------------------------------------------------------
/getCodingSequenceNonstop.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | library('biomaRt')
 4 | 
 5 | # Get transcript ID passed from python script
 6 | functinput <- commandArgs(trailingOnly = TRUE)
 7 | 
 8 | # Function to grab coding sequence
 9 | getCodingSeq <- function(transcriptID) {
10 | 	# Set up mart database
11 | 	ensembl <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", host="grch37.ensembl.org")
12 | 	ensembl <- useDataset("hsapiens_gene_ensembl", mart=ensembl)
13 | 	# Do getSequence query
14 | 	transcriptseq <- paste(getSequence(id=transcriptID, type="ensembl_transcript_id", seqType="coding", mart=ensembl)$coding,
15 | 	      getSequence(id=transcriptID, type="ensembl_transcript_id", seqType="3utr", mart=ensembl)$`3utr`, sep="")
16 | 	# Return to command line
17 | 	cat(transcriptseq)
18 | }
19 | 
20 | # Call function
21 | getCodingSeq(functinput)
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # neoantigen_calling_pipeline
 2 | 
 3 | This pipeline calls somatic cancer neoantigens generated from genetic mutations in patient tumor DNA.
 4 | 
 5 | To run: 
 6 |   - Download NetMHCPan-3.0 (http://www.cbs.dtu.dk/cgi-bin/nph-sw_request?netMHCpan) and configure paths in runNetMHCpan.py file (line 70).
 7 |   - Download GRCh37 Ensembl FASTA files, Homo_sapiens.GRCh37.cds.all.fa and Homo_sapiens.GRCh37.cdna.all.fa (http://grch37.ensembl.org/info/data/ftp/index.html) and update paths in fasta_paths.config file.
 8 |   - Change paths in shell script getMutNeoantigenBinders.sh (notes in file comments). 
 9 |       - For each sample, pipeline is intended to run on a MuTect SNV maf file, a Strelka InDel maf file, and a list of patient Class I HLA alleles (e.g., HLA-A02:01).
10 |   - Run getMutNeoantigenBinders.sh from command line as an SGE Array Job. This script is a wrapper and will call all other relevant scripts.
11 | 
12 | Additional notes: 
13 |   - Currently, pipeline works for MHC Class I only. MHC Class II functionality in development.
14 |   - Detailed execution instructions and functionality descriptions can be found in each script header, as well as for each individual function.
15 |   
16 | 


--------------------------------------------------------------------------------
/getMutNeoantigenBindersMHCClassII.sh:
--------------------------------------------------------------------------------
 1 | # Claire Margolis
 2 | # 3 October 2016
 3 | # getMutNeoantigenBindersMHCClassII.sh
 4 | # 
 5 | # Summary: Shell script that preprocesses patient .maf files and runs NetMHCIIPan on them
 6 | #
 7 | # *NOTE*: If you want to run this script, go through and verify that the paths to relevant files
 8 | # are in the correct format for your cohort. You will need to change out_dir.txt, among other 
 9 | # things, to make the script specific to your cohort. You can also change preferences for running
10 | # netMHCIpan vs. netMHCIIpan. 
11 | 
12 | # ----------------------------------------------------------------------------------------------- #
13 | 
14 | # ----------------------------------------------------------------------------------------------- #
15 | # Specify shell / UGER preferences
16 | 
17 | #!/bin/bash
18 | 
19 | #$ -cwd
20 | #$ -q long
21 | #$ -m e
22 | #$ -l h_vmem=10g 
23 | #$ -t 1-2
24 | # ----------------------------------------------------------------------------------------------- #
25 | 
26 | # ----------------------------------------------------------------------------------------------- #
27 | # Use statements
28 | 
29 | source /broad/software/scripts/useuse
30 | reuse Python-2.7
31 | use MySQL-5.6
32 | 
33 | # ----------------------------------------------------------------------------------------------- #
34 | 
35 | # ----------------------------------------------------------------------------------------------- #
36 | # Set directory paths
37 | 
38 | patient_dir=pat_dirs.txt
39 | PAT_DIR=$(cat $patient_dir | head -n $SGE_TASK_ID | tail -n 1)
40 | snv_mafs=snv_mafs.txt
41 | SNV_MAF=$(cat $snv_mafs | head -n $SGE_TASK_ID | tail -n 1)
42 | indel_mafs=indel_mafs.txt
43 | INDEL_MAF=$(cat $indel_mafs | head -n $SGE_TASK_ID | tail -n 1)
44 | hla_types=hla_types.txt
45 | HLA_TYPE=$(cat $hla_types | head -n $SGE_TASK_ID | tail -n 1)
46 | 
47 | # ----------------------------------------------------------------------------------------------- #
48 | 
49 | # ----------------------------------------------------------------------------------------------- #
50 | # Run mafToFasta.py for each patient for both SNVs and indels
51 | # ( Converts mutations in maf file to mutant peptides, generates wild-type peptides as well, and 
52 | # writes both to outfile )
53 | 
54 | echo 'Running mafToFasta.py script for both SNVs and indels.'
55 | python mafToFastaV2.py $SNV_MAF 0 18,19,20 $PAT_DIR ../$PAT_DIR
56 | python mafToFastaV2.py $INDEL_MAF 1 18,19,20 $PAT_DIR ../$PAT_DIR
57 | 
58 | # ----------------------------------------------------------------------------------------------- #
59 | 
60 | # ----------------------------------------------------------------------------------------------- #
61 | # Run runNetMHCpan.py for each patient
62 | # ( Runs netMHCpan program to get predicted binding affinities for each peptide based on patient 
63 | # HLA type )
64 | # *NOTE*: 1 for NetMHCPan, 2 for NetMHCIIPan. Must run script twice if you want both. 
65 | 
66 | echo 'Running runNetMHCpan.py script.'
67 | python runNetMHCpan.py ../$PAT_DIR/len18pep_FASTA_snv.txt,../$PAT_DIR/len18pep_FASTA_indel.txt,../$PAT_DIR/len19pep_FASTA_snv.txt,../$PAT_DIR/len19pep_FASTA_indel.txt,../$PAT_DIR/len20pep_FASTA_snv.txt,../$PAT_DIR/len20pep_FASTA_indel.txt $HLA_TYPE 18,18,19,19,20,20 2 ../$PAT_DIR
68 | 
69 | # ----------------------------------------------------------------------------------------------- #
70 | 
71 | 
72 | # ----------------------------------------------------------------------------------------------- #
73 | # Run mutationPostProcess.py for each patient
74 | # ( Processes netMHCpan output to make a more user-friendly file incorporating both mutant and 
75 | # wild-type data for each peptide )
76 | # *NOTE*: 1 for NetMHCPan, 2 for NetMHCIIPan
77 | 
78 | echo 'Running mutationPostProcess.py script.'
79 | python mutationPostProcess.py ../$PAT_DIR/NETMHCIIpan_out_18SNV.xls,../$PAT_DIR/NETMHCIIpan_out_18InDel.xls,../$PAT_DIR/NETMHCIIpan_out_19SNV.xls,../$PAT_DIR/NETMHCIIpan_out_19InDel.xls,../$PAT_DIR/NETMHCIIpan_out_20SNV.xls,../$PAT_DIR/NETMHCIIpan_out_20InDel.xls ../$PAT_DIR/len18pep_headermap_snv.txt,../$PAT_DIR/len18pep_headermap_indel.txt,../$PAT_DIR/len19pep_headermap_snv.txt,../$PAT_DIR/len19pep_headermap_indel.txt,../$PAT_DIR/len20pep_headermap_snv.txt,../$PAT_DIR/len20pep_headermap_indel.txt 18,18,19,19,20,20 $PAT_DIR 2 ../$PAT_DIR/
80 | 
81 | # ----------------------------------------------------------------------------------------------- #
82 | 
83 | 


--------------------------------------------------------------------------------
/getMutNeoantigenBinders.sh:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------------------------------- #
 2 | # Claire Margolis
 3 | # 21 March 2017
 4 | # getMutNeoantigenBinders.sh
 5 | # 
 6 | # Summary: Shell script that calls all python scripts to run neoantigen pipeline on batch of 
 7 | #	   samples. Intended to be submitted to UGER from the command line with task array 
 8 | #	   of samples. 
 9 | #	   Sample usage: qsub getMutNeoantigenBinders.sh
10 | #
11 | # *NOTE*: If you want to run this script, go through and verify that the paths to relevant files
12 | # are in the correct format for your cohort. You will need to change out_dir.txt, among other 
13 | # things, to make the script specific to your cohort.
14 | 
15 | # ----------------------------------------------------------------------------------------------- #
16 | 
17 | # ----------------------------------------------------------------------------------------------- #
18 | # Specify shell / UGER preferences
19 | 
20 | #!/bin/bash
21 | 
22 | #$ -cwd
23 | #$ -q long
24 | #$ -m e
25 | #$ -l h_vmem=10g 
26 | #$ -t 1-2
27 | # ----------------------------------------------------------------------------------------------- #
28 | 
29 | # ----------------------------------------------------------------------------------------------- #
30 | # Use statements
31 | 
32 | source /broad/software/scripts/useuse
33 | reuse Python-2.7
34 | reuse MySQL-5.6
35 | reuse R-3.3
36 | 
37 | # ----------------------------------------------------------------------------------------------- #
38 | 
39 | # ----------------------------------------------------------------------------------------------- #
40 | # Set directory paths
41 | 
42 | patient_dirs=./patient_dirs.txt # File should contain sample names (which double as directory names), one per line
43 | hla_paths=./hla_paths.txt # File should contain paths to HLA allele files for each sample, one per line, in same order as patient_dirs.txt file
44 | snv_maf_paths=./snv_maf_paths.txt # File should contain paths to MuTect files for each sample, one per line, in same order as patient_dirs.txt file
45 | indel_maf_paths=./indel_maf_paths.txt # File should contain paths to Strelka files for each sample, one per line, in same order as patient_dirs.txt file
46 | PAT_DIR=$(cat $patient_dirs | head -n $SGE_TASK_ID | tail -n 1)
47 | HLA_PATH=$(cat $hla_paths | head -n $SGE_TASK_ID | tail -n 1)
48 | SNV_MAF_PATH=$(cat $snv_maf_paths | head -n $SGE_TASK_ID | tail -n 1)
49 | INDEL_MAF_PATH=$(cat $indel_maf_paths | head -n $SGE_TASK_ID | tail -n 1)
50 | 
51 | # ----------------------------------------------------------------------------------------------- #
52 | 
53 | # ----------------------------------------------------------------------------------------------- #
54 | # Run mafToFasta.py for each patient for both SNVs and indels
55 | # ( Converts mutations in maf file to mutant peptides, generates wild-type peptides as well, and 
56 | # writes both to outfile )
57 | 
58 | echo 'Running mafToFasta.py script for both SNVs and indels.'
59 | python mafToFastaV2.py $SNV_MAF_PATH 0 9,10 $PAT_DIR ./$PAT_DIR  # Only change last argument (./$PAT_DIR) to contain whatever output path desired
60 | python mafToFastaV2.py $INDEL_MAF_PATH 1 9,10 $PAT_DIR ./$PAT_DIR  # Only change last argument to contain whatever output path you want
61 | 
62 | # ----------------------------------------------------------------------------------------------- #
63 | 
64 | # ----------------------------------------------------------------------------------------------- #
65 | # Run runNetMHCpan.py for each patient
66 | # ( Runs netMHCpan program to get predicted binding affinities for each peptide based on patient 
67 | # HLA type )
68 | 
69 | echo 'Running runNetMHCpan.py script.'
70 | python runNetMHCpan.py ./$PAT_DIR/len9pep_FASTA_snv.txt,./$PAT_DIR/len9pep_FASTA_indel.txt,./$PAT_DIR/len10pep_FASTA_snv.txt,./$PAT_DIR/len10pep_FASTA_indel.txt $HLA_PATH 9,9,10,10 1 ./$PAT_DIR  # Change "./$PAT_DIR" parts if you wrote to a different output path above
71 | 
72 | # ----------------------------------------------------------------------------------------------- #
73 | 
74 | 
75 | # ----------------------------------------------------------------------------------------------- #
76 | # Run mutationPostProcess.py for each patient
77 | # ( Processes netMHCpan output to make a more user-friendly file incorporating both mutant and 
78 | # wild-type data for each peptide )
79 | 
80 | echo 'Running mutationPostProcess.py script.'
81 | python mutationPostProcess.py ./$PAT_DIR/NETMHCpan_out_9SNV.xls,./$PAT_DIR/NETMHCpan_out_9InDel.xls,./$PAT_DIR/NETMHCpan_out_10SNV.xls,./$PAT_DIR/NETMHCpan_out_10InDel.xls ./$PAT_DIR/len9pep_headermap_snv.txt,./$PAT_DIR/len9pep_headermap_indel.txt,./$PAT_DIR/len10pep_headermap_snv.txt,./$PAT_DIR/len10pep_headermap_indel.txt 9,9,10,10 $PAT_DIR 1 ./$PAT_DIR/  # Change "./$PAT_DIR" parts if you wrote to a different output path above
82 | 
83 | # ----------------------------------------------------------------------------------------------- #
84 | 
85 | 


--------------------------------------------------------------------------------
/runNetMHCpan.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------------------------- #
  2 | # Claire Margolis
  3 | # runNetMHCpan.py
  4 | #
  5 | # Summary: Takes in one or more FASTA files containing all of the peptides upon which netMHCpan
  6 | # or netMHCIIpan is to be run. Runs whichever version of netMHCpan is requested and returns
  7 | # the results in an appropriately-named output file.
  8 | #
  9 | # Input format: python runNetMHCpan.py len9peptides.txt,len10peptides.txt HLAalleles.txt 1 outpath
 10 | # 	Options for specifying which netMHCpan version: 
 11 | #	1 = netMHCIpan
 12 | #	2 = netMHCIIpan
 13 | # *RELEVANT*: HLA allele input file can be in one of two formats: 
 14 | #	1. Polysolver winners_hla.txt output file
 15 | # 		example line from file: HLA-A   hla_a_02_01_01_01       hla_a_32_01_01
 16 | # 	2. Already processed, one allele per line in netMHC compatible format
 17 | #		example line from file: HLA-A02:01
 18 | #
 19 | # Output: netMHCpan output .xls file(s)
 20 | #
 21 | # ----------------------------------------------------------------------------------------------- #
 22 | 
 23 | 
 24 | # ----------------------------------------------------------------------------------------------- #
 25 | # Import necessary packages
 26 | 
 27 | #!/usr/bin/python
 28 | import sys
 29 | import numpy as np
 30 | import subprocess
 31 | import os
 32 | 
 33 | # ----------------------------------------------------------------------------------------------- #
 34 | 
 35 | 
 36 | # ----------------------------------------------------------------------------------------------- #
 37 | # Function: runNetMHCIpan
 38 | # Inputs: FASTA file of peptide sequences, patient HLA alleles (these are automatically given 
 39 | # by Polysolver and come in a .txt file that needs to be pre-processed into the correct format for 
 40 | # netMHCpan), peptide length outpath 
 41 | # Returns: None (netMHCpan will automatically write output to a .xls file)
 42 | # Summary: Pre-processes patient HLA alleles, runs netMHCIpan. 
 43 | def runNetMHCIpan(pepfile, hlafile, length, outpath):
 44 | 	# Determine whether we're dealing with a snv or indel file (for naming the outfile)
 45 | 	varianttype = ''
 46 | 	if pepfile.split('_FASTA_')[1].split('.')[0] == 'snv':
 47 | 		varianttype = 'SNV'
 48 | 	if pepfile.split('_FASTA_')[1].split('.')[0] == 'indel':
 49 | 		varianttype = 'InDel'
 50 | 	# Read in HLA alleles file and process
 51 | 	with open(hlafile) as f:
 52 | 		hlalines  = f.read().splitlines()
 53 | 	hlaalleles = []
 54 | 	# Determine which input format the hla allele file is in
 55 | 	if len(hlalines[0].split('\t')) <= 1:  # In already pre-processed format
 56 | 		hlaalleles = hlalines
 57 | 	else:  # Polysolver output file
 58 | 		for line in hlalines:
 59 | 			split = line.split('\t')
 60 | 			# Reformat each allele (2 for each type of HLA A, B, and C)
 61 | 			for i in range(1, 3):
 62 | 				currallele = 'HLA-'
 63 | 				allele = split[i]
 64 | 				components = allele.split('_')
 65 | 				currallele += components[1].upper() + components[2] + ':' + components[3]
 66 | 				hlaalleles.append(currallele)
 67 | 	hlaalleles = list(set(hlaalleles))  # Remove duplicate alleles if there are any
 68 | 	hlastring = ','.join(hlaalleles)
 69 | 	# Run netMHCI pan
 70 | 	command =  'export NHOME=/netMHCpan-4.1; export NETMHCpan=/netMHCpan-4.1/Linux_x86_64; /netMHCpan-4.1/Linux_x86_64/bin/netMHCpan -a '+hlastring+' -f '+pepfile+' -inptype 0 -l '+str(length)+' -s -xls -xlsfile '+outpath+'/NETMHCpan_out_'+str(length)+varianttype+'.xls -allname /netMHCpan-4.1/Linux_x86_64/data/allelenames -hlapseudo /netMHCpan-4.1/Linux_x86_64/data/MHC_pseudo.dat -t 500 -version /xchip/cga_home/margolis/Packages/netMHCPan/netMHCpan-3.0/data/version -tdir /netMHCpan-4.1/scratch/XXXXXX -rdir /netMHCpan-4.1/Linux_x86_64/ > '+outpath+'/netMHCpanoutlen_'+str(length)+varianttype+'.txt'
 71 | 	subprocess.call(command, shell=True)
 72 | 	
 73 | 	# Catch case where peptide file was empty (create dummy file) 
 74 | 	dummyfile = outpath+'/NETMHCpan_out_'+str(length)+varianttype+'.xls'
 75 | 	open(dummyfile, 'a').close()
 76 | 
 77 | 	return
 78 | 
 79 | # ----------------------------------------------------------------------------------------------- #
 80 | 
 81 | 
 82 | # ----------------------------------------------------------------------------------------------- #
 83 | # Function: runNetMHCIIpan
 84 | # Inputs: FASTA file of peptide sequences, patient HLA alleles (these are automatically given 
 85 | # by Polysolver and come in a .txt file that needs to be pre-processed into the correct format for 
 86 | # netMHCIIpan), peptide length outpath 
 87 | # Returns: None (netMHCIIpan will automatically write output to a .xls file)
 88 | # Summary: Pre-processes patient HLA alleles, runs netMHCIIpan
 89 | def runNetMHCIIpan(pepfile, hlafile, length, outpath):
 90 | 	# Determine whether we're dealing with a snv or indel file (for naming the outfile)
 91 |         varianttype = ''
 92 |         if pepfile.split('_FASTA_')[1].split('.')[0] == 'snv':
 93 |                 varianttype = 'SNV'
 94 |         if pepfile.split('_FASTA_')[1].split('.')[0] == 'indel':
 95 |                 varianttype = 'InDel'
 96 |         # Read in HLA alleles file and process
 97 |         with open(hlafile) as f:
 98 |                 hlalines = f.read().splitlines()
 99 |         hlaalleles = []
100 |         # Determine which input format the hla allele file is in
101 |         if len(hlalines[0].split('\t')) <= 1:  # In already pre-processed format
102 |                 hlaalleles = hlalines
103 |         else:  # PHLAT output file
104 | 		# DQA1
105 | 		DQA1a = hlalines[4].split('\t')[1].split('*')[1][0:5]
106 | 		DQA1a = DQA1a.split(':')[0]+DQA1a.split(':')[1]
107 | 		DQA1b = hlalines[4].split('\t')[2].split('*')[1][0:5]
108 | 		DQA1b = DQA1b.split(':')[0]+DQA1b.split(':')[1]
109 | 		# DQB1
110 | 		DQB1a = hlalines[5].split('\t')[1].split('*')[1][0:5]
111 | 		DQB1a = DQB1a.split(':')[0]+DQB1a.split(':')[1]
112 | 		DQB1b = hlalines[5].split('\t')[2].split('*')[1][0:5]
113 | 		DQB1b = DQB1b.split(':')[0]+DQB1b.split(':')[1]
114 | 		# Concatenate four DQ isoforms to be in correct format
115 | 		DQA1B1a = 'HLA-DQA1'+DQA1a+'-DQB1'+DQB1a
116 | 		DQA1aB1b = 'HLA-DQA1'+DQA1a+'-DQB1'+DQB1b
117 | 		DQA1bB1a = 'HLA-DQA1'+DQA1b+'-DQB1'+DQB1a
118 | 		DQA1B1b = 'HLA-DQA1'+DQA1b+'-DQB1'+DQB1b
119 | 		# DRB1
120 | 		DRB1a = hlalines[6].split('\t')[1].split('*')[1][0:5]
121 | 		DRB1a = DRB1a.split(':')[0]+DRB1a.split(':')[1]
122 | 		DRB1b = hlalines[6].split('\t')[2].split('*')[1][0:5]
123 | 		DRB1b = DRB1b.split(':')[0]+DRB1b.split(':')[1]
124 | 		# Format DRB1 alleles
125 | 		DRB1a = 'DRB1_'+DRB1a
126 | 		DRB1b = 'DRB1_'+DRB1b
127 | 		# Add alleles to list
128 | 		hlaalleles.append(DQA1B1a)
129 | 		hlaalleles.append(DQA1aB1b)
130 | 		hlaalleles.append(DQA1bB1a)
131 | 		hlaalleles.append(DQA1B1b)
132 | 		hlaalleles.append(DRB1a)
133 | 		hlaalleles.append(DRB1b)
134 |         hlaalleles = list(set(hlaalleles))  # Remove duplicate alleles if there are any
135 |         hlastring = ','.join(hlaalleles)
136 | 
137 | 
138 | 	# Run netMHCIIpan if file is not empty
139 | 	if os.path.getsize(pepfile) > 1:
140 | 		command = 'export NHOME=/netMHCIIpan-4.0; export NETMHCpan=/netMHCIIpan-4.0/Linux_x86_64; /netMHCIIpan-4.0/netMHCIIpan -a '+hlastring+' -f '+pepfile+' -inptype 0 -length '+str(length)+' -fast -filter 1 -affF 500 -rankF 2.0 -s -xls -xlsfile '+outpath+'/NETMHCIIpan_out_'+str(length)+varianttype+'.xls rdir /netMHCIIpan-4.0/Linux_x86_64/ > '+outpath+'/netMHCIIpanoutlen_'+str(length)+varianttype+'.txt'
141 | 		subprocess.call(command, shell=True)
142 | 
143 | 	# Catch case where peptide file was empty (create dummy file) 
144 |         dummyfile = outpath+'/NETMHCIIpan_out_'+str(length)+varianttype+'.xls'
145 |         open(dummyfile, 'a').close()	
146 | 
147 | 	return
148 | 
149 | # ----------------------------------------------------------------------------------------------- #
150 | 
151 | 
152 | # ----------------------------------------------------------------------------------------------- #
153 | # Main function
154 | def main():
155 | 	 # Check to make sure we have the right number of inputs
156 | 	if len(sys.argv) != 6:
157 | 		print 'Error: incorrect number of inputs.'
158 | 		print 'Please input FASTA file(s), a HLAalleles.txt file, the peptide length(s), a netMHCpan version, and an outpath.'
159 | 		sys.exit()
160 | 	# Parse inputs
161 | 	fastas = sys.argv[1]
162 | 	alleles = sys.argv[2]
163 | 	peplengths = sys.argv[3]
164 | 	versionchoice = sys.argv[4]
165 | 	outpath = sys.argv[5]
166 | 	# Split FASTA files and peptide lengths
167 | 	fastalist = fastas.split(',')
168 | 	lengthslist = peplengths.split(',')
169 | 	if len(fastalist) != len(lengthslist):
170 | 		print 'Error: Please make sure your peptide lengths correspond to the fasta files and are in the same order.'
171 | 		sys.exit()
172 | 	# Run whichever netMHC version is desired
173 | 	if versionchoice == '1':
174 | 		for i in range(0, len(fastalist)):
175 | 			runNetMHCIpan(fastalist[i], alleles, lengthslist[i], outpath)
176 | 	else:
177 | 		for i in range(0, len(fastalist)):
178 | 			runNetMHCIIpan(fastalist[i], alleles, lengthslist[i], outpath)
179 | 	
180 | 	return
181 | 
182 | if __name__ == '__main__':
183 |     main()
184 | 
185 | # ----------------------------------------------------------------------------------------------- #
186 | 
187 | 
188 | 


--------------------------------------------------------------------------------
/mutationPostProcess.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------------------------- #
  2 | # Claire Margolis
  3 | # mutationPostProcess.py
  4 | #
  5 | # Summary: Takes in NETMHC_out.xls file(s) and does postprocessing to create a more user-friendly 
  6 | # output format.
  7 | # Input format: python mutationPostProcess.py NETMHCpan_out9snv.xls,NETMHCpan_out9indel.xls,NETMHCpan_out10snv.xls,NETMHCpan_out10indel.txt 
  8 | #		len9pep_headermap_snv,len9pep_headermap_indel,len10pep_headermap_snv,len10pep_headermap_indel patientID outpath
  9 | # Output format: processedcombinedNETMHCpan_out.txt
 10 | #
 11 | # ----------------------------------------------------------------------------------------------- #
 12 | 
 13 | 
 14 | # ----------------------------------------------------------------------------------------------- #
 15 | # Import necessary packages
 16 | 
 17 | #!/usr/bin/python
 18 | import sys
 19 | import numpy as np
 20 | import subprocess
 21 | import os
 22 | # ----------------------------------------------------------------------------------------------- #
 23 | 
 24 | 
 25 | # ----------------------------------------------------------------------------------------------- #
 26 | # Function: processSingleFileOutput
 27 | # Inputs: netMHCpan output .xls file (tab-delimited), header map file, patient ID, version
 28 | # Returns: An ndarray with rows corresponding to distinct binder peptides and columns corresponding 
 29 | # to binder features and metadata.
 30 | # Summary: Postprocesses the netMHCpan output to eliminate useless rows, change data format from 
 31 | # wide to long, add allele name columns. Also incorporates information about the sequence from the
 32 | # .maf file (which is stored in the header map file input parameter).
 33 | def processSingleFileOutput(netMHCfile, mapfile, length, patID, version):
 34 | 	# Catch case where netMHCfile is empty
 35 | 	if os.path.getsize(netMHCfile) == 0:
 36 | 		return np.empty(shape=[0, 17])			
 37 | 	# Parse netMHC filename to get whether file is SNVs or InDels
 38 | 	snvorindel = 0
 39 | 	if "InDel" in netMHCfile:
 40 | 		snvorindel = 1
 41 | 	length = int(length)
 42 | 	# Read in first line of file to get number and names of alleles
 43 |         with open(netMHCfile, 'r') as f:
 44 |         	alleles = f.readline().strip().split('\t')
 45 |         alleles = filter(None, alleles)  # Remove empty strings just in case
 46 | 	# Read in rest of file (skip HLA alleles at the top and file header
 47 | 	data = np.loadtxt(netMHCfile, dtype='S40', delimiter='\t', skiprows=2, ndmin=2)
 48 |         nrow = data.shape[0]
 49 |         ncol = data.shape[1]
 50 | 	# Move columns so that data is in long form
 51 |         listofarrays = []  # Will store all allele-specific arrays
 52 |         initcols = data[:,0:3]  # Initial three columns that are common to all HLA alleles
 53 | 	if version == 1:
 54 |         	for i in range(0, len(alleles)):
 55 |                 	currstartcol = (3*(i+1))+i
 56 |                 	currendcol = currstartcol+4
 57 |                 	currarray = data[:,currstartcol:currendcol]
 58 |                 	listofarrays.append(currarray)
 59 |         		datav2 = np.vstack(tuple(listofarrays))
 60 | 	else:
 61 | 		for i in range(0, len(alleles)):
 62 | 			currstartcol = (3*(i+1))
 63 | 			currendcol = currstartcol+4
 64 | 			currarray = data[:,currstartcol:currendcol]
 65 | 			listofarrays.append(currarray)
 66 | 			datav2 = np.vstack(tuple(listofarrays))
 67 |         # Add initial columns and allele column into data frame
 68 |         # Allele column
 69 |         allelevec = []
 70 |         for i in range(0, len(alleles)):
 71 |                 currnewcol = [alleles[i]]*nrow
 72 |                 allelevec.extend(currnewcol)
 73 |         datav2 = np.insert(datav2, 1, allelevec, axis=1)  # Add allele column to datalong
 74 |         # Initial columns
 75 |         initcollist = []
 76 |         for i in range(0, len(listofarrays)):
 77 |                 initcollist.append(initcols)
 78 |         initcolstoappend = np.vstack(tuple(initcollist))
 79 |         datav3 = np.concatenate((initcolstoappend, datav2), axis=1)
 80 | 	datav3 = np.delete(datav3, 3, axis=1)
 81 | 	# Create mutant / WT dictionary for SNVs
 82 | 	if snvorindel == 0:
 83 | 		newnrow = datav3.shape[0]
 84 | 		newncol = datav3.shape[1]
 85 | 		mutWTdict = {}
 86 | 		WTindices = []
 87 | 		for i in range(0, newnrow):
 88 | 			if datav3[i,2].strip().split('_')[2] == 'mut':  # For each mutant row, do:
 89 | 				currkey = datav3[i,2]+'|'+datav3[i,0]+'|'+datav3[i,3]
 90 | 				# Find the corresponding wild-type row
 91 | 				for j in range(i+1, newnrow):
 92 | 					if datav3[j,0] == datav3[i,0] and datav3[j,2][0:5] == datav3[i,2][0:5]:
 93 | 						currval = datav3[j,2]+'|'+datav3[j,1]+'|'+datav3[j,4]+'|'+datav3[j,5]+'|'+datav3[j,6]
 94 | 						mutWTdict[currkey] = currval
 95 | 						WTindices.append(j)
 96 | 						break
 97 | 		# Delete WT rows
 98 | 		datav4 = np.delete(datav3, WTindices, axis=0)
 99 | 	else:
100 | 		datav4 = datav3
101 | 	# Eliminate any rows that have a rank above 2%
102 | 	toremove = []
103 | 	newnrow2 = datav4.shape[0]
104 | 	for i in range(0, newnrow2):
105 | 		if float(datav4[i,6]) > 2:
106 | 			toremove.append(i)
107 | 	datav5 = np.delete(datav4, toremove, 0)	
108 | 	# Read in map file and create map dictionary
109 | 	headerdict = {}
110 | 	with open(mapfile, 'r') as f:
111 | 	 	lines = f.read().splitlines()
112 | 	for line in lines:
113 | 		key = line.split('\t')[0][1:]
114 | 		val = line.split('\t')[1]
115 | 		headerdict[key] = val
116 | 	# Initialize new ndarray columns (will eventually use np.hstack to stack them all theogether into a numpy array)
117 | 	patID,sample,transcript,chrom_loc,gene,gene_num,cdna_change,prot_change,pep_pos,pep_length,hla,pep_mut,aff_mut,rank_mut,pep_wt,aff_wt,rank_wt = ([] for i in range(17))
118 | 	# For every row in current data array, use WT dict and header dict to find metainformation and save in a new ndarray
119 | 	newnrow3 = datav5.shape[0]
120 | 	for i in range(0, newnrow3):
121 | 		currrow = datav5[i,:]
122 | 		seqnum = currrow[2].split('_')
123 | 		headerdictkey = seqnum[0]+'_'+seqnum[1]
124 | 		headervals = headerdict[headerdictkey].split('|')
125 | 		patID.append(headervals[0])
126 | 		sample.append(headervals[1])
127 | 		transcript.append(headervals[3])
128 | 		chrom_loc.append(headervals[2])
129 | 		gene.append(headervals[4])
130 | 		gene_num.append(headervals[5])
131 | 		cdna_change.append(headervals[6])
132 | 		prot_change.append(headervals[7])
133 | 		pep_pos.append(currrow[0])
134 | 		pep_length.append(length)
135 | 		hla.append(currrow[3])
136 | 		pep_mut.append(currrow[1])
137 | 		aff_mut.append(currrow[5])
138 | 		rank_mut.append(currrow[6])
139 | 		if snvorindel == 0:
140 | 			WTdictkey = currrow[2]+'|'+currrow[0]+'|'+currrow[3]
141 | 			WTvals = mutWTdict[WTdictkey].split('|')
142 | 			pep_wt.append(WTvals[1])
143 | 			aff_wt.append(WTvals[3])
144 | 			rank_wt.append(WTvals[4])
145 | 		else:
146 | 			pep_wt.append('n/a')
147 | 			aff_wt.append('n/a')
148 | 			rank_wt.append('n/a')
149 | 	# Join all lists into new numpy array
150 | 	datafull = np.column_stack((patID,sample,transcript,chrom_loc,gene,gene_num,cdna_change,prot_change,pep_pos,pep_length,hla,pep_mut,aff_mut,rank_mut,pep_wt,aff_wt,rank_wt))
151 | 
152 | 	return datafull
153 | # ----------------------------------------------------------------------------------------------- #
154 | 
155 | 
156 | # ----------------------------------------------------------------------------------------------- #
157 | # Function: writeToOutfile
158 | # Inputs: final numpy array, patient ID, outpath
159 | # Returns: None (writes to file)
160 | # Summary: Takes the full numpy array and writes it, plus appropriate header, to a tab-delimited
161 | # file in the specified outpath.
162 | def writeToOutfile(array, patID, version, outpath):
163 | 	suffix = ''
164 | 	if version == 1:
165 | 		suffix = '_processedcombinedNETMHCpan_out.txt'
166 | 	else:
167 | 		suffix = '_processedcombinedNETMHCIIpan_out.txt'
168 | 	outfile = outpath+'/'+patID+suffix
169 | 	headerstring = 'patient\tsample\ttranscript\tchrom_loc\tgene\tgene_num\tcdna_change\tprot_change\tmut_pos\tpep_length\tHLA\tpep_mut\taff_mut\trank_mut\tpep_wt\taff_wt\trank_wt'
170 | 	np.savetxt(outfile, array, fmt='%s', delimiter='\t', header = headerstring, comments = '')
171 | 
172 | 	return
173 | 
174 | # ----------------------------------------------------------------------------------------------- #
175 | 
176 | 
177 | # ----------------------------------------------------------------------------------------------- #
178 | # Main function
179 | def main():
180 |          # Check to make sure we have the right number of inputs
181 |         if len(sys.argv) != 7:
182 |                 print 'Error: incorrect number of inputs.'
183 |                 print 'Please input netMHC outfile(s), corresponding header map(s), corresponding length(s), the patient ID, version of netMHCpan that was run, and an outpath.'
184 |                 sys.exit()
185 | 	# Parse inputs
186 | 	netmhcoutfiles = sys.argv[1]
187 | 	headermapfiles = sys.argv[2]
188 | 	lengths = sys.argv[3]
189 | 	patientID = sys.argv[4]
190 | 	version = int(sys.argv[5])
191 | 	outfilepath = sys.argv[6]
192 | 	# Split FASTA files and peptide lengths, making sure there are no leading/trailing whitespaces
193 | 	netmhclist = list(map(str.strip, netmhcoutfiles.split(',')))
194 | 	headerlist = list(map(str.strip, headermapfiles.split(',')))
195 | 	lengthslist = list(map(str.strip, lengths.split(',')))
196 |         if len(netmhclist) != len(headerlist):
197 |                 print 'Error: Please make sure your header map files correspond to the netMHC outfiles and are in the same order.'
198 |                 sys.exit()
199 |   	# Loop through each netMHC file and add in relevant information
200 | 	procarrays = []
201 | 	for i in range(0, len(netmhclist)):
202 | 		curroutputprocessed = processSingleFileOutput(netmhclist[i], headerlist[i], lengthslist[i], patientID, version)
203 | 		procarrays.append(curroutputprocessed)
204 | 	# If there is more than one processed array, concatenate them together then write to outfile
205 | 	if len(procarrays) > 1:
206 | 		fullarray = np.concatenate(tuple(procarrays), axis=0)
207 | 		writeToOutfile(fullarray, patientID, version, outfilepath)
208 | 	else:  # Otherwise, write single array to file
209 | 		writeToOutfile(procarrays[0], patientID, version, outfilepath)
210 | 
211 | 	return
212 | 
213 | if __name__ == '__main__':
214 |         main()
215 | # ----------------------------------------------------------------------------------------------- #
216 | 


--------------------------------------------------------------------------------
/mafToFastaV2.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------------------------- #
  2 | # Claire Margolis
  3 | # mafToFasta.py
  4 | # 
  5 | # Summary: Takes in a .maf file of either SNVs or InDels and translates mutations to peptide 
  6 | # sequences of (desired length*2 - 1). These peptides will be fed into netMHCpan and will result
  7 | # in getting the binding affinities of peptide of desired length with at least one AA overlapping
  8 | # with the mutated nucleotide(s). Also outputs a map file of headers which will be used to process 
  9 | # and annotate the netMHC output.
 10 | #
 11 | # Input format: python mafToFasta.py maffile maffiletype peptidelengths patientID outpath
 12 | # 	maffiletype = 0 for SNVs, 1 for indels
 13 | # 	peptidelengths = comma-separated list of lengths (e.g., 9,10 for netMHCpan)
 14 | #
 15 | # Outputs:
 16 | # 	For each length of peptide desired:
 17 | #		len#pep_headermap_(snv/indel).txt
 18 | #		len#pep_FASTA_(snv/indel).txt
 19 | #
 20 | # ------------------------------------------------------------------------------------------- #
 21 | 
 22 | 
 23 | # ----------------------------------------------------------------------------------------------- #
 24 | # Import necessary packages
 25 | 
 26 | #!/usr/bin/python
 27 | import sys
 28 | import numpy as np
 29 | import subprocess
 30 | from Bio.Seq import Seq
 31 | from ConfigParser import ConfigParser
 32 | # ----------------------------------------------------------------------------------------------- #
 33 | 
 34 | 
 35 | # ----------------------------------------------------------------------------------------------- #
 36 | # Function: DNASeqToProtein
 37 | # Inputs: List of DNA sequences, corresponding list of headers, peptide length
 38 | # Returns: List of protein sequences, corresponding list of headers
 39 | # Summary: Translates sequences in-frame to get proteins
 40 | def DNASeqToProtein(nucs, headers, length):
 41 |         # Initialize codon table and list of peptides
 42 |         codontable = {'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M', 'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
 43 |                 'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K', 'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',
 44 |                 'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L', 'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
 45 |                 'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q', 'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
 46 |                 'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
 47 |                 'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E', 'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
 48 |                 'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
 49 |                 'TAC':'Y', 'TAT':'Y', 'TAA':'*', 'TAG':'*', 'TGC':'C', 'TGT':'C', 'TGA':'*', 'TGG':'W'}
 50 |         peptides = []
 51 |         pepheaders = []
 52 |         # Translate nucleotide sequences
 53 |         for n in range(0, len(nucs)):
 54 |                 seq = nucs[n]
 55 |                 fullprotein = ''
 56 |                 for s in xrange(0, len(seq), 3):
 57 |                         codon = seq[s:s+3]
 58 |                         # Break if the sequence ends with trailing AAs (should never happen) or codon is not in table (i.e., has an "N" for unknwn base)
 59 |                         if len(codon) != 3 or codon not in codontable:
 60 |                                 break
 61 |                         # Find corresponding AA to codon
 62 |                         AA = codontable[codon]
 63 |                         fullprotein += AA
 64 |                 # Stop at stop codon, if there is one
 65 |                 if '*' in fullprotein:
 66 |                         substrings = ' '.join(fullprotein.split('*')).split()
 67 |                         if len(substrings[0]) >= int(length):
 68 |                                 peptides.append(substrings[0])
 69 |                                 pepheaders.append(headers[n])
 70 |                 else:  # Case when there is no stop codon in full-length protein
 71 |                         peptides.append(fullprotein)
 72 |                         pepheaders.append(headers[n])
 73 |         return peptides, pepheaders
 74 | # ----------------------------------------------------------------------------------------------- #
 75 | 
 76 | 
 77 | # ----------------------------------------------------------------------------------------------- #
 78 | # Function: MutationsToDNASeq
 79 | # Inputs: maffile, peptide length, patient ID, outpath, snv/indel indicator
 80 | # Returns: List of mutant/wild type DNA sequences of desired length, list of corresponding headers
 81 | # Summary: Takes in maf file, peptide length, patient ID, and outpath. Finds ORF orientation at 
 82 | # mutation location from .maf file Codon_change field, based on ORF orientation calculates nucleotide
 83 | # window to yield correct number of peptides flanking the mutation, calls twoBitToFa function to 
 84 | # get nucleotide sequence, writes header map to output file.
 85 | def MutationsToDNASeq(maf, length, patID, outpath, indicator, cds_path, cdna_path):
 86 | 	# Read in maf file (desired columns only)
 87 | 	#['Hugo_Symbol' 'Entrez_Gene_Id' 'Chromosome' 'Start_position' 'End_position' 'Variant_Classification' 'Tumor_Seq_Allele2'
 88 | 	# 'Tumor_Sample_Barcode' 'Annotation_Transcript' 'Transcript_Strand' 'cDNA_Change' 'Codon_Change' 'Protein_Change' 'Variant_Type'
 89 | 	# 'Transcript_Position' 'Reference_Allele']'''
 90 |         mafarray = np.loadtxt(maf, dtype=str, delimiter='\t', skiprows=0, usecols=(0,1,4,5,6,8,12,15, 305, 222, 302, 89, 287, 9, 278, 10), ndmin=2)
 91 |         # Create dictionary containing lengths to go backward and forward based on ORF orientation 
 92 |         distancedict = {0:[3,0], 1:[2,1], 2:[1,2]}
 93 |         # Open header map file for writing
 94 | 	headermapfile = ''
 95 | 	# Map file will be labeled differently for SNVs and InDels
 96 | 	if indicator == 0:
 97 |         	headermapfile = open(outpath+'/len'+str(length)+'pep_headermap_snv.txt', 'w')
 98 | 	else:
 99 | 		headermapfile = open(outpath+'/len'+str(length)+'pep_headermap_indel.txt', 'a')
100 | 	# Translate length from AAs to nucleotides
101 |         length = int(length)*3
102 | 	# Initialize sequence list, header list
103 | 	seqlist = []
104 | 	headerlist = []
105 | 	counter = 1
106 | 	# Loop through maf array and generate DNA sequences for each sequence
107 | 	isnonstop = 0 # Set indicator for nonstop mutations
108 | 	nonstopcounter = -1
109 | 	nonstopalphabet = 'abcdefghijklmnopqrstuvwxyz!@#$%^&*()~`"/,'
110 | 	for row in mafarray:
111 | 		
112 | 		# Check to make sure mutation is one that we care about and skip to next sequence if not
113 | 		classification = row[5]
114 |                 if not (classification == 'Missense_Mutation' or classification == 'Frame_Shift_Ins' or classification == 'Frame_Shift_Del' or classification == 'Nonstop_Mutation' or classification == 'In_Frame_Ins' or classification == 'In_Frame_Del'):
115 |                         continue
116 | 		# Go through special case for nonstop mutation
117 | 		if (classification == 'Nonstop_Mutation'):
118 | 			isnonstop = 1
119 | 			nonstopcounter += 1
120 | 			nonstopheadermapfile = open(outpath+'/len'+str(length/3)+'pep_headermap_indel.txt','a')
121 | 		# Calculate coding strand start and end positions
122 | 		orig_start = 0
123 | 		orig_end = 0
124 | 		if indicator == 0 and isnonstop == 0:  # SNVs (but NOT nonstop mutations)
125 | 			if row[13] == 'SNP':
126 | 				orig_start = int(((row[10].split('.')[1]).split('>')[0])[0:-1])-1 # Subtract because MAFs are 1-indexed but python is 0-indexed
127 | 				orig_end = orig_start # SNVs only affect one position
128 | 			else: # case of DNPs or TNPs or ONPs
129 | 				orig_start = int((row[10].split('.')[1]).split('_')[0])-1 
130 | 				orig_end = orig_start + len(row[6].strip()) - 1 # Will be 2 for DNPs, 3 for TNPs, ... for ONPs
131 | 		elif indicator == 0 and isnonstop == 1: # Nonstop Mutations specifically
132 | 			if row[13] == 'SNP':
133 | 				if '+' in row[9]: # Deal with cases on positive strand separately
134 | 					orig_start = int(row[14].strip())-1 # Subtract one because positive strand transcript positions seem to be 1-indexed
135 | 					orig_end = orig_start
136 | 				else:
137 | 					orig_start = int(row[14].strip()) # Negative strand transcript positions seem to be 0-indexed...?
138 | 					orig_end = orig_start
139 | 			else: # case of DNP/TNP/ONPs that are also nonstop mutations
140 | 				if '+' in row[9]:
141 | 					orig_start = int(row[14].split('_')[0])-1
142 | 					orig_end = orig_start + len(row[6].strip()) - 1
143 | 		else:  # InDels
144 | 			if row[13] == 'DEL': # deletion
145 | 				orig_start = int(((row[10].split('.')[1]).split('del')[0]).split('_')[0])-1
146 | 				if '_' in row[10]:
147 | 					orig_end = int((row[10].split('del')[0]).split('_')[1])-1
148 | 				else:
149 | 					orig_end = orig_start
150 | 			else: # insertion
151 | 				orig_start = int(((row[10].split('.')[1]).split('ins')[0]).split('_')[0])
152 | 				if '_' in row[10]:
153 | 					orig_end = int((row[10].split('ins')[0]).split('_')[1])-1
154 | 				else:
155 | 					orig_end = orig_start
156 | 
157 | 		# Calculate mutation length
158 | 		mut_length = len(row[15].strip())
159 | 
160 |                 # Calculate ORF orientation at mutation start site
161 | 		if indicator == 0:  # If SNV, use codon_change .maf field
162 |                 	codonchange = (row[11].split(')')[1]).split('>')[0]
163 |                 	orfpos = 0  # Initalize variable
164 |                 	for i in range(0, len(codonchange)):
165 |                         	if codonchange[i].isupper():
166 |                                 	orfpos = i  # Set ORF variable
167 | 					break
168 | 		else:  # If InDel, use codon_change .maf field but do further processing
169 | 			codonstartnum = (((row[11].split('('))[1]).split('-'))[0]
170 | 			cdnastartnum = 0
171 | 			if row[13] == 'DEL':
172 | 				cdnastartnum = ((((row[10].split('c.'))[1]).split('del'))[0]).split('_')[0]
173 | 			else:
174 | 				cdnastartnum = ((((row[10].split('c.'))[1]).split('ins'))[0]).split('_')[0]
175 | 				cdnastartnum = int(cdnastartnum)+1
176 | 			orfpos = (int(cdnastartnum) - int(codonstartnum)) % 3
177 | 
178 | 		# Set new start and end positions for chromosome region with appropriate nucleotide region around mutation site based on ORF orientation
179 | 		start = 0
180 | 		end = 0
181 | 		if indicator == 0:  # For SNVs, do this:
182 | 			snvlength = orig_end-orig_start+1
183 | 			start = orig_start - (length - distancedict[orfpos][0])
184 | 			end = orig_end + (length - distancedict[orfpos][1])
185 | 			if row[13] != 'SNP': #Account for DNPs
186 | 				start = start + 3
187 | 				end = end + 3
188 | 		else:  # For InDels, do this:
189 | 			start = orig_start - (length - distancedict[orfpos][0])
190 | 			end = orig_start + mut_length + (length - distancedict[orfpos][1])
191 | 		
192 | 		# Get output from R script that will contain the coding sequence for transcript of interest
193 | 		annot_transcript = row[8].split('.')[0]
194 | 		#CONFIG_FILENAME = 'fasta_paths.config'
195 | 		#config = ConfigParser()
196 | 		#config.read(CONFIG_FILENAME)
197 | 		if isnonstop == 0:
198 | 			#ref_37_path = config.get('Reference Paths','GRCh37cds')
199 | 			ref_path = cds_path
200 | 		else:
201 | 			#ref_37_path = config.get('Reference Paths', 'GRCh37cdna')
202 | 			ref_path = cdna_path
203 | 		command = "sed -n -e '/"+annot_transcript+"/,/>/ p' "+ref_path+" | sed -e '1d;$d'"
204 | 		codingseq = subprocess.check_output(command, shell=True)
205 | 		
206 | 		# Check to see whether transcript sequence has an entry in the reference genome (if not, continue)
207 | 		if len(codingseq) == 0:
208 | 			print 'Error: Reference does not contain coding sequence for transcript '+annot_transcript+'. Skipping this mutation.'
209 | 			continue
210 | 		
211 | 		# Get length of coding sequence plus position of mutation, and get desired sequence start and end indices
212 | 		codingseq = codingseq.replace('\n','')
213 | 		seqlength = len(codingseq)
214 | 		seqstart = 0
215 | 		seqend = 0
216 | 		if start >= 0:
217 | 			seqstart = start
218 | 		else:
219 | 			seqstart = 0
220 | 		if classification == 'Frame_Shift_Ins' or classification == 'Frame_Shift_Del':  # Special case of frameshift mutations
221 | 			seqend = seqlength-1
222 | 		else:
223 | 			if end <= seqlength-1:
224 | 				seqend = end
225 | 			else:
226 | 				seqend = seqlength-1
227 | 		
228 | 		# Retrieve sequence desired
229 | 		sequence = codingseq[seqstart:seqend+1]
230 | 
231 | 		# Substitute in mutation at appropriate position
232 | 		disttomut = orig_start - seqstart
233 | 		if indicator == 0: # SNV
234 | 			mutregion = row[10].split('>')[1]
235 | 			mutatedseq = sequence[0:disttomut]+mutregion+sequence[disttomut+snvlength:]
236 | 		else:  # InDel
237 | 			if row[13] == 'DEL':
238 | 				mutatedseq = sequence[0:disttomut]+sequence[disttomut+mut_length:]
239 | 			else:
240 | 				mutregion = row[10].split('ins')[1]
241 | 				mutatedseq = sequence[0:disttomut]+mutregion+sequence[disttomut:]
242 | 
243 | 		# Deal with nonstop mutations in their own way (separately)
244 | 		if isnonstop == 1:
245 | 			nonstopseqlist = [mutatedseq]
246 | 			nonstopheaderlist = ['>seq_'+nonstopalphabet[nonstopcounter]+'_mut']
247 | 			nonstoppeptide, nonstoppepheader = DNASeqToProtein(nonstopseqlist, nonstopheaderlist, length/3)		
248 | 			if len(nonstoppeptide) < 1 or len(nonstoppepheader) < 1: # If we hit a stop codon and can't make a large enough peptide, continue
249 | 				isnonstop = 0
250 | 				continue
251 | 			nonstopfilehandle = outpath+'/len'+str(length/3)+'pep_FASTA_indel.txt'
252 | 			f = open(nonstopfilehandle, 'a')
253 | 			f.write(nonstoppepheader[0]+'\n'+nonstoppeptide[0]+'\n')
254 | 			nonstopheadermapfile.write('>seq_'+nonstopalphabet[nonstopcounter]+'\t'+patID+'|'+row[7]+'|chr'+row[2]+':'+row[3]+'-'+row[4]+'|'+row[8]+'|'+row[0]+'|'+row[1]+'|'+row[10]+'|'+row[12]+'\n')
255 | 			isnonstop = 0
256 | 			continue
257 | 
258 | 		# Add sequences to lists (mutant and WT for SNV, just mutant for indel)
259 | 		seqlist.append(mutatedseq)
260 | 		headerlist.append('>seq_'+str(counter)+'_mut')
261 | 		if indicator == 0:
262 | 			seqlist.append(sequence)
263 | 			headerlist.append('>seq_'+str(counter)+'_wt')
264 | 		# Write maf annotation information to map file (will be used in netMHC postprocessing)
265 | 		headermapfile.write('>seq_'+str(counter)+'\t'+patID+'|'+row[7]+'|chr'+row[2]+':'+row[3]+'-'+row[4]+'|'+row[8]+'|'+row[0]+'|'+row[1]+'|'+row[10]+'|'+row[12]+'\n')
266 | 		counter += 1
267 | 	headermapfile.close()		
268 | 
269 | 	return seqlist, headerlist
270 | # ----------------------------------------------------------------------------------------------- #
271 | 
272 | 
273 | # ----------------------------------------------------------------------------------------------- #
274 | # Function: writeToOutfile
275 | # Inputs: list of peptides, corresponding headers, current length, outpath, SNV vs. InDel indicator
276 | # Returns: None (writes to file)
277 | # Summary: Writes header+peptide combos to a file, one item per line, that will be an input to netMHC.
278 | # Example output file name: path/len9pep_snv_FASTA.txt (this file would contain SNV peptides of length 9). 
279 | def writeToOutfile(peps, headers, length, outpath, indicator):
280 | 	filehandle = ''
281 | 	# If SNVs, do this:
282 | 	if indicator == 0:
283 | 		filehandle = outpath+'/len'+str(length)+'pep_FASTA_snv.txt'
284 | 		# Loop through the peptide, header lists and write to filehandle
285 | 		f = open(filehandle, 'a')
286 | 		# In the case of SNVs, need to check to make sure every mutant has corresponding wt and vice versa
287 | 		for i in range(0, len(peps)):
288 | 			if (len(peps[i])) > 0:
289 | 				if 'mut' in headers[i]:
290 | 					if '>seq_'+headers[i].split('_')[1]+'_wt' in headers:
291 | 						f.write(headers[i]+'\n'+peps[i]+'\n')
292 | 				else:
293 | 					if '>seq_'+headers[i].split('_')[1]+'_mut' in headers:
294 | 						f.write(headers[i]+'\n'+peps[i]+'\n')
295 | 		f.close()
296 | 	# If InDels, do this:
297 | 	else:
298 | 		filehandle = outpath+'/len'+str(length)+'pep_FASTA_indel.txt'
299 | 		# Loop through the peptide, header lists and write to filehandle
300 | 		f = open(filehandle, 'a')
301 | 		for i in range(0, len(peps)):  # The peptide and header lists will always be the same length
302 | 			if len(peps[i]) > 0:
303 | 				f.write(headers[i]+'\n'+peps[i]+'\n')
304 | 		f.close()
305 | 
306 | 	return
307 | # ----------------------------------------------------------------------------------------------- #
308 | 
309 | 
310 | # ----------------------------------------------------------------------------------------------- #
311 | # Main function
312 | def main():
313 |         # Check to make sure we have the right number of inputs
314 |         if len(sys.argv) != 8:
315 |                 print 'Error: incorrect number of inputs.'
316 |                 print 'Please input a .maf file, .maf file type, the peptide lengths you want, the patient ID, an outfile path, and the paths to the cds and cdna reference files.'
317 |                 sys.exit()
318 |         # Store inputs
319 |         maffile = sys.argv[1]
320 |         snvorindel = int(sys.argv[2])
321 | 	lengthlist = sys.argv[3].split(',')
322 |         patientID = sys.argv[4]
323 |         outpath = sys.argv[5]
324 | 	cds_fasta_path = sys.argv[6]
325 | 	cdna_fasta_path = sys.argv[7]
326 | 	# For each peptide length in list, do this:
327 | 	for length in lengthlist:
328 | 		# Convert mutation into nucleotide sequences
329 | 		nucleotideseqs, nucleotideheaders = MutationsToDNASeq(maffile, length, patientID, outpath, snvorindel, cds_fasta_path, cdna_fasta_path)
330 | 		# Convert nucleotide sequences into peptide sequences
331 | 		peptideseqs, peptideheaders = DNASeqToProtein(nucleotideseqs, nucleotideheaders, length)
332 | 		# Print to outfile
333 | 		writeToOutfile(peptideseqs, peptideheaders, length, outpath, snvorindel)
334 | 
335 |         return
336 | 
337 | if __name__ == '__main__':
338 |     main()
339 | # ----------------------------------------------------------------------------------------------- #
340 |                                                                                                        
341 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------