14 |
15 |
16 |
PATIENT INFO
17 |
18 |
19 |
20 |
21 |
22 | {% for key, val in patient_info.items() %}
23 | {{ key }} | {{ val }} |
24 | {% endfor %}
25 |
26 |
27 |
28 |
COMMAND LINE ARGUMENTS
29 |
30 |
31 |
32 |
33 |
34 | {% for key, val in args %}
35 | {{ key }} | {{ val }} |
36 | {% endfor %}
37 |
38 | {% if input_json_file %}
39 |
Report generated from saved location: {{ input_json_file }}
40 | {% endif %}
41 |
42 |
43 | {% if variants %}
44 | {% for v in variants %}
45 | -
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 | Variant |
55 | {{ v.short_description }} |
56 |
57 |
58 | {% for key, val in v.variant_data.items() %}
59 | {{ key }} | {{ val }} |
60 | {% endfor %}
61 |
62 |
63 |
64 |
Predicted Effect
65 |
66 | {% for key, val in v.effect_data.items() %}
67 | {{ key }} | {{ val }} |
68 | {% endfor %}
69 |
70 |
71 |
72 | {% if v.databases %}
73 |
Databases
74 |
75 | {% for key, val in v.databases.items() %}
76 | {{ key }} | {{ val }} |
77 | {% endfor %}
78 |
79 |
80 | {% endif %}
81 |
82 |
83 | {% for p in v.peptides %}
84 | -
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 | {% for key, val in p.peptide_data.items() %}
96 | {{ key }} | {{ val }} |
97 | {% endfor %}
98 |
99 | |
100 | {% if include_manufacturability %}
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 | {% for key, val in p.manufacturability_data.items() %}
109 | {{ key }} | {{ val }} |
110 | {% endfor %}
111 |
112 | |
113 | {% endif %}
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 | {% for key in p.epitopes[0] %}
129 | {{ key }} |
130 | {% endfor %}
131 |
132 |
133 | {% for e in p.epitopes %}
134 |
135 | {% for _, val in e.items() %}
136 | {{ val }} |
137 | {% endfor %}
138 |
139 | {% endfor %}
140 |
141 | |
142 |
143 | {% if include_wt_epitopes %}
144 |
145 | {% if p.wt_epitopes %}
146 |
Predicted strong binders that do not overlap the mutation
147 |
148 |
149 |
150 | Sequence |
151 | IC50 |
152 | Allele |
153 |
154 |
155 | {% for e in p.wt_epitopes %}
156 |
157 | {{ e["Sequence"] }} |
158 | {{ e["IC50"] }} |
159 | {{ e["Allele"] }} |
160 |
161 | {% endfor %}
162 |
163 | {% else %}
164 |
No predicted strong binders that do not overlap the mutation.
165 | {% endif %}
166 |
167 | {% endif %}
168 |
169 | {% endfor %}
170 |
171 |
172 |
173 | {% endfor %}
174 |
175 |
176 | {% if reviewers %}
177 |
178 |
179 | Reviewed By |
180 |
181 | {% for r in reviewers %}
182 | {{ r }} |
183 | {% endfor %}
184 |
185 |
186 | {% endif %}
187 |
188 |
189 |
190 | Final Review |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 | Name | {{ final_review }} |
200 | Signature | |
201 | Date | |
202 |
203 | |
204 |
205 |
206 | {% else %}
207 |
No variants with sufficient vaccine peptides were found.
208 | {% endif %}
209 |
210 |
211 |
212 |
--------------------------------------------------------------------------------
/vaxrank/templates/template.txt:
--------------------------------------------------------------------------------
1 | {% for key, val in patient_info.items() %}
2 | {{ key }}: {{ val }}
3 | {% endfor %}
4 |
5 | Package version info
6 | {% for key, val in package_versions.items() %}
7 | {{ key }}: {{ val }}
8 | {% endfor %}
9 | ---
10 |
11 | {% if variants %}
12 | {% for v in variants %}
13 | {{ v.num }}) {{ v.short_description }} ({{ v.variant_data['Gene name'] }})
14 | {% for key, val in v.variant_data.items() %}
15 | {{ key }}: {{ val }}
16 | {% endfor %}
17 |
18 | {% for key, val in v.effect_data.items() %}
19 | {{ key }}: {{ val }}
20 | {% endfor %}
21 |
22 | Vaccine Peptides:
23 | {% for p in v.peptides %}
24 | {{ p.header_display_data.num }}. {{ p.header_display_data.aa_before_mutation }}_{{ p.header_display_data.aa_mutant }}_{{ p.header_display_data.aa_after_mutation }} (score = {{ v.variant_data["Top score"] }})
25 | {% for key, val in p.peptide_data.items() %}
26 | - {{ key }}: {{ val }}
27 | {% endfor %}
28 | {% if include_manufacturability %}
29 |
30 | Manufacturability:
31 | {% for key, val in p.manufacturability_data.items() %}
32 | - {{ key }}: {{ val }}
33 | {% endfor %}
34 | {% endif %}
35 |
36 | Predicted mutant epitopes:
37 | {{ p.ascii_epitopes|indent(18) }}
38 |
39 | {% if include_wt_epitopes and p.wt_epitopes %}
40 | Predicted strong binders that do not overlap the mutation:
41 | {{ p.ascii_wt_epitopes|indent(18) }}
42 | {% endif %}
43 |
44 | {% endfor %}
45 | {% endfor %}
46 | {% else %}
47 | No variants with sufficient vaccine peptides were found.
48 | {% endif %}
49 |
--------------------------------------------------------------------------------
/vaxrank/vaccine_peptide.py:
--------------------------------------------------------------------------------
1 | # Licensed under the Apache License, Version 2.0 (the "License");
2 | # you may not use this file except in compliance with the License.
3 | # You may obtain a copy of the License at
4 | #
5 | # http://www.apache.org/licenses/LICENSE-2.0
6 | #
7 | # Unless required by applicable law or agreed to in writing, software
8 | # distributed under the License is distributed on an "AS IS" BASIS,
9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 |
13 |
14 | from operator import attrgetter
15 |
16 | import numpy as np
17 | from serializable import Serializable
18 |
19 | from .manufacturability import ManufacturabilityScores
20 |
21 |
22 | class VaccinePeptide(Serializable):
23 | """
24 | VaccinePeptide combines the sequence information of MutantProteinFragment
25 | with MHC binding predictions for subsequences of the protein fragment.
26 |
27 | The resulting lists of mutant and wildtype epitope predictions
28 | are sorted by affinity.
29 | """
30 |
31 | def __init__(
32 | self,
33 | mutant_protein_fragment,
34 | epitope_predictions,
35 | num_mutant_epitopes_to_keep=None,
36 | sort_predictions_by='ic50'):
37 | """
38 | Parameters
39 | ----------
40 | mutant_protein_fragment : MutantProteinFragment
41 |
42 | epitope_predictions : list of EpitopePrediction
43 |
44 | num_mutant_epitopes_to_keep : int or None
45 | If None then keep all mutant epitopes.
46 |
47 | sort_predictions_by : str
48 | Field of EpitopePrediction used for sorting epitope predictions
49 | overlapping mutation in ascending order. Can be either 'ic50'
50 | or 'percentile_rank'.
51 | """
52 | self.mutant_protein_fragment = mutant_protein_fragment
53 | self.epitope_predictions = epitope_predictions
54 | self.num_mutant_epitopes_to_keep = num_mutant_epitopes_to_keep
55 | self.sort_predictions_by = sort_predictions_by
56 |
57 | sort_key = attrgetter(sort_predictions_by)
58 |
59 | # only keep the top k epitopes
60 | self.mutant_epitope_predictions = sorted([
61 | p for p in epitope_predictions
62 | if p.overlaps_mutation and not p.occurs_in_reference
63 | ], key=sort_key)
64 | if num_mutant_epitopes_to_keep:
65 | self.mutant_epitope_predictions = \
66 | self.mutant_epitope_predictions[:num_mutant_epitopes_to_keep]
67 |
68 | self.wildtype_epitope_predictions = sorted([
69 | p for p in epitope_predictions
70 | if not p.overlaps_mutation or p.occurs_in_reference
71 | ], key=sort_key)
72 |
73 | self.wildtype_epitope_score = sum(
74 | p.logistic_epitope_score()
75 | for p in self.wildtype_epitope_predictions)
76 | # only keep the top k epitopes for the purposes of the score
77 | self.mutant_epitope_score = sum(
78 | p.logistic_epitope_score()
79 | for p in self.mutant_epitope_predictions)
80 |
81 | self.manufacturability_scores = \
82 | ManufacturabilityScores.from_amino_acids(
83 | self.mutant_protein_fragment.amino_acids)
84 |
85 | def peptide_synthesis_difficulty_score_tuple(
86 | self,
87 | max_c_terminal_hydropathy=1.5,
88 | min_kmer_hydropathy=0,
89 | max_kmer_hydropathy_low_priority=1.5,
90 | max_kmer_hydropathy_high_priority=2.5):
91 | """
92 | Generates a tuple of scores used for lexicographic sorting of vaccine
93 | peptides.
94 |
95 | The most important criterion for choosing a vaccine peptide is to
96 | minimize the number of cysteines in the sequence (to prevent the
97 | formation of disulfide bonds).
98 |
99 | It is also important to keep the mean hydropathy of the C-terminal
100 | residues below 1.5 and also to ensure that no window of amino acids
101 | within the sequence has a mean hydropathy score > 2.5 (using
102 | AA values from Table 2 of Kyte & Doolittle 1982).
103 |
104 | If there are multiple vaccine peptides all of whose subsequence
105 | windows satisfy the GRAVY (mean hydropathy) < 2.5 constraint then
106 | let's optimize the terminal amino acids to exclude ones known to
107 | make solid phase synthesis difficult.
108 |
109 | If there are multiple vaccine peptides without difficult terminal
110 | residues then try to eliminate N-terminal asparagine residues
111 | (not as harmful) and asparagine-proline bonds
112 | (known to dissociate easily). If all of these constraints
113 | are satisfied, then attempt to keep the max k-mer hydropahy below
114 | a lower constant (default GRAVY score 1.5) and above a minimum value
115 | (default 0).
116 |
117 | (Sort criteria determined through conversations with manufacturer)
118 | """
119 | cterm_7mer_gravy = self.manufacturability_scores.cterm_7mer_gravy_score
120 | max_7mer_gravy = self.manufacturability_scores.max_7mer_gravy_score
121 |
122 | # numbers we want to minimize, so a bigger number is worse
123 | return (
124 | # total number of Cys residues
125 | self.manufacturability_scores.cysteine_count,
126 |
127 | # C-terminal 7mer GRAVY score < 1.5
128 | # (or user specified max GRAVY score for C terminus of peptide)
129 | max(0, cterm_7mer_gravy - max_c_terminal_hydropathy),
130 |
131 | # max 7mer GRAVY score < 2.5
132 | # (or user specified higher priority maximum for GRAVY score)
133 | max(0, max_7mer_gravy - max_kmer_hydropathy_high_priority),
134 |
135 | # avoid N-terminal Gln, Glu, Cys
136 | self.manufacturability_scores.difficult_n_terminal_residue,
137 |
138 | # avoid C-terminal Cys
139 | self.manufacturability_scores.c_terminal_cysteine,
140 |
141 | # avoid C-terminal Pro
142 | self.manufacturability_scores.c_terminal_proline,
143 |
144 | # avoid N-terminal Asn
145 | self.manufacturability_scores.n_terminal_asparagine,
146 |
147 | # avoid Asp-Pro bonds
148 | self.manufacturability_scores.asparagine_proline_bond_count,
149 |
150 | # max 7mer GRAVY score < 1.5
151 | # (or user specified lower priority maximum for GRAVY score)
152 | max(0, max_7mer_gravy - max_kmer_hydropathy_low_priority),
153 |
154 | # max 7mer GRAVY score > 0
155 | # (or user specified min GRAVY for 7mer windows in peptide)
156 | max(0, min_kmer_hydropathy - max_7mer_gravy),
157 | )
158 |
159 | def lexicographic_sort_key(self):
160 | """
161 | Create tuple of scores so that candidates get sorted lexicographically
162 | by multiple criteria. Make sure to make the wildtype epitope
163 | score positive (since we want fewer wildtype epitopes) but the others
164 | negative (since we want more of them).
165 | """
166 | # since we're sorting in decreasing order, numbers which we want
167 | # to be larger must have their signs flipped
168 | essential_score_tuple = (
169 | # Sum of normalized MHC binding affinities of subsequences
170 | # round to 5 digits to avoid floating point errors from
171 | # serving as tie-breakers
172 | -round(self.mutant_epitope_score, 6),
173 |
174 | # Number of reads supporting the variant
175 | -self.mutant_protein_fragment.n_alt_reads
176 | )
177 | manufacturability_score_tuple = self.peptide_synthesis_difficulty_score_tuple()
178 | extra_score_tuple = (
179 | # Number of reads supporting the particular protein sequence
180 | # sequence we're using for this vaccine peptide. Currently
181 | # all vaccine peptides are drawn from the same larger sequence
182 | # so this score shouldn't change.
183 | -self.mutant_protein_fragment.n_alt_reads_supporting_protein_sequence,
184 |
185 | # Minimize the sum of non-mutant MHC binding scores,
186 | # round to prevent floating point errors from serving as
187 | # tie-breakers
188 | round(self.wildtype_epitope_score, 6),
189 |
190 | # All else being equal, we prefer to maximize the number of
191 | # mutant amino acids
192 | -self.mutant_protein_fragment.n_mutant_amino_acids,
193 |
194 | # If nothing else can serve as a tie break then try to center
195 | # the mutation in the vaccine peptide.
196 | -self.mutant_protein_fragment.mutation_distance_from_edge
197 | )
198 | return (
199 | essential_score_tuple +
200 | manufacturability_score_tuple +
201 | extra_score_tuple
202 | )
203 |
204 | def contains_mutant_epitopes(self):
205 | return len(self.mutant_epitope_predictions) > 0
206 |
207 | @property
208 | def expression_score(self):
209 | return np.sqrt(self.mutant_protein_fragment.n_alt_reads)
210 |
211 | @property
212 | def combined_score(self):
213 | return self.expression_score * self.mutant_epitope_score
214 |
215 | def to_dict(self):
216 | epitope_predictions = self.mutant_epitope_predictions + self.wildtype_epitope_predictions
217 | return {
218 | "mutant_protein_fragment": self.mutant_protein_fragment,
219 | "epitope_predictions": epitope_predictions,
220 | "num_mutant_epitopes_to_keep": self.num_mutant_epitopes_to_keep,
221 | "sort_predictions_by": self.sort_predictions_by,
222 | }
223 |
224 | @classmethod
225 | def from_dict(cls, d):
226 | d = d.copy()
227 | if "sort_predictions_by" not in d:
228 | d["sort_predictions_by"] = "ic50"
229 | return cls(**d)
230 |
--------------------------------------------------------------------------------
/vaxrank/vaxrank_results.py:
--------------------------------------------------------------------------------
1 | # Licensed under the Apache License, Version 2.0 (the "License");
2 | # you may not use this file except in compliance with the License.
3 | # You may obtain a copy of the License at
4 | #
5 | # http://www.apache.org/licenses/LICENSE-2.0
6 | #
7 | # Unless required by applicable law or agreed to in writing, software
8 | # distributed under the License is distributed on an "AS IS" BASIS,
9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 |
13 | from collections import OrderedDict
14 |
15 | from serializable import Serializable
16 |
17 | class VaxrankResults(Serializable):
18 | """
19 | Data class used to represent all results captured by running Vaxrank.
20 | """
21 | def __init__(
22 | self,
23 | isovar_results,
24 | variant_to_vaccine_peptides_dict,
25 | ranked_vaccine_peptides):
26 | """
27 | Parameters
28 | ----------
29 | isovar_results : list of isovar.IsovarResult
30 | IsovarResult object for each variant without any filtering
31 |
32 | variant_to_vaccine_peptides_dict : dict
33 | Dictionary mapping variant to a list of possible vaccine peptides
34 |
35 | ranked_vaccine_peptides : list of VaccinePeptide
36 | """
37 | self.isovar_results = isovar_results
38 | self.variant_to_vaccine_peptides_dict = variant_to_vaccine_peptides_dict
39 | self.ranked_vaccine_peptides = ranked_vaccine_peptides
40 |
41 |
42 | @property
43 | def variants(self):
44 | """
45 | Unfiltered list of variants
46 |
47 | Returns
48 | -------
49 | list of varcode.Variant
50 | """
51 | return [
52 | isovar_result.variant
53 | for isovar_result
54 | in self.isovar_results
55 | ]
56 |
57 | def variant_counts(self):
58 | """
59 | Summarize Vaxrank counts for total variants, variants with coding effects,
60 | variants with RNA support, and variants with associated vaccine peptides.
61 |
62 | Returns
63 | -------
64 | dict
65 | """
66 | variant_properties = self.variant_properties()
67 |
68 | # dictionary which will contain some overall variant counts
69 | # for report display
70 | counts_dict = {}
71 | counts_dict['num_total_variants'] = len(self.isovar_results)
72 | counts_dict['num_coding_effect_variants'] = \
73 | sum([v['is_coding_nonsynonymous'] for v in variant_properties])
74 | counts_dict['num_variants_with_rna_support'] = \
75 | sum([v['rna_support'] for v in variant_properties])
76 |
77 | counts_dict['num_variants_with_vaccine_peptides'] = \
78 | sum([v['has_vaccine_peptide'] for v in variant_properties])
79 | return counts_dict
80 |
81 | def variant_properties(self, gene_pathway_check=None):
82 | """
83 | Parameters
84 | ----------
85 | gene_pathway_check : GenePathwayCheck (optional)
86 | Used to look up whether a mutation or its affected gene are in some
87 | biologically important pathway.
88 |
89 | Returns
90 | -------
91 | list of dictionaries containing properties we want to analyze later,
92 | e.g. whether this variant is part of a pathway of interest,
93 | is a strong MHC binder, etc.
94 | """
95 | variant_properties_list = []
96 | for isovar_result in self.isovar_results:
97 | variant = isovar_result.variant
98 |
99 | variant_dict = OrderedDict((
100 | ('gene_name', isovar_result.top_gene_name),
101 | ('contig', variant.contig),
102 | ('start', variant.start),
103 | ('ref', variant.ref),
104 | ('alt', variant.alt),
105 | ('is_coding_nonsynonymous',
106 | isovar_result.predicted_effect_modifies_protein_sequence),
107 | ('rna_support',
108 | isovar_result.has_mutant_protein_sequence_from_rna),
109 | ))
110 |
111 | # TODO:
112 | # compute MHC binder status for variants that don't have RNA support
113 | variant_dict['mhc_binder'] = \
114 | variant_dict["has_vaccine_peptide"] = \
115 | variant in self.variant_to_vaccine_peptides_dict
116 |
117 | if gene_pathway_check is not None:
118 | pathway_dict = gene_pathway_check.make_variant_dict(variant)
119 | variant_dict.update(pathway_dict)
120 |
121 | variant_properties_list.append(variant_dict)
122 | return variant_properties_list
123 |
--------------------------------------------------------------------------------