├── .gitignore
├── Design_OPS
    ├── README.pdf
    ├── references.bib
    └── README.Rmd
├── Installation
    ├── README.pdf
    ├── README.Rmd
    └── README.md
├── Design_CRISPRa
    ├── README.pdf
    ├── references.bib
    └── README.Rmd
├── Design_CRISPRi
    ├── README.pdf
    ├── references.bib
    └── README.Rmd
├── Design_CRISPRbe
    ├── README.pdf
    ├── README.Rmd
    └── references.bib
├── Design_PairedGuides
    ├── README.pdf
    ├── figures
    │   ├── resave.sh
    │   └── paired_simplified.pdf
    └── README.Rmd
├── Design_CRISPRkd_CasRx
    ├── README.pdf
    ├── README.Rmd
    └── references.bib
├── Design_CRISPRko_Cas12a
    └── README.pdf
├── Design_CRISPRko_Cas9
    └── README.pdf
├── Design_Custom_Sequence
    ├── README.pdf
    ├── data
    │   └── egfp.fa
    └── README.Rmd
├── Building_Custom_Nuclease
    ├── README.pdf
    ├── figures
    │   ├── enzymes.pdf
    │   ├── enzymes.pptx
    │   ├── nickases.pdf
    │   ├── cut_sites.pdf
    │   ├── cut_sites.pptx
    │   ├── nickases.pptx
    │   ├── nucleases.pdf
    │   ├── nucleases.pptx
    │   ├── drafts
    │   │   ├── nucleases_be.pptx
    │   │   └── protospacer.pptx
    │   ├── nucleasesBaseEditor.pdf
    │   ├── nucleasesBaseEditor.pptx
    │   ├── deprecated
    │   │   └── nucleasesv1.pptx
    │   └── resave.sh
    ├── README_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-24-1.png
    │   │   └── unnamed-chunk-25-1.png
    └── references.bib
├── Building_Gene_Annotation
    ├── README.pdf
    └── README.Rmd
├── Building_Genome_Indices
    ├── README.pdf
    ├── references.bib
    ├── README.Rmd
    └── README.md
├── Design_Cross_Reactivity
    ├── README.pdf
    └── README.Rmd
├── Design_Minor_Major_Allele
    ├── README.pdf
    └── README.Rmd
├── Building_Database_Human_Cas9
    ├── README.pdf
    ├── script.R
    └── README.Rmd
├── Validating_Existing_gRNA_Libraries
    ├── README.pdf
    ├── extdata
    │   ├── mtsg-grnas-readcounts.xlsx
    │   └── readme.txt
    └── README.Rmd
├── Scripts
    └── generatePDFs.R
├── Design_CRISPRkd_Csm
    ├── references.bib
    └── README.Rmd
├── LICENSE
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store


--------------------------------------------------------------------------------
/Design_OPS/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Design_OPS/README.pdf


--------------------------------------------------------------------------------
/Installation/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Installation/README.pdf


--------------------------------------------------------------------------------
/Design_CRISPRa/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Design_CRISPRa/README.pdf


--------------------------------------------------------------------------------
/Design_CRISPRi/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Design_CRISPRi/README.pdf


--------------------------------------------------------------------------------
/Design_CRISPRbe/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Design_CRISPRbe/README.pdf


--------------------------------------------------------------------------------
/Design_PairedGuides/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Design_PairedGuides/README.pdf


--------------------------------------------------------------------------------
/Design_CRISPRkd_CasRx/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Design_CRISPRkd_CasRx/README.pdf


--------------------------------------------------------------------------------
/Design_CRISPRko_Cas12a/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Design_CRISPRko_Cas12a/README.pdf


--------------------------------------------------------------------------------
/Design_CRISPRko_Cas9/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Design_CRISPRko_Cas9/README.pdf


--------------------------------------------------------------------------------
/Design_Custom_Sequence/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Design_Custom_Sequence/README.pdf


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/README.pdf


--------------------------------------------------------------------------------
/Building_Gene_Annotation/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Gene_Annotation/README.pdf


--------------------------------------------------------------------------------
/Building_Genome_Indices/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Genome_Indices/README.pdf


--------------------------------------------------------------------------------
/Design_Cross_Reactivity/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Design_Cross_Reactivity/README.pdf


--------------------------------------------------------------------------------
/Design_Minor_Major_Allele/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Design_Minor_Major_Allele/README.pdf


--------------------------------------------------------------------------------
/Building_Database_Human_Cas9/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Database_Human_Cas9/README.pdf


--------------------------------------------------------------------------------
/Design_PairedGuides/figures/resave.sh:
--------------------------------------------------------------------------------
1 | #Required inkscape to be installed
2 | ink paired_simplified.pdf --export-plain-svg=paired_simplified.svg


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/figures/enzymes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/figures/enzymes.pdf


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/figures/enzymes.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/figures/enzymes.pptx


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/figures/nickases.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/figures/nickases.pdf


--------------------------------------------------------------------------------
/Validating_Existing_gRNA_Libraries/README.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Validating_Existing_gRNA_Libraries/README.pdf


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/figures/cut_sites.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/figures/cut_sites.pdf


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/figures/cut_sites.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/figures/cut_sites.pptx


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/figures/nickases.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/figures/nickases.pptx


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/figures/nucleases.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/figures/nucleases.pdf


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/figures/nucleases.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/figures/nucleases.pptx


--------------------------------------------------------------------------------
/Design_PairedGuides/figures/paired_simplified.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Design_PairedGuides/figures/paired_simplified.pdf


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/figures/drafts/nucleases_be.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/figures/drafts/nucleases_be.pptx


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/figures/drafts/protospacer.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/figures/drafts/protospacer.pptx


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/figures/nucleasesBaseEditor.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/figures/nucleasesBaseEditor.pdf


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/figures/nucleasesBaseEditor.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/figures/nucleasesBaseEditor.pptx


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/figures/deprecated/nucleasesv1.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/figures/deprecated/nucleasesv1.pptx


--------------------------------------------------------------------------------
/Validating_Existing_gRNA_Libraries/extdata/mtsg-grnas-readcounts.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Validating_Existing_gRNA_Libraries/extdata/mtsg-grnas-readcounts.xlsx


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/README_files/figure-gfm/unnamed-chunk-24-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/README_files/figure-gfm/unnamed-chunk-24-1.png


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/README_files/figure-gfm/unnamed-chunk-25-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crisprVerse/Tutorials/HEAD/Building_Custom_Nuclease/README_files/figure-gfm/unnamed-chunk-25-1.png


--------------------------------------------------------------------------------
/Validating_Existing_gRNA_Libraries/extdata/readme.txt:
--------------------------------------------------------------------------------
1 | * mtsg-grnas-readcounts.xlsx comes from Addgene:
2 | https://www.addgene.org/pooled-library/chen-mouse-tsg-crispr-knockout/	
3 | Mouse library for tumor suppressor genes.


--------------------------------------------------------------------------------
/Scripts/generatePDFs.R:
--------------------------------------------------------------------------------
1 | library(rmarkdown)
2 | library(utils)
3 | files <- list.files("../", pattern=".Rmd", recursive=TRUE, full.names=TRUE)
4 | for (i in 1:length(files)){
5 |     render(files[i], output_format='pdf_document')
6 |     print(i)
7 | }
8 | 


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/figures/resave.sh:
--------------------------------------------------------------------------------
1 | #Required inkscape to be installed
2 | ink enzymes.pdf --export-plain-svg=enzymes.svg
3 | ink nucleases.pdf --export-plain-svg=nucleases.svg
4 | ink nickases.pdf --export-plain-svg=nickases.svg
5 | ink nucleasesBaseEditor.pdf --export-plain-svg=nucleasesBaseEditor.svg
6 | ink cut_sites.pdf --export-plain-svg=cut_sites.svg
7 | 


--------------------------------------------------------------------------------
/Design_OPS/references.bib:
--------------------------------------------------------------------------------
 1 | %% This BibTeX bibliography file was created using BibDesk.
 2 | %% https://bibdesk.sourceforge.io/
 3 | 
 4 | %% Created for Luke Hoberecht at 2022-08-10 13:55:08 -0700 
 5 | 
 6 | 
 7 | %% Saved with string encoding Unicode (UTF-8) 
 8 | 
 9 | 
10 | 
11 | @article{ops,
12 | 	author = {Feldman, David and Singh, Avtar and Schmid-Burgk, Jonathan L and Carlson, Rebecca J and Mezger, Anja and Garrity, Anthony J and Zhang, Feng and Blainey, Paul C},
13 | 	journal = {Cell},
14 | 	number = {3},
15 | 	pages = {787--799},
16 | 	publisher = {Elsevier},
17 | 	title = {Optical pooled screens in human cells},
18 | 	volume = {179},
19 | 	year = {2019}}
20 | 


--------------------------------------------------------------------------------
/Design_CRISPRkd_Csm/references.bib:
--------------------------------------------------------------------------------
 1 | @article{csm1,
 2 |   title={Precise transcript targeting by CRISPR-Csm complexes},
 3 |   author={Colognori, David and Trinidad, Marena and Doudna, Jennifer A},
 4 |   journal={Nature biotechnology},
 5 |   volume={41},
 6 |   number={9},
 7 |   pages={1256--1264},
 8 |   year={2023},
 9 |   publisher={Nature Publishing Group US New York}
10 | }
11 | 
12 | 
13 | @article{csm2,
14 |   title={Single-molecule live-cell RNA imaging with CRISPR-Csm},
15 |   author={Xia, Chenglong and Colognori, David and Jiang, Xueyang and Xu, Ke and Doudna, Jennifer A},
16 |   journal={bioRxiv},
17 |   pages={2024--07},
18 |   year={2024},
19 |   publisher={Cold Spring Harbor Laboratory}
20 | }
21 | 


--------------------------------------------------------------------------------
/Design_Custom_Sequence/data/egfp.fa:
--------------------------------------------------------------------------------
1 | >egfp
2 | ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTAA


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2022, Genentech, Inc.
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/Building_Genome_Indices/references.bib:
--------------------------------------------------------------------------------
 1 | %% This BibTeX bibliography file was created using BibDesk.
 2 | %% https://bibdesk.sourceforge.io/
 3 | 
 4 | %% Created for Luke Hoberecht at 2022-07-29 11:37:07 -0700 
 5 | 
 6 | 
 7 | %% Saved with string encoding Unicode (UTF-8) 
 8 | 
 9 | 
10 | 
11 | @article{langmead2009bowtie,
12 | 	abstract = {Bowtie is an ultrafast, memory-efficient alignment program for aligning short DNA sequence reads to large genomes. For the human genome, Burrows-Wheeler indexing allows Bowtie to align more than 25 million reads per CPU hour with a memory footprint of approximately 1.3 gigabytes. Bowtie extends previous Burrows-Wheeler techniques with a novel quality-aware backtracking algorithm that permits mismatches. Multiple processor cores can be used simultaneously to achieve even greater alignment speeds. Bowtie is open source http://bowtie.cbcb.umd.edu.},
13 | 	author = {Langmead, Ben and Trapnell, Cole and Pop, Mihai and Salzberg, Steven L.},
14 | 	da = {2009/03/04},
15 | 	date-added = {2022-07-29 11:36:57 -0700},
16 | 	date-modified = {2022-07-29 11:36:57 -0700},
17 | 	doi = {10.1186/gb-2009-10-3-r25},
18 | 	id = {Langmead2009},
19 | 	isbn = {1474-760X},
20 | 	journal = {Genome Biology},
21 | 	number = {3},
22 | 	pages = {R25},
23 | 	title = {Ultrafast and memory-efficient alignment of short DNA sequences to the human genome},
24 | 	ty = {JOUR},
25 | 	url = {https://doi.org/10.1186/gb-2009-10-3-r25},
26 | 	volume = {10},
27 | 	year = {2009},
28 | 	bdsk-url-1 = {https://doi.org/10.1186/gb-2009-10-3-r25}}
29 | 
30 | @article{bwa,
31 | 	author = {Li, Heng and Durbin, Richard},
32 | 	journal = {bioinformatics},
33 | 	number = {14},
34 | 	pages = {1754--1760},
35 | 	publisher = {Oxford University Press},
36 | 	title = {Fast and accurate short read alignment with Burrows--Wheeler transform},
37 | 	volume = {25},
38 | 	year = {2009}}
39 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Tutorials for the crisprVerse
 2 | 
 3 | ### Getting started 
 4 | 
 5 | 1. [Installation](https://github.com/crisprVerse/Tutorials/tree/master/Installation)
 6 | 2. [Building a genome index for off-target alignment](https://github.com/crisprVerse/Tutorials/tree/master/Building_Genome_Indices)
 7 | 3. [Specifying a custom nuclease](https://github.com/crisprVerse/Tutorials/tree/master/Building_Custom_Nuclease)
 8 | 4. [Building a gene annotation object](https://github.com/crisprVerse/Tutorials/tree/master/Building_Gene_Annotation)
 9 | 
10 | ### Common gRNA design workflows
11 | 
12 | 5. [CRISPR knockout (CRISPRko) design with Cas9](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9)
13 | 6. [CRISPR knockout (CRISPRko) design with Cas12a](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas12a)
14 | 7. [CRISPR knockdown (CRISPRkd) design with Cas13d](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRkd_CasRx)
15 | 8. [CRISPR knockdown (CRISPRkd) design with the Csm complex](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRkd_Csm)
16 | 9. [CRISPR base editing (CRISPRbe) design with BE4max](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRbe)
17 | 10. [CRISPR activation (CRISPRa) design](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRa)
18 | 11. [CRISPR interference (CRISPRi) design](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRi)
19 | 
20 | 
21 | ### Less common gRNA design workflows
22 | 
23 | 12. [Paired gRNA design](https://github.com/crisprVerse/Tutorials/tree/master/Design_PairedGuides)
24 | 13. [Working with custom DNA sequences](https://github.com/crisprVerse/Tutorials/tree/master/Design_Custom_Sequence)
25 | 14. [Design for optical pooled screening (OPS)](https://github.com/crisprVerse/Tutorials/tree/master/Design_OPS)
26 | 
27 | ### Miscellaneous
28 | 
29 | 15. [Mapping gRNAs across species](https://github.com/crisprVerse/Tutorials/tree/master/Design_Cross_Reactivity)
30 | 16. [Working with minor and major alleles](https://github.com/crisprVerse/Tutorials/tree/master/Design_Minor_Major_Allele)
31 | 17. [Validating existing gRNA libraries](https://github.com/crisprVerse/Tutorials/tree/master/Validating_Existing_gRNA_Libraries)
32 | 18. [Building a gRNA database across all protein-coding genes](https://github.com/crisprVerse/Tutorials/tree/master/Building_Database_Human_Cas9)
33 | 
34 | 


--------------------------------------------------------------------------------
/Building_Gene_Annotation/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Building a gene annotation object"
  3 | author: Jean-Philippe Fortin, Luke Hoberecht
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | ---
  8 | 
  9 | ```{r, echo=FALSE, results="hide"}
 10 | options("knitr.graphics.auto_pdf"=TRUE)
 11 | ```
 12 | 
 13 | 
 14 | 
 15 | # Introduction
 16 | 
 17 | In this tutorial, we describe the process for making and using  
 18 | rich gene annotation objects to be used throughout the crisprVerse ecosystem. 
 19 | Such objects enable users to retrieve coordinates of transcripts, exons, etc.
 20 | Those objects are also used by several functions in the [crisprDesign package](https://github.com/crisprVerse/crisprDesign) to add gene annotations
 21 | to both gRNA on-targets and off-targets. This is what the `txObject` argument 
 22 | in many of the functions expect. 
 23 | 
 24 | We will also describe the process for constructing and using a 
 25 | transcription start site (TSS) annotation object (`tssObject` argument in many
 26 | of the functions).  
 27 | 
 28 | 
 29 | # Installation
 30 | 
 31 | See the [Installation tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Installation) to learn how to install the packages  `crisprDesign` and `crisprDesignData` required in this tutorial. 
 32 | 
 33 | 
 34 | ### Getting started
 35 | 
 36 | The packages can be loaded into an R session in the usual way:
 37 | 
 38 | ```{r, warning=FALSE, message=FALSE, results='hide'}
 39 | library(crisprDesign)
 40 | library(crisprDesignData)
 41 | ```
 42 | 
 43 | # Building gene annotation objects
 44 | 
 45 | In the crisprVerse, we represent gene annotations using `GRangesList` object,
 46 | and this can be easily constructed using the commonly-used Bioconductor
 47 | objects `TxDb` (see the [GenomicFeatures package](https://bioconductor.riken.jp/packages/3.2/bioc/vignettes/GenomicFeatures/inst/doc/GenomicFeatures.pdf) to learn more about `TxDb` objects).
 48 | We will now show several ways of constructing such objects. 
 49 | 
 50 | 
 51 | # Building a GRangesList from Ensembl
 52 | 
 53 | We construct a gene annotation object for the human genome
 54 | using the Ensembl release 104 (hg38). This can be done using the function
 55 | `getTxDb` in `crisprDesign`:
 56 | 
 57 | ```{r, warning=FALSE, message=FALSE, results='hide'}
 58 | txdb <- getTxDb(organism="Homo sapiens", release=104)
 59 | ```
 60 | 
 61 | This may take several minutes, and note that this requires an internet connection.
 62 | In case it times out, one can increase the timeout option using the following:
 63 | 
 64 | ```{r}
 65 | options(timeout = max(10000000, getOption("timeout")))
 66 | ```
 67 | 
 68 | Once obtained, we can convert the object
 69 | into a `GRangesList` using the function `TxDb2GRangesList` from `crisprDesign`:
 70 | 
 71 | ```{r, warning=FALSE, message=FALSE, results='hide'}
 72 | grList <- TxDb2GRangesList(txdb)
 73 | ```
 74 | 
 75 | We will specify that the genome is hg38:
 76 | 
 77 | ```{r}
 78 | GenomeInfoDb::genome(grList) <- "hg38"
 79 | ```
 80 | 
 81 | And that's it! The `grList` object contains all of the information about
 82 | the Ensembl release 104 gene model, and is ready to be used in the crisprVerse. 
 83 | Let's take a quick look at our gene annotation object:
 84 | 
 85 | ```{r}
 86 | names(grList)
 87 | grList$transcripts
 88 | ```
 89 | 
 90 | 
 91 | 
 92 | ## Building a tssObject
 93 | 
 94 | Building a TSS annotation object requires only one additional step after
 95 | constructing the `GRangesList` object described above.
 96 | This can be obtained using the function `getTssObjectFromTxObject` in
 97 | `crisprDesign`:
 98 | 
 99 | ```{r}
100 | tssObject <- getTssObjectFromTxObject(grList)
101 | tssObject
102 | ```
103 | 
104 | 
105 | # Using gene annotation objects
106 | 
107 | The gene (or TSS) annotation objects described above are often necessary for 
108 | the full characterization of CRISPR gRNAs as they as inputs for several
109 | of the `crisprDesign` functions, including `queryTxObject`,
110 | `queryTssObject`, `addGeneAnnotation`, `addTssAnnotation`, 
111 | and `addSpacerAlignments`. 
112 | 
113 | For convenience, we provide in the [crisprDesignData package](https://github.com/crisprVerse/crisprDesignData)
114 | precomputed gene annotation for human and mouse:
115 | 
116 | |  Object name | Object class | Version | Description |
117 | |----------- | ----------- | ----------- |----------- | 
118 | | `txdb_human`  | `GRangesList`   | Release 104 | Ensembl gene model for human (hg38/GRCh38) |
119 | | `txdb_mouse`  | `GRangesList`   | Release 102 | Ensembl gene model for mouse (mm10/GRCm38) |
120 | | `tss_human`   | `GRanges`       | Release 104 | Ensembl-based TSS coordinates for human (hg38/GRCh38) |
121 | | `tss_mouse`   | `GRanges`       | Release 102 | Ensembl-based TSS coordinates for human (mm10/GRCm38) |
122 | 
123 | # Building a gene annotation object from a GFF file
124 | 
125 | If you have a General Feature Format (GFF) file from which you want to 
126 | construct the gene annotation object, you can pass this to the `file` argument 
127 | of the `crisprDesign` function `getTxDb`; this will create the `TxDb` object 
128 | using the `GenomicFeatures` function `makeTxDbFromGFF`.
129 | 
130 | 
131 | # Reproducibility
132 | 
133 | ```{r}
134 | sessionInfo()
135 | ```
136 | 
137 | 
138 | 


--------------------------------------------------------------------------------
/Building_Genome_Indices/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Building genome indices off-target alignment"
  3 | author: Jean-Philippe Fortin, Luke Hoberecht
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | bibliography: references.bib
  8 | ---
  9 | 
 10 | ```{r, echo=FALSE, results="hide"}
 11 | options("knitr.graphics.auto_pdf"=TRUE)
 12 | ```
 13 | 
 14 | 
 15 | 
 16 | # Introduction
 17 | 
 18 | This vignette demonstrates how to build genome indices for the purpose of
 19 | performing on- and off-target alignment. In particular, we show how to build
 20 | such indices for the short read aligners bowtie [@langmead2009bowtie], 
 21 | as used by the `Rbowtie` and `crisprBowtie` packages, and BWA-backtrack [@bwa],
 22 | as used by the `Rbwa` and `crisprBwa` packages. Note that BWA is not 
 23 | available for Windows users. 
 24 | 
 25 | Generating a genome index file is time consuming, but only needs to be 
 26 | done once for a given genome. 
 27 | 
 28 | # Installation
 29 | 
 30 | See the [Installation tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Installation) to learn how to install the `crisprBowtie` and `crisprBwa` packages.
 31 | 
 32 | 
 33 | # Building a bowtie index
 34 | 
 35 | 
 36 | In the following example, we build a bowtie index for the human genome
 37 | using the hg38 build. First, users will need to donwload the FASTA file
 38 | from the UCSC genome browser. Here's the link:
 39 | https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz
 40 | 
 41 | Next, assuming the `hg38.fa.gz` is located in the current directory, 
 42 | we build the bowtie genome index using the function `bowtie_build`
 43 | from the `Rbowtie` package (which is installed when `crisprBowtie` is installed):
 44 | 
 45 | ```{r, eval=FALSE}
 46 | library(Rbowtie)
 47 | fastaFile <- "./hg38.fa.gz"
 48 | bowtie_build(fastaFile,
 49 |              outdir="./",
 50 |              force=TRUE,
 51 |              prefix="hg38")
 52 | ```
 53 | 
 54 | This should take a couple of hours to run, and the resulting bowtie index files
 55 | will be located in the folder `./hg38` and can be used to run bowtie alignment.
 56 | See the [crisprBowtie](https://github.com/crisprVerse/crisprBowtie) package 
 57 | to learn how to perform a bowtie alignment within R. 
 58 | 
 59 | 
 60 | # Building a BWA index
 61 | 
 62 | Building a BWA index is similar to building a bowtie index.
 63 | Assuming the `hg38.fa.gz` is located in the current directory, 
 64 | we build the BWA genome index using the function `bwa_build_index`
 65 | from the `Rbwa` package (which is installed when `crisprBwa` is installed):
 66 | 
 67 | 
 68 | ```{r, eval=FALSE}
 69 | library(Rbwa)
 70 | fastaFile <- "./hg38.fa.gz"
 71 | bwa_build_index(fastaFile,
 72 |                 index_prefix="hg38")
 73 | ```
 74 | 
 75 | This should take a couple of hours to run, and the resulting BWA index files
 76 | will be located in the folder `./hg38` and can be used to run BWA alignment.
 77 | See the [crisprBwa](https://github.com/crisprVerse/crisprBwa) package 
 78 | to learn how to perform a BWA alignment within R. 
 79 | 
 80 | 
 81 | # Building a transcriptome index
 82 | 
 83 | For applications using RNA-targeting nucleases such as CasRx, off-target 
 84 | search is performed against against transcriptomes rather than genomes.
 85 | Building a transcriptome index works similar, except that we first need to
 86 | generate a FASTA file containing the transcriptome sequences.
 87 | This is easily accomplished with the function `getMrnaSequences`
 88 | from the `crisprDesign` package, assuming that a gene model
 89 | is provided, as well as a `BSgenome` object containing the DNA sequences
 90 | for the hg38 genome (`BSgenome.Hsapiens.UCSC.hg38`).
 91 | 
 92 | We first load the necessary packages
 93 | 
 94 | ```{r, eval=FALSE}
 95 | library(BSgenome.Hsapiens.UCSC.hg38)
 96 | library(crisprDesign)
 97 | ```
 98 | 
 99 | The `crisprDesignData` package (see Installation) contains a gene model
100 | annotation for the hg38 genome, and can be loaded using the following:
101 | 
102 | ```{r, eval=FALSE}
103 | library(crisprDesignData)
104 | data("txdb_human", package="crisprDesignData")
105 | ```
106 | 
107 | See the [Gene annotation tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Building_Gene_Annotation) to learn more about how to build such gene annotation objects. 
108 | 
109 | We will now extract mRNA sequences for all available transcripts:
110 | 
111 | ```{r, eval=FALSE}
112 | txids <- unique(txdb_human$exons$tx_id)
113 | mrnasHuman <- getMrnaSequences(txids,
114 |                                bsgenome=BSgenome.Hsapiens.UCSC.hg38,
115 |                                txObject=txdb_human)
116 | ```
117 | 
118 | This should take less than an hour to run. Once completed, we will write
119 | the extracted mRNA sequences to disk using the FASTA format. This can
120 | be accomplished using the `writeXStringSet` function from the `Biostrings`
121 | package: 
122 | 
123 | 
124 | ```{r, eval=FALSE}
125 | library(Biostrings)
126 | writeXStringSet(mrnasHuman,
127 |                 file="ensembl_human_104.fasta",
128 |                 format="fasta")
129 | ```
130 | 
131 | Note that the `seqnames` of this FASTA file are Ensembl transcript IDs 
132 | instead of chromosomes. Once the FASTA file has been generated, the process for constructing either a bowtie or BWA index file is the same as described 
133 | in the above sections.
134 | 
135 | 
136 | # Reproducibility
137 | 
138 | ```{r}
139 | sessionInfo()
140 | ```
141 | 
142 | 
143 | 
144 | # References
145 | 
146 | 


--------------------------------------------------------------------------------
/Installation/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Installing the crisprVerse and packages necessary for the tutorials"
  3 | author: Jean-Philippe Fortin, Luke Hoberecht
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | ---
  8 | 
  9 | ```{r, echo=FALSE, results="hide"}
 10 | options("knitr.graphics.auto_pdf"=TRUE)
 11 | ```
 12 | 
 13 | 
 14 | 
 15 | # Installation 
 16 | 
 17 | We show in this tutorial how to install the crisprVerse packages, as well
 18 | as other packages
 19 | necessary for some of the [crisprVerse tutorials](https://github.com/crisprVerse/Tutorials).
 20 | 
 21 | 
 22 | 
 23 | ## Requirements
 24 | 
 25 | The crisprVerse is supported for macOS, Linux and Windows machines.
 26 | It requires R version >=4.4. Some of the third-party functionalities are not
 27 | available for Windows machines (BWA alignment, and some of the scoring 
 28 | functions). To download and install R, follow the instructions on the  [R-project website](https://www.r-project.org/).
 29 | 
 30 | ## Bioconductor versions
 31 | 
 32 | The Bioconductor project has 2 concurrent branches: `release` and `devel`.
 33 | Currently (July 2024), the release branch is `3.19`, and the
 34 | devel branch is `3.20`. See the 
 35 | [Bioconductor install page](https://www.bioconductor.org/install/) for
 36 | more information regarding Bioconductor versions.
 37 | 
 38 | The crisprVerse ecosystem is currently available on both the Bioconductor 
 39 | release and devel branches. The release branch, updated twice a year, 
 40 | is frozen in time and contains stable releases of the packages.
 41 | The devel branch is built over night and 
 42 | contains the latest changes pushed to the GitHub repositories.
 43 | We recommend using the release branch, unless new features are only
 44 | available in the devel branch. 
 45 | 
 46 | 
 47 | ## Installing the core crisprVerse packages
 48 | 
 49 | Type in the following commands in an R session to install the core
 50 | crisprVerse packages from the Bioconductor devel branch:
 51 | 
 52 | 
 53 | ```{r, eval=FALSE}
 54 | if (!requireNamespace("BiocManager", quietly = TRUE))
 55 |     install.packages("BiocManager")
 56 | 
 57 | BiocManager::install(version="devel")
 58 | BiocManager::install("crisprVerse")
 59 | ```
 60 | 
 61 | To install the packages from the release branch instead:
 62 | 
 63 | 
 64 | ```{r, eval=FALSE}
 65 | if (!requireNamespace("BiocManager", quietly = TRUE))
 66 |     install.packages("BiocManager")
 67 | 
 68 | BiocManager::install(version="release")
 69 | BiocManager::install("crisprVerse")
 70 | ```
 71 | 
 72 | 
 73 | 
 74 | This will install the following packages:
 75 | 
 76 | * [crisprBase](https://github.com/crisprVerse/crisprBase) to specify and manipulate CRISPR nucleases.
 77 | * [crisprBowtie](https://github.com/crisprVerse/crisprBowtie) to perform gRNA spacer sequence
 78 | alignment with Bowtie.
 79 | * [crisprScore](https://github.com/crisprVerse/crisprScore) to annotate gRNAs with on-target
 80 | and off-target scores.
 81 | * [crisprDesign](https://github.com/crisprVerse/crisprDesign) to design and manipulate gRNAs
 82 | with `GuideSet` objects. 
 83 | * [crisprScoreData](https://github.com/crisprVerse/crisprScoreData) to use pre-trained models for the `crisprScore` package.
 84 | 
 85 | The following command will load all of those packages in an R session:
 86 | 
 87 | ```{r, eval=FALSE}
 88 | library(crisprVerse)
 89 | ```
 90 | 
 91 | You can check that all crisprVerse packages are up-to-date with 
 92 | `crisprVerse_update()`:
 93 | 
 94 | ```{r, eval=FALSE}
 95 | crisprVerse_update()
 96 | ```
 97 | 
 98 | ## Installing data packages
 99 | 
100 | The following genome data packages from Bioconductor are required 
101 | for several of the tutorials:
102 | 
103 | ```{r, eval=FALSE}
104 | if (!requireNamespace("BiocManager", quietly = TRUE))
105 |     install.packages("BiocManager")
106 | 
107 | BiocManager::install(version="devel")
108 | BiocManager::install("BSgenome.Mmusculus.UCSC.mm10")
109 | BiocManager::install("BSgenome.Hsapiens.UCSC.hg38")
110 | BiocManager::install("BSgenome.Hsapiens.UCSC.hg38.dbSNP151.major")
111 | BiocManager::install("BSgenome.Hsapiens.UCSC.hg38.dbSNP151.minor")
112 | ```
113 | 
114 | The [crisrpDesignData](https://github.com/crisprVerse/crisprDesignData) package 
115 | is also required for most of the tutorials and can be installed directly 
116 | from our GitHub page using the `devtools` package:
117 | 
118 | 
119 | ```{r, eval=FALSE}
120 | if (!requireNamespace("devtools", quietly = TRUE))
121 |     install.packages("devtools")
122 | 
123 | devtools::install.packages("crisprVerse/crisprDesignData")
124 | ```
125 | 
126 | ## Installing optional packages
127 | 
128 | For maxOS and Linux users, the 
129 | [crisprBwa](https://github.com/crisprVerse/crisprBwa) can be installed
130 | from Bioconductor using the following:
131 | 
132 | ```{r, eval=FALSE}
133 | if (!requireNamespace("BiocManager", quietly = TRUE))
134 |     install.packages("BiocManager")
135 | 
136 | BiocManager::install(version="devel")
137 | BiocManager::install("crisprBwa")
138 | ```
139 | 
140 | The [crisprViz](https://github.com/crisprVerse/crisprViz) package is
141 | currently under review at Bioconductor, but can be installed directly 
142 | from GitHub:
143 | 
144 | ```{r, eval=FALSE}
145 | if (!requireNamespace("devtools", quietly = TRUE))
146 |     install.packages("devtools")
147 | 
148 | devtools::install.packages("crisprVerse/crisprViz")
149 | ```
150 | 
151 | 
152 | 
153 | # Reproducibility
154 | 
155 | ```{r}
156 | sessionInfo()
157 | ```
158 | 
159 | 
160 | # References
161 | 


--------------------------------------------------------------------------------
/Installation/README.md:
--------------------------------------------------------------------------------
  1 | Installing the crisprVerse and packages necessary for the tutorials
  2 | ================
  3 | Jean-Philippe Fortin, Luke Hoberecht
  4 | 
  5 | -   <a href="#installation" id="toc-installation">Installation</a>
  6 |     -   <a href="#requirements" id="toc-requirements">Requirements</a>
  7 |     -   <a href="#bioconductor-versions"
  8 |         id="toc-bioconductor-versions">Bioconductor versions</a>
  9 |     -   <a href="#installing-the-core-crisprverse-packages"
 10 |         id="toc-installing-the-core-crisprverse-packages">Installing the core
 11 |         crisprVerse packages</a>
 12 |     -   <a href="#installing-data-packages"
 13 |         id="toc-installing-data-packages">Installing data packages</a>
 14 |     -   <a href="#installing-optional-packages"
 15 |         id="toc-installing-optional-packages">Installing optional packages</a>
 16 | -   <a href="#reproducibility" id="toc-reproducibility">Reproducibility</a>
 17 | -   <a href="#references" id="toc-references">References</a>
 18 | 
 19 | # Installation
 20 | 
 21 | We show in this tutorial how to install the crisprVerse packages, as
 22 | well as other packages necessary for some of the [crisprVerse
 23 | tutorials](https://github.com/crisprVerse/Tutorials).
 24 | 
 25 | ## Requirements
 26 | 
 27 | The crisprVerse is supported for macOS, Linux and Windows machines. It
 28 | requires R version \>=4.4. Some of the third-party functionalities are
 29 | not available for Windows machines (BWA alignment, and some of the
 30 | scoring functions). To download and install R, follow the instructions
 31 | on the [R-project website](https://www.r-project.org/).
 32 | 
 33 | ## Bioconductor versions
 34 | 
 35 | The Bioconductor project has 2 concurrent branches: `release` and
 36 | `devel`. Currently (July 2024), the release branch is `3.19`, and the
 37 | devel branch is `3.20`. See the [Bioconductor install
 38 | page](https://www.bioconductor.org/install/) for more information
 39 | regarding Bioconductor versions.
 40 | 
 41 | The crisprVerse ecosystem is currently available on both the
 42 | Bioconductor release and devel branches. The release branch, updated
 43 | twice a year, is frozen in time and contains stable releases of the
 44 | packages. The devel branch is built over night and contains the latest
 45 | changes pushed to the GitHub repositories. We recommend using the
 46 | release branch, unless new features are only available in the devel
 47 | branch.
 48 | 
 49 | ## Installing the core crisprVerse packages
 50 | 
 51 | Type in the following commands in an R session to install the core
 52 | crisprVerse packages from the Bioconductor devel branch:
 53 | 
 54 | ``` r
 55 | if (!requireNamespace("BiocManager", quietly = TRUE))
 56 |     install.packages("BiocManager")
 57 | 
 58 | BiocManager::install(version="devel")
 59 | BiocManager::install("crisprVerse")
 60 | ```
 61 | 
 62 | To install the packages from the release branch instead:
 63 | 
 64 | ``` r
 65 | if (!requireNamespace("BiocManager", quietly = TRUE))
 66 |     install.packages("BiocManager")
 67 | 
 68 | BiocManager::install(version="release")
 69 | BiocManager::install("crisprVerse")
 70 | ```
 71 | 
 72 | This will install the following packages:
 73 | 
 74 | -   [crisprBase](https://github.com/crisprVerse/crisprBase) to specify
 75 |     and manipulate CRISPR nucleases.
 76 | -   [crisprBowtie](https://github.com/crisprVerse/crisprBowtie) to
 77 |     perform gRNA spacer sequence alignment with Bowtie.
 78 | -   [crisprScore](https://github.com/crisprVerse/crisprScore) to
 79 |     annotate gRNAs with on-target and off-target scores.
 80 | -   [crisprDesign](https://github.com/crisprVerse/crisprDesign) to
 81 |     design and manipulate gRNAs with `GuideSet` objects.
 82 | -   [crisprScoreData](https://github.com/crisprVerse/crisprScoreData) to
 83 |     use pre-trained models for the `crisprScore` package.
 84 | 
 85 | The following command will load all of those packages in an R session:
 86 | 
 87 | ``` r
 88 | library(crisprVerse)
 89 | ```
 90 | 
 91 | You can check that all crisprVerse packages are up-to-date with
 92 | `crisprVerse_update()`:
 93 | 
 94 | ``` r
 95 | crisprVerse_update()
 96 | ```
 97 | 
 98 | ## Installing data packages
 99 | 
100 | The following genome data packages from Bioconductor are required for
101 | several of the tutorials:
102 | 
103 | ``` r
104 | if (!requireNamespace("BiocManager", quietly = TRUE))
105 |     install.packages("BiocManager")
106 | 
107 | BiocManager::install(version="devel")
108 | BiocManager::install("BSgenome.Mmusculus.UCSC.mm10")
109 | BiocManager::install("BSgenome.Hsapiens.UCSC.hg38")
110 | BiocManager::install("BSgenome.Hsapiens.UCSC.hg38.dbSNP151.major")
111 | BiocManager::install("BSgenome.Hsapiens.UCSC.hg38.dbSNP151.minor")
112 | ```
113 | 
114 | The [crisrpDesignData](https://github.com/crisprVerse/crisprDesignData)
115 | package is also required for most of the tutorials and can be installed
116 | directly from our GitHub page using the `devtools` package:
117 | 
118 | ``` r
119 | if (!requireNamespace("devtools", quietly = TRUE))
120 |     install.packages("devtools")
121 | 
122 | devtools::install.packages("crisprVerse/crisprDesignData")
123 | ```
124 | 
125 | ## Installing optional packages
126 | 
127 | For maxOS and Linux users, the
128 | [crisprBwa](https://github.com/crisprVerse/crisprBwa) can be installed
129 | from Bioconductor using the following:
130 | 
131 | ``` r
132 | if (!requireNamespace("BiocManager", quietly = TRUE))
133 |     install.packages("BiocManager")
134 | 
135 | BiocManager::install(version="devel")
136 | BiocManager::install("crisprBwa")
137 | ```
138 | 
139 | The [crisprViz](https://github.com/crisprVerse/crisprViz) package is
140 | currently under review at Bioconductor, but can be installed directly
141 | from GitHub:
142 | 
143 | ``` r
144 | if (!requireNamespace("devtools", quietly = TRUE))
145 |     install.packages("devtools")
146 | 
147 | devtools::install.packages("crisprVerse/crisprViz")
148 | ```
149 | 
150 | # Reproducibility
151 | 
152 | ``` r
153 | sessionInfo()
154 | ```
155 | 
156 |     ## R version 4.4.1 (2024-06-14)
157 |     ## Platform: x86_64-apple-darwin20
158 |     ## Running under: macOS Ventura 13.6.7
159 |     ## 
160 |     ## Matrix products: default
161 |     ## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib 
162 |     ## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
163 |     ## 
164 |     ## locale:
165 |     ## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
166 |     ## 
167 |     ## time zone: America/Los_Angeles
168 |     ## tzcode source: internal
169 |     ## 
170 |     ## attached base packages:
171 |     ## [1] stats     graphics  grDevices utils     datasets  methods   base     
172 |     ## 
173 |     ## loaded via a namespace (and not attached):
174 |     ##  [1] compiler_4.4.1    fastmap_1.2.0     cli_3.6.3         tools_4.4.1      
175 |     ##  [5] htmltools_0.5.8.1 rstudioapi_0.16.0 yaml_2.3.9        rmarkdown_2.27   
176 |     ##  [9] knitr_1.48        xfun_0.46         digest_0.6.36     rlang_1.1.4      
177 |     ## [13] evaluate_0.24.0
178 | 
179 | # References
180 | 


--------------------------------------------------------------------------------
/Validating_Existing_gRNA_Libraries/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Validating existing gRNA libraries"
  3 | author: Jean-Philippe Fortin, Luke Hoberecht
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | ---
  8 | 
  9 | ```{r, echo=FALSE, results="hide"}
 10 | options("knitr.graphics.auto_pdf"=TRUE)
 11 | ```
 12 | 
 13 | # Introduction
 14 | 
 15 | In this vignette, we characterize a small mouse CRISPR knockout (CRISPRko) 
 16 | library that was designed to target tumor suppressors. 
 17 | The library was obtained from Addgene, and is stored in the 
 18 | folder `extdata` in the current directory.
 19 | 
 20 | # Loading necessary packages
 21 | 
 22 | ```{r, warning=FALSE, message=FALSE}
 23 | library(crisprDesign)
 24 | library(crisprBowtie)
 25 | library(crisprBase)
 26 | library(readxl)
 27 | library(BSgenome.Mmusculus.UCSC.mm10)
 28 | bsgenome <- BSgenome.Mmusculus.UCSC.mm10
 29 | ```
 30 | 
 31 | We also load `crisprDesignData`, which is a data package containing
 32 | already-processed Ensembl objects for gene annotation of human and mouse gRNAs:
 33 | 
 34 | ```{r, warning=FALSE, message=FALSE}
 35 | library(crisprDesignData)
 36 | ```
 37 | 
 38 | # Reading in data
 39 | 
 40 | ```{r}
 41 | data <- read_excel("extdata/mtsg-grnas-readcounts.xlsx")
 42 | data <- as.data.frame(data)[,1:2]
 43 | colnames(data) <- c("ID", "spacer_20mer")
 44 | 
 45 | # Getting genes names:
 46 | data$gene_symbol <- sapply(strsplit(data$ID, split="_"), function(x)x[[1]])
 47 | head(data)
 48 | ```
 49 | 
 50 | 
 51 | # Building a `GuideSet` object
 52 | 
 53 | 
 54 | We first define the nuclease for the analysis. 
 55 | We here use the standard wildtype Cas9 (SpCas9) from
 56 | the `crisprBase` package:
 57 | 
 58 | ```{r}
 59 | data(SpCas9, package="crisprBase")
 60 | crisprNuclease <- SpCas9
 61 | crisprNuclease
 62 | ```
 63 | 
 64 | The default length of the spacer sequences is 20nt.
 65 | This can be changed to a different length if needed,
 66 | for instance 19nt:
 67 | 
 68 | ```{r, eval=FALSE}
 69 | # Not run
 70 | spacerLength(SpCas9) <- 19
 71 | ```
 72 | 
 73 | We next need to define a bowtie index that we will use for alignment:
 74 | 
 75 | 
 76 | ```{r}
 77 | bowtie_index <- "/Users/fortinj2/crisprIndices/bowtie/mm10/mm10"
 78 | ```
 79 | 
 80 | For instructions on how to build a Bowtie index from a given reference
 81 | genome, see the [genome index tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Building_Genome_Indices) or the [crisprBowtie page](https://github.com/crisprVerse/crisprBowtie) .
 82 | 
 83 | 
 84 | We first map the gRNAs to the reference genome with perfect match to
 85 | obtain genomic coordinates of those gRNAs:
 86 | 
 87 | ```{r}
 88 | spacers <- unique(data$spacer_20mer)
 89 | aln <- runCrisprBowtie(spacers,
 90 |                        crisprNuclease=crisprNuclease,
 91 |                        bowtie_index=bowtie_index,
 92 |                        n_mismatches=0)
 93 | head(aln)
 94 | ```
 95 | 
 96 | `n_mismatches=0` specifies that we require a perfect match between
 97 | spacer and protospacer sequences (on-targets).
 98 | 
 99 | Non-targeting controls should not have any alignments to the genome,
100 | and some guides might have multiple alignments if they were not
101 | designed carefully. For such guides, that's OK, we can pick up pick one 
102 | genomic coordinate for now, and the multiple alignments annotation
103 | will be handled later on. 
104 | 
105 | We keep only alignments to the standard chromosomes:
106 | 
107 | ```{r}
108 | chrs <- paste0("chr",c(1:22, "X", "Y"))
109 | aln <- aln[aln$chr %in% chrs,,drop=FALSE]
110 | ```
111 | 
112 | 
113 | We add the genomic coordinates to the data.frame:
114 | 
115 | ```{r}
116 | wh <- match(data$spacer_20mer, aln$spacer)
117 | data$chr <- aln$chr[wh]
118 | data$pam_site <- aln$pam_site[wh]
119 | data$pam <- aln$pam[wh]
120 | data$strand <- aln$strand[wh]
121 | head(data)
122 | ```
123 | 
124 | 
125 | We can now build a proper `GuideSet` object in `crisprDesign` that will 
126 | allow us to do (more) sophisticated analyses.
127 | 
128 | 
129 | We need to filter out first guides that don't have a match to the genome:
130 | 
131 | ```{r}
132 | data <- data[!is.na(data$pam_site),,drop=FALSE]
133 | ```
134 | 
135 | 
136 | Finally, we create unique ids to identify the spacer sequences:
137 | 
138 | ```{r}
139 | ids <- paste0("gRNA_", seq_len(nrow(data)))
140 | head(ids)
141 | ```
142 | 
143 | We are now ready to build the `GuideSet` object using the constructor 
144 | function `GuideSet` from `crisprDesign`:
145 | 
146 | ```{r}
147 | gs <- GuideSet(ids=ids,
148 |                protospacers=data$spacer_20mer,
149 |                pams=data$pam,
150 |                pam_site=data$pam_site,
151 |                seqnames=data$chr,
152 |                strand=data$strand,
153 |                CrisprNuclease=crisprNuclease,
154 |                bsgenome=bsgenome)
155 | gs$gene_symbol <- data$gene_symbol
156 | ```
157 | 
158 | 
159 | 
160 | The `GuideSet` object, and [crisprDesign](https://github.com/crisprVerse/crisprDesign), provide rich functionalities
161 | to annotate and manipulate gRNAs. See the [CRISPRko design tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) 
162 | to get an overview of the functionalities. For the rest of 
163 | this tutorial, we only focus on characterizing the off-targets.
164 | 
165 | 
166 | # Off-target characterization
167 | 
168 | Having a `GuideSet` object, it is now a piece of cake to 
169 | characterize the off-targets. We characterize off-targets 
170 | using the bowtie aligner, with up to 3 mismatches between 
171 | the spacer (gRNA) and protospacer (target DNA) sequences.
172 | The function `addSpacerAlignments` accomplishes that. 
173 | 
174 | It has an optional argument `txObject` that can be used 
175 | to provide gene model data to put the off-targets in a 
176 | gene model context. We made such objects available for human and mouse
177 | in the [crisprDesignData](https://github.com/crisprVerse/crisprDesignData)
178 | package (see `txdb_human` and `txdb_mouse`).
179 | 
180 | 
181 | 
182 | 
183 | ```{r, eval=TRUE, warning=FALSE}
184 | data(txdb_mouse, package="crisprDesignData")
185 | txObject <- txdb_mouse
186 | gs <- addSpacerAlignments(gs,
187 |                           txObject=txObject,
188 |                           aligner="bowtie",
189 |                           aligner_index=bowtie_index,
190 |                           bsgenome=bsgenome,
191 |                           n_mismatches=2)
192 | ```
193 | 
194 | 
195 | The alignments are stored in a metadata column called `alignments`. 
196 | See `?getSpacerAlignments` for more details about what the 
197 | different columns are. 
198 | 
199 | As an example, we can access the on- and off-target alignments of 
200 | the first gRNA using the following commands:
201 | 
202 | 
203 | ```{r, eval=TRUE}
204 | aln <- gs$alignments[[1]]
205 | aln
206 | ```
207 | 
208 | 
209 | We can also add CFD and MIT scores to the off-targets to
210 | characterize the likelihood of SpCas9 to cut at the off-targets:
211 | 
212 | ```{r, eval=TRUE}
213 | gs <- addOffTargetScores(gs)
214 | ```
215 | 
216 | The scores range from 0 to 1, and a higher score indicates a higher 
217 | probability of the off-target to occur. 
218 | 
219 | 
220 | 
221 | 
222 | 
223 | # Session Info
224 | 
225 | ```{r}
226 | sessionInfo()
227 | ```
228 | 


--------------------------------------------------------------------------------
/Building_Genome_Indices/README.md:
--------------------------------------------------------------------------------
  1 | Building genome indices off-target alignment
  2 | ================
  3 | Jean-Philippe Fortin, Luke Hoberecht
  4 | 
  5 | -   <a href="#introduction" id="toc-introduction">Introduction</a>
  6 | -   <a href="#installation" id="toc-installation">Installation</a>
  7 | -   <a href="#building-a-bowtie-index"
  8 |     id="toc-building-a-bowtie-index">Building a bowtie index</a>
  9 | -   <a href="#building-a-bwa-index" id="toc-building-a-bwa-index">Building a
 10 |     BWA index</a>
 11 | -   <a href="#building-a-transcriptome-index"
 12 |     id="toc-building-a-transcriptome-index">Building a transcriptome
 13 |     index</a>
 14 | -   <a href="#reproducibility" id="toc-reproducibility">Reproducibility</a>
 15 | -   <a href="#references" id="toc-references">References</a>
 16 | 
 17 | # Introduction
 18 | 
 19 | This vignette demonstrates how to build genome indices for the purpose
 20 | of performing on- and off-target alignment. In particular, we show how
 21 | to build such indices for the short read aligners bowtie (Langmead et
 22 | al. 2009), as used by the `Rbowtie` and `crisprBowtie` packages, and
 23 | BWA-backtrack (Li and Durbin 2009), as used by the `Rbwa` and
 24 | `crisprBwa` packages. Note that BWA is not available for Windows users.
 25 | 
 26 | Generating a genome index file is time consuming, but only needs to be
 27 | done once for a given genome.
 28 | 
 29 | # Installation
 30 | 
 31 | See the [Installation
 32 | tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Installation)
 33 | to learn how to install the `crisprBowtie` and `crisprBwa` packages.
 34 | 
 35 | # Building a bowtie index
 36 | 
 37 | In the following example, we build a bowtie index for the human genome
 38 | using the hg38 build. First, users will need to donwload the FASTA file
 39 | from the UCSC genome browser. Here’s the link:
 40 | <https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz>
 41 | 
 42 | Next, assuming the `hg38.fa.gz` is located in the current directory, we
 43 | build the bowtie genome index using the function `bowtie_build` from the
 44 | `Rbowtie` package (which is installed when `crisprBowtie` is installed):
 45 | 
 46 | ``` r
 47 | library(Rbowtie)
 48 | fastaFile <- "./hg38.fa.gz"
 49 | bowtie_build(fastaFile,
 50 |              outdir="./",
 51 |              force=TRUE,
 52 |              prefix="hg38")
 53 | ```
 54 | 
 55 | This should take a couple of hours to run, and the resulting bowtie
 56 | index files will be located in the folder `./hg38` and can be used to
 57 | run bowtie alignment. See the
 58 | [crisprBowtie](https://github.com/crisprVerse/crisprBowtie) package to
 59 | learn how to perform a bowtie alignment within R.
 60 | 
 61 | # Building a BWA index
 62 | 
 63 | Building a BWA index is similar to building a bowtie index. Assuming the
 64 | `hg38.fa.gz` is located in the current directory, we build the BWA
 65 | genome index using the function `bwa_build_index` from the `Rbwa`
 66 | package (which is installed when `crisprBwa` is installed):
 67 | 
 68 | ``` r
 69 | library(Rbwa)
 70 | fastaFile <- "./hg38.fa.gz"
 71 | bwa_build_index(fastaFile,
 72 |                 index_prefix="hg38")
 73 | ```
 74 | 
 75 | This should take a couple of hours to run, and the resulting BWA index
 76 | files will be located in the folder `./hg38` and can be used to run BWA
 77 | alignment. See the [crisprBwa](https://github.com/crisprVerse/crisprBwa)
 78 | package to learn how to perform a BWA alignment within R.
 79 | 
 80 | # Building a transcriptome index
 81 | 
 82 | For applications using RNA-targeting nucleases such as CasRx, off-target
 83 | search is performed against against transcriptomes rather than genomes.
 84 | Building a transcriptome index works similar, except that we first need
 85 | to generate a FASTA file containing the transcriptome sequences. This is
 86 | easily accomplished with the function `getMrnaSequences` from the
 87 | `crisprDesign` package, assuming that a gene model is provided, as well
 88 | as a `BSgenome` object containing the DNA sequences for the hg38 genome
 89 | (`BSgenome.Hsapiens.UCSC.hg38`).
 90 | 
 91 | We first load the necessary packages
 92 | 
 93 | ``` r
 94 | library(BSgenome.Hsapiens.UCSC.hg38)
 95 | library(crisprDesign)
 96 | ```
 97 | 
 98 | The `crisprDesignData` package (see Installation) contains a gene model
 99 | annotation for the hg38 genome, and can be loaded using the following:
100 | 
101 | ``` r
102 | library(crisprDesignData)
103 | data("txdb_human", package="crisprDesignData")
104 | ```
105 | 
106 | See the [Gene annotation
107 | tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Building_Gene_Annotation)
108 | to learn more about how to build such gene annotation objects.
109 | 
110 | We will now extract mRNA sequences for all available transcripts:
111 | 
112 | ``` r
113 | txids <- unique(txdb_human$exons$tx_id)
114 | mrnasHuman <- getMrnaSequences(txids,
115 |                                bsgenome=BSgenome.Hsapiens.UCSC.hg38,
116 |                                txObject=txdb_human)
117 | ```
118 | 
119 | This should take less than an hour to run. Once completed, we will write
120 | the extracted mRNA sequences to disk using the FASTA format. This can be
121 | accomplished using the `writeXStringSet` function from the `Biostrings`
122 | package:
123 | 
124 | ``` r
125 | library(Biostrings)
126 | writeXStringSet(mrnasHuman,
127 |                 file="ensembl_human_104.fasta",
128 |                 format="fasta")
129 | ```
130 | 
131 | Note that the `seqnames` of this FASTA file are Ensembl transcript IDs
132 | instead of chromosomes. Once the FASTA file has been generated, the
133 | process for constructing either a bowtie or BWA index file is the same
134 | as described in the above sections.
135 | 
136 | # Reproducibility
137 | 
138 | ``` r
139 | sessionInfo()
140 | ```
141 | 
142 |     ## R version 4.2.1 (2022-06-23)
143 |     ## Platform: x86_64-apple-darwin17.0 (64-bit)
144 |     ## Running under: macOS Catalina 10.15.7
145 |     ## 
146 |     ## Matrix products: default
147 |     ## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
148 |     ## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
149 |     ## 
150 |     ## locale:
151 |     ## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
152 |     ## 
153 |     ## attached base packages:
154 |     ## [1] stats     graphics  grDevices utils     datasets  methods   base     
155 |     ## 
156 |     ## loaded via a namespace (and not attached):
157 |     ##  [1] compiler_4.2.1   magrittr_2.0.3   fastmap_1.1.0    cli_3.3.0       
158 |     ##  [5] tools_4.2.1      htmltools_0.5.3  rstudioapi_0.14  yaml_2.3.5      
159 |     ##  [9] stringi_1.7.8    rmarkdown_2.15.2 knitr_1.40       stringr_1.4.1   
160 |     ## [13] xfun_0.32        digest_0.6.29    rlang_1.0.4      evaluate_0.16
161 | 
162 | # References
163 | 
164 | <div id="refs" class="references csl-bib-body hanging-indent">
165 | 
166 | <div id="ref-langmead2009bowtie" class="csl-entry">
167 | 
168 | Langmead, Ben, Cole Trapnell, Mihai Pop, and Steven L. Salzberg. 2009.
169 | “Ultrafast and Memory-Efficient Alignment of Short DNA Sequences to the
170 | Human Genome.” *Genome Biology* 10 (3): R25.
171 | <https://doi.org/10.1186/gb-2009-10-3-r25>.
172 | 
173 | </div>
174 | 
175 | <div id="ref-bwa" class="csl-entry">
176 | 
177 | Li, Heng, and Richard Durbin. 2009. “Fast and Accurate Short Read
178 | Alignment with Burrows–Wheeler Transform.” *Bioinformatics* 25 (14):
179 | 1754–60.
180 | 
181 | </div>
182 | 
183 | </div>
184 | 


--------------------------------------------------------------------------------
/Design_CRISPRbe/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using crisprDesign to design gRNAs for CRISPRbe"
  3 | author: Jean-Philippe Fortin, Luke Hoberecht
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | bibliography: references.bib
  8 | ---
  9 | 
 10 | ```{r, echo=FALSE, results="hide"}
 11 | options("knitr.graphics.auto_pdf"=TRUE)
 12 | ```
 13 | 
 14 | 
 15 | # Introduction
 16 | 
 17 | In this tutorial, We illustrate the CRISPR base editing (CRISPRbe) functionalities 
 18 | of `crisprDesign` by designing and characterizing gRNAs targeting the 
 19 | human gene KRAS using the cytidine base editor BE4max [@koblan2018improving]. 
 20 | 
 21 | 
 22 | # Installation
 23 | 
 24 | See the [Installation tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Installation) to learn how to install the packages necessary for this tutorial:
 25 | `crisprDesign`, `crisprDesignData`
 26 | 
 27 | 
 28 | # Terminology
 29 | 
 30 | See the [CRISPRko design tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) to get familiar 
 31 | with the terminology used throughout this tutorial.
 32 | 
 33 | 
 34 | # CRISPR base editing with BE4max
 35 | 
 36 | ## Loading packages
 37 | 
 38 | We first load the necessary packages for this tutorial:
 39 | 
 40 | ```{r, warning=FALSE, message=FALSE, results='hide'}
 41 | library(crisprBase)
 42 | library(crisprDesign)
 43 | library(crisprDesignData)
 44 | library(BSgenome.Hsapiens.UCSC.hg38)
 45 | ```
 46 | 
 47 | 
 48 | ## Creating the GuideSet
 49 | 
 50 | We first load the BE4max `BaseEditor` object from the `crisprBase` package:
 51 | 
 52 | ```{r}
 53 | data(BE4max, package="crisprBase")
 54 | BE4max
 55 | ```
 56 | 
 57 | The editing probabilities of the base editor BE4max are stored in a 
 58 | matrix where rows correspond to the different nucleotide substitutions, 
 59 | and columns correspond to the genomic coordinate relative to the PAM site. 
 60 | The `editingWeights` function from `crisprBase` retrieves those probabilities. 
 61 | One can see that C to T editing is optimal around 15 nucleotides upstream of 
 62 | the PAM site for the BE4max base editor:
 63 | 
 64 | ```{r}
 65 | crisprBase::editingWeights(BE4max)["C2T",]
 66 | ```
 67 | 
 68 | Let's create the `GuideSet` containing gRNAs targeting KRAS.
 69 | 
 70 | We first load the data containing gene regions for the human genome 
 71 | from `crisprDesignData`:
 72 | 
 73 | ```{r}
 74 | data("txdb_human", package="crisprDesignData")
 75 | ```
 76 | 
 77 | For more information on `txdb_human` and how to create 
 78 | similar gene annotation objects, see the [Building a gene annotation object tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Building_Gene_Annotation).
 79 | 
 80 | 
 81 | We will also load the `BSgenome` package containing DNA sequences for the hg38
 82 | genome:
 83 | 
 84 | ```{r, message=FALSE, warning=FALSE, results='hide'}
 85 | library(BSgenome.Hsapiens.UCSC.hg38)
 86 | ```
 87 | 
 88 | 
 89 | We retrive the genomic coordinates of the KRAS CDS
 90 | ```{r}
 91 | gr <- queryTxObject(txObject=txdb_human,
 92 |                     featureType="cds",
 93 |                     queryColumn="gene_symbol",
 94 |                     queryValue="KRAS")
 95 | ```
 96 | 
 97 | and design all possigle gRNAs using the function `findSpacers`:
 98 | 
 99 | ```{r}
100 | bsgenome <- BSgenome.Hsapiens.UCSC.hg38
101 | gs <- findSpacers(gr,
102 |                   bsgenome=bsgenome,
103 |                   crisprNuclease=BE4max)
104 | ```
105 | 
106 | 
107 | ## Annotating the GuideSet
108 | 
109 | Next, we annotate our candidate gRNAs to assess quality. There are several 
110 | functions in `crisprDesign` that provide annotation for features that are 
111 | nonspecific to CRISPRbe, for which we refer the reader to the
112 | [CRISPRko design with Cas9](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) tutorial for more information. The sections below will 
113 | cover annotation functions that are of particular interest to,
114 | or deserve extra care for CRISPRbe applications.
115 | 
116 | 
117 | ### Adding edited alleles
118 | 
119 | The function `addEditedAlleles` finds, characterizes, and scores 
120 | predicted edited alleles for each gRNA and a chosen transcript. 
121 | It requires a transcript-specific annotation that can be obtained 
122 | with the `getTxInfoDataFrame` function. Here, we perform the analysis
123 | using the primary isoform of KRAS (Ensembl transcript ID: ENST00000311936).
124 | 
125 | We first get the transcript table for our transcript
126 | 
127 | ```{r}
128 | txid <- "ENST00000311936"
129 | txTable <- getTxInfoDataFrame(tx_id=txid,
130 |                               txObject=txdb_human,
131 |                               bsgenome=bsgenome)
132 | head(txTable)
133 | ```
134 | 
135 | and then add the edited alleles annotation to the `GuideSet`:
136 | 
137 | ```{r}
138 | editingWindow <- c(-20,-8)
139 | gs <- addEditedAlleles(gs,
140 |                        baseEditor=BE4max,
141 |                        txTable=txTable,
142 |                        editingWindow=editingWindow)
143 | ```
144 | 
145 | The `editingWindow` argument specifies the window of editing that we
146 | are interested in. When not provided, it uses the default window provided 
147 | in the `BaseEditor` object. Note that providing large windows can 
148 | exponentially increase computing time as the number of possible 
149 | alleles grows exponentially. 
150 | 
151 | Let's retrieve the edited alleles for the first gRNA:
152 | 
153 | ```{r}
154 | alleles <- editedAlleles(gs)[[1]]
155 | ```
156 | 
157 | We get a `DataFrame` object with useful metadata:
158 | 
159 | ```{r}
160 | metadata(alleles)
161 | ```
162 | 
163 | The `wildtypeAllele` reports the unedited nucleotide sequence of the
164 | region specified by the editing window (with respect to the gRNA PAM site).
165 | It is always reported from the 5' to 3' direction on the strand corresponding 
166 | to the gRNA strand. The `start` and `end` fields specify the
167 | corresponding coordinates on the transcript. 
168 | 
169 | Let's look at the edited alleles:
170 | 
171 | ```{r}
172 | head(alleles)
173 | ```
174 | 
175 | The `DataFrame` is ordered by descending values in the `score` column.
176 | This `score` represents the likelihood of the edited allele to occur 
177 | relative to all possible edited alleles, and is calculated using the 
178 | editing weights stored in the `BE4max` object. The `seq` column represents
179 | the edited nucleotide sequences. As with the `wildtypeAllele` in the metadata, 
180 | they are always reported from the 5' to 3' direction on the strand 
181 | corresponding to the gRNA strand. 
182 | 
183 | The `variant` column describes the
184 | functional consequence of the editing event (silent, nonsense or
185 | missense mutation). If an edited allele results in multiple editing 
186 | events, as can happen when multiple bases are edited, the most 
187 | consequential mutation (nonsense over missense, missense over silent) 
188 | is reported. Finally, the `aa` column reports the resulting edited 
189 | amino acid sequence, with each single letter code mapping to its 
190 | corresponding nucleotide (`*` for termination).
191 | 
192 | Note that `addEditedAlleles` also appended several gRNA-level aggregate 
193 | scores to the `GuideSet` object:
194 | 
195 | ```{r}
196 | head(gs)
197 | ```
198 | 
199 | The `score_missense`, `score_nonsense` and `score_silent` columns report 
200 | aggregated scores for each mutation type. They are calculated by summing 
201 | all scores of a given mutation type across the set of edited alleles for 
202 | a given gRNA. The `maxVariant` column indicates the most probable mutation 
203 | type for the given gRNA based on the maximum aggregated score, which is 
204 | stored in `maxVariantScore`. In our example, the highest score for `spacer_4` 
205 | is `score_nonsense`, and so `maxVariant` is set to `nonsense`.
206 | 
207 | 
208 | 
209 | # Session Info
210 | 
211 | ```{r}
212 | sessionInfo()
213 | ```
214 | 
215 | 
216 | # References
217 | 
218 | 


--------------------------------------------------------------------------------
/Design_Custom_Sequence/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using crisprDesign to design gRNAs for custom sequences"
  3 | author: Jean-Philippe Fortin, Luke Hoberecht
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | bibliography: references.bib
  8 | ---
  9 | 
 10 | ```{r, echo=FALSE, results="hide"}
 11 | options("knitr.graphics.auto_pdf"=TRUE)
 12 | ```
 13 | 
 14 | # Introduction
 15 | 
 16 | In this tutorial, we illustrate the main functionalities of  `crisprDesign` 
 17 | for designing gRNAs for custom sequences. To design gRNAs for targets
 18 | located in an organism genome, see the [introductory CRISPRko tutorial ](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9). 
 19 | 
 20 | 
 21 | 
 22 | # Installation
 23 | 
 24 | See the [Installation tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Installation) to learn how to install the packages necessary for this tutorial:
 25 | `crisprDesign`, `crisprDesignData`
 26 | 
 27 | 
 28 | # Use case: designing gRNAs against EGFP
 29 | 
 30 | Suppose we are engineering a human cell line to express the
 31 | enhanced green fluorescent protein (EGFP) marker, and that we want
 32 | to design gRNAs that knockout EGFP as experimental controls.
 33 | Such control gRNAs should target EGFP with (1) high efficiency, 
 34 | and (2) should be specific to EGFP, that is, should not target 
 35 | the cell genome (human genome in this case). Supposed also that the
 36 | cell line is also stably expressing SpCas9.
 37 | 
 38 | ## Loading necessary packages
 39 | 
 40 | We first start by loading the necessary packages:
 41 | 
 42 | ```{r, message=FALSE, warning=FALSE, results='hide'}
 43 | library(Biostrings)
 44 | library(crisprBase)
 45 | library(crisprDesign)
 46 | library(crisprDesignData)
 47 | library(BSgenome.Hsapiens.UCSC.hg38)
 48 | ```
 49 | 
 50 | 
 51 | ## Obtaining the DNA sequence 
 52 | 
 53 | In the folder `data`, we have included a fasta file containing the DNA
 54 | sequence of the EGFP marker. The sequence was obtained from the [SnapGene website](https://www.snapgene.com/resources/plasmid-files/?set=fluorescent_protein_genes_and_plasmids&plasmid=EGFP)
 55 | 
 56 | 
 57 | We can read in the fasta file using the `readDNAStringSet` function from
 58 | the package `Biostrings`:
 59 | 
 60 | ```{r}
 61 | dna <- Biostrings::readDNAStringSet("data/egfp.fa")
 62 | names(dna) <- "EGFP"
 63 | dna
 64 | ```
 65 | 
 66 | This could also be simply constructed from a regular string:
 67 | 
 68 | ```{r, eval=FALSE}
 69 | dna <- "ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTAA"
 70 | dna <- DNAStringSet(dna)
 71 | names(dna) <- "EGFP"
 72 | ```
 73 | 
 74 | (Note that the function also accepts a simple string, which would be
 75 | internally converted into a `DNAStringSet`). This is the custom sequence
 76 | input that we will use to design gRNAs.
 77 | 
 78 | ## Constructing the `GuideSet` object:
 79 | 
 80 | Next, we design all possible SpCas9 gRNAs targeting EGFP. First, we load
 81 | the SpCas9 object from the `crisprBase` package:
 82 | 
 83 | ```{r}
 84 | data(SpCas9, package="crisprBase")
 85 | ```
 86 | 
 87 | and we design gRNAs using the function `findSpacers` from `crisprDesign`:
 88 | 
 89 | ```{r}
 90 | gs <- findSpacers(dna, 
 91 |                   crisprNuclease=SpCas9)
 92 | head(gs)
 93 | ```
 94 | 
 95 | The resulting output is a regular `GuideSet` object, and all functionalities
 96 | described in the [introductory CRISPRko tutorial ](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9)
 97 | can be applied here as well. 
 98 | 
 99 | There are a few key differences to note with respect to a `GuideSet` object
100 | constructed using a reference genome. First, the name of the input DNA sequence
101 | (EGFP) is used as the chromosome name stored in the `seqnames` field. 
102 | Second, the `pam_site` and `cut_site` coordinates are all relative to the
103 | first nucleotide of the custom DNA sequence. Finally, the `GuideSet` object
104 | stores the input sequence, which can be accessed using the function 
105 | `customSequences`:
106 | 
107 | ```{r}
108 | customSequences(gs)
109 | ```
110 | 
111 | ## Finding off-targets in the human genome to find gRNAs specific to EGFP
112 | 
113 | 
114 | Now that we have designed all possible gRNAs targeting EGFP, we will 
115 | filter out gRNAs that have on- and off-targets located in the human
116 | genome. We will use the bowtie aligner to find targets, so we need to
117 | first specify the path of a bowtie index constructed on the human genome:
118 | 
119 | ```{r}
120 | # Path of the hg38 bowtie index on my personal laptop:
121 | bowtie_index <- "/Users/fortinj2/crisprIndices/bowtie/hg38/hg38"
122 | ```
123 | 
124 | For instructions on how to build a Bowtie index from a given reference genome, 
125 | see the [genome index tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Building_Genome_Indices). 
126 | 
127 | To annotate off-targets with genomic context, for instance to know whether 
128 | or not they are located in coding regions, we will also need a gene model
129 | object. We will use the gene model object `txdb_human` 
130 | from `crisprDesignData`, which contains genomic coordinates of all 
131 | human protein-coding genes. See the [crisprDesignData package](https://github.com/crisprVerse/crisprDesignData) for more details.
132 | 
133 | ```{r}
134 | data(txdb_human, package="crisprDesignData")
135 | ```
136 | 
137 | We are now ready to find all on- and off-targets using the 
138 | `addSpacerAlignments` function from `crisprDesign`:
139 | 
140 | 
141 | ```{r, warning=FALSE, message=FALSE}
142 | gs <- addSpacerAlignments(gs,
143 |                           aligner="bowtie",
144 |                           aligner_index=bowtie_index,
145 |                           bsgenome=BSgenome.Hsapiens.UCSC.hg38,
146 |                           n_mismatches=3,
147 |                           txObject=txdb_human)
148 | gs
149 | ```
150 | 
151 | 
152 | 
153 | ## Predicting on-target activity
154 | 
155 | We also want to make sure to filter out gRNAs that are predicted to have
156 | poor on-target activity. To do so, we annotate gRNAs with the DeepHF
157 | on-target activity score:
158 | 
159 | ```{r}
160 | gs <- addOnTargetScores(gs, methods="deephf")
161 | ```
162 | 
163 | 
164 | 
165 | Finally, we characterize the spacer sequences using the `addSequenceFeatures`
166 | function from `crisprDesign`:
167 | 
168 | ```{r}
169 | gs <- addSequenceFeatures(gs)
170 | ```
171 | 
172 | ## Final selection
173 | 
174 | For our use case, we will only retain gRNAs that do not map to the human 
175 | genome (`n0=0`), don't have any 1 or 2-mismatch off-targets (`n1=0` 
176 | and `n2=0`), and do not have 3-mismatch off-targets located 
177 | in coding regions (`n3_c=0`):
178 | 
179 | ```{r}
180 | gs <- gs[gs$n0==0 & gs$n1==0 & gs$n2==0 & gs$n3_c==0]
181 | ```
182 | 
183 | We also remove gRNAs that contain polyT sequences
184 | 
185 | ```{r}
186 | gs <- gs[!gs$polyT,]
187 | ```
188 | 
189 | and only keep gRNAs that don't have extreme GC content:
190 | 
191 | ```{r}
192 | gs <- gs[gs$percentGC>=20 & gs$percentGC<=80]
193 | ```
194 | 
195 | Finally, we rank gRNAs from the highest to the lowest on-target activity score:
196 | 
197 | ```{r}
198 | gs <- gs[order(-gs$score_deephf)]
199 | head(gs)
200 | ```
201 | 
202 | Users can select the top gRNAs as their control gRNAs.
203 | 
204 | # Session Info
205 | 
206 | ```{r}
207 | sessionInfo()
208 | ```
209 | 
210 | # References
211 | 
212 | 
213 | 


--------------------------------------------------------------------------------
/Design_CRISPRkd_CasRx/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using crisprDesign to design gRNAs for CRISPRkd with CasRx"
  3 | author: Jean-Philippe Fortin, Luke Hoberecht
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | bibliography: references.bib
  8 | ---
  9 | 
 10 | ```{r, echo=FALSE, results="hide"}
 11 | options("knitr.graphics.auto_pdf"=TRUE)
 12 | ```
 13 | 
 14 | # Introduction
 15 | 
 16 | In this tutorial, we will design gRNAs for the RNA-targeting nuclease 
 17 | CasRx (RfxCas13d) [@cas13d]. We will design all gRNAs targeting the primary 
 18 | isoform of the human gene KRAS. 
 19 | 
 20 | # Installation
 21 | 
 22 | See the [Installation tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Installation) to learn how to install the packages necessary for this tutorial:
 23 | `crisprDesign`, `crisprDesignData`
 24 | 
 25 | 
 26 | # Terminology
 27 | 
 28 | See the [CRISPRko design vignette](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) to get familiar with the terminology used throughout
 29 | this tutorial.
 30 | 
 31 | 
 32 | # End-to-end gRNA design workflow
 33 | 
 34 | We first start by loading the crisprVerse packages needed for 
 35 | this tutorial:
 36 | 
 37 | ```{r, message=FALSE, warning=FALSE, results='hide'}
 38 | library(crisprBase)
 39 | library(crisprDesign)
 40 | library(crisprDesignData)
 41 | ```
 42 | 
 43 | We will also load the `BSgenome` package containing DNA sequences for the hg38
 44 | genome:
 45 | 
 46 | ```{r, message=FALSE, warning=FALSE, results='hide'}
 47 | library(BSgenome.Hsapiens.UCSC.hg38)
 48 | ```
 49 | 
 50 | 
 51 | ## Creating the GuideSet
 52 | 
 53 | We begin by loading the CasRx `CrisprNuclease` object from the `crisprBase`
 54 | package:
 55 | 
 56 | ```{r}
 57 | data(CasRx, package="crisprBase")
 58 | CasRx
 59 | ```
 60 | 
 61 | The PFS sequence (the equivalent of a PAM sequence for RNA-targeting nucleases)
 62 | for CasRx is `N`, meaning there is no specific PFS sequences preferred by CasRx. 
 63 | 
 64 | Next, we will extract the mRNA sequence for the KRAS transcript ENST00000311936 
 65 | with the function `getMrnaSequences` from `crisprDesign`. The function
 66 | requires a gene annotation object. We will load the Ensembl model from
 67 | the `crisprDesignData` package stored in the `GRangesList` object `txdb_human`:
 68 | 
 69 | ```{r}
 70 | data("txdb_human", package="crisprDesignData")
 71 | ```
 72 | 
 73 | For more information on `txdb_human` and how to create similar gene 
 74 | annotation objects, see the [Building a gene annotation object](https://github.com/crisprVerse/Tutorials/tree/master/Building_Gene_Annotation) tutorial).
 75 | 
 76 | We also need a `BSgenome` object containing the DNA sequences:
 77 | 
 78 | ```{r}
 79 | bsgenome <- BSgenome.Hsapiens.UCSC.hg38
 80 | ```
 81 | 
 82 | We are now ready to obtain our mRNA sequence:
 83 | 
 84 | ```{r}
 85 | txid <- "ENST00000311936"
 86 | mrna <- getMrnaSequences(txids=txid,
 87 |                          bsgenome=bsgenome,
 88 |                          txObject=txdb_human)
 89 | mrna
 90 | ```
 91 | 
 92 | Similar to the CRISPRko gRNA design, we use the function `findSpacers` 
 93 | to design our gRNAs:
 94 | 
 95 | ```{r, warning=FALSE}
 96 | gs <- findSpacers(mrna,
 97 |                   crisprNuclease=CasRx)
 98 | head(gs)
 99 | ```
100 | 
101 | Note that all protospacer sequences are located on the original strand of 
102 | the mRNA sequence. For RNA-targeting nucleases, the spacer and protospacer 
103 | sequences are the reverse complement of each other. 
104 | (Compare the output of the code below with a `GuideSet` that 
105 | uses a DNA-targeting nuclease--for such `GuideSet` pbjects, the output 
106 | of `spacers` and `protospacers` are identical.)
107 | 
108 | ```{r}
109 | head(spacers(gs))
110 | head(protospacers(gs))
111 | ```
112 | 
113 | 
114 | ## Annotating the GuideSet
115 | 
116 | Next, we annotate our candidate gRNAs to assess quality. There are several 
117 | functions in `crisprDesign` that provide annotation for features that are 
118 | nonspecific to CRISPRkd, for which we refer the reader to the
119 | [CRISPRko design with Cas9](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) tutorial for more information. The sections below will 
120 | cover annotation functions that are of particular interest to,
121 | or deserve extra care for CRISPRkd applications.
122 | 
123 | ### Adding spacer alignments
124 | 
125 | Since our CRISPR nuclease targets RNA rather than DNA, off-target 
126 | searches should be restricted to the transcriptome. We can perform
127 | such a search using one of two methods.
128 | 
129 | #### Adding spacer alignments with Biostrings
130 | 
131 | For the first method, we set the `aligner` argument to `"biostrings"` 
132 | and pass a `DNAStringSet` representation of the transcriptome to the 
133 | argument `custom_seq`. We can create this representation with
134 | `getMrnaSequences` and all transcript IDs found in `txdb_human`. 
135 | The code below uses this method to search for off-targets having up to
136 | one mismatch and passes `txdb_human` to the `txObject` argument so 
137 | that the alignments will be accompanied with gene annotation.
138 | 
139 | ```{r, eval=FALSE}
140 | exon_ids <- unique(txdb_human$exons$tx_id)
141 | mrnasHuman <- getMrnaSequences(exon_ids,
142 |                                bsgenome=BSgenome.Hsapiens.UCSC.hg38,
143 |                                txObject=txdb_human)
144 | ## long run time
145 | results <- addSpacerAlignments(gs,
146 |                                aligner="biostrings",
147 |                                txObject=txdb_human,
148 |                                n_mismatches=1,
149 |                                custom_seq=mrnasHuman)
150 | ```
151 | 
152 | NOTE: since `mrnasHuman` contains many sequences (>100k), this method 
153 | has a very long run time; for transcriptome-wide searches, 
154 | or for searches against a large number of sequences, we recommend the 
155 | following method instead.
156 | 
157 | 
158 | #### Adding spacer alignments with bowtie or BWA
159 | 
160 | The second method uses the `bowtie` (or `bwa`) aligner. This requires 
161 | building a transcriptome bowtie (or BWA) index file first. See the [Building genome indices for short read aligners](https://github.com/crisprVerse/Tutorials/tree/master/Building_Genome_Indices) tutorial for more information. 
162 | 
163 | Here we set `aligner` to `"bowtie"` and pass a precomputed 
164 | transcriptome bowtie index to `aligner_index` to find off-targets:
165 | 
166 | ```{r, warning=FALSE, message=FALSE, results='hide'}
167 | bowtie_index <- "/Users/fortinj2/crisprIndices/bowtie/ensembl_human_104/ensembl_human_104"
168 | results <- addSpacerAlignments(gs,
169 |                                aligner="bowtie",
170 |                                aligner_index=bowtie_index,
171 |                                txObject=txdb_human,
172 |                                n_mismatches=1)
173 | ```
174 | ```{r}
175 | head(results)
176 | ```
177 | 
178 | The columns `n0_gene` and `n0_tx` report the number of on-targets
179 | at the gene- and transcript-level, respectively. For instance, 
180 | each spacer shown above shows `n0_gene` equal to 1 and `n0_tx` 
181 | equal to 4, meaning each spacer maps to all four isoforms of KRAS.
182 | We can retrieve information about each alignment with the `onTargets`
183 | function. Looking at the on-targets for the first spacer we can see 
184 | where the target `pam_site` is relative to the start of the transcript
185 | with respect to each isoform of KRAS.
186 | 
187 | ```{r}
188 | onTargets(results["spacer_1"])
189 | ```
190 | 
191 | Note that each annotated alignment is specific to the transcript 
192 | ID given under `seqnames`.
193 | 
194 | Below is a spacer that targets (with no mismatches) multiple genes:
195 | 
196 | ```{r}
197 | results["spacer_244"]
198 | ```
199 | 
200 | Upon further inspection of this spacer's alignments, 
201 | however, we can see that the off-target occurs in the pseudogene KRASP1,
202 | and should be harmless.
203 | 
204 | ```{r}
205 | onTargets(results["spacer_244"])
206 | ```
207 | 
208 | 
209 | ## On-target scoring (gRNA efficiency)
210 | 
211 | Finally, we add an on-target activity score using the CasRx-RF 
212 | method [@casrxrf] using the `addOnTargetScores` function from `crisprDesign`
213 | package:
214 | 
215 | ```{r, eval=TRUE, warning=FALSE, message=FALSE}
216 | gs <- addOnTargetScores(gs, methods=c("casrxrf"))
217 | gs
218 | ```
219 | 
220 | 
221 | 
222 | # Session Info
223 | 
224 | ```{r}
225 | sessionInfo()
226 | ```
227 | 
228 | 
229 | # References
230 | 
231 | 


--------------------------------------------------------------------------------
/Design_CRISPRkd_Csm/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using crisprDesign to design gRNAs for the CRISPR-Csm complex"
  3 | author: Jean-Philippe Fortin, Luke Hoberecht
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | bibliography: references.bib
  8 | ---
  9 | 
 10 | ```{r, echo=FALSE, results="hide"}
 11 | options("knitr.graphics.auto_pdf"=TRUE)
 12 | ```
 13 | 
 14 | # Introduction
 15 | 
 16 | The CRISPR-Csm complex is a programmable RNA-targeting system that does 
 17 | not induce indiscriminate trans-cleavage activity, which is 
 18 | an important advantage in comparison to the CRISPR-Cas13 family of RNA-targeting
 19 | nucleases [@csm1]. It has recently been shown that it can be use to
 20 | perform effective single-molecule live-cell RNA imaging [@csm2]. 
 21 | 
 22 | In this tutorial, we will design gRNAs for the CRISPR-Csm system for the
 23 | primary isoform of the human gene KRAS. 
 24 | 
 25 | # Installation
 26 | 
 27 | See the [Installation tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Installation) to learn how to install the packages necessary for this tutorial:
 28 | `crisprDesign`, `crisprDesignData`
 29 | 
 30 | 
 31 | # Terminology
 32 | 
 33 | See the [CRISPRko design vignette](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) to get familiar with the terminology used throughout
 34 | this tutorial.
 35 | 
 36 | 
 37 | # End-to-end gRNA design workflow
 38 | 
 39 | We first start by loading the crisprVerse packages needed for 
 40 | this tutorial:
 41 | 
 42 | ```{r, message=FALSE, warning=FALSE, results='hide'}
 43 | library(crisprBase)
 44 | library(crisprDesign)
 45 | library(crisprDesignData)
 46 | ```
 47 | 
 48 | We will also load the `BSgenome` package containing DNA sequences for the hg38
 49 | genome:
 50 | 
 51 | ```{r, message=FALSE, warning=FALSE, results='hide'}
 52 | library(BSgenome.Hsapiens.UCSC.hg38)
 53 | ```
 54 | 
 55 | 
 56 | ## Creating the GuideSet
 57 | 
 58 | We begin by loading the Csm `CrisprNuclease` object from the `crisprBase`
 59 | package:
 60 | 
 61 | ```{r}
 62 | data(Csm, package="crisprBase")
 63 | Csm
 64 | ```
 65 | 
 66 | The PFS sequence (the equivalent of a PAM sequence for RNA-targeting nucleases)
 67 | for Csm is `N`, meaning there is no specific PFS sequences preferred by Csm. 
 68 | The default spacer length of the Csm nuclease is 32nt. This can be changed
 69 | using `spacerLength` (for instance, `spacerLength(Csm) <- 36`). 
 70 | 
 71 | Next, we will extract the mRNA sequence for the KRAS transcript ENST00000311936 
 72 | with the function `getMrnaSequences` from `crisprDesign`. The function
 73 | requires a gene annotation object. We will load the Ensembl model from
 74 | the `crisprDesignData` package stored in the `GRangesList` object `txdb_human`:
 75 | 
 76 | ```{r}
 77 | data("txdb_human", package="crisprDesignData")
 78 | ```
 79 | 
 80 | For more information on `txdb_human` and how to create similar gene 
 81 | annotation objects, see the [Building a gene annotation object](https://github.com/crisprVerse/Tutorials/tree/master/Building_Gene_Annotation) tutorial).
 82 | 
 83 | We also need a `BSgenome` object containing the DNA sequences:
 84 | 
 85 | ```{r}
 86 | bsgenome <- BSgenome.Hsapiens.UCSC.hg38
 87 | ```
 88 | 
 89 | We are now ready to obtain our mRNA sequence:
 90 | 
 91 | ```{r}
 92 | txid <- "ENST00000311936"
 93 | mrna <- getMrnaSequences(txids=txid,
 94 |                          bsgenome=bsgenome,
 95 |                          txObject=txdb_human)
 96 | mrna
 97 | ```
 98 | 
 99 | Similar to the CRISPRko gRNA design, we use the function `findSpacers` 
100 | to design our gRNAs:
101 | 
102 | ```{r, warning=FALSE}
103 | gs <- findSpacers(mrna,
104 |                   crisprNuclease=Csm)
105 | head(gs)
106 | ```
107 | 
108 | Note that all protospacer sequences are located on the original strand of 
109 | the mRNA sequence. For RNA-targeting nucleases, the spacer and protospacer 
110 | sequences are the reverse complement of each other. 
111 | (Compare the output of the code below with a `GuideSet` that 
112 | uses a DNA-targeting nuclease--for such `GuideSet` pbjects, the output 
113 | of `spacers` and `protospacers` are identical.)
114 | 
115 | ```{r}
116 | head(spacers(gs))
117 | head(protospacers(gs))
118 | ```
119 | 
120 | 
121 | ## Annotating the GuideSet
122 | 
123 | Next, we annotate our candidate gRNAs to assess quality. There are several 
124 | functions in `crisprDesign` that provide annotation for features that are 
125 | nonspecific to CRISPRkd, for which we refer the reader to the
126 | [CRISPRko design with Cas9](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) tutorial for more information. The sections below will 
127 | cover annotation functions that are of particular interest to,
128 | or deserve extra care for CRISPRkd applications.
129 | 
130 | ### Adding spacer alignments
131 | 
132 | Since our CRISPR nuclease targets RNA rather than DNA, off-target 
133 | searches should be restricted to the transcriptome. We can perform
134 | such a search using one of two methods.
135 | 
136 | #### Adding spacer alignments with Biostrings
137 | 
138 | For the first method, we set the `aligner` argument to `"biostrings"` 
139 | and pass a `DNAStringSet` representation of the transcriptome to the 
140 | argument `custom_seq`. We can create this representation with
141 | `getMrnaSequences` and all transcript IDs found in `txdb_human`. 
142 | The code below uses this method to search for off-targets having up to
143 | one mismatch and passes `txdb_human` to the `txObject` argument so 
144 | that the alignments will be accompanied with gene annotation.
145 | 
146 | ```{r, eval=FALSE}
147 | exon_ids <- unique(txdb_human$exons$tx_id)
148 | mrnasHuman <- getMrnaSequences(exon_ids,
149 |                                bsgenome=BSgenome.Hsapiens.UCSC.hg38,
150 |                                txObject=txdb_human)
151 | ## long run time
152 | results <- addSpacerAlignments(gs,
153 |                                aligner="biostrings",
154 |                                txObject=txdb_human,
155 |                                n_mismatches=1,
156 |                                custom_seq=mrnasHuman)
157 | ```
158 | 
159 | NOTE: since `mrnasHuman` contains many sequences (>100k), this method 
160 | has a very long run time; for transcriptome-wide searches, 
161 | or for searches against a large number of sequences, we recommend the 
162 | following method instead.
163 | 
164 | 
165 | #### Adding spacer alignments with bowtie or BWA
166 | 
167 | The second method uses the `bowtie` (or `bwa`) aligner. This requires 
168 | building a transcriptome bowtie (or BWA) index file first. See the [Building genome indices for short read aligners](https://github.com/crisprVerse/Tutorials/tree/master/Building_Genome_Indices) tutorial for more information. 
169 | 
170 | Here we set `aligner` to `"bowtie"` and pass a precomputed 
171 | transcriptome bowtie index to `aligner_index` to find off-targets:
172 | 
173 | ```{r, warning=FALSE, message=FALSE, results='hide'}
174 | bowtie_index <- "/Users/fortinj2/crisprIndices/bowtie/ensembl_human_104/ensembl_human_104"
175 | results <- addSpacerAlignments(gs,
176 |                                aligner="bowtie",
177 |                                aligner_index=bowtie_index,
178 |                                txObject=txdb_human,
179 |                                n_mismatches=1)
180 | ```
181 | ```{r}
182 | head(results)
183 | ```
184 | 
185 | The columns `n0_gene` and `n0_tx` report the number of on-targets
186 | at the gene- and transcript-level, respectively. For instance, 
187 | each spacer shown above shows `n0_gene` equal to 1 and `n0_tx` 
188 | equal to 4, meaning each spacer maps to all four isoforms of KRAS.
189 | We can retrieve information about each alignment with the `onTargets`
190 | function. Looking at the on-targets for the first spacer we can see 
191 | where the target `pam_site` is relative to the start of the transcript
192 | with respect to each isoform of KRAS.
193 | 
194 | ```{r}
195 | onTargets(results["spacer_1"])
196 | ```
197 | 
198 | Note that each annotated alignment is specific to the transcript 
199 | ID given under `seqnames`.
200 | 
201 | Below is a spacer that targets (with no mismatches) multiple genes:
202 | 
203 | ```{r}
204 | results["spacer_244"]
205 | ```
206 | 
207 | Upon further inspection of this spacer's alignments, 
208 | however, we can see that the off-target occurs in the pseudogene KRASP1,
209 | and should be harmless.
210 | 
211 | ```{r}
212 | onTargets(results["spacer_244"])
213 | ```
214 | 
215 | 
216 | 
217 | # Session Info
218 | 
219 | ```{r}
220 | sessionInfo()
221 | ```
222 | 
223 | 
224 | # References
225 | 
226 | 


--------------------------------------------------------------------------------
/Building_Database_Human_Cas9/script.R:
--------------------------------------------------------------------------------
  1 | # This is the script used in the tutorial:
  2 | 
  3 | #0. required packages
  4 | library(crisprDatabase)
  5 | library(crisprBase)
  6 | library(crisprDesign)
  7 | library(crisprDesignData)
  8 | library(crisprDesignFacilitator)
  9 | overwrite <- FALSE
 10 | 
 11 | 
 12 | #i=1
 13 | #species <- "human"
 14 | #species <- "mouse"
 15 | #modality <- "crispra"
 16 | #modality <- "crisprkd"
 17 | #modality <- "crisprko"
 18 | #nuclease <- "SpCas9"
 19 | #nuclease <- "enAsCas12a"
 20 | #nuclease <- "CasRx"
 21 | #version  <- "v6"
 22 | 
 23 | 
 24 | #1. Tech options
 25 | i <- as.numeric(commandArgs(TRUE)[1])
 26 | species  <- as.character(commandArgs(TRUE)[2])
 27 | modality <- as.character(commandArgs(TRUE)[3])
 28 | nuclease <- as.character(commandArgs(TRUE)[4])
 29 | version  <- "v6"
 30 | 
 31 | 
 32 | 
 33 | #2. gRNA design options:
 34 | scoring_methods_cas9 <- c("ruleset1", "azimuth", 
 35 |                           "deephf",  "crisprscan", "crisprai",
 36 |                           "crisprater", "deepspcas9", "ruleset3")
 37 | scoring_methods_cas12a <- c("deepcpf1", "enpamgb")
 38 | scoring_methods_cas13d <- "casrxrf"
 39 | scoring_methods <- c(scoring_methods_cas9,
 40 |                      scoring_methods_cas12a,
 41 |                      scoring_methods_cas13d)
 42 | n_mismatches <- 3
 43 | max_mm <- 2
 44 | canonical_offtarget <- FALSE
 45 | if (nuclease=="SpCas9"){
 46 |     canonical_ontarget=TRUE
 47 | } else if (nuclease=="enAsCas12a"){
 48 |     canonical_ontarget=FALSE
 49 | } else if (nuclease=="CasRx"){
 50 |     canonical_ontarget=FALSE
 51 | }
 52 | 
 53 | ### TSS WINDOW
 54 | if (modality=="crispko" | modality=="crisprkd"){
 55 |     tss_window <- NULL
 56 | } else if (modality=="crispra"){
 57 |     tss_window <- c(-500,0)
 58 | } else if (modality=="crispri"){
 59 |     tss_window <- c(0, 500)
 60 | } 
 61 | 
 62 | 
 63 | ### Getting CRISPR nuclease object:
 64 | data(SpCas9, package="crisprBase")
 65 | data(enAsCas12a, package="crisprBase")
 66 | data(CasRx, package="crisprBase")
 67 | if (nuclease=="SpCas9"){
 68 |     crisprNuclease <- SpCas9
 69 | } else if (nuclease=="enAsCas12a"){
 70 |     crisprNuclease <- enAsCas12a
 71 | } else if (nuclease=="CasRx"){
 72 |     crisprNuclease <- CasRx
 73 | }
 74 | 
 75 | 
 76 | ### Necessary annotation files
 77 | snpFile <- getSNPFile() 
 78 | if (nuclease=="CasRx"){
 79 |     bowtie_index <- getBowtieIndex(species=species, what="rna")
 80 | } else {
 81 |     bowtie_index <- getBowtieIndex(species=species, what="dna")
 82 | }
 83 | bsgenome  <- getGenomePackage(species=species)
 84 | 
 85 | # Stuff for CRISPRai
 86 | if (species=="human"){
 87 |     chromatinFiles <- getChromatinFiles()
 88 |     fastaFile <- getGenomeFasta()
 89 | } else {
 90 |     chromatinFiles <- NULL
 91 |     fastaFile <- NULL
 92 | }
 93 | if (species=="human" & nuclease=="SpCas9"){
 94 |     useDistanceToTss <- FALSE
 95 | } else {
 96 |     useDistanceToTss <- TRUE
 97 | }
 98 | 
 99 | # Getting binaries for CasRx
100 | if (nuclease=="CasRx"){
101 |     binaries <- crisprDesignFacilitator::getCasRxRfBinaries()
102 | } else {
103 |     binaries <- NULL
104 | }
105 | 
106 | 
107 | 
108 | # SNP stuff
109 | if (species=="human"){
110 |     vcf <- getSNPFile()
111 | } else {
112 |     vcf <- NULL
113 | }
114 | 
115 | # Conservation stuff
116 | if (modality=="crisprko"){
117 |     conservationFile <- crisprDesignFacilitator::getConservationFiles(species)
118 | } else {
119 |     conservationFile <- NULL
120 | }
121 | 
122 | 
123 | 
124 | # Isoform stuff:
125 | if (modality=="crisprko"){
126 |     if (species=="human"){
127 |         data(canonicalHuman, package="crisprDesignData")
128 |         canonicalIsoforms <- canonicalHuman
129 |     } else {
130 |         data(canonicalMouse, package="crisprDesignData")
131 |         canonicalIsoforms <- canonicalMouse
132 |     }
133 | } else {
134 |     canonicalIsoforms <- NULL
135 | }
136 | 
137 | 
138 | if (species=="human"){
139 |     data(tss_human, package="crisprDesignFacilitator")
140 |     data(txdb_human, package="crisprDesignData")
141 |     data(gr.repeats.hg38, package="crisprDesignData")
142 |     data(pfamTableHuman, package="crisprDesignData")
143 |     txObject <- txdb_human
144 |     tssObject <- tss_human
145 |     grRepeats <- gr.repeats.hg38
146 |     pfamTable <- pfamTableHuman
147 | } else {
148 |     data(tss_mouse, package="crisprDesignFacilitator")
149 |     data(txdb_mouse, package="crisprDesignData")
150 |     data(gr.repeats.mm10, package="crisprDesignData")
151 |     data(pfamTableMouse, package="crisprDesignData")
152 |     txObject <- txdb_mouse
153 |     tssObject <- tss_mouse
154 |     grRepeats <- gr.repeats.mm10
155 |     pfamTable <- pfamTableMouse
156 | }
157 | 
158 | ### Modality for crisprDesign
159 | modality2 <- gsub("crispr", "CRISPR", modality)
160 | 
161 | 
162 | 
163 | #3. gene specification:
164 | genedir <- crisprDatabase::getGeneModelDir(version=version,
165 |                                            species=species)
166 | if (modality=="crisprko"){
167 |     ids <- readRDS(file.path(genedir, "genesids.500chunks.rds"))
168 | } else if (modality=="crisprkd"){
169 |     ids <- readRDS(file.path(genedir, "txids.500chunks.rds"))
170 | } else {
171 |     ids <- readRDS(file.path(genedir, "tssids.500chunks.rds"))
172 | }
173 | ids <- ids[[i]]
174 | 
175 | 
176 | 
177 | if (modality=="crisprko"){
178 |     queryColumn="gene_id"
179 | } else if (modality=="crispra" | modality=="crispri"){
180 |     queryColumn="ID"
181 | } else if (modality=="crisprkd"){
182 |     queryColumn=NULL
183 | }
184 | 
185 | 
186 | #gene <- "ENSG00000133703" #KRAS
187 | #gene <- "ENSG00000130270" #With repeats
188 | extdir <- crisprDatabase::getCrisprDir(version=version,
189 |                                        modality=modality,
190 |                                        nuclease=nuclease,
191 |                                        species=species)
192 | if (!dir.exists(extdir)){
193 |     dir.create(extdir, recursive=TRUE)
194 | }
195 | for (k in seq_along(ids)){
196 |     id <- ids[k]
197 |     filename <- file.path(extdir, paste0(id, '.rds'))
198 |     print(modality)
199 |     print(species)
200 |     print(nuclease)
201 |     print(k)
202 |     print(id)
203 |     if (!overwrite & file.exists(filename)){
204 |         cat("Overwrite mode is off, and data already generated for this gene. Skipping :) \n")
205 |     } else { 
206 |         gs <- crisprDesign::designCompleteAnnotation(queryValue=id,
207 |                                                      queryColumn=queryColumn,
208 |                                                      modality=modality2,
209 |                                                      bsgenome=bsgenome,
210 |                                                      vcf=vcf,
211 |                                                      tssObject=tssObject,
212 |                                                      txObject=txObject,
213 |                                                      bowtie_index=bowtie_index,
214 |                                                      crisprNuclease=crisprNuclease,
215 |                                                      n_mismatches=n_mismatches,
216 |                                                      scoring_methods=scoring_methods,
217 |                                                      max_mm=max_mm,
218 |                                                      tss_window=tss_window,
219 |                                                      canonical_ontarget=canonical_ontarget,
220 |                                                      canonical_offtarget=canonical_offtarget,
221 |                                                      grRepeats=grRepeats,
222 |                                                      fastaFile=fastaFile,
223 |                                                      chromatinFiles=chromatinFiles,
224 |                                                      conservationFile=conservationFile,
225 |                                                      geneCol="gene_symbol",
226 |                                                      canonicalIsoforms=canonicalIsoforms,
227 |                                                      binaries=binaries,
228 |                                                      pfamTable=pfamTable)
229 |         if (modality=="crisprko"){
230 |             txid <- canonicalIsoforms$tx_id[match(id, canonicalIsoforms$gene_id)]
231 |         } else {
232 |             txid <- NULL
233 |         }
234 |         if (class(gs)=="GuideSet"){
235 |             gs <- rankSpacers(gs,
236 |                               modality=modality2,
237 |                               commonExon=TRUE,
238 |                               tx_id=txid,
239 |                               useDistanceToTss=useDistanceToTss)
240 |         } 
241 |         saveRDS(gs, file=filename)
242 |     }
243 | }
244 | q("no")
245 | 
246 | 
247 | 
248 | 
249 | 
250 | 
251 | 
252 | 
253 | 
254 | 


--------------------------------------------------------------------------------
/Design_Minor_Major_Allele/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using crisprDesign to design gRNAs with minor and major alleles"
  3 | author: Jean-Philippe Fortin, Luke Hoberecht
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | ---
  8 | 
  9 | ```{r, echo=FALSE, results="hide"}
 10 | options("knitr.graphics.auto_pdf"=TRUE)
 11 | ```
 12 | 
 13 | 
 14 | # Introduction
 15 | 
 16 | Genetic variants such as single nucleotide polymorphisms (SNPs) can 
 17 | be problematic in guide RNA (gRNA) design, as different alleles can
 18 | result in unintended gRNA:DNA mismatches for on-targets that reduce 
 19 | gRNA efficacy. To circumvent this, it is advisable to generally avoid
 20 | targeting sequences that contain variants. However, this may not always
 21 | be possible, due to a small target window and/or few target options,
 22 | or desirable, if, for example, a CRISPR application intends to target 
 23 | a pathogenic variant.
 24 | 
 25 | Functions in `crisprDesign` are well equipped to handle these cases.
 26 | gRNAs overlapping SNPs can be identified with the `addSNPAnnotation` 
 27 | function, as documented in the [CRISPRko design with Cas9](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) tutorial. Should the user wish to target region despite (or because of)
 28 | the presence of variants, the user only needs to take care in the choice
 29 | of `BSgenome` when constructing the `GuideSet` (an alternative option is
 30 | to supply a custom target sequence; see the [Working with a custom sequence](https://github.com/crisprVerse/Tutorials/tree/master/Design_Custom_Sequence) tutorial for more information).
 31 | 
 32 | This tutorial covers use cases for `BSgenome` objects that store variants 
 33 | of the reference human genome (hg38) injected with major and minor alleles.
 34 | It assumes the reader is familiar with constructing and using gene 
 35 | annotation objects (see the [Building a gene annotation object](https://github.com/crisprVerse/Tutorials/tree/master/Building_Gene_Annotation) tutorial) and `GuideSet` objects (see the [CRISPRko design with Cas9]( https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) tutorial) so that the content may focus on the utility of 
 36 | the `BSgenome` variants discussed herein. Please consult the
 37 | applicable tutorials if necessary.
 38 | 
 39 | Finally, it goes without saying that the user should be knowledgeable
 40 | of the sequence(s), including possible variations in such, he or she 
 41 | is designing gRNAs for.
 42 | 
 43 | 
 44 | 
 45 | # Installation
 46 | 
 47 | See the [Installation tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Installation) to learn how to install the crisprVerse packages necessary
 48 | for this tutorial. 
 49 | 
 50 | 
 51 | 
 52 | # Terminology
 53 | 
 54 | See the [CRISPRko design tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) to get familiar 
 55 | with the terminology used throughout this tutorial.
 56 | 
 57 | # gRNA design
 58 | 
 59 | ## Loading packages
 60 | 
 61 | We first load the necessary packages for this tutorial:
 62 | 
 63 | ```{r, warning=FALSE, message=FALSE, results='hide'}
 64 | library(crisprBase)
 65 | library(crisprDesign)
 66 | library(crisprDesignData)
 67 | library(BSgenome.Hsapiens.UCSC.hg38)
 68 | library(BSgenome.Hsapiens.UCSC.hg38.dbSNP151.major)
 69 | library(BSgenome.Hsapiens.UCSC.hg38.dbSNP151.minor)
 70 | ```
 71 | 
 72 | We will also load the gene annotation model `txdb_human` from the
 73 | `crisprDesignData` package:
 74 | 
 75 | ```{r}
 76 | data(txdb_human, package="crisprDesignData")
 77 | ```
 78 | 
 79 | 
 80 | The `BSgenome.Hsapiens.UCSC.hg38.dbSNP151.major` and 
 81 | `BSgenome.Hsapiens.UCSC.hg38.dbSNP151.minor` packages are `BSgenome` 
 82 | packages that contain the major and minor alleles of the human reference
 83 | genome hg38 based dbSNP151.For more information, type the following: 
 84 | 
 85 | 
 86 | ```{r, eval=FALSE}
 87 | help(BSgenome.Hsapiens.UCSC.hg38.dbSNP151.major)
 88 | help(BSgenome.Hsapiens.UCSC.hg38.dbSNP151.minor)
 89 | ```
 90 | 
 91 | ## Designing gRNAs for human (hg38) with injected major alleles
 92 | 
 93 | It is worth noting that the human reference genome sequence (GRCh38.p12), 
 94 | does not give the major allele 
 95 | (i.e. most common allele in a population) at all nucleotide locations.
 96 | Indeed, given that it was historically constructed from a small set of 
 97 | human genomes, it contains minor alleles that were common across this 
 98 | set of human genomes.
 99 | 
100 | For example, in the coding region (CDS) of the human gene SMC3, 
101 | the reference genome contains the minor allele of the SNP rs2419565; the
102 | reference allele (A) frequency is 0.00577, and the alternative allele (G) 
103 | frequency is 0.99423 as indicated in the table here [here](https://www.ncbi.nlm.nih.gov/snp/rs2419565)). 
104 | 
105 | 
106 | Designing gRNAs targeting the CDS of SMC3 with the SpCas9 nuclease returns
107 | one gRNA that overlaps this SNP. Below, we first construct a 
108 | `GuideSet` object using the reference `BSgenome` object:
109 | 
110 | 
111 | ```{r, collapse=TRUE, results='markup'}
112 | smc3 <- queryTxObject(txdb_human,
113 |                       featureType="cds",
114 |                       queryColumn="gene_symbol",
115 |                       queryValue="SMC3")
116 | gs_reference <- findSpacers(smc3,
117 |                             crisprNuclease=SpCas9,
118 |                             bsgenome=BSgenome.Hsapiens.UCSC.hg38)
119 | ``` 
120 | 
121 | and a `GuideSet` object with the `BSgenome` object that contains 
122 | the major alleles:
123 | 
124 | ```{r, collapse=TRUE, results='markup'}
125 | gs_major <- findSpacers(smc3,
126 |                         crisprNuclease=SpCas9,
127 |                         bsgenome=BSgenome.Hsapiens.UCSC.hg38.dbSNP151.major)
128 | ```
129 | 
130 | Let's compare the protospacer sequences from both objects:
131 | 
132 | ```{r, collapse=TRUE, results='markup'}
133 | 
134 | protospacers(gs_reference["spacer_199"])
135 | protospacers(gs_major["spacer_199"])
136 | ```
137 | 
138 | The variant occurs in the seed sequence of this gRNA, 5 bases upstream of
139 | the `pam_site`, so a gRNA:DNA mismatch at this location is likely detrimental
140 | to its efficacy. Also, as this major allele occurs at >99% frequency, it
141 | may be more beneficial to design gRNAs in this example using the major allele
142 | genome contained in `BSgenome.Hsapiens.UCSC.hg38.dbSNP151.major`.
143 | 
144 | 
145 | ## Designing gRNAs for human (hg38) with injected minor alleles
146 | 
147 | It may be desirable, in some applications, to target a genic sequence 
148 | that contains a minor allele (i.e. less common allele) rather than the 
149 | major or reference allele. For example, if a particular minor allele is 
150 | pathogenic and the host cell has a single copy of that allele, the user
151 | may want to target that pathogenic variant and disrupt its behavior while
152 | leaving the other copy undisturbed.
153 | 
154 | As an example, using `BSgenome.Hsapiens.UCSC.hg38.dbSNP151.minor`, we can 
155 | target the pathogenic minor allele ([rs398122995](https://www.ncbi.nlm.nih.gov/clinvar/variation/92240/?oq=rs398122995&m=NM_001378454.1(ALMS1):c.1897C%3ET%20(p.Gln633Ter))) located in the human gene
156 | ALMS1:
157 | 
158 | 
159 | 
160 | ```{r}
161 | alms1 <- queryTxObject(txdb_human, 'cds', 'gene_symbol', 'ALMS1')
162 | gs_minor <- findSpacers(alms1,
163 |                         crisprNuclease=SpCas9,
164 |                         bsgenome=BSgenome.Hsapiens.UCSC.hg38.dbSNP151.minor)
165 | gs_minor <- unique(gs_minor)
166 | ```
167 | 
168 | We also include, for comparison, the resulting `GuideSet` using the 
169 | reference genome sequence:
170 | 
171 | ```{r}
172 | gs_reference <- findSpacers(alms1,
173 |                             crisprNuclease=SpCas9,
174 |                             bsgenome=BSgenome.Hsapiens.UCSC.hg38)
175 | gs_reference <- unique(gs_reference)
176 | ```
177 | 
178 | and compare the two versions of the gRNA:
179 | 
180 | ```{r}
181 | gs_reference["spacer_615"]
182 | gs_minor["spacer_612"]
183 | ```
184 | 
185 | The variant occurs 1 base upstream of the `pam_site`, and likely influences
186 | gRNA activity, that is, we can design a gRNA that targets the minor allele 
187 | and has a much lower affinity for the reference, or major allele.
188 | 
189 | Note that while the two `GuideSet` objects differ only by their
190 | `BSgenome` object, we need to provide different indices to access
191 | protospacers at equivalent `pam_site`s. This is due to variants in
192 | one `BSgenome` (in this case theone with minor alleles) eliminating
193 | PAM sequences, that is, one of the Gs  in NGG is changed to another base 
194 | such that SpCas9 does not recognize it. This, where permissible, is also 
195 | an effective way of ensuring gRNAs only  target a specific sequence if
196 | that sequence contains the desired variant.
197 | 
198 | 
199 | # Session Info
200 | 
201 | ```{r}
202 | sessionInfo()
203 | ```
204 | 
205 | 
206 | 
207 | 


--------------------------------------------------------------------------------
/Design_Cross_Reactivity/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using crisprDesign to design gRNAs that map across species"
  3 | author: Jean-Philippe Fortin, Luke Hoberecht
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | ---
  8 | 
  9 | ```{r, echo=FALSE, results="hide"}
 10 | options("knitr.graphics.auto_pdf"=TRUE)
 11 | ```
 12 | 
 13 | 
 14 | 
 15 | # Introduction
 16 | 
 17 | This tutorial describes how to design guide RNAs (gRNAs) that target 
 18 | homologous genes across multiple species using functions from 
 19 | the [crisprDesign](https://github.com/crisprVerse/crisprDesign) package.
 20 | This strategy can be applied to any two (or more) species for which the
 21 | genome sequence and gene model annotation is available.
 22 | 
 23 | 
 24 | # Installation
 25 | 
 26 | See the [Installation tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Installation) to learn how to install the packages necessary for this tutorial:
 27 | `crisprDesign`, `crisprDesignData`
 28 | 
 29 | 
 30 | # Terminology
 31 | 
 32 | See the [CRISPRko design tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) to get familiar 
 33 | with the terminology used throughout this tutorial.
 34 | 
 35 | 
 36 | # Mapping gRNAs across species
 37 | 
 38 | ## Loading packages
 39 | 
 40 | We first load the necessary packages for this tutorial:
 41 | 
 42 | ```{r, warning=FALSE, message=FALSE, results='hide'}
 43 | library(crisprBase)
 44 | library(crisprDesign)
 45 | library(crisprDesignData)
 46 | library(BSgenome.Hsapiens.UCSC.hg38)
 47 | library(BSgenome.Mmusculus.UCSC.mm10)
 48 | ```
 49 | 
 50 | 
 51 | ## Creating the GuideSet
 52 | 
 53 | In this tutorial, we will design gRNAs using the SpCas9 nuclease that
 54 | target both the human KRAS gene and its mouse ortholog Kras. There 
 55 | are multiple ways to go about this, which we describe in the following 
 56 | sections. 
 57 | 
 58 | We first create a `GuideSet` object containing gRNAs targeting the 
 59 | coding sequence (CDS) of human KRAS.
 60 | To do so, we start by loading the SpCas9 `CrisprNuclease` object from 
 61 | the `crisprBase` package:
 62 | 
 63 | ```{r}
 64 | data(SpCas9, package="crisprBase")
 65 | ```
 66 | 
 67 | and then load data containing gene regions for the human genome 
 68 | from the `crisprDesignData` package, `txdb_human` (we will also load 
 69 | a similar object for the mouse genome, `txdb_mouse`):
 70 | 
 71 | ```{r}
 72 | data(txdb_human, package="crisprDesignData")
 73 | data(txdb_mouse, package="crisprDesignData")
 74 | ```
 75 | 
 76 | For more information on `txdb_human` and `txdb_mouse` and how to create 
 77 | similar gene annotation objects, see the [Building a gene annotation object tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Building_Gene_Annotation) tutorial.
 78 | 
 79 | Next, we find the coordinates for the CDS of KRAS using the `queryTxObject` function:
 80 | 
 81 | ```{r}
 82 | kras_human <- queryTxObject(txdb_human,
 83 |                             featureType="cds",
 84 |                             queryColumn="gene_symbol",
 85 |                             queryValue="KRAS")
 86 | ```
 87 | 
 88 | and build our `GuideSet` object with the `findSpacers` function:
 89 | 
 90 | ```{r}
 91 | gs_human <- findSpacers(kras_human,
 92 |                         crisprNuclease=SpCas9,
 93 |                         bsgenome=BSgenome.Hsapiens.UCSC.hg38) 
 94 | ```
 95 | 
 96 | 
 97 | ## Mapping gRNAs across species via `intersect`
 98 | 
 99 | As a first strategy to find gRNAs that target both species, we first 
100 | create a similar `GuideSet` targeting the mouse ortholog Kras:
101 | 
102 | ```{r}
103 | kras_mouse <- queryTxObject(txdb_mouse,
104 |                             featureType="cds",
105 |                             queryColumn="gene_symbol",
106 |                             queryValue="Kras")
107 | gs_mouse <- findSpacers(kras_mouse,
108 |                         crisprNuclease=SpCas9,
109 |                         bsgenome=BSgenome.Mmusculus.UCSC.mm10) 
110 | ```
111 | 
112 | Then, we find the common spacers between the two `GuideSet` objects 
113 | using `intersect`
114 | 
115 | ```{r}
116 | common_spacers <- intersect(spacers(gs_human),
117 |                             spacers(gs_mouse))
118 | length(common_spacers)
119 | ```
120 | 
121 | There are 18 spacers that target KRAS in both species. We can filter
122 | each `GuideSet` object for this common spacer set:
123 | 
124 | ```{r}
125 | results_human <- gs_human[spacers(gs_human) %in% common_spacers]
126 | results_mouse <- gs_mouse[spacers(gs_mouse) %in% common_spacers]
127 | ```
128 | 
129 | Let's look at the results:
130 | 
131 | ```{r}
132 | results_human
133 | results_mouse
134 | ```
135 | 
136 | This simple approach, however, has some drawbacks. It requires gRNAs 
137 | to have perfect sequence matching, which, while perhaps acceptable 
138 | for targets having many gRNA choices, may be too restrictive for 
139 | those applications that have fewer choices and may need tolerate 
140 | mismatches in the target genes. Also, and more notably, we now have
141 | multiple `GuideSet` objects to maintain in the process of selecting 
142 | candidate gRNAs (see [CRISPRko design with Cas9](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9))--essentially twice the work.
143 | 
144 | 
145 | ## Mapping gRNAs across species via `addSpacerAlignments`
146 | 
147 | To avoid the drawbacks of the above strategy, we can use
148 | the `addSpacerAlignments` function on our human KRAS `GuideSet` 
149 | to append alignment annotation of the **mouse** genome.
150 | 
151 | For this example, we will use the bowtie aligner, and weneed to specify a 
152 | bowtie index for the mouse genome: 
153 | 
154 | ```{r}
155 | # Path of the mm10 bowtie index on my personal laptop:
156 | bowtie_index_mouse <- "/Users/fortinj2/crisprIndices/bowtie/mm10/mm10"
157 | ```
158 | 
159 | For instructions on how to build a Bowtie index from a given reference genome,
160 | see the [genome index tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Building_Genome_Indices). 
161 | 
162 | We will also search up to 1 mismatch and pass the gene model 
163 | object `txdb_mouse` to the `txObject` argument, so the alignments will 
164 | be annotated with genomic context and we can determine which of our spacers 
165 | map to the CDS of Kras.
166 | 
167 | As we will also want to search for off-targets in the human genome in a later 
168 | step, we can ensure these results are not overwritten by setting
169 | the `colname` argument to a non-default value, such as `alignments_mouse`.
170 | 
171 | ```{r}
172 | results_human <- addSpacerAlignments(gs_human,
173 |                                      aligner="bowtie",
174 |                                      aligner_index=bowtie_index_mouse,
175 |                                      bsgenome=BSgenome.Mmusculus.UCSC.mm10,
176 |                                      txObject=txdb_mouse,
177 |                                      colname="alignments_mouse",
178 |                                      n_mismatches=1)
179 | results_human
180 | ```
181 | 
182 | Our results are stored in the `alignments_mouse` column. We can access 
183 | these alignments with the `alignments` function and by specifying 
184 | the `columnName`:
185 | 
186 | ```{r}
187 | alignments(results_human, columnName="alignments_mouse")
188 | ```
189 | 
190 | With these data, we can filter our gRNAs for those that target
191 | both orthologs (and we have off-target annotation for the mouse genome).
192 | 
193 | ```{r}
194 | aln <- alignments(results_human, columnName="alignments_mouse")
195 | cds_targets <- aln$cds
196 | aln <- aln[!is.na(cds_targets) & cds_targets == "Kras"]
197 | targets_Kras <- unique(names(aln))
198 | results_human <- results_human[targets_Kras]
199 | ```
200 | 
201 | Adding alignments for the human genome (or any other genome) will overwrite
202 | the summary columns in `results_human` (`n0`, `n0_c`, `n1`, and `n1_c`) 
203 | unless we set `addSummary=FALSE` in `addSpacerAlignments`. We should 
204 | also take care to ensure the column name for our alignments 
205 | annotation remains unique so it will not be overwritten. Here, 
206 | we add alignment annotation for the human genome, but overwrite the
207 | mouse alignment summary columns (see the warning message below).
208 | 
209 | ```{r}
210 | # Path of the hg38 bowtie index on my personal laptop:
211 | bowtie_index_human <- "/Users/fortinj2/crisprIndices/bowtie/hg38/hg38"
212 | 
213 | results_human <- addSpacerAlignments(results_human,
214 |                                      aligner="bowtie",
215 |                                      aligner_index=bowtie_index_human,
216 |                                      bsgenome=BSgenome.Hsapiens.UCSC.hg38,
217 |                                      txObject=txdb_human,
218 |                                      colname="alignments_human",
219 |                                      n_mismatches=1)
220 | results_human
221 | ```
222 | 
223 | 
224 | 
225 | # Session Info
226 | 
227 | ```{r}
228 | sessionInfo()
229 | ```
230 | 
231 | 
232 | 
233 | 


--------------------------------------------------------------------------------
/Building_Database_Human_Cas9/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Building a genome-wide gRNA database"
  3 | author: Jean-Philippe Fortin, Luke Hoberecht
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | ---
  8 | 
  9 | ```{r, echo=FALSE, results="hide"}
 10 | options("knitr.graphics.auto_pdf"=TRUE)
 11 | ```
 12 | 
 13 | 
 14 | # Introduction
 15 | 
 16 | In this tutorial, we provide reproducible code to design and annotate
 17 | gRNAs against all human protein-coding genes using the nuclease SpCas9. 
 18 | 
 19 | # Loading necessary packages
 20 | 
 21 | We first load the necessary packages:
 22 | 
 23 | ```{r, warning=FALSE, message=FALSE}
 24 | library(crisprBase)
 25 | library(crisprScore)
 26 | library(crisprDesign)
 27 | library(crisprDesignData)
 28 | library(BSgenome.Hsapiens.UCSC.hg38)
 29 | ```
 30 | 
 31 | 
 32 | ### Specifying the genome
 33 | 
 34 | We specify a `BSGenome` object that contains the DNA sequence of the human
 35 | genome in hg38 coordinates:
 36 | 
 37 | ```{r}
 38 | bsgenome <- BSgenome.Hsapiens.UCSC.hg38
 39 | ```
 40 | 
 41 | ### Specifying the genome index
 42 | 
 43 | We specify the file path of the Bowtie index that we will need for
 44 | off-target alignment:
 45 | 
 46 | ```{r}
 47 | bowtie_index <- "/Users/fortinj2/crisprIndices/bowtie/hg38/hg38"
 48 | ```
 49 | 
 50 | For instructions on how to build a Bowtie index from a given reference
 51 | genome, see the [genome index tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Building_Genome_Indices). 
 52 | 
 53 | 
 54 | 
 55 | ### Specifying a SNP VCF file
 56 | 
 57 | To flag gRNAs overlapping common SNPs, we specify a VCF file obtained from the
 58 | dbSNP website containing common SNPs from the dbSNP151 release:
 59 | 
 60 | ```{r}
 61 | vcf <- "/Users/fortinj2/crisprIndices/snps/dbsnp151.grch38/00-common_all.vcf.gz"
 62 | ```
 63 | 
 64 | The VCF file was obtained from [NCBI](https://www.ncbi.nlm.nih.gov/variation/docs/human_variation_vcf).
 65 | 
 66 | ### Specifying the nuclease
 67 | 
 68 | We load a `CrisprNuclease` object representing the SpCas9 nuclease from 
 69 | the `crisprBase` package:
 70 | 
 71 | ```{r}
 72 | data(SpCas9, package="crisprBase")
 73 | crisprNuclease <- SpCas9
 74 | ```
 75 | 
 76 | To learn how to specify or build a custom nuclease, see the [nuclease tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Building_Custom_Nuclease).
 77 | 
 78 | 
 79 | 
 80 | ### Specifying on-target scoring methods
 81 | 
 82 | We specify which on-target scoring methods should be used to score 
 83 | the gRNAs:
 84 | 
 85 | ```{r}
 86 | scoring_methods <- c("deephf", "deepspcas9")
 87 | ```
 88 | 
 89 | 
 90 | One can see which scoring methods are available for a given nuclease using
 91 | the following command:
 92 | 
 93 | 
 94 | ```{r}
 95 | crisprScore::scoringMethodsInfo
 96 | ```
 97 | 
 98 | 
 99 | ### Specifying gene models and TSS annotations
100 | 
101 | To annotate gRNAs with a gene and TSS annotation, we need to specify a gene model 
102 | formatted as a `GRangesList` object, as well as a TSS annotation with a 
103 | `GRanges` object. The `crisprDesignData` contains such objects for both 
104 | the human and mouse genomes, in GRCh38 (hg38) and GRCm38 (mm10) coordinates,
105 | respectively. Ensembl gene models were used to generate such objects.
106 | We load those objects:
107 | 
108 | 
109 | ```{r, warning=FALSE, message=FALSE}
110 | data(txdb_human, package="crisprDesignData")
111 | data(tss_human, package="crisprDesignData")
112 | txObject <- txdb_human
113 | tssObject <- tss_human
114 | ```
115 | 
116 | See the [gene annotation tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Building_Gene_Annotation) 
117 | to learn how to build such objects. The [crisprDesignData](https://github.com/crisprVerse/crisprDesignData) also has
118 | tons of useful information. 
119 | 
120 | 
121 | ### Specifying repeat elements
122 | 
123 | To avoid designing gRNAs targeting repeat elements, we will specify a `GRanges`
124 | object containing repeats coordinates for the human genome. Here, we use the object `gr.repeats.hg38` in `crisprDesignData`. It contains genomic coordinates of 
125 | the RepeatMasker UCSC track, for the hg38 reference genome:
126 | 
127 | ```{r, warning=FALSE, message=FALSE}
128 | data(gr.repeats.hg38, package="crisprDesignData")
129 | grRepeats <- gr.repeats.hg38
130 | ```
131 | 
132 | 
133 | # Building a complete annotation for a given gene 
134 | 
135 | 
136 | ```{r, echo=FALSE, warning=FALSE, message=FALSE}
137 | # Explicitely calling those packages here instead of
138 | # printout when calling precomputeGuides. This can be omitted. 
139 | library(crisprScoreData)
140 | library(ExperimentHub)
141 | library(AnnotationHub)
142 | library(BiocFileCache)
143 | library(dbplyr)
144 | ```
145 | 
146 | The `designCompleteAnnotation` function in `crisprDesign` provides a 
147 | one-step workflow to design and annotate all gRNAs targeting a given gene.
148 | The function was designed to be as comprehensive as possible to 
149 | design and annotate gRNAs in one step. It does the following: 
150 | 
151 | - Extract the DNA/RNA sequences with `queryTss`/`queryTxDB`
152 | - Design gRNAs with `findSpacers`
153 | - Remove gRNAs targeting repeat elements with `removeRepeats`
154 | - Characterize spacer sequences with `addSequenceFeatures`
155 | - Find on- and off-targets with `addSpacerAlignmentsIterative`
156 | - Add gene annotation with `addGeneAnnotation`
157 | - Add TSS annotation with `addTssAnnotation`
158 | - Add on-target efficiency scores with `addOnTargetScores`
159 | - Add off-target specificity scores with `addOffTargetScores`
160 | - Add SNP annotation with `addSNPAnnotation`
161 | - Add restriction enzymes information with `addRestrictionEnzymes`
162 | 
163 | 
164 | Here, we design all CRISPRko gRNAs targeting the 
165 | human KRAS gene (ENSG00000133703):
166 | 
167 | 
168 | 
169 | ```{r, warning=FALSE}
170 | gs <- designCompleteAnnotation(queryValue="ENSG00000133703",
171 |                                queryColumn="gene_id",
172 |                                modality="CRISPRko",
173 |                                bsgenome=bsgenome,
174 |                                bowtie_index=bowtie_index,
175 |                                crisprNuclease=SpCas9,
176 |                                txObject=txObject,
177 |                                tssObject=tssObject,
178 |                                grRepeats=grRepeats,
179 |                                vcf=vcf,
180 |                                n_mismatches=1,
181 |                                scoring_methods=scoring_methods)
182 | ```
183 | 
184 | 
185 | The resulting object is a `GuideSet` object. To learn more about what are
186 | `GuideSet` objects, and how to interact with them, see the [CRISPRko gRNA design tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9).
187 | 
188 | 
189 | ```{r}
190 | gs
191 | ```
192 | 
193 | 
194 | ### Converting the `GuideSet` object to a list of data.frames
195 | 
196 | The `flattenGuideSet` function in `crisprDesign` is a convenience function 
197 | to convert a `GuideSet` object into a set of `data.frames` that can be 
198 | saved as plain text files:
199 | 
200 | ```{r}
201 | dfs <- flattenGuideSet(gs)
202 | ```
203 | 
204 | We can look at the names of the data.frames:
205 | 
206 | ```{r}
207 | names(dfs)
208 | ```
209 | 
210 | As an example, let's look at the first rows of the primary data.frame:
211 | 
212 | ```{r}
213 | head(dfs$primary)
214 | ```
215 | 
216 | # Building a complete gRNA database across all protein-coding genes
217 | 
218 | We first get all possibles genes from our gene model:
219 | 
220 | ```{r}
221 | gene_ids <- unique(txObject$cds$gene_id)
222 | head(gene_ids)
223 | ```
224 | 
225 | 
226 | and specify where to save the `GuideSet` objects:
227 | 
228 | 
229 | ```{r, eval=FALSE}
230 | dir <- "./crisprko_cas9_hg38"
231 | if (!dir.exists(dir)){
232 |     dir.create(dir, recursive=TRUE)
233 | }
234 | ```
235 | 
236 | 
237 | We are now looping over all genes to generate the data:
238 | 
239 | ```{r, eval=FALSE}
240 | lapply(gene_index, function(gene){
241 |     gs <- designCompleteAnnotation(queryValue=gene,
242 |                                    queryColumn="gene_id",
243 |                                    modality="CRISPRko",
244 |                                    bsgenome=bsgenome,
245 |                                    bowtie_index=bowtie_index,
246 |                                    crisprNuclease=SpCas9,
247 |                                    txObject=txObject,
248 |                                    tssObject=tssObject,
249 |                                    grRepeats=grRepeats,
250 |                                    vcf=vcf,
251 |                                    n_mismatches=3,
252 |                                    scoring_methods=scoring_methods)
253 |     write.rds(gs, file=file.path(dir, paste0(gene, ".rds")))
254 | })
255 | ```
256 | 
257 | This loop can be modified by the user to use an embarrassingly-parallel 
258 | approach, using the [BiocParallel](https://bioconductor.org/packages/BiocParallel/) package, for instance.
259 | 
260 | 
261 | Building a database for CRISPRa and CRISPRi applications works similarly
262 | See `?designCompleteAnnotation` for more information. 
263 | 
264 | 
265 | # Reproducibility
266 | 
267 | ```{r}
268 | sessionInfo()
269 | ```
270 | 


--------------------------------------------------------------------------------
/Building_Custom_Nuclease/references.bib:
--------------------------------------------------------------------------------
  1 | @article{komor,
  2 |   title={Programmable editing of a target base in genomic DNA without double-stranded DNA cleavage},
  3 |   author={Komor, Alexis C and Kim, Yongjoo B and Packer, Michael S and Zuris, John A and Liu, David R},
  4 |   journal={Nature},
  5 |   volume={533},
  6 |   number={7603},
  7 |   pages={420--424},
  8 |   year={2016},
  9 |   publisher={Nature Publishing Group}
 10 | }
 11 | 
 12 | @article{behive,
 13 |   title={Determinants of base editing outcomes from target library analysis and machine learning},
 14 |   author={Arbab, Mandana and Shen, Max W and Mok, Beverly and Wilson, Christopher and Matuszek, {\.Z}aneta and Cassa, Christopher A and Liu, David R},
 15 |   journal={Cell},
 16 |   volume={182},
 17 |   number={2},
 18 |   pages={463--480},
 19 |   year={2020},
 20 |   publisher={Elsevier}
 21 | }
 22 | 
 23 | @article{langmead2009bowtie,
 24 | 	Abstract = {Bowtie is an ultrafast, memory-efficient alignment program for aligning short DNA sequence reads to large genomes. For the human genome, Burrows-Wheeler indexing allows Bowtie to align more than 25 million reads per CPU hour with a memory footprint of approximately 1.3 gigabytes. Bowtie extends previous Burrows-Wheeler techniques with a novel quality-aware backtracking algorithm that permits mismatches. Multiple processor cores can be used simultaneously to achieve even greater alignment speeds. Bowtie is open source http://bowtie.cbcb.umd.edu.},
 25 | 	Author = {Langmead, Ben and Trapnell, Cole and Pop, Mihai and Salzberg, Steven L.},
 26 | 	Da = {2009/03/04},
 27 | 	Doi = {10.1186/gb-2009-10-3-r25},
 28 | 	Id = {Langmead2009},
 29 | 	Isbn = {1474-760X},
 30 | 	Journal = {Genome Biology},
 31 | 	Number = {3},
 32 | 	Pages = {R25},
 33 | 	Title = {Ultrafast and memory-efficient alignment of short DNA sequences to the human genome},
 34 | 	Ty = {JOUR},
 35 | 	Url = {https://doi.org/10.1186/gb-2009-10-3-r25},
 36 | 	Volume = {10},
 37 | 	Year = {2009}
 38 | }
 39 | 
 40 | 
 41 | @article{lindel,
 42 |   title={Massively parallel profiling and predictive modeling of the outcomes of CRISPR/Cas9-mediated double-strand break repair},
 43 |   author={Chen, Wei and McKenna, Aaron and Schreiber, Jacob and Haeussler, Maximilian and Yin, Yi and Agarwal, Vikram and Noble, William Stafford and Shendure, Jay},
 44 |   journal={Nucleic acids research},
 45 |   volume={47},
 46 |   number={15},
 47 |   pages={7989--8003},
 48 |   year={2019},
 49 |   publisher={Oxford University Press}
 50 | }
 51 | 
 52 | @article{azimuth,
 53 |   title={Optimized sgRNA design to maximize activity and minimize off-target effects of CRISPR-Cas9},
 54 |   author={Doench, John G and Fusi, Nicolo and Sullender, Meagan and Hegde, Mudra and Vaimberg, Emma W and Donovan, Katherine F and Smith, Ian and Tothova, Zuzana and Wilen, Craig and Orchard, Robert and others},
 55 |   journal={Nature biotechnology},
 56 |   volume={34},
 57 |   number={2},
 58 |   pages={184},
 59 |   year={2016},
 60 |   publisher={Nature Publishing Group}
 61 | }
 62 | 
 63 | @article{deepcas9,
 64 |   title={Optimized CRISPR guide RNA design for two high-fidelity Cas9 variants by deep learning},
 65 |   author={Wang, Daqi and Zhang, Chengdong and Wang, Bei and Li, Bin and Wang, Qiang and Liu, Dong and Wang, Hongyan and Zhou, Yan and Shi, Leming and Lan, Feng and others},
 66 |   journal={Nature communications},
 67 |   volume={10},
 68 |   number={1},
 69 |   pages={1--14},
 70 |   year={2019},
 71 |   publisher={Nature Publishing Group}
 72 | }
 73 | 
 74 | @article{deepcpf1,
 75 |   title={Deep learning improves prediction of CRISPR--Cpf1 guide RNA activity},
 76 |   author={Kim, Hui Kwon and Min, Seonwoo and Song, Myungjae and Jung, Soobin and Choi, Jae Woo and Kim, Younggwang and Lee, Sangeun and Yoon, Sungroh and Kim, Hyongbum Henry},
 77 |   journal={Nature biotechnology},
 78 |   volume={36},
 79 |   number={3},
 80 |   pages={239},
 81 |   year={2018},
 82 |   publisher={Nature Publishing Group}
 83 | }
 84 | 
 85 | @article{perturbseq,
 86 |   title={A multiplexed single-cell CRISPR screening platform enables systematic dissection of the unfolded protein response},
 87 |   author={Adamson, Britt and Norman, Thomas M and Jost, Marco and Cho, Min Y and Nu{\~n}ez, James K and Chen, Yuwen and Villalta, Jacqueline E and Gilbert, Luke A and Horlbeck, Max A and Hein, Marco Y and others},
 88 |   journal={Cell},
 89 |   volume={167},
 90 |   number={7},
 91 |   pages={1867--1882},
 92 |   year={2016},
 93 |   publisher={Elsevier}
 94 | }
 95 | 
 96 | @article{cropseq,
 97 |   title={Pooled CRISPR screening with single-cell transcriptome readout},
 98 |   author={Datlinger, Paul and Rendeiro, Andr{\'e} F and Schmidl, Christian and Krausgruber, Thomas and Traxler, Peter and Klughammer, Johanna and Schuster, Linda C and Kuchler, Amelie and Alpar, Donat and Bock, Christoph},
 99 |   journal={Nature methods},
100 |   volume={14},
101 |   number={3},
102 |   pages={297},
103 |   year={2017},
104 |   publisher={Nature Publishing Group}
105 | }
106 | 
107 | @article{crispracrisprireview,
108 |   title={CRISPRi and CRISPRa screens in mammalian cells for precision biology and medicine},
109 |   author={Kampmann, Martin},
110 |   journal={ACS chemical biology},
111 |   volume={13},
112 |   number={2},
113 |   pages={406--416},
114 |   year={2018},
115 |   publisher={ACS Publications}
116 | }
117 | 
118 | 
119 | 
120 | @article{crispri,
121 |   title={CRISPR-mediated modular RNA-guided regulation of transcription in eukaryotes},
122 |   author={Gilbert, Luke A and Larson, Matthew H and Morsut, Leonardo and Liu, Zairan and Brar, Gloria A and Torres, Sandra E and Stern-Ginossar, Noam and Brandman, Onn and Whitehead, Evan H and Doudna, Jennifer A and others},
123 |   journal={Cell},
124 |   volume={154},
125 |   number={2},
126 |   pages={442--451},
127 |   year={2013},
128 |   publisher={Elsevier}
129 | }
130 | 
131 | 
132 | @article{sam,
133 |   title={Genome-scale transcriptional activation by an engineered CRISPR-Cas9 complex},
134 |   author={Konermann, Silvana and Brigham, Mark D and Trevino, Alexandro E and Joung, Julia and Abudayyeh, Omar O and Barcena, Clea and Hsu, Patrick D and Habib, Naomi and Gootenberg, Jonathan S and Nishimasu, Hiroshi and others},
135 |   journal={Nature},
136 |   volume={517},
137 |   number={7536},
138 |   pages={583},
139 |   year={2015},
140 |   publisher={Nature Publishing Group}
141 | }
142 | 
143 | @article{fortin2019,
144 |   title={Multiple-gene targeting and mismatch tolerance can confound analysis of genome-wide pooled CRISPR screens},
145 |   author={Fortin, Jean-Philippe and Tan, Jenille and Gascoigne, Karen E and Haverty, Peter M and Forrest, William F and Costa, Michael R and Martin, Scott E},
146 |   journal={Genome biology},
147 |   volume={20},
148 |   number={1},
149 |   pages={21},
150 |   year={2019},
151 |   publisher={Springer}
152 | }
153 | 
154 | 
155 | 
156 | @article{sanson2018optimized,
157 |   title={Optimized libraries for CRISPR-Cas9 genetic screens with multiple modalities},
158 |   author={Sanson, Kendall R and Hanna, Ruth E and Hegde, Mudra and Donovan, Katherine F and Strand, Christine and Sullender, Meagan E and Vaimberg, Emma W and Goodale, Amy and Root, David E and Piccioni, Federica and others},
159 |   journal={Nature communications},
160 |   volume={9},
161 |   number={1},
162 |   pages={1--15},
163 |   year={2018},
164 |   publisher={Nature Publishing Group}
165 | }
166 | 
167 | 
168 | 
169 | @article{horlbeck2016compact,
170 |   title={Compact and highly active next-generation libraries for CRISPR-mediated gene repression and activation},
171 |   author={Horlbeck, Max A and Gilbert, Luke A and Villalta, Jacqueline E and Adamson, Britt and Pak, Ryan A and Chen, Yuwen and Fields, Alexander P and Park, Chong Yon and Corn, Jacob E and Kampmann, Martin and others},
172 |   journal={Elife},
173 |   volume={5},
174 |   pages={e19760},
175 |   year={2016},
176 |   publisher={eLife Sciences Publications Limited}
177 | }
178 | 
179 | 
180 | @article{ceres,
181 |   title={Computational correction of copy number effect improves specificity of CRISPR--Cas9 essentiality screens in cancer cells},
182 |   author={Meyers, Robin M and Bryan, Jordan G and McFarland, James M and Weir, Barbara A and Sizemore, Ann E and Xu, Han and Dharia, Neekesh V and Montgomery, Phillip G and Cowley, Glenn S and Pantel, Sasha and others},
183 |   journal={Nature genetics},
184 |   volume={49},
185 |   number={12},
186 |   pages={1779--1784},
187 |   year={2017},
188 |   publisher={Nature Publishing Group}
189 | }
190 | 
191 | 
192 | @article{score,
193 |   title={Prioritization of cancer therapeutic targets using CRISPR--Cas9 screens},
194 |   author={Behan, Fiona M and Iorio, Francesco and Picco, Gabriele and Gon{\c{c}}alves, Emanuel and Beaver, Charlotte M and Migliardi, Giorgia and Santos, Rita and Rao, Yanhua and Sassi, Francesco and Pinnelli, Marika and others},
195 |   journal={Nature},
196 |   volume={568},
197 |   number={7753},
198 |   pages={511},
199 |   year={2019},
200 |   publisher={Nature Publishing Group}
201 | }
202 | 
203 | 
204 | 
205 | @article{mit,
206 |   title={DNA targeting specificity of RNA-guided Cas9 nucleases},
207 |   author={Hsu, Patrick D and Scott, David A and Weinstein, Joshua A and Ran, F Ann and Konermann, Silvana and Agarwala, Vineeta and Li, Yinqing and Fine, Eli J and Wu, Xuebing and Shalem, Ophir and others},
208 |   journal={Nature biotechnology},
209 |   volume={31},
210 |   number={9},
211 |   pages={827},
212 |   year={2013},
213 |   publisher={Nature Publishing Group}
214 | }
215 | 
216 | 
217 | 
218 | @article{rebase,
219 |   title={REBASE—a database for DNA restriction and modification: enzymes, genes and genomes},
220 |   author={Roberts, Richard J and Vincze, Tamas and Posfai, Janos and Macelis, Dana},
221 |   journal={Nucleic acids research},
222 |   volume={38},
223 |   number={suppl\_1},
224 |   pages={D234--D236},
225 |   year={2010},
226 |   publisher={Oxford University Press}
227 | }
228 | 
229 | 
230 | 
231 | 


--------------------------------------------------------------------------------
/Design_CRISPRbe/references.bib:
--------------------------------------------------------------------------------
  1 | @article{ops,
  2 |   title={Optical pooled screens in human cells},
  3 |   author={Feldman, David and Singh, Avtar and Schmid-Burgk, Jonathan L and Carlson, Rebecca J and Mezger, Anja and Garrity, Anthony J and Zhang, Feng and Blainey, Paul C},
  4 |   journal={Cell},
  5 |   volume={179},
  6 |   number={3},
  7 |   pages={787--799},
  8 |   year={2019},
  9 |   publisher={Elsevier}
 10 | }
 11 | 
 12 | @article{cas13d,
 13 |   author = {Konermann, Silvana and Lotfy, Peter and Brideau, Nicholas J and Oki, Jennifer and Shokhirev, Maxim N and Hsu, Patrick D},
 14 |   journal = {Cell},
 15 |   number = {3},
 16 |   pages = {665--676},
 17 |   publisher = {Elsevier},
 18 |   title = {Transcriptome engineering with RNA-targeting type VI-D CRISPR effectors},
 19 |   volume = {173},
 20 |   year = {2018}}
 21 | 
 22 | @article{koblan2018improving,
 23 |   title={Improving cytidine and adenine base editors by expression optimization and ancestral reconstruction},
 24 |   author={Koblan, Luke W and Doman, Jordan L and Wilson, Christopher and Levy, Jonathan M and Tay, Tristan and Newby, Gregory A and Maianti, Juan Pablo and Raguram, Aditya and Liu, David R},
 25 |   journal={Nature biotechnology},
 26 |   volume={36},
 27 |   number={9},
 28 |   pages={843--846},
 29 |   year={2018},
 30 |   publisher={Nature Publishing Group}
 31 | }
 32 | 
 33 | @article{sanson2018optimized,
 34 |   title={Optimized libraries for CRISPR-Cas9 genetic screens with multiple modalities},
 35 |   author={Sanson, Kendall R and Hanna, Ruth E and Hegde, Mudra and Donovan, Katherine F and Strand, Christine and Sullender, Meagan E and Vaimberg, Emma W and Goodale, Amy and Root, David E and Piccioni, Federica and others},
 36 |   journal={Nature communications},
 37 |   volume={9},
 38 |   number={1},
 39 |   pages={1--15},
 40 |   year={2018},
 41 |   publisher={Nature Publishing Group}
 42 | }
 43 | 
 44 | @article{langmead2009bowtie,
 45 | 	Abstract = {Bowtie is an ultrafast, memory-efficient alignment program for aligning short DNA sequence reads to large genomes. For the human genome, Burrows-Wheeler indexing allows Bowtie to align more than 25 million reads per CPU hour with a memory footprint of approximately 1.3 gigabytes. Bowtie extends previous Burrows-Wheeler techniques with a novel quality-aware backtracking algorithm that permits mismatches. Multiple processor cores can be used simultaneously to achieve even greater alignment speeds. Bowtie is open source http://bowtie.cbcb.umd.edu.},
 46 | 	Author = {Langmead, Ben and Trapnell, Cole and Pop, Mihai and Salzberg, Steven L.},
 47 | 	Da = {2009/03/04},
 48 | 	Doi = {10.1186/gb-2009-10-3-r25},
 49 | 	Id = {Langmead2009},
 50 | 	Isbn = {1474-760X},
 51 | 	Journal = {Genome Biology},
 52 | 	Number = {3},
 53 | 	Pages = {R25},
 54 | 	Title = {Ultrafast and memory-efficient alignment of short DNA sequences to the human genome},
 55 | 	Ty = {JOUR},
 56 | 	Url = {https://doi.org/10.1186/gb-2009-10-3-r25},
 57 | 	Volume = {10},
 58 | 	Year = {2009}
 59 | }
 60 | 
 61 | 
 62 | @article{lindel,
 63 |   title={Massively parallel profiling and predictive modeling of the outcomes of CRISPR/Cas9-mediated double-strand break repair},
 64 |   author={Chen, Wei and McKenna, Aaron and Schreiber, Jacob and Haeussler, Maximilian and Yin, Yi and Agarwal, Vikram and Noble, William Stafford and Shendure, Jay},
 65 |   journal={Nucleic acids research},
 66 |   volume={47},
 67 |   number={15},
 68 |   pages={7989--8003},
 69 |   year={2019},
 70 |   publisher={Oxford University Press}
 71 | }
 72 | 
 73 | @article{azimuth,
 74 |   title={Optimized sgRNA design to maximize activity and minimize off-target effects of CRISPR-Cas9},
 75 |   author={Doench, John G and Fusi, Nicolo and Sullender, Meagan and Hegde, Mudra and Vaimberg, Emma W and Donovan, Katherine F and Smith, Ian and Tothova, Zuzana and Wilen, Craig and Orchard, Robert and others},
 76 |   journal={Nature biotechnology},
 77 |   volume={34},
 78 |   number={2},
 79 |   pages={184},
 80 |   year={2016},
 81 |   publisher={Nature Publishing Group}
 82 | }
 83 | 
 84 | @article{deepcas9,
 85 |   title={Optimized CRISPR guide RNA design for two high-fidelity Cas9 variants by deep learning},
 86 |   author={Wang, Daqi and Zhang, Chengdong and Wang, Bei and Li, Bin and Wang, Qiang and Liu, Dong and Wang, Hongyan and Zhou, Yan and Shi, Leming and Lan, Feng and others},
 87 |   journal={Nature communications},
 88 |   volume={10},
 89 |   number={1},
 90 |   pages={1--14},
 91 |   year={2019},
 92 |   publisher={Nature Publishing Group}
 93 | }
 94 | 
 95 | @article{deepcpf1,
 96 |   title={Deep learning improves prediction of CRISPR--Cpf1 guide RNA activity},
 97 |   author={Kim, Hui Kwon and Min, Seonwoo and Song, Myungjae and Jung, Soobin and Choi, Jae Woo and Kim, Younggwang and Lee, Sangeun and Yoon, Sungroh and Kim, Hyongbum Henry},
 98 |   journal={Nature biotechnology},
 99 |   volume={36},
100 |   number={3},
101 |   pages={239},
102 |   year={2018},
103 |   publisher={Nature Publishing Group}
104 | }
105 | 
106 | @article{perturbseq,
107 |   title={A multiplexed single-cell CRISPR screening platform enables systematic dissection of the unfolded protein response},
108 |   author={Adamson, Britt and Norman, Thomas M and Jost, Marco and Cho, Min Y and Nu{\~n}ez, James K and Chen, Yuwen and Villalta, Jacqueline E and Gilbert, Luke A and Horlbeck, Max A and Hein, Marco Y and others},
109 |   journal={Cell},
110 |   volume={167},
111 |   number={7},
112 |   pages={1867--1882},
113 |   year={2016},
114 |   publisher={Elsevier}
115 | }
116 | 
117 | @article{cropseq,
118 |   title={Pooled CRISPR screening with single-cell transcriptome readout},
119 |   author={Datlinger, Paul and Rendeiro, Andr{\'e} F and Schmidl, Christian and Krausgruber, Thomas and Traxler, Peter and Klughammer, Johanna and Schuster, Linda C and Kuchler, Amelie and Alpar, Donat and Bock, Christoph},
120 |   journal={Nature methods},
121 |   volume={14},
122 |   number={3},
123 |   pages={297},
124 |   year={2017},
125 |   publisher={Nature Publishing Group}
126 | }
127 | 
128 | @article{crispracrisprireview,
129 |   title={CRISPRi and CRISPRa screens in mammalian cells for precision biology and medicine},
130 |   author={Kampmann, Martin},
131 |   journal={ACS chemical biology},
132 |   volume={13},
133 |   number={2},
134 |   pages={406--416},
135 |   year={2018},
136 |   publisher={ACS Publications}
137 | }
138 | 
139 | 
140 | 
141 | @article{crispri,
142 |   title={CRISPR-mediated modular RNA-guided regulation of transcription in eukaryotes},
143 |   author={Gilbert, Luke A and Larson, Matthew H and Morsut, Leonardo and Liu, Zairan and Brar, Gloria A and Torres, Sandra E and Stern-Ginossar, Noam and Brandman, Onn and Whitehead, Evan H and Doudna, Jennifer A and others},
144 |   journal={Cell},
145 |   volume={154},
146 |   number={2},
147 |   pages={442--451},
148 |   year={2013},
149 |   publisher={Elsevier}
150 | }
151 | 
152 | 
153 | @article{sam,
154 |   title={Genome-scale transcriptional activation by an engineered CRISPR-Cas9 complex},
155 |   author={Konermann, Silvana and Brigham, Mark D and Trevino, Alexandro E and Joung, Julia and Abudayyeh, Omar O and Barcena, Clea and Hsu, Patrick D and Habib, Naomi and Gootenberg, Jonathan S and Nishimasu, Hiroshi and others},
156 |   journal={Nature},
157 |   volume={517},
158 |   number={7536},
159 |   pages={583},
160 |   year={2015},
161 |   publisher={Nature Publishing Group}
162 | }
163 | 
164 | @article{fortin2019,
165 |   title={Multiple-gene targeting and mismatch tolerance can confound analysis of genome-wide pooled CRISPR screens},
166 |   author={Fortin, Jean-Philippe and Tan, Jenille and Gascoigne, Karen E and Haverty, Peter M and Forrest, William F and Costa, Michael R and Martin, Scott E},
167 |   journal={Genome biology},
168 |   volume={20},
169 |   number={1},
170 |   pages={21},
171 |   year={2019},
172 |   publisher={Springer}
173 | }
174 | 
175 | 
176 | 
177 | @article{sanson2018optimized,
178 |   title={Optimized libraries for CRISPR-Cas9 genetic screens with multiple modalities},
179 |   author={Sanson, Kendall R and Hanna, Ruth E and Hegde, Mudra and Donovan, Katherine F and Strand, Christine and Sullender, Meagan E and Vaimberg, Emma W and Goodale, Amy and Root, David E and Piccioni, Federica and others},
180 |   journal={Nature communications},
181 |   volume={9},
182 |   number={1},
183 |   pages={1--15},
184 |   year={2018},
185 |   publisher={Nature Publishing Group}
186 | }
187 | 
188 | 
189 | 
190 | @article{horlbeck2016compact,
191 |   title={Compact and highly active next-generation libraries for CRISPR-mediated gene repression and activation},
192 |   author={Horlbeck, Max A and Gilbert, Luke A and Villalta, Jacqueline E and Adamson, Britt and Pak, Ryan A and Chen, Yuwen and Fields, Alexander P and Park, Chong Yon and Corn, Jacob E and Kampmann, Martin and others},
193 |   journal={Elife},
194 |   volume={5},
195 |   pages={e19760},
196 |   year={2016},
197 |   publisher={eLife Sciences Publications Limited}
198 | }
199 | 
200 | 
201 | @article{ceres,
202 |   title={Computational correction of copy number effect improves specificity of CRISPR--Cas9 essentiality screens in cancer cells},
203 |   author={Meyers, Robin M and Bryan, Jordan G and McFarland, James M and Weir, Barbara A and Sizemore, Ann E and Xu, Han and Dharia, Neekesh V and Montgomery, Phillip G and Cowley, Glenn S and Pantel, Sasha and others},
204 |   journal={Nature genetics},
205 |   volume={49},
206 |   number={12},
207 |   pages={1779--1784},
208 |   year={2017},
209 |   publisher={Nature Publishing Group}
210 | }
211 | 
212 | 
213 | @article{score,
214 |   title={Prioritization of cancer therapeutic targets using CRISPR--Cas9 screens},
215 |   author={Behan, Fiona M and Iorio, Francesco and Picco, Gabriele and Gon{\c{c}}alves, Emanuel and Beaver, Charlotte M and Migliardi, Giorgia and Santos, Rita and Rao, Yanhua and Sassi, Francesco and Pinnelli, Marika and others},
216 |   journal={Nature},
217 |   volume={568},
218 |   number={7753},
219 |   pages={511},
220 |   year={2019},
221 |   publisher={Nature Publishing Group}
222 | }
223 | 
224 | 
225 | 
226 | @article{mit,
227 |   title={DNA targeting specificity of RNA-guided Cas9 nucleases},
228 |   author={Hsu, Patrick D and Scott, David A and Weinstein, Joshua A and Ran, F Ann and Konermann, Silvana and Agarwala, Vineeta and Li, Yinqing and Fine, Eli J and Wu, Xuebing and Shalem, Ophir and others},
229 |   journal={Nature biotechnology},
230 |   volume={31},
231 |   number={9},
232 |   pages={827},
233 |   year={2013},
234 |   publisher={Nature Publishing Group}
235 | }
236 | 
237 | 
238 | 
239 | 


--------------------------------------------------------------------------------
/Design_PairedGuides/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using crisprDesign to design paired gRNAs"
  3 | author: Jean-Philippe Fortin, Luke Hoberecht
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | bibliography: references.bib
  8 | ---
  9 | 
 10 | ```{r, echo=FALSE, results="hide"}
 11 | options("knitr.graphics.auto_pdf"=TRUE)
 12 | ```
 13 | 
 14 | 
 15 | # Introduction
 16 | 
 17 | In this tutorial, we illustrate the main functionalities of  `crisprDesign` 
 18 | for designing pairs of gRNAs. 
 19 | 
 20 | # Getting started
 21 | 
 22 | # Installation
 23 | 
 24 | See the [Installation tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Installation) to learn how to install the packages necessary for this tutorial:
 25 | `crisprDesign`, `crisprDesignData`
 26 | 
 27 | 
 28 | ## Terminology
 29 | 
 30 | See the [CRISPRko Cas9 design tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) to get familiar with the terminology used throughout this tutorial.
 31 | 
 32 | ## Paired gRNA design overview
 33 | 
 34 | There are several applications that require the design of gRNA pairs:
 35 | 
 36 | 1. Double nicking with CRISPR/Cas9 [@ran2013double]
 37 | 2. Dual-promoter screening systems [@han2017synergistic]
 38 | 3. Multiplexing gRNAs with enAsCas12a [@deweirdt2021optimization]
 39 | 4. Nanopore Cas9-targeted sequencing (nCATS) [@gilpatrick2020targeted]
 40 | 
 41 | The `crisprDesign` package provides an infrastructure to store an annotate
 42 | gRNA pairs via the  `PaireGuideSet` object, which behaves very similarly
 43 | to the `GuideSet` object used for unpaired gRNAs. We designed the 
 44 | functionalities for paired gRNAs with the aforementioned applications in mind. 
 45 | 
 46 | In this tutorial, we will go through a simple example to illustrate 
 47 | the general concept behind paired gRNA design with `crisprDesign`. 
 48 | 
 49 | 
 50 | # A simple example: deleting a KRAS exon with a pair of gRNAs
 51 | 
 52 | We will show here how to design an optimal pair of Cas9 gRNAs flanking the
 53 | second exon of the human gene KRAS (ENSG00000133703), with the goal of 
 54 | creating a deletion that will excise the exon. 
 55 | 
 56 | We first start by loading the necessary packages:
 57 | 
 58 | ```{r, message=FALSE, warning=FALSE, results='hide' }
 59 | library(crisprDesign)
 60 | library(crisprDesignData)
 61 | library(crisprBase)
 62 | library(BSgenome.Hsapiens.UCSC.hg38)
 63 | ```
 64 | 
 65 | We will be designing gRNAs for the SpCas9 nuclease, which can be loaded 
 66 | from We load the `crisprBase` package (see the `crisprBase` [vignette](https://github.com/crisprVerse/crisprBase) for 
 67 | instructions on how to create or load alternative nucleases):
 68 | 
 69 | 
 70 | ```{r}
 71 | data(SpCas9, package="crisprBase")
 72 | ```
 73 | 
 74 | Let's get the genomic coordinates of the second exon.
 75 | First, we obtain from `crisprDesignData` a `GRangesList` object that defines
 76 | the genomic coordinates (hg38 genome) of human protein-coding genes:
 77 | 
 78 | ```{r}
 79 | data(txdb_human, package="crisprDesignData")
 80 | ```
 81 | 
 82 | We then get the exonic coordinates of the canonical transcript ENST00000311936 
 83 | using the function `queryTxObject` from `crisprDesign`:
 84 | 
 85 | 
 86 | ```{r}
 87 | exons <- queryTxObject(txObject=txdb_human,
 88 |                        featureType="exons",
 89 |                        queryColumn="tx_id",
 90 |                        queryValue="ENST00000311936")
 91 | exons
 92 | ```
 93 | 
 94 | Finally, we select the second exon:
 95 | 
 96 | ```{r}
 97 | exon <- exons[exons$exon_rank==2]
 98 | names(exon) <- "exon_kras"
 99 | exon
100 | ```
101 | 
102 | The exon is on chr12, and spans the region 25245274-25245395 (122 
103 | nucleotides in length). We aim to design gRNAs pairs for which one 
104 | gRNA is located upstream of the exon, and another located downstream 
105 | of the exon. To be able to find good gRNA candddates, let's define those 
106 | regions to have 100 nucleotides on each side:
107 | 
108 | 
109 | ```{r}
110 | library(IRanges)
111 | regionUpstream   <- IRanges::flank(exon, width=100, start=FALSE)
112 | regionDownstream <- IRanges::flank(exon, width=100, start=TRUE)
113 | names(regionUpstream) <- "upstreamTarget"
114 | names(regionDownstream) <- "downstreamTarget"
115 | ```
116 | 
117 | Similar to the `findSpacers` function in `crisprDesign`, we will need
118 | to specify a `BSgenome` object containing the reference genome DNA
119 | sequences:
120 | 
121 | ```{r}
122 | bsgenome <- BSgenome.Hsapiens.UCSC.hg38
123 | ```
124 | 
125 | We are now ready to find all candidate gRNA pairs:
126 | 
127 | ```{r}
128 | pairs <- findSpacerPairs(x1=regionUpstream,
129 |                          x2=regionDownstream,
130 |                          bsgenome=bsgenome,
131 |                          crisprNuclease=SpCas9)
132 | ```
133 | 
134 | The `x1` and `x2` arguments specify the genomic regions in which gRNAs at
135 | position 1 and position 2 should be targeting, respectively. 
136 | The function finds all possible pair combinations between spacers 
137 | found in the region specified by `x1` and spacers found in the region s
138 | pecified by `x2`. Let' first name our pairs:
139 | 
140 | ```{r}
141 | names(pairs) <- paste0("pair_", seq_along(pairs))
142 | ```
143 | 
144 | Let's see what the results look like:
145 | ```{r}
146 | head(pairs, n=3)
147 | ```
148 | 
149 | The returned object is a `PairedGuideSet`, which can be though of a list 
150 | of two `GuideSet` objects. The first and second `GuideSet` store 
151 | information about gRNAs at position 1 and position 2, respectively. 
152 | They can be accessed using the `first` and `second` functions:
153 | 
154 | ```{r}
155 | grnas1 <- first(pairs)
156 | head(grnas1, n=3)
157 | ```
158 | 
159 | and 
160 | 
161 | ```{r}
162 | grnas2 <- second(pairs)
163 | head(grnas2, n=3)
164 | ```
165 | 
166 | The `pamOrientation` function returns the PAM orientation of the pairs:
167 | 
168 | ```{r}
169 | head(pamOrientation(pairs))
170 | ```
171 | 
172 | and takes 4 different values: `in` (for PAM-in configuration), `out` (for 
173 | PAM-out configuration), `fwd` (both gRNAs target the forward strand), 
174 | and `rev` (both gRNAs target the reverse strand); see figure below for 
175 | an illustration of the PAM orientations for the SpCas9 nuclease. 
176 | The importance of the PAM orientation is application-specific. For 
177 | Nanopore Cas9-targeted sequencing, PAM-in configuration is preferred.
178 | For double nicking with CRISPR/Cas9, PAM-out configuration is preferred.
179 | For applications using a dual-promoter system, no configuration is 
180 | preferred. 
181 | 
182 | 
183 | ```{r, echo=FALSE, out.width = "75%", fig.align="center", fig.cap="Different PAM orientations for Cas9 paired gRNAs"}
184 | knitr::include_graphics("./figures/paired_simplified.svg")
185 | ```
186 | 
187 | 
188 | The function `pamDistance` returns the distance between the PAM sites of the
189 | two gRNAs. The function `cutLength` returns the distance between the 
190 | cut sites of the two gRNAs, and the function `spacerDistance` 
191 | returns the distance between the two spacer sequences of the gRNAs.
192 | 
193 | Most functionalities available for designing single gRNAs (`GuideSet` 
194 | annotation functions described in [this tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9)) work similarly for `PairedGuideSet` objects. This includes:
195 | 
196 | - `addSequenceFeatures`
197 | - `addSpacerAlignments`
198 | - `addGeneAnnotation`
199 | - `addTssAnnotation`
200 | - `addOnTargetScores`
201 | - `addOffTargetScores`
202 | - `addPamScores`
203 | - `addSNPAnnotation`
204 | - `addRestrictionEnzymes`
205 | - `addCompositeScores`
206 | - `addConservationScores`
207 | 
208 | Each function adds an annotation to the first and second `GuideSet` objects
209 | stored in the `PairedGuideSet`. Let's look at an example using 
210 | `addSequenceFeatures`:
211 | 
212 | ```{r}
213 | pairs <- addSequenceFeatures(pairs)
214 | ```
215 | 
216 | and let's look at the `GuideSet` in the first position:
217 | 
218 | ```{r}
219 | head(first(pairs), n=3)
220 | ```
221 | 
222 | This comes in handy to filter out pairs with unwanted sgRNA characteristics, 
223 | e.g. sgRNA with polyT stretches:
224 | 
225 | ```{r}
226 | good1 <- !first(pairs)$polyT
227 | good2 <- !second(pairs)$polyT
228 | pairs <- pairs[good1 & good2]
229 | ```
230 | 
231 | To select the final candidate pairs to excise the KRAS exon, we will
232 | filter out pairs with low  predicted on-target activity using the
233 | DeepHF on-target activity score. We first add the score:
234 | 
235 | ```{r}
236 | pairs <- addOnTargetScores(pairs, methods="deephf")
237 | ```
238 | 
239 | and only keep pairs for which both gRNAs have a score greater than 0.5:
240 | 
241 | ```{r}
242 | good1 <- first(pairs)$score_deephf>=0.5
243 | good2 <- second(pairs)$score_deephf>=0.5
244 | pairs <- pairs[good1 & good2]
245 | ```
246 | 
247 | This leaves us with 2 candidate pairs:
248 | 
249 | ```{r}
250 | pairs
251 | ```
252 | 
253 | 
254 | Finally, let's check for off-targets. 
255 | We need to specify the path of the bowtie index that was generated 
256 | from the human reference genome:
257 | 
258 | ```{r}
259 | bowtie_index <- "/Users/fortinj2/crisprIndices/bowtie/hg38/hg38"
260 | ```
261 | 
262 | For instructions on how to build a Bowtie index from a given reference genome, see the [genome index tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Building_Genome_Indices) or the [crisprBowtie page](https://github.com/crisprVerse/crisprBowtie). 
263 | 
264 | We are now ready to search for off-targets with up to 3 mismatches:
265 | 
266 | 
267 | ```{r}
268 | pairs <- addSpacerAlignments(pairs,
269 |                              txObject=txdb_human,
270 |                              aligner_index=bowtie_index,
271 |                              bsgenome=bsgenome,
272 |                              n_mismatches=3)
273 | ```
274 | 
275 | We are in luck, none of the spacer sequences has an off-target in the coding 
276 | region of other genes:
277 | 
278 | 
279 | ```{r}
280 | good1 <- first(pairs)$n1_c==0 & first(pairs)$n2_c==0 & first(pairs)$n3_c==0
281 | good2 <- second(pairs)$n1_c==0 & second(pairs)$n2_c==0 & second(pairs)$n3_c==0
282 | pairs <- pairs[good1 & good2]
283 | pairs
284 | ```
285 | 
286 | 
287 | One can get the spacer sequences using the `spacers` accessor function
288 | as usual:
289 | 
290 | ```{r}
291 | spacers(pairs)
292 | ```
293 | 
294 | 
295 | 
296 | # Session Info
297 | 
298 | ```{r}
299 | sessionInfo()
300 | ```
301 | 
302 | # References
303 | 
304 | 
305 | 


--------------------------------------------------------------------------------
/Design_OPS/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using crisprDesign to design gRNAs for optical pooled screening (OPS)"
  3 | author: Jean-Philippe Fortin, Luke Hoberecht
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | bibliography: references.bib
  8 | ---
  9 | 
 10 | ```{r, echo=FALSE, results="hide"}
 11 | options("knitr.graphics.auto_pdf"=TRUE)
 12 | ```
 13 | 
 14 | 
 15 | # Introduction
 16 | 
 17 | Optical pooled screening (OPS) combines image-based sequencing (in 
 18 | situ sequencing) of gRNAs and optical phenotyping on the same physical 
 19 | wells [@ops]. In such experiments, guide RNA (gRNA) spacer sequences 
 20 | are partially sequenced from the 5-prime end; the length of these 
 21 | truncated sequences, or barcodes, which corresponds to the number of 
 22 | sequencing cycles, is fixed and chosen by the experimentalist. From a 
 23 | gRNA design perspective, additional constraints are needed to ensure 
 24 | sufficient dissimilarity between the truncated barcodes for their 
 25 | identification during the analysis.
 26 | 
 27 | This tutorial will demonstrate how to design gRNAs for use in 
 28 | optical pooled screens, with emphasis on the constraints described 
 29 | above. Common gRNA design steps that are not specific to OPS are omitted 
 30 | in this tutorial (e.g. off-target search, or on-target activity prediction)
 31 | here. Users can peruse through the list of [available tutorials](https://github.com/crisprVerse/Tutorials) for more information
 32 | regarding application-specific gRNA design rules.
 33 | 
 34 | 
 35 | 
 36 | # Installation
 37 | 
 38 | See the [Installation tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Installation) to learn how to install the packages necessary for this tutorial:
 39 | `crisprDesign`, `crisprDesignData`
 40 | 
 41 | 
 42 | # Terminology
 43 | 
 44 | See the [CRISPRko design vignette](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) to get familiar with the terminology used throughout this tutorial.
 45 | 
 46 | 
 47 | # Design for optical pooled screening (OPS)
 48 | 
 49 | To illustrate the functionalities of `crisprDesign` for designing 
 50 | OPS libraries, we will design a small CRISPRko OPS library targeting 
 51 | 3 genes of the human RAS family: KRAS, HRAS, and NRAS. We will use the SpCas9
 52 | nuclease. 
 53 | 
 54 | We will design gRNAs for an experiment that uses 8 in situ sequencing cycles:
 55 | 
 56 | ```{r}
 57 | n_cycles=8
 58 | ```
 59 | 
 60 | ## Loading packages
 61 | 
 62 | Before we start, we first load the necessary packages for this tutorial:
 63 | 
 64 | ```{r, warning=FALSE, message=FALSE, results='hide'}
 65 | library(crisprBase)
 66 | library(crisprDesign)
 67 | library(crisprDesignData)
 68 | library(BSgenome.Hsapiens.UCSC.hg38)
 69 | ```
 70 | 
 71 | 
 72 | ## Creating the GuideSet
 73 | 
 74 | We begin by loading the SpCas9 `CrisprNuclease` object from the `crisprBase`
 75 | package
 76 | 
 77 | ```{r}
 78 | data(SpCas9, package="crisprBase")
 79 | ```
 80 | 
 81 | as well as data containing gene regions for the human genome:
 82 | 
 83 | ```{r}
 84 | data(txdb_human, package="crisprDesignData")
 85 | ```
 86 | 
 87 | For more information on `txdb_human` and how to create similar gene annotation objects, see the [Building a gene annotation object](https://github.com/crisprVerse/Tutorials/tree/master/Building_Gene_Annotation) tutorial.
 88 | 
 89 | 
 90 | Next, we find the CDS coordinates for our genes using 
 91 | the `queryTxObject` function:
 92 | 
 93 | ```{r}
 94 | target_genes <- c("KRAS", "HRAS", "NRAS")
 95 | target_regions <- queryTxObject(txdb_human,
 96 |                                 featureType="cds",
 97 |                                 queryColumn="gene_symbol",
 98 |                                 queryValue=target_genes)
 99 | ```
100 | 
101 | then build our `GuideSet` with the `findSpacers` function:
102 | 
103 | ```{r}
104 | gs <- findSpacers(target_regions,
105 |                   crisprNuclease=SpCas9,
106 |                   bsgenome=BSgenome.Hsapiens.UCSC.hg38)
107 | ```
108 | 
109 | As we will want to distinguish which gene each spacer targets, 
110 | we will add `gene_symbol` and `gene_id` columns from `target_regions`.
111 | 
112 | ```{r}
113 | gene_info <- target_regions[gs$region]
114 | gs$gene_symbol <- gene_info$gene_symbol
115 | gs$gene_id <- gene_info$gene_id
116 | ```
117 | 
118 | 
119 | ## Adding OPS barcodes
120 | 
121 | We can add our OPS barcodes to the GuideSet with the `addOpsBarcodes` function.
122 | This function extracts the `n_cycles` nucleotides from the 5-prime end of
123 | our spacers and stores them in the `opsBarcode` column:
124 | 
125 | ```{r}
126 | gs <- addOpsBarcodes(gs,
127 |                      n_cycles=n_cycles)
128 | head(gs$opsBarcode)
129 | ```
130 | 
131 | 
132 | ## Barcode distance matrix
133 | 
134 | We can pass our barcodes to the function `getBarcodeDistanceMatrix` to 
135 | calculate the nucleotide distance between them. The `dist_method` argument
136 | determines the type of distance to calculate: `"hamming"`, which only 
137 | considers substitutions (default) or `"levenstein"`, which also allows for
138 | insertions and deletions. 
139 | 
140 | As a brief demonstration, let's look at the distances between the first few
141 | barcodes in our `GuideSet`. We set the `binarize` argument (more on this
142 | parameter later) to `FALSE` to show distances:
143 | 
144 | ```{r}
145 | barcodes <- gs$opsBarcode
146 | dist <- getBarcodeDistanceMatrix(barcodes[1:5],
147 |                                  binnarize=FALSE)
148 | dist
149 | ```
150 | 
151 | Note that the output is a sparse matrix, so the barcodes along the diagonal
152 | (i.e., compared against themselves) return `.`, or a distance of zero. 
153 | To compare one set of barcodes against another, we can pass the other set
154 | to the `targetBarcodes` argument (the former barcode set being passed 
155 | to the `queryBarcodes` argument, which is compared against 
156 | itself when `targetBarcodes` is `NULL`):
157 | 
158 | ```{r}
159 | dist <- getBarcodeDistanceMatrix(barcodes[1:5],
160 |                                  targetBarcodes=barcodes[6:10],
161 |                                  binnarize=FALSE)
162 | dist
163 | ```
164 | 
165 | The question we are interested in with respect to barcode distances is 
166 | whether this distance is sufficiently dissimilar for accurate identification 
167 | of spacers during sequencing. This minimum distance edit (`min_dist_edit`) 
168 | relies on the accuracy of various steps in the experiment. Suppose,
169 | as a conservative estimate, that we can expect no more than two edits 
170 | per barcode in our example. A `min_dist_edit` of `3` should 
171 | suffice. Setting the `binnarize` argument to `TRUE`, and passing 
172 | our minimum distance edit value to `min_dist_edit` will binarize the
173 | output, flagging barcodes (with a value of `1`) that are too similar
174 | and should not both be included in our library:
175 | 
176 | ```{r}
177 | dist <- getBarcodeDistanceMatrix(barcodes[1:5],
178 |                                  barcodes[6:10],
179 |                                  binnarize=TRUE,
180 |                                  min_dist_edit=3)
181 | dist
182 | ```
183 | 
184 | Using this function with large sets of barcodes can be taxing on memory.
185 | To manage this, it is recommended to set `splitByChunks=TRUE` and specify 
186 | the number of chunks with `n_chunks` (see `?getBarcodeDistanceMatrix`).
187 | 
188 | 
189 | ## Designing OPS libraries
190 | 
191 | The `designOpsLibrary` function allows users to perform a complete 
192 | end-to-end OPS library design. We will design our library with 4 
193 | gRNAs per gene using the `n_guides` and `gene_field` (to identify gRNAs
194 | by gene target) parameters. We will also use the same distance method 
195 | and minimum distance edit parameters as in the example above.
196 | 
197 | Note that this requires a `rank` column in the metadata columns of the GuideSet object to be able to select best guides first. For the purpose of this tutorial, we will create a mock rank column. In practice, to learn how to rank gRNAs, see the [Cas9 gRNA design tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9).
198 | 
199 | ```{r}
200 | gs$rank <- 1:length(gs)
201 | ```
202 | 
203 | NOTE: it is advised to first complete other steps in gRNA design
204 | (annotating, filtering, and ranking gRNAs in the `GuideSet`) prior to 
205 | using this function; this will ensure the library contains the best 
206 | gRNAs. As this example did not rank gRNAs, we are notified that 
207 | rankings are assigned by the order in which gRNAs appear in our input.
208 | 
209 | ```{r}
210 | opsLibrary <- designOpsLibrary(gs,
211 |                                n_cycles=n_cycles,
212 |                                n_guides=4,
213 |                                gene_field="gene_symbol",
214 |                                min_dist_edit=5,
215 |                                dist_method="hamming")
216 | opsLibrary
217 | ```
218 | 
219 | 
220 | ## Adding gRNAs to an existing OPS library
221 | 
222 | Suppose we later wish to add another gene target to our library, 
223 | but also want to retain the gRNAs that are currently in our library.
224 | We can append these additional gRNAs with the `updateOpsLibrary` function. 
225 | This function has the same parameters as `designOpsLibrary`, with an 
226 | additional `opsLibrary` argument to which we pass our original OPS library.
227 | 
228 | To demonstrate, we will add the MRAS gene to our library. We first
229 | construct the `GuideSet` for MRAS:
230 | 
231 | ```{r}
232 | target_region <- queryTxObject(txdb_human,
233 |                                featureType="cds",
234 |                                queryColumn="gene_symbol",
235 |                                queryValue="MRAS")
236 | gs_mras <- findSpacers(target_region,
237 |                        crisprNuclease=SpCas9,
238 |                        bsgenome=BSgenome.Hsapiens.UCSC.hg38)
239 | gs_mras$gene_symbol <- "MRAS"
240 | gs_mras$gene_id <- "ENSG00000158186"
241 | gs_mras$rank <- 1:length(gs_mras)
242 | ```
243 | 
244 | then add barcodes:
245 | 
246 | ```{r}
247 | ## add OPS barcodes
248 | gs_mras <- addOpsBarcodes(gs_mras,
249 |                           n_cycles=n_cycles)
250 | ```
251 | 
252 | which we then pass with our other parameters to `updateOpsLibrary`:
253 | 
254 | ```{r}
255 | opsLibrary <- updateOpsLibrary(opsLibrary,
256 |                                gs_mras,
257 |                                n_cycles=n_cycles,
258 |                                n_guides=4,
259 |                                gene_field="gene_symbol",
260 |                                min_dist_edit=5,
261 |                                dist_method="hamming")
262 | opsLibrary
263 | ```
264 | 
265 | 
266 | 
267 | # Session Info
268 | 
269 | ```{r}
270 | sessionInfo()
271 | ```
272 | 
273 | 
274 | # References
275 | 


--------------------------------------------------------------------------------
/Design_CRISPRkd_CasRx/references.bib:
--------------------------------------------------------------------------------
  1 | @article{casrxrf,
  2 |   title={Massively parallel Cas13 screens reveal principles for guide RNA design},
  3 |   author={Wessels, Hans-Hermann and M{\'e}ndez-Mancilla, Alejandro and Guo, Xinyi and Legut, Mateusz and Daniloski, Zharko and Sanjana, Neville E},
  4 |   journal={Nature biotechnology},
  5 |   volume={38},
  6 |   number={6},
  7 |   pages={722--727},
  8 |   year={2020},
  9 |   publisher={Nature Publishing Group}
 10 | }
 11 | 
 12 | 
 13 | @article{ops,
 14 |   title={Optical pooled screens in human cells},
 15 |   author={Feldman, David and Singh, Avtar and Schmid-Burgk, Jonathan L and Carlson, Rebecca J and Mezger, Anja and Garrity, Anthony J and Zhang, Feng and Blainey, Paul C},
 16 |   journal={Cell},
 17 |   volume={179},
 18 |   number={3},
 19 |   pages={787--799},
 20 |   year={2019},
 21 |   publisher={Elsevier}
 22 | }
 23 | 
 24 | @article{cas13d,
 25 |   author = {Konermann, Silvana and Lotfy, Peter and Brideau, Nicholas J and Oki, Jennifer and Shokhirev, Maxim N and Hsu, Patrick D},
 26 |   journal = {Cell},
 27 |   number = {3},
 28 |   pages = {665--676},
 29 |   publisher = {Elsevier},
 30 |   title = {Transcriptome engineering with RNA-targeting type VI-D CRISPR effectors},
 31 |   volume = {173},
 32 |   year = {2018}}
 33 | 
 34 | @article{koblan2018improving,
 35 |   title={Improving cytidine and adenine base editors by expression optimization and ancestral reconstruction},
 36 |   author={Koblan, Luke W and Doman, Jordan L and Wilson, Christopher and Levy, Jonathan M and Tay, Tristan and Newby, Gregory A and Maianti, Juan Pablo and Raguram, Aditya and Liu, David R},
 37 |   journal={Nature biotechnology},
 38 |   volume={36},
 39 |   number={9},
 40 |   pages={843--846},
 41 |   year={2018},
 42 |   publisher={Nature Publishing Group}
 43 | }
 44 | 
 45 | @article{sanson2018optimized,
 46 |   title={Optimized libraries for CRISPR-Cas9 genetic screens with multiple modalities},
 47 |   author={Sanson, Kendall R and Hanna, Ruth E and Hegde, Mudra and Donovan, Katherine F and Strand, Christine and Sullender, Meagan E and Vaimberg, Emma W and Goodale, Amy and Root, David E and Piccioni, Federica and others},
 48 |   journal={Nature communications},
 49 |   volume={9},
 50 |   number={1},
 51 |   pages={1--15},
 52 |   year={2018},
 53 |   publisher={Nature Publishing Group}
 54 | }
 55 | 
 56 | @article{langmead2009bowtie,
 57 | 	Abstract = {Bowtie is an ultrafast, memory-efficient alignment program for aligning short DNA sequence reads to large genomes. For the human genome, Burrows-Wheeler indexing allows Bowtie to align more than 25 million reads per CPU hour with a memory footprint of approximately 1.3 gigabytes. Bowtie extends previous Burrows-Wheeler techniques with a novel quality-aware backtracking algorithm that permits mismatches. Multiple processor cores can be used simultaneously to achieve even greater alignment speeds. Bowtie is open source http://bowtie.cbcb.umd.edu.},
 58 | 	Author = {Langmead, Ben and Trapnell, Cole and Pop, Mihai and Salzberg, Steven L.},
 59 | 	Da = {2009/03/04},
 60 | 	Doi = {10.1186/gb-2009-10-3-r25},
 61 | 	Id = {Langmead2009},
 62 | 	Isbn = {1474-760X},
 63 | 	Journal = {Genome Biology},
 64 | 	Number = {3},
 65 | 	Pages = {R25},
 66 | 	Title = {Ultrafast and memory-efficient alignment of short DNA sequences to the human genome},
 67 | 	Ty = {JOUR},
 68 | 	Url = {https://doi.org/10.1186/gb-2009-10-3-r25},
 69 | 	Volume = {10},
 70 | 	Year = {2009}
 71 | }
 72 | 
 73 | 
 74 | @article{lindel,
 75 |   title={Massively parallel profiling and predictive modeling of the outcomes of CRISPR/Cas9-mediated double-strand break repair},
 76 |   author={Chen, Wei and McKenna, Aaron and Schreiber, Jacob and Haeussler, Maximilian and Yin, Yi and Agarwal, Vikram and Noble, William Stafford and Shendure, Jay},
 77 |   journal={Nucleic acids research},
 78 |   volume={47},
 79 |   number={15},
 80 |   pages={7989--8003},
 81 |   year={2019},
 82 |   publisher={Oxford University Press}
 83 | }
 84 | 
 85 | @article{azimuth,
 86 |   title={Optimized sgRNA design to maximize activity and minimize off-target effects of CRISPR-Cas9},
 87 |   author={Doench, John G and Fusi, Nicolo and Sullender, Meagan and Hegde, Mudra and Vaimberg, Emma W and Donovan, Katherine F and Smith, Ian and Tothova, Zuzana and Wilen, Craig and Orchard, Robert and others},
 88 |   journal={Nature biotechnology},
 89 |   volume={34},
 90 |   number={2},
 91 |   pages={184},
 92 |   year={2016},
 93 |   publisher={Nature Publishing Group}
 94 | }
 95 | 
 96 | @article{deepcas9,
 97 |   title={Optimized CRISPR guide RNA design for two high-fidelity Cas9 variants by deep learning},
 98 |   author={Wang, Daqi and Zhang, Chengdong and Wang, Bei and Li, Bin and Wang, Qiang and Liu, Dong and Wang, Hongyan and Zhou, Yan and Shi, Leming and Lan, Feng and others},
 99 |   journal={Nature communications},
100 |   volume={10},
101 |   number={1},
102 |   pages={1--14},
103 |   year={2019},
104 |   publisher={Nature Publishing Group}
105 | }
106 | 
107 | @article{deepcpf1,
108 |   title={Deep learning improves prediction of CRISPR--Cpf1 guide RNA activity},
109 |   author={Kim, Hui Kwon and Min, Seonwoo and Song, Myungjae and Jung, Soobin and Choi, Jae Woo and Kim, Younggwang and Lee, Sangeun and Yoon, Sungroh and Kim, Hyongbum Henry},
110 |   journal={Nature biotechnology},
111 |   volume={36},
112 |   number={3},
113 |   pages={239},
114 |   year={2018},
115 |   publisher={Nature Publishing Group}
116 | }
117 | 
118 | @article{perturbseq,
119 |   title={A multiplexed single-cell CRISPR screening platform enables systematic dissection of the unfolded protein response},
120 |   author={Adamson, Britt and Norman, Thomas M and Jost, Marco and Cho, Min Y and Nu{\~n}ez, James K and Chen, Yuwen and Villalta, Jacqueline E and Gilbert, Luke A and Horlbeck, Max A and Hein, Marco Y and others},
121 |   journal={Cell},
122 |   volume={167},
123 |   number={7},
124 |   pages={1867--1882},
125 |   year={2016},
126 |   publisher={Elsevier}
127 | }
128 | 
129 | @article{cropseq,
130 |   title={Pooled CRISPR screening with single-cell transcriptome readout},
131 |   author={Datlinger, Paul and Rendeiro, Andr{\'e} F and Schmidl, Christian and Krausgruber, Thomas and Traxler, Peter and Klughammer, Johanna and Schuster, Linda C and Kuchler, Amelie and Alpar, Donat and Bock, Christoph},
132 |   journal={Nature methods},
133 |   volume={14},
134 |   number={3},
135 |   pages={297},
136 |   year={2017},
137 |   publisher={Nature Publishing Group}
138 | }
139 | 
140 | @article{crispracrisprireview,
141 |   title={CRISPRi and CRISPRa screens in mammalian cells for precision biology and medicine},
142 |   author={Kampmann, Martin},
143 |   journal={ACS chemical biology},
144 |   volume={13},
145 |   number={2},
146 |   pages={406--416},
147 |   year={2018},
148 |   publisher={ACS Publications}
149 | }
150 | 
151 | 
152 | 
153 | @article{crispri,
154 |   title={CRISPR-mediated modular RNA-guided regulation of transcription in eukaryotes},
155 |   author={Gilbert, Luke A and Larson, Matthew H and Morsut, Leonardo and Liu, Zairan and Brar, Gloria A and Torres, Sandra E and Stern-Ginossar, Noam and Brandman, Onn and Whitehead, Evan H and Doudna, Jennifer A and others},
156 |   journal={Cell},
157 |   volume={154},
158 |   number={2},
159 |   pages={442--451},
160 |   year={2013},
161 |   publisher={Elsevier}
162 | }
163 | 
164 | 
165 | @article{sam,
166 |   title={Genome-scale transcriptional activation by an engineered CRISPR-Cas9 complex},
167 |   author={Konermann, Silvana and Brigham, Mark D and Trevino, Alexandro E and Joung, Julia and Abudayyeh, Omar O and Barcena, Clea and Hsu, Patrick D and Habib, Naomi and Gootenberg, Jonathan S and Nishimasu, Hiroshi and others},
168 |   journal={Nature},
169 |   volume={517},
170 |   number={7536},
171 |   pages={583},
172 |   year={2015},
173 |   publisher={Nature Publishing Group}
174 | }
175 | 
176 | @article{fortin2019,
177 |   title={Multiple-gene targeting and mismatch tolerance can confound analysis of genome-wide pooled CRISPR screens},
178 |   author={Fortin, Jean-Philippe and Tan, Jenille and Gascoigne, Karen E and Haverty, Peter M and Forrest, William F and Costa, Michael R and Martin, Scott E},
179 |   journal={Genome biology},
180 |   volume={20},
181 |   number={1},
182 |   pages={21},
183 |   year={2019},
184 |   publisher={Springer}
185 | }
186 | 
187 | 
188 | 
189 | @article{sanson2018optimized,
190 |   title={Optimized libraries for CRISPR-Cas9 genetic screens with multiple modalities},
191 |   author={Sanson, Kendall R and Hanna, Ruth E and Hegde, Mudra and Donovan, Katherine F and Strand, Christine and Sullender, Meagan E and Vaimberg, Emma W and Goodale, Amy and Root, David E and Piccioni, Federica and others},
192 |   journal={Nature communications},
193 |   volume={9},
194 |   number={1},
195 |   pages={1--15},
196 |   year={2018},
197 |   publisher={Nature Publishing Group}
198 | }
199 | 
200 | 
201 | 
202 | @article{horlbeck2016compact,
203 |   title={Compact and highly active next-generation libraries for CRISPR-mediated gene repression and activation},
204 |   author={Horlbeck, Max A and Gilbert, Luke A and Villalta, Jacqueline E and Adamson, Britt and Pak, Ryan A and Chen, Yuwen and Fields, Alexander P and Park, Chong Yon and Corn, Jacob E and Kampmann, Martin and others},
205 |   journal={Elife},
206 |   volume={5},
207 |   pages={e19760},
208 |   year={2016},
209 |   publisher={eLife Sciences Publications Limited}
210 | }
211 | 
212 | 
213 | @article{ceres,
214 |   title={Computational correction of copy number effect improves specificity of CRISPR--Cas9 essentiality screens in cancer cells},
215 |   author={Meyers, Robin M and Bryan, Jordan G and McFarland, James M and Weir, Barbara A and Sizemore, Ann E and Xu, Han and Dharia, Neekesh V and Montgomery, Phillip G and Cowley, Glenn S and Pantel, Sasha and others},
216 |   journal={Nature genetics},
217 |   volume={49},
218 |   number={12},
219 |   pages={1779--1784},
220 |   year={2017},
221 |   publisher={Nature Publishing Group}
222 | }
223 | 
224 | 
225 | @article{score,
226 |   title={Prioritization of cancer therapeutic targets using CRISPR--Cas9 screens},
227 |   author={Behan, Fiona M and Iorio, Francesco and Picco, Gabriele and Gon{\c{c}}alves, Emanuel and Beaver, Charlotte M and Migliardi, Giorgia and Santos, Rita and Rao, Yanhua and Sassi, Francesco and Pinnelli, Marika and others},
228 |   journal={Nature},
229 |   volume={568},
230 |   number={7753},
231 |   pages={511},
232 |   year={2019},
233 |   publisher={Nature Publishing Group}
234 | }
235 | 
236 | 
237 | 
238 | @article{mit,
239 |   title={DNA targeting specificity of RNA-guided Cas9 nucleases},
240 |   author={Hsu, Patrick D and Scott, David A and Weinstein, Joshua A and Ran, F Ann and Konermann, Silvana and Agarwala, Vineeta and Li, Yinqing and Fine, Eli J and Wu, Xuebing and Shalem, Ophir and others},
241 |   journal={Nature biotechnology},
242 |   volume={31},
243 |   number={9},
244 |   pages={827},
245 |   year={2013},
246 |   publisher={Nature Publishing Group}
247 | }
248 | 
249 | 
250 | 
251 | 


--------------------------------------------------------------------------------
/Design_CRISPRa/references.bib:
--------------------------------------------------------------------------------
  1 | %% This BibTeX bibliography file was created using BibDesk.
  2 | %% https://bibdesk.sourceforge.io/
  3 | 
  4 | %% Created for Luke Hoberecht at 2022-08-02 15:33:04 -0700 
  5 | 
  6 | 
  7 | %% Saved with string encoding Unicode (UTF-8) 
  8 | 
  9 | 
 10 | 
 11 | @article{crisprai,
 12 | 	author = {Horlbeck, Max A and Gilbert, Luke A and Villalta, Jacqueline E and Adamson, Britt and Pak, Ryan A and Chen, Yuwen and Fields, Alexander P and Park, Chong Yon and Corn, Jacob E and Kampmann, Martin and others},
 13 | 	date-added = {2022-08-02 15:33:01 -0700},
 14 | 	date-modified = {2022-08-02 15:33:01 -0700},
 15 | 	journal = {elife},
 16 | 	publisher = {eLife Sciences Publications, Ltd},
 17 | 	title = {Compact and highly active next-generation libraries for CRISPR-mediated gene repression and activation},
 18 | 	volume = {5},
 19 | 	year = {2016}}
 20 | 
 21 | @article{ops,
 22 | 	author = {Feldman, David and Singh, Avtar and Schmid-Burgk, Jonathan L and Carlson, Rebecca J and Mezger, Anja and Garrity, Anthony J and Zhang, Feng and Blainey, Paul C},
 23 | 	journal = {Cell},
 24 | 	number = {3},
 25 | 	pages = {787--799},
 26 | 	publisher = {Elsevier},
 27 | 	title = {Optical pooled screens in human cells},
 28 | 	volume = {179},
 29 | 	year = {2019}}
 30 | 
 31 | @article{cas13d,
 32 | 	author = {Konermann, Silvana and Lotfy, Peter and Brideau, Nicholas J and Oki, Jennifer and Shokhirev, Maxim N and Hsu, Patrick D},
 33 | 	journal = {Cell},
 34 | 	number = {3},
 35 | 	pages = {665--676},
 36 | 	publisher = {Elsevier},
 37 | 	title = {Transcriptome engineering with RNA-targeting type VI-D CRISPR effectors},
 38 | 	volume = {173},
 39 | 	year = {2018}}
 40 | 
 41 | @article{koblan2018improving,
 42 | 	author = {Koblan, Luke W and Doman, Jordan L and Wilson, Christopher and Levy, Jonathan M and Tay, Tristan and Newby, Gregory A and Maianti, Juan Pablo and Raguram, Aditya and Liu, David R},
 43 | 	journal = {Nature biotechnology},
 44 | 	number = {9},
 45 | 	pages = {843--846},
 46 | 	publisher = {Nature Publishing Group},
 47 | 	title = {Improving cytidine and adenine base editors by expression optimization and ancestral reconstruction},
 48 | 	volume = {36},
 49 | 	year = {2018}}
 50 | 
 51 | @article{sanson2018optimized,
 52 | 	author = {Sanson, Kendall R and Hanna, Ruth E and Hegde, Mudra and Donovan, Katherine F and Strand, Christine and Sullender, Meagan E and Vaimberg, Emma W and Goodale, Amy and Root, David E and Piccioni, Federica and others},
 53 | 	journal = {Nature communications},
 54 | 	number = {1},
 55 | 	pages = {1--15},
 56 | 	publisher = {Nature Publishing Group},
 57 | 	title = {Optimized libraries for CRISPR-Cas9 genetic screens with multiple modalities},
 58 | 	volume = {9},
 59 | 	year = {2018}}
 60 | 
 61 | @article{langmead2009bowtie,
 62 | 	abstract = {Bowtie is an ultrafast, memory-efficient alignment program for aligning short DNA sequence reads to large genomes. For the human genome, Burrows-Wheeler indexing allows Bowtie to align more than 25 million reads per CPU hour with a memory footprint of approximately 1.3 gigabytes. Bowtie extends previous Burrows-Wheeler techniques with a novel quality-aware backtracking algorithm that permits mismatches. Multiple processor cores can be used simultaneously to achieve even greater alignment speeds. Bowtie is open source http://bowtie.cbcb.umd.edu.},
 63 | 	author = {Langmead, Ben and Trapnell, Cole and Pop, Mihai and Salzberg, Steven L.},
 64 | 	da = {2009/03/04},
 65 | 	doi = {10.1186/gb-2009-10-3-r25},
 66 | 	id = {Langmead2009},
 67 | 	isbn = {1474-760X},
 68 | 	journal = {Genome Biology},
 69 | 	number = {3},
 70 | 	pages = {R25},
 71 | 	title = {Ultrafast and memory-efficient alignment of short DNA sequences to the human genome},
 72 | 	ty = {JOUR},
 73 | 	url = {https://doi.org/10.1186/gb-2009-10-3-r25},
 74 | 	volume = {10},
 75 | 	year = {2009},
 76 | 	bdsk-url-1 = {https://doi.org/10.1186/gb-2009-10-3-r25}}
 77 | 
 78 | @article{lindel,
 79 | 	author = {Chen, Wei and McKenna, Aaron and Schreiber, Jacob and Haeussler, Maximilian and Yin, Yi and Agarwal, Vikram and Noble, William Stafford and Shendure, Jay},
 80 | 	journal = {Nucleic acids research},
 81 | 	number = {15},
 82 | 	pages = {7989--8003},
 83 | 	publisher = {Oxford University Press},
 84 | 	title = {Massively parallel profiling and predictive modeling of the outcomes of CRISPR/Cas9-mediated double-strand break repair},
 85 | 	volume = {47},
 86 | 	year = {2019}}
 87 | 
 88 | @article{azimuth,
 89 | 	author = {Doench, John G and Fusi, Nicolo and Sullender, Meagan and Hegde, Mudra and Vaimberg, Emma W and Donovan, Katherine F and Smith, Ian and Tothova, Zuzana and Wilen, Craig and Orchard, Robert and others},
 90 | 	journal = {Nature biotechnology},
 91 | 	number = {2},
 92 | 	pages = {184},
 93 | 	publisher = {Nature Publishing Group},
 94 | 	title = {Optimized sgRNA design to maximize activity and minimize off-target effects of CRISPR-Cas9},
 95 | 	volume = {34},
 96 | 	year = {2016}}
 97 | 
 98 | @article{deepcas9,
 99 | 	author = {Wang, Daqi and Zhang, Chengdong and Wang, Bei and Li, Bin and Wang, Qiang and Liu, Dong and Wang, Hongyan and Zhou, Yan and Shi, Leming and Lan, Feng and others},
100 | 	journal = {Nature communications},
101 | 	number = {1},
102 | 	pages = {1--14},
103 | 	publisher = {Nature Publishing Group},
104 | 	title = {Optimized CRISPR guide RNA design for two high-fidelity Cas9 variants by deep learning},
105 | 	volume = {10},
106 | 	year = {2019}}
107 | 
108 | @article{deepcpf1,
109 | 	author = {Kim, Hui Kwon and Min, Seonwoo and Song, Myungjae and Jung, Soobin and Choi, Jae Woo and Kim, Younggwang and Lee, Sangeun and Yoon, Sungroh and Kim, Hyongbum Henry},
110 | 	journal = {Nature biotechnology},
111 | 	number = {3},
112 | 	pages = {239},
113 | 	publisher = {Nature Publishing Group},
114 | 	title = {Deep learning improves prediction of CRISPR--Cpf1 guide RNA activity},
115 | 	volume = {36},
116 | 	year = {2018}}
117 | 
118 | @article{perturbseq,
119 | 	author = {Adamson, Britt and Norman, Thomas M and Jost, Marco and Cho, Min Y and Nu{\~n}ez, James K and Chen, Yuwen and Villalta, Jacqueline E and Gilbert, Luke A and Horlbeck, Max A and Hein, Marco Y and others},
120 | 	journal = {Cell},
121 | 	number = {7},
122 | 	pages = {1867--1882},
123 | 	publisher = {Elsevier},
124 | 	title = {A multiplexed single-cell CRISPR screening platform enables systematic dissection of the unfolded protein response},
125 | 	volume = {167},
126 | 	year = {2016}}
127 | 
128 | @article{cropseq,
129 | 	author = {Datlinger, Paul and Rendeiro, Andr{\'e} F and Schmidl, Christian and Krausgruber, Thomas and Traxler, Peter and Klughammer, Johanna and Schuster, Linda C and Kuchler, Amelie and Alpar, Donat and Bock, Christoph},
130 | 	journal = {Nature methods},
131 | 	number = {3},
132 | 	pages = {297},
133 | 	publisher = {Nature Publishing Group},
134 | 	title = {Pooled CRISPR screening with single-cell transcriptome readout},
135 | 	volume = {14},
136 | 	year = {2017}}
137 | 
138 | @article{crispracrisprireview,
139 | 	author = {Kampmann, Martin},
140 | 	journal = {ACS chemical biology},
141 | 	number = {2},
142 | 	pages = {406--416},
143 | 	publisher = {ACS Publications},
144 | 	title = {CRISPRi and CRISPRa screens in mammalian cells for precision biology and medicine},
145 | 	volume = {13},
146 | 	year = {2018}}
147 | 
148 | @article{crispri,
149 | 	author = {Gilbert, Luke A and Larson, Matthew H and Morsut, Leonardo and Liu, Zairan and Brar, Gloria A and Torres, Sandra E and Stern-Ginossar, Noam and Brandman, Onn and Whitehead, Evan H and Doudna, Jennifer A and others},
150 | 	journal = {Cell},
151 | 	number = {2},
152 | 	pages = {442--451},
153 | 	publisher = {Elsevier},
154 | 	title = {CRISPR-mediated modular RNA-guided regulation of transcription in eukaryotes},
155 | 	volume = {154},
156 | 	year = {2013}}
157 | 
158 | @article{sam,
159 | 	author = {Konermann, Silvana and Brigham, Mark D and Trevino, Alexandro E and Joung, Julia and Abudayyeh, Omar O and Barcena, Clea and Hsu, Patrick D and Habib, Naomi and Gootenberg, Jonathan S and Nishimasu, Hiroshi and others},
160 | 	journal = {Nature},
161 | 	number = {7536},
162 | 	pages = {583},
163 | 	publisher = {Nature Publishing Group},
164 | 	title = {Genome-scale transcriptional activation by an engineered CRISPR-Cas9 complex},
165 | 	volume = {517},
166 | 	year = {2015}}
167 | 
168 | @article{fortin2019,
169 | 	author = {Fortin, Jean-Philippe and Tan, Jenille and Gascoigne, Karen E and Haverty, Peter M and Forrest, William F and Costa, Michael R and Martin, Scott E},
170 | 	journal = {Genome biology},
171 | 	number = {1},
172 | 	pages = {21},
173 | 	publisher = {Springer},
174 | 	title = {Multiple-gene targeting and mismatch tolerance can confound analysis of genome-wide pooled CRISPR screens},
175 | 	volume = {20},
176 | 	year = {2019}}
177 | 
178 | @article{sanson2018optimized,
179 | 	author = {Sanson, Kendall R and Hanna, Ruth E and Hegde, Mudra and Donovan, Katherine F and Strand, Christine and Sullender, Meagan E and Vaimberg, Emma W and Goodale, Amy and Root, David E and Piccioni, Federica and others},
180 | 	journal = {Nature communications},
181 | 	number = {1},
182 | 	pages = {1--15},
183 | 	publisher = {Nature Publishing Group},
184 | 	title = {Optimized libraries for CRISPR-Cas9 genetic screens with multiple modalities},
185 | 	volume = {9},
186 | 	year = {2018}}
187 | 
188 | @article{horlbeck2016compact,
189 | 	author = {Horlbeck, Max A and Gilbert, Luke A and Villalta, Jacqueline E and Adamson, Britt and Pak, Ryan A and Chen, Yuwen and Fields, Alexander P and Park, Chong Yon and Corn, Jacob E and Kampmann, Martin and others},
190 | 	journal = {Elife},
191 | 	pages = {e19760},
192 | 	publisher = {eLife Sciences Publications Limited},
193 | 	title = {Compact and highly active next-generation libraries for CRISPR-mediated gene repression and activation},
194 | 	volume = {5},
195 | 	year = {2016}}
196 | 
197 | @article{ceres,
198 | 	author = {Meyers, Robin M and Bryan, Jordan G and McFarland, James M and Weir, Barbara A and Sizemore, Ann E and Xu, Han and Dharia, Neekesh V and Montgomery, Phillip G and Cowley, Glenn S and Pantel, Sasha and others},
199 | 	journal = {Nature genetics},
200 | 	number = {12},
201 | 	pages = {1779--1784},
202 | 	publisher = {Nature Publishing Group},
203 | 	title = {Computational correction of copy number effect improves specificity of CRISPR--Cas9 essentiality screens in cancer cells},
204 | 	volume = {49},
205 | 	year = {2017}}
206 | 
207 | @article{score,
208 | 	author = {Behan, Fiona M and Iorio, Francesco and Picco, Gabriele and Gon{\c{c}}alves, Emanuel and Beaver, Charlotte M and Migliardi, Giorgia and Santos, Rita and Rao, Yanhua and Sassi, Francesco and Pinnelli, Marika and others},
209 | 	journal = {Nature},
210 | 	number = {7753},
211 | 	pages = {511},
212 | 	publisher = {Nature Publishing Group},
213 | 	title = {Prioritization of cancer therapeutic targets using CRISPR--Cas9 screens},
214 | 	volume = {568},
215 | 	year = {2019}}
216 | 
217 | @article{mit,
218 | 	author = {Hsu, Patrick D and Scott, David A and Weinstein, Joshua A and Ran, F Ann and Konermann, Silvana and Agarwala, Vineeta and Li, Yinqing and Fine, Eli J and Wu, Xuebing and Shalem, Ophir and others},
219 | 	journal = {Nature biotechnology},
220 | 	number = {9},
221 | 	pages = {827},
222 | 	publisher = {Nature Publishing Group},
223 | 	title = {DNA targeting specificity of RNA-guided Cas9 nucleases},
224 | 	volume = {31},
225 | 	year = {2013}}
226 | 


--------------------------------------------------------------------------------
/Design_CRISPRi/references.bib:
--------------------------------------------------------------------------------
  1 | %% This BibTeX bibliography file was created using BibDesk.
  2 | %% https://bibdesk.sourceforge.io/
  3 | 
  4 | %% Created for Luke Hoberecht at 2022-08-02 15:33:04 -0700 
  5 | 
  6 | 
  7 | %% Saved with string encoding Unicode (UTF-8) 
  8 | 
  9 | 
 10 | 
 11 | @article{crisprai,
 12 | 	author = {Horlbeck, Max A and Gilbert, Luke A and Villalta, Jacqueline E and Adamson, Britt and Pak, Ryan A and Chen, Yuwen and Fields, Alexander P and Park, Chong Yon and Corn, Jacob E and Kampmann, Martin and others},
 13 | 	date-added = {2022-08-02 15:33:01 -0700},
 14 | 	date-modified = {2022-08-02 15:33:01 -0700},
 15 | 	journal = {elife},
 16 | 	publisher = {eLife Sciences Publications, Ltd},
 17 | 	title = {Compact and highly active next-generation libraries for CRISPR-mediated gene repression and activation},
 18 | 	volume = {5},
 19 | 	year = {2016}}
 20 | 
 21 | @article{ops,
 22 | 	author = {Feldman, David and Singh, Avtar and Schmid-Burgk, Jonathan L and Carlson, Rebecca J and Mezger, Anja and Garrity, Anthony J and Zhang, Feng and Blainey, Paul C},
 23 | 	journal = {Cell},
 24 | 	number = {3},
 25 | 	pages = {787--799},
 26 | 	publisher = {Elsevier},
 27 | 	title = {Optical pooled screens in human cells},
 28 | 	volume = {179},
 29 | 	year = {2019}}
 30 | 
 31 | @article{cas13d,
 32 | 	author = {Konermann, Silvana and Lotfy, Peter and Brideau, Nicholas J and Oki, Jennifer and Shokhirev, Maxim N and Hsu, Patrick D},
 33 | 	journal = {Cell},
 34 | 	number = {3},
 35 | 	pages = {665--676},
 36 | 	publisher = {Elsevier},
 37 | 	title = {Transcriptome engineering with RNA-targeting type VI-D CRISPR effectors},
 38 | 	volume = {173},
 39 | 	year = {2018}}
 40 | 
 41 | @article{koblan2018improving,
 42 | 	author = {Koblan, Luke W and Doman, Jordan L and Wilson, Christopher and Levy, Jonathan M and Tay, Tristan and Newby, Gregory A and Maianti, Juan Pablo and Raguram, Aditya and Liu, David R},
 43 | 	journal = {Nature biotechnology},
 44 | 	number = {9},
 45 | 	pages = {843--846},
 46 | 	publisher = {Nature Publishing Group},
 47 | 	title = {Improving cytidine and adenine base editors by expression optimization and ancestral reconstruction},
 48 | 	volume = {36},
 49 | 	year = {2018}}
 50 | 
 51 | @article{sanson2018optimized,
 52 | 	author = {Sanson, Kendall R and Hanna, Ruth E and Hegde, Mudra and Donovan, Katherine F and Strand, Christine and Sullender, Meagan E and Vaimberg, Emma W and Goodale, Amy and Root, David E and Piccioni, Federica and others},
 53 | 	journal = {Nature communications},
 54 | 	number = {1},
 55 | 	pages = {1--15},
 56 | 	publisher = {Nature Publishing Group},
 57 | 	title = {Optimized libraries for CRISPR-Cas9 genetic screens with multiple modalities},
 58 | 	volume = {9},
 59 | 	year = {2018}}
 60 | 
 61 | @article{langmead2009bowtie,
 62 | 	abstract = {Bowtie is an ultrafast, memory-efficient alignment program for aligning short DNA sequence reads to large genomes. For the human genome, Burrows-Wheeler indexing allows Bowtie to align more than 25 million reads per CPU hour with a memory footprint of approximately 1.3 gigabytes. Bowtie extends previous Burrows-Wheeler techniques with a novel quality-aware backtracking algorithm that permits mismatches. Multiple processor cores can be used simultaneously to achieve even greater alignment speeds. Bowtie is open source http://bowtie.cbcb.umd.edu.},
 63 | 	author = {Langmead, Ben and Trapnell, Cole and Pop, Mihai and Salzberg, Steven L.},
 64 | 	da = {2009/03/04},
 65 | 	doi = {10.1186/gb-2009-10-3-r25},
 66 | 	id = {Langmead2009},
 67 | 	isbn = {1474-760X},
 68 | 	journal = {Genome Biology},
 69 | 	number = {3},
 70 | 	pages = {R25},
 71 | 	title = {Ultrafast and memory-efficient alignment of short DNA sequences to the human genome},
 72 | 	ty = {JOUR},
 73 | 	url = {https://doi.org/10.1186/gb-2009-10-3-r25},
 74 | 	volume = {10},
 75 | 	year = {2009},
 76 | 	bdsk-url-1 = {https://doi.org/10.1186/gb-2009-10-3-r25}}
 77 | 
 78 | @article{lindel,
 79 | 	author = {Chen, Wei and McKenna, Aaron and Schreiber, Jacob and Haeussler, Maximilian and Yin, Yi and Agarwal, Vikram and Noble, William Stafford and Shendure, Jay},
 80 | 	journal = {Nucleic acids research},
 81 | 	number = {15},
 82 | 	pages = {7989--8003},
 83 | 	publisher = {Oxford University Press},
 84 | 	title = {Massively parallel profiling and predictive modeling of the outcomes of CRISPR/Cas9-mediated double-strand break repair},
 85 | 	volume = {47},
 86 | 	year = {2019}}
 87 | 
 88 | @article{azimuth,
 89 | 	author = {Doench, John G and Fusi, Nicolo and Sullender, Meagan and Hegde, Mudra and Vaimberg, Emma W and Donovan, Katherine F and Smith, Ian and Tothova, Zuzana and Wilen, Craig and Orchard, Robert and others},
 90 | 	journal = {Nature biotechnology},
 91 | 	number = {2},
 92 | 	pages = {184},
 93 | 	publisher = {Nature Publishing Group},
 94 | 	title = {Optimized sgRNA design to maximize activity and minimize off-target effects of CRISPR-Cas9},
 95 | 	volume = {34},
 96 | 	year = {2016}}
 97 | 
 98 | @article{deepcas9,
 99 | 	author = {Wang, Daqi and Zhang, Chengdong and Wang, Bei and Li, Bin and Wang, Qiang and Liu, Dong and Wang, Hongyan and Zhou, Yan and Shi, Leming and Lan, Feng and others},
100 | 	journal = {Nature communications},
101 | 	number = {1},
102 | 	pages = {1--14},
103 | 	publisher = {Nature Publishing Group},
104 | 	title = {Optimized CRISPR guide RNA design for two high-fidelity Cas9 variants by deep learning},
105 | 	volume = {10},
106 | 	year = {2019}}
107 | 
108 | @article{deepcpf1,
109 | 	author = {Kim, Hui Kwon and Min, Seonwoo and Song, Myungjae and Jung, Soobin and Choi, Jae Woo and Kim, Younggwang and Lee, Sangeun and Yoon, Sungroh and Kim, Hyongbum Henry},
110 | 	journal = {Nature biotechnology},
111 | 	number = {3},
112 | 	pages = {239},
113 | 	publisher = {Nature Publishing Group},
114 | 	title = {Deep learning improves prediction of CRISPR--Cpf1 guide RNA activity},
115 | 	volume = {36},
116 | 	year = {2018}}
117 | 
118 | @article{perturbseq,
119 | 	author = {Adamson, Britt and Norman, Thomas M and Jost, Marco and Cho, Min Y and Nu{\~n}ez, James K and Chen, Yuwen and Villalta, Jacqueline E and Gilbert, Luke A and Horlbeck, Max A and Hein, Marco Y and others},
120 | 	journal = {Cell},
121 | 	number = {7},
122 | 	pages = {1867--1882},
123 | 	publisher = {Elsevier},
124 | 	title = {A multiplexed single-cell CRISPR screening platform enables systematic dissection of the unfolded protein response},
125 | 	volume = {167},
126 | 	year = {2016}}
127 | 
128 | @article{cropseq,
129 | 	author = {Datlinger, Paul and Rendeiro, Andr{\'e} F and Schmidl, Christian and Krausgruber, Thomas and Traxler, Peter and Klughammer, Johanna and Schuster, Linda C and Kuchler, Amelie and Alpar, Donat and Bock, Christoph},
130 | 	journal = {Nature methods},
131 | 	number = {3},
132 | 	pages = {297},
133 | 	publisher = {Nature Publishing Group},
134 | 	title = {Pooled CRISPR screening with single-cell transcriptome readout},
135 | 	volume = {14},
136 | 	year = {2017}}
137 | 
138 | @article{crispracrisprireview,
139 | 	author = {Kampmann, Martin},
140 | 	journal = {ACS chemical biology},
141 | 	number = {2},
142 | 	pages = {406--416},
143 | 	publisher = {ACS Publications},
144 | 	title = {CRISPRi and CRISPRa screens in mammalian cells for precision biology and medicine},
145 | 	volume = {13},
146 | 	year = {2018}}
147 | 
148 | @article{crispri,
149 | 	author = {Gilbert, Luke A and Larson, Matthew H and Morsut, Leonardo and Liu, Zairan and Brar, Gloria A and Torres, Sandra E and Stern-Ginossar, Noam and Brandman, Onn and Whitehead, Evan H and Doudna, Jennifer A and others},
150 | 	journal = {Cell},
151 | 	number = {2},
152 | 	pages = {442--451},
153 | 	publisher = {Elsevier},
154 | 	title = {CRISPR-mediated modular RNA-guided regulation of transcription in eukaryotes},
155 | 	volume = {154},
156 | 	year = {2013}}
157 | 
158 | @article{sam,
159 | 	author = {Konermann, Silvana and Brigham, Mark D and Trevino, Alexandro E and Joung, Julia and Abudayyeh, Omar O and Barcena, Clea and Hsu, Patrick D and Habib, Naomi and Gootenberg, Jonathan S and Nishimasu, Hiroshi and others},
160 | 	journal = {Nature},
161 | 	number = {7536},
162 | 	pages = {583},
163 | 	publisher = {Nature Publishing Group},
164 | 	title = {Genome-scale transcriptional activation by an engineered CRISPR-Cas9 complex},
165 | 	volume = {517},
166 | 	year = {2015}}
167 | 
168 | @article{fortin2019,
169 | 	author = {Fortin, Jean-Philippe and Tan, Jenille and Gascoigne, Karen E and Haverty, Peter M and Forrest, William F and Costa, Michael R and Martin, Scott E},
170 | 	journal = {Genome biology},
171 | 	number = {1},
172 | 	pages = {21},
173 | 	publisher = {Springer},
174 | 	title = {Multiple-gene targeting and mismatch tolerance can confound analysis of genome-wide pooled CRISPR screens},
175 | 	volume = {20},
176 | 	year = {2019}}
177 | 
178 | @article{sanson2018optimized,
179 | 	author = {Sanson, Kendall R and Hanna, Ruth E and Hegde, Mudra and Donovan, Katherine F and Strand, Christine and Sullender, Meagan E and Vaimberg, Emma W and Goodale, Amy and Root, David E and Piccioni, Federica and others},
180 | 	journal = {Nature communications},
181 | 	number = {1},
182 | 	pages = {1--15},
183 | 	publisher = {Nature Publishing Group},
184 | 	title = {Optimized libraries for CRISPR-Cas9 genetic screens with multiple modalities},
185 | 	volume = {9},
186 | 	year = {2018}}
187 | 
188 | @article{horlbeck2016compact,
189 | 	author = {Horlbeck, Max A and Gilbert, Luke A and Villalta, Jacqueline E and Adamson, Britt and Pak, Ryan A and Chen, Yuwen and Fields, Alexander P and Park, Chong Yon and Corn, Jacob E and Kampmann, Martin and others},
190 | 	journal = {Elife},
191 | 	pages = {e19760},
192 | 	publisher = {eLife Sciences Publications Limited},
193 | 	title = {Compact and highly active next-generation libraries for CRISPR-mediated gene repression and activation},
194 | 	volume = {5},
195 | 	year = {2016}}
196 | 
197 | @article{ceres,
198 | 	author = {Meyers, Robin M and Bryan, Jordan G and McFarland, James M and Weir, Barbara A and Sizemore, Ann E and Xu, Han and Dharia, Neekesh V and Montgomery, Phillip G and Cowley, Glenn S and Pantel, Sasha and others},
199 | 	journal = {Nature genetics},
200 | 	number = {12},
201 | 	pages = {1779--1784},
202 | 	publisher = {Nature Publishing Group},
203 | 	title = {Computational correction of copy number effect improves specificity of CRISPR--Cas9 essentiality screens in cancer cells},
204 | 	volume = {49},
205 | 	year = {2017}}
206 | 
207 | @article{score,
208 | 	author = {Behan, Fiona M and Iorio, Francesco and Picco, Gabriele and Gon{\c{c}}alves, Emanuel and Beaver, Charlotte M and Migliardi, Giorgia and Santos, Rita and Rao, Yanhua and Sassi, Francesco and Pinnelli, Marika and others},
209 | 	journal = {Nature},
210 | 	number = {7753},
211 | 	pages = {511},
212 | 	publisher = {Nature Publishing Group},
213 | 	title = {Prioritization of cancer therapeutic targets using CRISPR--Cas9 screens},
214 | 	volume = {568},
215 | 	year = {2019}}
216 | 
217 | @article{mit,
218 | 	author = {Hsu, Patrick D and Scott, David A and Weinstein, Joshua A and Ran, F Ann and Konermann, Silvana and Agarwala, Vineeta and Li, Yinqing and Fine, Eli J and Wu, Xuebing and Shalem, Ophir and others},
219 | 	journal = {Nature biotechnology},
220 | 	number = {9},
221 | 	pages = {827},
222 | 	publisher = {Nature Publishing Group},
223 | 	title = {DNA targeting specificity of RNA-guided Cas9 nucleases},
224 | 	volume = {31},
225 | 	year = {2013}}
226 | 


--------------------------------------------------------------------------------
/Design_CRISPRa/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "gRNA design for CRISPR activation (CRISPRa)"
  3 | author: Jean-Philippe Fortin, Luke Hoberecht
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | bibliography: references.bib
  8 | ---
  9 | 
 10 | ```{r, echo=FALSE, results="hide"}
 11 | options("knitr.graphics.auto_pdf"=TRUE)
 12 | ```
 13 | 
 14 | 
 15 | # Introduction
 16 | 
 17 | This tutorial will demonstrate how to use `crisprDesign` to design gRNAs 
 18 | for CRISPR activation (CRISPRa). Specifically, we will target the 
 19 | human KRAS gene and use the SpCas9 nuclease.
 20 | 
 21 | 
 22 | # Installation
 23 | 
 24 | See the [Installation tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Installation) to learn how to install the packages necessary for this tutorial:
 25 | `crisprDesign`, `crisprDesignData`
 26 | 
 27 | 
 28 | # Terminology
 29 | 
 30 | See the [CRISPRko design tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) to get familiar 
 31 | with the terminology used throughout this tutorial.
 32 | 
 33 | 
 34 | 
 35 | # CRISPRa design
 36 | 
 37 | For CRISPR activation (CRISPRa) and interference (CRISPRi) applications,
 38 | the CRISPR nuclease is engineered to lose its endonuclease activity, 
 39 | and should therefore not introduce double-stranded breaks (DSBs). 
 40 | We will use the dead SpCas9 (dSpCas9) nuclease as an example here. 
 41 | Note that users don't have to distinguish between dSpCas9 and SpCas9 
 42 | when specifying the nuclease in the crisprVerse as they 
 43 | do not differ in terms of the characteristics stored in 
 44 | the `CrisprNuclease` object.
 45 | 
 46 | In CRISPRa, dSpCas9 is used to activate gene expression by coupling 
 47 | the dead nuclease with activation factors. Several CRISPRa systems have 
 48 | been developed (see @crispracrisprireview for a review). For 
 49 | optimal activation, gRNAs are usually designed to target the 
 50 | region directly upstream of the gene transcription start site (TSS).
 51 | 
 52 | `crisprDesign` provides functionalities to be able to take into account 
 53 | design rules that are specific to CRISPRa applications. The `queryTss` function
 54 | allows for specifying genomic coordinates of promoter regions. 
 55 | The `addTssAnnotation` function annotates gRNAs for known TSSs, 
 56 | and includes a column `dist_to_tss` that gives the distance in
 57 | nucleotides between the TSS position and the PAM site of the gRNA. 
 58 | For CRISPRa, we recommend targeting the region 75-150bp upstream of 
 59 | the TSS for optimal activation; see @sanson2018optimized for 
 60 | more information. Finally, the function `addCrispraiScores` adds on-target 
 61 | activity scores based on the work of [@crisprai]. 
 62 | 
 63 | 
 64 | ## Creating the GuideSet
 65 | 
 66 | We first start by loading the required packages:
 67 | 
 68 | ```{r, message=FALSE, warning=FALSE, results='hide' }
 69 | library(crisprBase)
 70 | library(crisprDesign)
 71 | library(crisprDesignData)
 72 | library(BSgenome.Hsapiens.UCSC.hg38)
 73 | ```
 74 | 
 75 | 
 76 | To demonstrate CRISPRa design, we will design gRNAs to activate the human KRAS 
 77 | gene using the SpCas9 nuclease. To accomplish this, we want our gRNAs to
 78 | target the region upstream of the KRAS TSS; let's consider the window
 79 | containing 500bp immediately upstream of the TSS. We first need to retrieve 
 80 | the TSS coordinates for KRAS. These data are conveniently stored in the 
 81 | `crisprDesignData` package as the dataset `tss_human`. For more information 
 82 | on `tss_human` and how to create similar TSS annotation objects, 
 83 | see the [Building a gene annotation object](https://github.com/crisprVerse/Tutorials/tree/master/Building_Gene_Annotation) 
 84 | tutorial.
 85 | 
 86 | We load the TSS coordinates stored in the `tss_human` object
 87 | 
 88 | ```{r, warning=FALSE, message=FALSE, results='hide'}
 89 | data("tss_human", package="crisprDesignData")
 90 | ```
 91 | 
 92 | and query for KRAS using the `queryTss` function from `crisprDesign`:
 93 | 
 94 | ```{r, warning=FALSE, message=FALSE, results='hide'}
 95 | target_window <- c(-500, 0)
 96 | target_region <- queryTss(tss_human,
 97 |                           queryColumn="gene_symbol",
 98 |                           queryValue="KRAS",
 99 |                           tss_window=target_window)
100 | ```
101 | 
102 | ```{r, collapse=TRUE}
103 | target_region
104 | ```
105 | 
106 | We load the `crisprNuclease` object storing information about the
107 | SpCas9 nuclease from the `crisprBase` package:
108 | 
109 | ```{r}
110 | data(SpCas9, package="crisprBase")
111 | ```
112 | 
113 | We then find all candidate protospacer sequences in our target region 
114 | with `findSpacers`:
115 | 
116 | ```{r, warning=FALSE, message=FALSE, results='hide'}
117 | gs <- findSpacers(target_region,
118 |                   crisprNuclease=SpCas9,
119 |                   bsgenome=BSgenome.Hsapiens.UCSC.hg38)
120 | ```
121 | 
122 | ```{r, collapse=TRUE}
123 | gs
124 | ```
125 | 
126 | 
127 | ## Annotating the GuideSet
128 | 
129 | Next, we annotate our candidate gRNAs to assess quality. There are several 
130 | functions in `crisprDesign` that provide annotation for features that are 
131 | not specific to CRISPRa, for which we refer the reader to the
132 | [CRISPRko design with Cas9](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) tutorial for more information. The sections below will 
133 | cover annotation functions that are of particular interest to 
134 | CRISPRa applications.
135 | 
136 | 
137 | ## Adding TSS annotation
138 | 
139 | As the name implies, the `addTssAnnotation` function annotates gRNAs with 
140 | TSS context such as the distance between the gRNA and the TSS, as well as 
141 | which TSS is targeted (many genes contain different TSSs corresponding to
142 | different isoforms). 
143 | 
144 | The function requires a `tssObject` object, and the `tss_window` values 
145 | that we used earlier to define the target region. We can then retrieve 
146 | the appended annotation with the accessor function `tssAnnotation`:
147 | 
148 | ```{r, collapse=TRUE}
149 | gs <- addTssAnnotation(gs,
150 |                        tssObject=tss_human,
151 |                        tss_window=target_window)
152 | tssAnnotation(gs)
153 | ```
154 | 
155 | 
156 | ## Adding spacer alignments with TSS annotation
157 | 
158 | As with all CRISPR applications, off-targets is an important concern in 
159 | assessing gRNA quality. While this concern is somewhat moderated for 
160 | CRISPRa, since the dead CRISPR nuclease does not make DSBs, we should 
161 | be aware of off-targets occuring in the promoter regions of other genes. 
162 | This can be handled by passing our `tssObject` to the `addSpacerAlignments` 
163 | function. We will search  for up to 2 mismatches and increase the 
164 | size of our `tss_window` to err on the safe side. 
165 | 
166 | Similar to the CRISPRko design tutorial, we need to specify a Bowtie index
167 | of the human referenge genome; see the [Building genome indices for short read aligners](https://github.com/crisprVerse/Tutorials/tree/master/Building_Genome_Indices) tutorial to learn how to create such an index. 
168 | 
169 | Here we specify the index that was available to us when generating this
170 | tutorial:
171 | 
172 | ```{r}
173 | index_path <- "/Users/fortinj2/crisprIndices/bowtie/hg38/hg38"
174 | ```
175 | 
176 | (this needs to be changed by users). We are now ready to add on- and off-target 
177 | alignments:
178 | 
179 | ```{r, warning=FALSE, message=FALSE, results='hide'}
180 | gs <- addSpacerAlignments(gs,
181 |                           aligner="bowtie",
182 |                           aligner_index=index_path,
183 |                           bsgenome=BSgenome.Hsapiens.UCSC.hg38,
184 |                           n_mismatches=2,
185 |                           tssObject=tss_human,
186 |                           tss_window=c(-2000, 500))
187 | ```
188 | ```{r, collapse=TRUE}
189 | gs
190 | ```
191 | 
192 | Including a `tssObject` parameter in the `addSpacerAlignments` function 
193 | appends columns to the `GuideSet` that tallies the alignments restricted 
194 | to the defined (via `tss_window`) promoter regions: 
195 | `n0_p`, `n1_p`, and `n2_p` (the `_p` suffix denotes "promoter").
196 | 
197 | 
198 | ## Adding CRISPRai scores
199 | 
200 | The CRISPRai algorithm was developed by the Weissman lab to score SpCas9 
201 | gRNAs for CRISPRa and CRISPRi applications for the 
202 | human genome [@crisprai]. The function `addCrispraiScores` 
203 | implements this algorithm to add scores to the `GuideSet`.
204 | Compared to other on-target scoring algorithms, it requires several additional
205 | inputs:
206 | 
207 | - The `gr` argument is the `GRanges` object derived from the `queryTss`
208 | function and used to create the `GuideSet` object. 
209 | In our example, this is the object named `target_region`.
210 | - The `tssObject` argument is a `GRanges` object that contains 
211 | TSS coordinates and annotation. It must also contain the following 
212 | columns: `ID`, `promoter`, `tx_id`, and `gene_symbol`.
213 | Our `tssObject` in this instance is `tss_human`.
214 | - `geneCol` indicates which column of `tssObject` should be used as the 
215 | unique gene identifier.
216 | - `modality` is the modality of the CRISPR application, in our case, `CRISPRa`.
217 | - `fastaFile` is the path of a FASTA file containing the sequence of the
218 | human reference genome in hg38 coordinates. This file is available [here](https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz).
219 | - `chromatinFiles` is a vector of length 3 specifying the path of files 
220 | containing the chromatin accessibility data needed for the algorithm 
221 | in hg38 coordinates. The chromatin files can be downloaded from Zenodo [here](https://zenodo.org/record/6716721#.YrzCfS-cY4d).
222 | 
223 | 
224 | We first prepare all needed inputs for `addCrispraiScores`.
225 | We start by specifying the location of the FASTA file on our local machine:
226 | 
227 | ```{r}
228 | fastaPath <- "/Users/fortinj2/crisprIndices/genomes/hg38/hg38.fa"
229 | ```
230 | 
231 | This corresponds to the path where the downloaded file from [here](https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz) is
232 | stored. Next, we specify the location of the chromatin files:
233 | 
234 | 
235 | 
236 | ```{r}
237 | mnasePath <- "/Users/fortinj2/crisprIndices/chromatin/hg38/crispria_mnase_human_K562_hg38.bigWig"
238 | dnasePath <- "/Users/fortinj2/crisprIndices/chromatin/hg38/crispria_dnase_human_K562_hg38.bigWig"
239 | fairePath <- "/Users/fortinj2/crisprIndices/chromatin/hg38/crispria_faire_human_K562_hg38.bigWig"
240 | chromatinFiles <- c(mnase=mnasePath,
241 |                     dnase=dnasePath,
242 |                     faire=fairePath)
243 | ```
244 | 
245 | This should correspond to the files that were downloaded from 
246 | [here](https://zenodo.org/record/6716721#.YrzCfS-cY4d).
247 | 
248 | We are now ready to add the scores:
249 | 
250 | ```{r, warning=FALSE, message=FALSE, results='hide', eval=TRUE}
251 | results <- addCrispraiScores(gs,
252 |                              gr=target_region,
253 |                              tssObject=tss_human,
254 |                              geneCol="gene_id",
255 |                              modality="CRISPRa",
256 |                              fastaFile=fastaPath,
257 |                              chromatinFiles=chromatinFiles)
258 | ```
259 | 
260 | Let's look at the results:
261 | 
262 | ```{r, collapse=TRUE, eval=TRUE}
263 | results
264 | ```
265 | 
266 | You can see that the column `score_crispra` was added to the `GuideSet`.
267 | Note that this function works identically for CRISPRi applications, with 
268 | the `modality` argument replaced by `CRISPRi`. 
269 | 
270 | 
271 | 
272 | # Session Info
273 | 
274 | ```{r}
275 | sessionInfo()
276 | ```
277 | 
278 | 
279 | # References
280 | 


--------------------------------------------------------------------------------
/Design_CRISPRi/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "gRNA design for CRISPR interference (CRISPRi)"
  3 | author: Jean-Philippe Fortin, Luke Hoberecht
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | bibliography: references.bib
  8 | ---
  9 | 
 10 | ```{r, echo=FALSE, results="hide"}
 11 | options("knitr.graphics.auto_pdf"=TRUE)
 12 | ```
 13 | 
 14 | 
 15 | 
 16 | # Introduction
 17 | 
 18 | This tutorial will demonstrate how to use `crisprDesign` to design gRNAs 
 19 | for CRISPR interference (CRISPRi). Specifically, we will target the 
 20 | human KRAS gene and use the SpCas9 nuclease.
 21 | 
 22 | # Installation
 23 | 
 24 | See the [Installation tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Installation) to learn how to install the packages necessary for this tutorial:
 25 | `crisprDesign`, `crisprDesignData`
 26 | 
 27 | 
 28 | # Terminology
 29 | 
 30 | See the [CRISPRko design tutorial](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) to get familiar 
 31 | with the terminology used throughout this tutorial.
 32 | 
 33 | 
 34 | # CRISPRi design
 35 | 
 36 | For CRISPR activation (CRISPRa) and interference (CRISPRi) applications,
 37 | the CRISPR nuclease is engineered to lose its endonuclease activity, 
 38 | and should therefore not introduce double-stranded breaks (DSBs). 
 39 | We will use the dead SpCas9 (dSpCas9) nuclease as an example here. 
 40 | Note that users don't have to distinguish between dSpCas9 and SpCas9 
 41 | when specifying the nuclease in the crisprVerse as they 
 42 | do not differ in terms of the characteristics stored in 
 43 | the `CrisprNuclease` object.
 44 | 
 45 | In CRISPRi, fusing dSpCas9 with a Krüppel-associated box (KRAB) domain has been
 46 | shown to be effective at repressing transcription in mammalian cells
 47 | [@crispri]. The dSpCas9-KRAB fused protein is a commonly-used construct to
 48 | conduct CRISPR inhibition (CRISPRi) experiments. To achieve optimal inhibition,
 49 | gRNAs are usually designed targeting the region directly downstream of the gene
 50 | transcription starting site (TSS).
 51 | 
 52 | `crisprDesign` provides functionalities to be able to take into account 
 53 | design rules that are specific to CRISPRi applications. The `queryTss` function
 54 | allows for specifying genomic coordinates of promoter regions. 
 55 | The `addTssAnnotation` function annotates gRNAs for known TSSs, 
 56 | and includes a column `dist_to_tss` that gives the distance in
 57 | nucleotides between the TSS position and the PAM site of the gRNA. 
 58 | For CRISPRi, we recommend targeting the region 25-75bp region 
 59 | downstream of the TSS for optimal inhibition; see @sanson2018optimized for 
 60 | more information. Finally, the function `addCrispraiScores` adds 
 61 | CRISPRi-specific on-target activity scores based on the work of [@crisprai]. 
 62 | 
 63 | 
 64 | ## Creating the GuideSet
 65 | 
 66 | We first start by loading the required packages:
 67 | 
 68 | ```{r, message=FALSE, warning=FALSE, results='hide' }
 69 | library(crisprBase)
 70 | library(crisprDesign)
 71 | library(crisprDesignData)
 72 | library(BSgenome.Hsapiens.UCSC.hg38)
 73 | ```
 74 | 
 75 | 
 76 | To demonstrate CRISPRi design, we will design gRNAs to inhibit
 77 | expression of the human KRAS 
 78 | gene using the SpCas9 nuclease. To accomplish this, we want our gRNAs to
 79 | target the region downstream of the KRAS TSS; let's consider the window
 80 | containing 500bp immediately downstream of the TSS to explore candidate gRNAs.
 81 | 
 82 | We first need to retrieve the TSS coordinates for KRAS. 
 83 | These data are conveniently stored in the 
 84 | `crisprDesignData` package as the dataset `tss_human`. For more information 
 85 | on `tss_human` and how to create similar TSS annotation objects, 
 86 | see the [Building a gene annotation object](https://github.com/crisprVerse/Tutorials/tree/master/Building_Gene_Annotation) 
 87 | tutorial.
 88 | 
 89 | 
 90 | We load the TSS coordinates stored in the `tss_human` object
 91 | 
 92 | ```{r, warning=FALSE, message=FALSE, results='hide'}
 93 | data("tss_human", package="crisprDesignData")
 94 | ```
 95 | 
 96 | and query for KRAS using the `queryTss` function from `crisprDesign`:
 97 | 
 98 | ```{r, warning=FALSE, message=FALSE, results='hide'}
 99 | target_window <- c(0, 500)
100 | target_region <- queryTss(tss_human,
101 |                           queryColumn="gene_symbol",
102 |                           queryValue="KRAS",
103 |                           tss_window=target_window)
104 | ```
105 | 
106 | ```{r, collapse=TRUE}
107 | target_region
108 | ```
109 | 
110 | We load the `crisprNuclease` object storing information about the
111 | SpCas9 nuclease from the `crisprBase` package:
112 | 
113 | ```{r}
114 | data(SpCas9, package="crisprBase")
115 | ```
116 | 
117 | We then find all candidate protospacer sequences in our target region 
118 | with `findSpacers`:
119 | 
120 | ```{r, warning=FALSE, message=FALSE, results='hide'}
121 | gs <- findSpacers(target_region,
122 |                   crisprNuclease=SpCas9,
123 |                   bsgenome=BSgenome.Hsapiens.UCSC.hg38)
124 | ```
125 | 
126 | ```{r, collapse=TRUE}
127 | gs
128 | ```
129 | 
130 | 
131 | ## Annotating the GuideSet
132 | 
133 | Next, we annotate our candidate gRNAs to assess quality. There are several 
134 | functions in `crisprDesign` that provide annotation for features that are 
135 | not specific to CRISPRi, for which we refer the reader to the
136 | [CRISPRko design with Cas9](https://github.com/crisprVerse/Tutorials/tree/master/Design_CRISPRko_Cas9) tutorial for more information. The sections below will 
137 | cover annotation functions that are of particular interest to 
138 | CRISPRi applications.
139 | 
140 | 
141 | ## Adding TSS annotation
142 | 
143 | As the name implies, the `addTssAnnotation` function annotates gRNAs with 
144 | TSS context such as the distance between the gRNA and the TSS, as well as 
145 | which TSS is targeted (many genes contain different TSSs corresponding to
146 | different isoforms). 
147 | 
148 | The function requires a `tssObject` object, and the `tss_window` values 
149 | that we used earlier to define the target region. We can then retrieve 
150 | the appended annotation with the accessor function `tssAnnotation`:
151 | 
152 | ```{r, collapse=TRUE}
153 | gs <- addTssAnnotation(gs,
154 |                        tssObject=tss_human,
155 |                        tss_window=target_window)
156 | tssAnnotation(gs)
157 | ```
158 | 
159 | 
160 | ## Adding spacer alignments with TSS annotation
161 | 
162 | As with all CRISPR applications, potential off-targets effects are an 
163 | important concern in assessing gRNA quality. While this concern is 
164 | somewhat moderated for CRISPRi, since the dead CRISPR nuclease does 
165 | not make DSBs, we should be aware of off-targetsoccuring in the promoter regions of other genes. This can be handled by  passing our `tssObject` to the `addSpacerAlignments` function. We will search  for up to 2 mismatches and increase the size of our `tss_window` (which defines the promoter region when searching for off-targets) to err 
166 | on the safe side. 
167 | 
168 | Similar to the CRISPRko design tutorial, we need to specify a Bowtie index
169 | of the human referenge genome; see the [Building genome indices for short read aligners](https://github.com/crisprVerse/Tutorials/tree/master/Building_Genome_Indices) tutorial to learn how to create such an index. 
170 | 
171 | Here, we specify the index that was available to us when generating this
172 | tutorial:
173 | 
174 | ```{r}
175 | # Users need to specify the path of their bowtie index
176 | index_path <- "/Users/fortinj2/crisprIndices/bowtie/hg38/hg38"
177 | ```
178 | 
179 | We are ready to add on- and off-target alignments:
180 | 
181 | ```{r, warning=FALSE, message=FALSE, results='hide'}
182 | gs <- addSpacerAlignments(gs,
183 |                           aligner="bowtie",
184 |                           aligner_index=index_path,
185 |                           bsgenome=BSgenome.Hsapiens.UCSC.hg38,
186 |                           n_mismatches=2,
187 |                           tssObject=tss_human,
188 |                           tss_window=c(-500, 2000))
189 | ```
190 | ```{r, collapse=TRUE}
191 | gs
192 | ```
193 | 
194 | Including a `tssObject` parameter in the `addSpacerAlignments` function 
195 | appends columns to the `GuideSet` that tallies the alignments restricted 
196 | to the defined (via `tss_window`) promoter regions: 
197 | `n0_p`, `n1_p`, and `n2_p` (the `_p` suffix denotes "promoter").
198 | 
199 | 
200 | ## Adding CRISPRai scores
201 | 
202 | The CRISPRai algorithm was developed by the Weissman lab to score SpCas9 
203 | gRNAs for CRISPRa and CRISPRi applications for the 
204 | human genome [@crisprai]. The function `addCrispraiScores` 
205 | implements this algorithm to add scores to the `GuideSet`.
206 | Compared to other on-target scoring algorithms, it requires several additional
207 | inputs:
208 | 
209 | - The `gr` argument is the `GRanges` object derived from the `queryTss`
210 | function and used to create the `GuideSet` object. 
211 | In our example, this is the object named `target_region`.
212 | - The `tssObject` argument is a `GRanges` object that contains 
213 | TSS coordinates and annotation. It must also contain the following 
214 | columns: `ID`, `promoter`, `tx_id`, and `gene_symbol`.
215 | Our `tssObject` in this instance is `tss_human`.
216 | - `geneCol` indicates which column of `tssObject` should be used as the 
217 | unique gene identifier.
218 | - `modality` is the modality of the CRISPR application, in our case, `CRISPRi`.
219 | - `fastaFile` is the path of a FASTA file containing the sequence of the
220 | human reference genome in hg38 coordinates. This file is available [here](https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz).
221 | - `chromatinFiles` is a vector of length 3 specifying the path of files 
222 | containing the chromatin accessibility data needed for the algorithm 
223 | in hg38 coordinates. The chromatin files can be downloaded from Zenodo [here](https://zenodo.org/record/6716721#.YrzCfS-cY4d).
224 | 
225 | 
226 | We first prepare all needed inputs for `addCrispraiScores`.
227 | We start by specifying the location of the FASTA file on our local machine:
228 | 
229 | ```{r}
230 | fastaPath <- "/Users/fortinj2/crisprIndices/genomes/hg38/hg38.fa"
231 | ```
232 | 
233 | This corresponds to the path where the downloaded file from [here](https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz) is
234 | stored. Next, we specify the location of the chromatin files:
235 | 
236 | ```{r}
237 | mnasePath <- "/Users/fortinj2/crisprIndices/chromatin/hg38/crispria_mnase_human_K562_hg38.bigWig"
238 | dnasePath <- "/Users/fortinj2/crisprIndices/chromatin/hg38/crispria_dnase_human_K562_hg38.bigWig"
239 | fairePath <- "/Users/fortinj2/crisprIndices/chromatin/hg38/crispria_faire_human_K562_hg38.bigWig"
240 | chromatinFiles <- c(mnase=mnasePath,
241 |                     dnase=dnasePath,
242 |                     faire=fairePath)
243 | ```
244 | 
245 | This should correspond to the files that were downloaded from 
246 | [here](https://zenodo.org/record/6716721#.YrzCfS-cY4d).
247 | 
248 | We are now ready to add the scores:
249 | 
250 | ```{r, warning=FALSE, message=FALSE, results='hide', eval=TRUE}
251 | results <- addCrispraiScores(gs,
252 |                              gr=target_region,
253 |                              tssObject=tss_human,
254 |                              geneCol="gene_id",
255 |                              modality="CRISPRi",
256 |                              fastaFile=fastaPath,
257 |                              chromatinFiles=chromatinFiles)
258 | ```
259 | 
260 | Let's look at the results:
261 | 
262 | ```{r, collapse=TRUE, eval=TRUE}
263 | results
264 | ```
265 | 
266 | You can see that the column `score_crispri` was added to the `GuideSet`.
267 | Note that this function works identically for CRISPRa applications, with 
268 | the `modality` argument replaced by `CRISPRa`. 
269 | 
270 | 
271 | 
272 | # Session Info
273 | 
274 | ```{r}
275 | sessionInfo()
276 | ```
277 | 
278 | 
279 | # References
280 | 


--------------------------------------------------------------------------------