├── .DS_Store
├── README.md
├── docs
├── .DS_Store
├── CONFIG
│ ├── README.md
│ ├── config_1.md
│ ├── config_2.md
│ └── config_3.md
├── MOFA
│ ├── README.md
│ ├── mofa_1.md
│ ├── mofa_2.md
│ ├── mofa_3.assets
│ │ ├── do3.png
│ │ ├── do4-20221019030752121.png
│ │ ├── do4-6120066.png
│ │ ├── do4.png
│ │ ├── do5-6120069.png
│ │ ├── do5.png
│ │ └── do6.png
│ ├── mofa_3.md
│ ├── mofa_4.assets
│ │ ├── do10.png
│ │ ├── do11-6120091.png
│ │ ├── do11.png
│ │ ├── do12-6120093.png
│ │ ├── do12.png
│ │ ├── do13.png
│ │ ├── do14.png
│ │ ├── do15.png
│ │ ├── do16-1636004475065.png
│ │ ├── do16.png
│ │ ├── do7.png
│ │ ├── do8.png
│ │ ├── do9-6120084.png
│ │ └── do9.png
│ ├── mofa_4.md
│ ├── mofa_5.assets
│ │ └── image-20211104134822671.png
│ ├── mofa_5.md
│ ├── mofa_6.md
│ ├── mofa_7.assets
│ │ ├── 下载 (3).png
│ │ ├── 下载 (4).png
│ │ ├── 下载 (5).png
│ │ ├── 下载 (6).png
│ │ ├── 下载 (7).png
│ │ └── 下载 (8).png
│ └── mofa_7.md
├── MultiOMIC-ipynb
│ ├── cellanno-1.ipynb
│ ├── cellanno-2.ipynb
│ ├── figure1.ipynb
│ ├── figure2.ipynb
│ ├── glue-0.ipynb
│ ├── glue-1.ipynb
│ └── glue-2.ipynb
├── MultiOMIC
│ ├── .DS_Store
│ ├── README.md
│ ├── book-1.md
│ ├── book-2.md
│ ├── book-3.assets
│ │ ├── image-20221019004245874-6119913.png
│ │ ├── image-20221019004245874-6119960.png
│ │ ├── image-20221019004245874.png
│ │ ├── image-20221019004442469-6119965.png
│ │ ├── image-20221019004442469.png
│ │ ├── image-20221019004556158-6119968.png
│ │ ├── image-20221019004556158.png
│ │ ├── image-20221019004807328-6119971.png
│ │ ├── image-20221019004807328.png
│ │ ├── image-20221019004930306-6119974.png
│ │ ├── image-20221019004930306.png
│ │ ├── image-20221019005115998-6119977.png
│ │ ├── image-20221019005115998.png
│ │ ├── image-20221019005254018-6119981.png
│ │ ├── image-20221019005254018.png
│ │ ├── image-20221019005328419-6119984.png
│ │ ├── image-20221019005328419.png
│ │ ├── image-20221019005554337-6119988.png
│ │ ├── image-20221019005554337.png
│ │ ├── image-20221019005740026-6119990.png
│ │ └── image-20221019005740026.png
│ ├── book-3.md
│ ├── book-4.assets
│ │ ├── image-20221019012328268-6120000.png
│ │ ├── image-20221019012328268.png
│ │ ├── image-20221019012404062-6120003.png
│ │ ├── image-20221019012404062.png
│ │ ├── image-20221019012503861-6120007.png
│ │ └── image-20221019012503861.png
│ ├── book-4.md
│ ├── book-5.assets
│ │ ├── image-20221019014049480-6120013.png
│ │ ├── image-20221019014049480.png
│ │ ├── image-20221019014313452-6120017.png
│ │ ├── image-20221019014313452.png
│ │ ├── image-20221019014538577-6120020.png
│ │ ├── image-20221019014538577.png
│ │ ├── image-20221019014718599-6120022.png
│ │ ├── image-20221019014718599.png
│ │ ├── image-20221019014912052-6120025.png
│ │ └── image-20221019014912052.png
│ ├── book-5.md
│ ├── book-6.assets
│ │ ├── image-20221019020116601-6120031.png
│ │ ├── image-20221019020116601.png
│ │ ├── image-20221019020147975-6120034.png
│ │ ├── image-20221019020147975.png
│ │ ├── image-20221019020412203-6120038.png
│ │ └── image-20221019020412203.png
│ ├── book-6.md
│ ├── book-7.assets
│ │ ├── image-20221019021043508-6120043.png
│ │ ├── image-20221019021043508.png
│ │ ├── image-20221019021239322-6120046.png
│ │ ├── image-20221019021239322.png
│ │ ├── image-20221019021513334-6120051.png
│ │ └── image-20221019021513334.png
│ └── book-7.md
├── PLOT
│ ├── .DS_Store
│ ├── README.md
│ ├── data
│ │ ├── .DS_Store
│ │ ├── data_exp.csv
│ │ ├── data_heatmap.csv
│ │ ├── data_kegg.txt
│ │ └── data_vol.csv
│ ├── plot_1.assets
│ │ ├── output_11_1.png
│ │ ├── output_14_1.png
│ │ ├── output_16_1.png
│ │ ├── output_18_0.png
│ │ ├── output_1_1.png
│ │ ├── output_20_0.png
│ │ ├── output_5_0.png
│ │ ├── output_7_1.png
│ │ └── output_9_1.png
│ ├── plot_1.md
│ ├── plot_2.assets
│ │ ├── output_11_0.png
│ │ ├── output_13_0.png
│ │ ├── output_15_0.png
│ │ ├── output_17_0.png
│ │ ├── output_19_1.png
│ │ ├── output_1_1.png
│ │ ├── output_21_1.png
│ │ └── output_23_0.png
│ ├── plot_2.md
│ ├── plot_3.assets
│ │ ├── output_11_0.png
│ │ ├── output_13_1.png
│ │ ├── output_15_2.png
│ │ ├── output_1_1.png
│ │ └── output_9_0.png
│ └── plot_3.md
├── README.md
├── RNASEQ
│ ├── README.md
│ ├── rnaseq_1.md
│ ├── rnaseq_2.assets
│ │ ├── NHDF_heatmap.png
│ │ ├── NHDF_volcano-1635819681870.png
│ │ ├── NHDF_volcano.png
│ │ ├── untitled.png
│ │ └── untitled1.png
│ ├── rnaseq_2.md
│ ├── rnaseq_3.assets
│ │ ├── NOD-like receptor signaling pathway.prerank.png
│ │ └── untitled1.png
│ └── rnaseq_3.md
├── RNASEQUP
│ ├── README.md
│ ├── rnasequp_1.md
│ └── rnasequp_2.md
├── SCGLUE
│ ├── README.md
│ ├── scglue_1.md
│ ├── scglue_2.md
│ ├── scglue_3.assets
│ │ ├── do.png
│ │ ├── do10.png
│ │ ├── do11.png
│ │ ├── do12.png
│ │ ├── do13.png
│ │ ├── do14.png
│ │ ├── do15.png
│ │ ├── do16.png
│ │ ├── do2.png
│ │ ├── do3.png
│ │ ├── do4.png
│ │ ├── do5.png
│ │ ├── do6.png
│ │ ├── do7.png
│ │ ├── do8.png
│ │ └── do9.png
│ ├── scglue_3.md
│ ├── scglue_4.md
│ └── scglue_5.md
├── SUMMARY.md
├── TMT
│ ├── .DS_Store
│ ├── README.md
│ ├── tmt_1.assets
│ │ ├── image-20220126012940259-6120196.png
│ │ └── image-20220126012940259.png
│ ├── tmt_1.md
│ ├── tmt_2.assets
│ │ ├── image-20220126013900416-6120205.png
│ │ ├── image-20220126013900416.png
│ │ ├── image-20220126014204530.png
│ │ ├── image-20220126014423239.png
│ │ ├── image-20220126014658979.png
│ │ ├── image-20220126014844199.png
│ │ ├── image-20220126014944222.png
│ │ ├── image-20220126015312481.png
│ │ ├── image-20220126015354523.png
│ │ └── image-20220126015849066.png
│ ├── tmt_2.md
│ ├── tmt_3.assets
│ │ ├── image-20220126021021714.png
│ │ ├── image-20220126022018085.png
│ │ ├── image-20220126022234665.png
│ │ ├── image-20220126022304695.png
│ │ └── image-20220126022354890.png
│ └── tmt_3.md
├── index.md
└── overrides
│ └── main.html
├── mkdocs.yml
└── site
├── .DS_Store
├── 404.html
├── CONFIG
├── config_1
│ └── index.html
├── config_2
│ └── index.html
├── config_3
│ └── index.html
└── index.html
├── MOFA
├── index.html
├── mofa_1
│ └── index.html
├── mofa_2
│ └── index.html
├── mofa_3.assets
│ ├── do3.png
│ ├── do4-20221019030752121.png
│ ├── do4-6120066.png
│ ├── do4.png
│ ├── do5-6120069.png
│ ├── do5.png
│ └── do6.png
├── mofa_3
│ └── index.html
├── mofa_4.assets
│ ├── do10.png
│ ├── do11-6120091.png
│ ├── do11.png
│ ├── do12-6120093.png
│ ├── do12.png
│ ├── do13.png
│ ├── do14.png
│ ├── do15.png
│ ├── do16-1636004475065.png
│ ├── do16.png
│ ├── do7.png
│ ├── do8.png
│ ├── do9-6120084.png
│ └── do9.png
├── mofa_4
│ └── index.html
├── mofa_5.assets
│ └── image-20211104134822671.png
├── mofa_5
│ └── index.html
├── mofa_6
│ └── index.html
├── mofa_7.assets
│ ├── 下载 (3).png
│ ├── 下载 (4).png
│ ├── 下载 (5).png
│ ├── 下载 (6).png
│ ├── 下载 (7).png
│ └── 下载 (8).png
└── mofa_7
│ └── index.html
├── MultiOMIC-ipynb
├── cellanno-1
│ └── index.html
├── cellanno-2
│ └── index.html
├── figure1
│ └── index.html
├── figure2
│ └── index.html
├── glue-0
│ └── index.html
├── glue-1
│ └── index.html
└── glue-2
│ └── index.html
├── MultiOMIC
├── book-1
│ └── index.html
├── book-2
│ └── index.html
├── book-3.assets
│ ├── image-20221019004245874-6119913.png
│ ├── image-20221019004245874-6119960.png
│ ├── image-20221019004245874.png
│ ├── image-20221019004442469-6119965.png
│ ├── image-20221019004442469.png
│ ├── image-20221019004556158-6119968.png
│ ├── image-20221019004556158.png
│ ├── image-20221019004807328-6119971.png
│ ├── image-20221019004807328.png
│ ├── image-20221019004930306-6119974.png
│ ├── image-20221019004930306.png
│ ├── image-20221019005115998-6119977.png
│ ├── image-20221019005115998.png
│ ├── image-20221019005254018-6119981.png
│ ├── image-20221019005254018.png
│ ├── image-20221019005328419-6119984.png
│ ├── image-20221019005328419.png
│ ├── image-20221019005554337-6119988.png
│ ├── image-20221019005554337.png
│ ├── image-20221019005740026-6119990.png
│ └── image-20221019005740026.png
├── book-3
│ └── index.html
├── book-4.assets
│ ├── image-20221019012328268-6120000.png
│ ├── image-20221019012328268.png
│ ├── image-20221019012404062-6120003.png
│ ├── image-20221019012404062.png
│ ├── image-20221019012503861-6120007.png
│ └── image-20221019012503861.png
├── book-4
│ └── index.html
├── book-5.assets
│ ├── image-20221019014049480-6120013.png
│ ├── image-20221019014049480.png
│ ├── image-20221019014313452-6120017.png
│ ├── image-20221019014313452.png
│ ├── image-20221019014538577-6120020.png
│ ├── image-20221019014538577.png
│ ├── image-20221019014718599-6120022.png
│ ├── image-20221019014718599.png
│ ├── image-20221019014912052-6120025.png
│ └── image-20221019014912052.png
├── book-5
│ └── index.html
├── book-6.assets
│ ├── image-20221019020116601-6120031.png
│ ├── image-20221019020116601.png
│ ├── image-20221019020147975-6120034.png
│ ├── image-20221019020147975.png
│ ├── image-20221019020412203-6120038.png
│ └── image-20221019020412203.png
├── book-6
│ └── index.html
├── book-7.assets
│ ├── image-20221019021043508-6120043.png
│ ├── image-20221019021043508.png
│ ├── image-20221019021239322-6120046.png
│ ├── image-20221019021239322.png
│ ├── image-20221019021513334-6120051.png
│ └── image-20221019021513334.png
├── book-7
│ └── index.html
└── index.html
├── PLOT
├── data
│ ├── data_exp.csv
│ ├── data_heatmap.csv
│ ├── data_kegg.txt
│ └── data_vol.csv
├── index.html
├── plot_1.assets
│ ├── output_11_1.png
│ ├── output_14_1.png
│ ├── output_16_1.png
│ ├── output_18_0.png
│ ├── output_1_1.png
│ ├── output_20_0.png
│ ├── output_5_0.png
│ ├── output_7_1.png
│ └── output_9_1.png
├── plot_1
│ └── index.html
├── plot_2.assets
│ ├── output_11_0.png
│ ├── output_13_0.png
│ ├── output_15_0.png
│ ├── output_17_0.png
│ ├── output_19_1.png
│ ├── output_1_1.png
│ ├── output_21_1.png
│ └── output_23_0.png
├── plot_2
│ └── index.html
├── plot_3.assets
│ ├── output_11_0.png
│ ├── output_13_1.png
│ ├── output_15_2.png
│ ├── output_1_1.png
│ └── output_9_0.png
└── plot_3
│ └── index.html
├── RNASEQ
├── index.html
├── rnaseq_1
│ └── index.html
├── rnaseq_2.assets
│ ├── NHDF_heatmap.png
│ ├── NHDF_volcano-1635819681870.png
│ ├── NHDF_volcano.png
│ ├── untitled.png
│ └── untitled1.png
├── rnaseq_2
│ └── index.html
├── rnaseq_3.assets
│ ├── NOD-like receptor signaling pathway.prerank.png
│ └── untitled1.png
└── rnaseq_3
│ └── index.html
├── RNASEQUP
├── index.html
├── rnasequp_1
│ └── index.html
└── rnasequp_2
│ └── index.html
├── SCGLUE
├── index.html
├── scglue_1
│ └── index.html
├── scglue_2
│ └── index.html
├── scglue_3.assets
│ ├── do.png
│ ├── do10.png
│ ├── do11.png
│ ├── do12.png
│ ├── do13.png
│ ├── do14.png
│ ├── do15.png
│ ├── do16.png
│ ├── do2.png
│ ├── do3.png
│ ├── do4.png
│ ├── do5.png
│ ├── do6.png
│ ├── do7.png
│ ├── do8.png
│ └── do9.png
├── scglue_3
│ └── index.html
├── scglue_4
│ └── index.html
└── scglue_5
│ └── index.html
├── SUMMARY
└── index.html
├── TMT
├── index.html
├── tmt_1.assets
│ ├── image-20220126012940259-6120196.png
│ └── image-20220126012940259.png
├── tmt_1
│ └── index.html
├── tmt_2.assets
│ ├── image-20220126013900416-6120205.png
│ ├── image-20220126013900416.png
│ ├── image-20220126014204530.png
│ ├── image-20220126014423239.png
│ ├── image-20220126014658979.png
│ ├── image-20220126014844199.png
│ ├── image-20220126014944222.png
│ ├── image-20220126015312481.png
│ ├── image-20220126015354523.png
│ └── image-20220126015849066.png
├── tmt_2
│ └── index.html
├── tmt_3.assets
│ ├── image-20220126021021714.png
│ ├── image-20220126022018085.png
│ ├── image-20220126022234665.png
│ ├── image-20220126022304695.png
│ └── image-20220126022354890.png
└── tmt_3
│ └── index.html
├── assets
├── images
│ └── favicon.png
├── javascripts
│ ├── bundle.fc8c2696.min.js
│ ├── bundle.fc8c2696.min.js.map
│ ├── lunr
│ │ ├── min
│ │ │ ├── lunr.ar.min.js
│ │ │ ├── lunr.da.min.js
│ │ │ ├── lunr.de.min.js
│ │ │ ├── lunr.du.min.js
│ │ │ ├── lunr.es.min.js
│ │ │ ├── lunr.fi.min.js
│ │ │ ├── lunr.fr.min.js
│ │ │ ├── lunr.hi.min.js
│ │ │ ├── lunr.hu.min.js
│ │ │ ├── lunr.it.min.js
│ │ │ ├── lunr.ja.min.js
│ │ │ ├── lunr.jp.min.js
│ │ │ ├── lunr.ko.min.js
│ │ │ ├── lunr.multi.min.js
│ │ │ ├── lunr.nl.min.js
│ │ │ ├── lunr.no.min.js
│ │ │ ├── lunr.pt.min.js
│ │ │ ├── lunr.ro.min.js
│ │ │ ├── lunr.ru.min.js
│ │ │ ├── lunr.stemmer.support.min.js
│ │ │ ├── lunr.sv.min.js
│ │ │ ├── lunr.ta.min.js
│ │ │ ├── lunr.th.min.js
│ │ │ ├── lunr.tr.min.js
│ │ │ ├── lunr.vi.min.js
│ │ │ └── lunr.zh.min.js
│ │ ├── tinyseg.js
│ │ └── wordcut.js
│ └── workers
│ │ ├── search.208ed371.min.js
│ │ └── search.208ed371.min.js.map
└── stylesheets
│ ├── main.7bf56d0a.min.css
│ ├── main.7bf56d0a.min.css.map
│ ├── palette.a0c5b2b5.min.css
│ └── palette.a0c5b2b5.min.css.map
├── index.html
├── overrides
└── main.html
├── sitemap.xml
└── sitemap.xml.gz
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/.DS_Store
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Bioinformatics_tutorial
2 |
3 | - 作者:starlitnightly
4 | - 日期:2021.11.02
5 |
6 | ## 楔子
7 |
8 | 撰写这个笔记,目的在于提供一个生物信息学相关的教程吧
9 |
10 | ——2021.11.02,星夜
11 |
12 | ## 目录
13 |
14 | - 【配置】:https://starlitnightly.github.io/bioinformatic_tutorial/CONFIG/
15 | - 【分析1: RNA-seq上游】:https://starlitnightly.github.io/bioinformatic_tutorial/RNASEQUP/
16 | - 【分析2: RNA-seq下游】:https://starlitnightly.github.io/bioinformatic_tutorial/RNASEQ/
17 | - 【分析3: 单细胞样本对齐】:https://starlitnightly.github.io/bioinformatic_tutorial/SCGLUE/
18 | - 【分析4: MOFA单细胞多组学因子分析】:https://starlitnightly.github.io/bioinformatic_tutorial/MOFA/
19 | - 【绘图:Python数据可视化-生物信息学专栏】:https://starlitnightly.github.io/bioinformatic_tutorial/PLOT/
20 |
21 | ## 数据
22 |
23 | 【绘图:Python数据可视化-生物信息学专栏】:https://github.com/Starlitnightly/bioinformatic_tutorial/tree/main/PLOT/data
24 |
25 | ## License
26 |
27 |
28 |
29 | 本作品采用知识共享署名-非商业性使用-相同方式共享 4.0 国际许可协议进行许可。
--------------------------------------------------------------------------------
/docs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/.DS_Store
--------------------------------------------------------------------------------
/docs/CONFIG/README.md:
--------------------------------------------------------------------------------
1 | # 配置:基础环境配置
2 |
3 |
--------------------------------------------------------------------------------
/docs/CONFIG/config_1.md:
--------------------------------------------------------------------------------
1 | # 配置1:Python环境
2 |
3 | 对于生物信息学相关的分析,这里分为Python,R语言与linux三部分
4 |
5 | ## 1. 前言
6 |
7 | 在python部分,我们主要使用到numpy,pandas,seaborn,matplotlib,scipy,statsmodel这几个包,值得庆幸的是,只需要通过Anaconda的安装,这几个包就可以一并被安装上了,避免各种各样报错的烦恼
8 |
9 | ## 2. Anaconda下载
10 |
11 | 如果你没有科学上网的话,建议从清华大学镜像网下载Anaconda3的安装包进行安装,省事儿
12 |
13 | 下载地址:https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/Anaconda3-5.3.1-Windows-x86_64.exe
14 |
15 | ## 3. Anaconda安装(转自知乎BG大龍)
16 |
17 | ### 3.1 安装过程
18 |
19 | 
20 |
21 | 
22 |
23 | 接着就是路径,提醒小白,安装到C盘真的可以避免后续的很多小问题,但是尽管这样我也没有尝试过把它装入C盘。
24 |
25 | > 我选择了E盘,单独创建一个文件夹命名为“Anaconda”.
26 | > 注意路径要简单,我的是 E:\Anaconda\
27 | >
28 | > ——不要有空格!!!
29 | >
30 | > ——不要有中文字符!!!
31 |
32 | 
33 |
34 | 大家注意到这一幅图跟上一幅图不一样,那是因为,我建议两个都打勾,第一个不打勾对初学者其实挺不友好的,打勾了能省很多事儿
35 |
36 | 
37 |
38 | 点击install,等待不太漫长的进度条……
39 |
40 | 
41 |
42 | 提示安装成功……
43 |
44 | 
45 |
46 | 提示安装VScode,选择点击“skip”
47 |
48 | 
49 |
50 | 两个“learn”,都取消打勾
51 |
52 | 
53 |
54 | ### 3.2 检验安装
55 |
56 | 我们在cmd中输入python,检查是否有Python环境
57 |
58 | > Q:cmd怎么打开
59 | >
60 | > A:“cmd命令的打开方法:1、在电脑桌面中使用“WIN+R”组合键,打开的“运行”窗口,输入“cmd”命令并回车即可打开;2、打开“开始”菜单,在搜索框中输入“cmd”,点击“cmd.exe”即可打开。”
61 |
62 | 
63 |
64 | 在cmd中输入: conda info,——查看是否有? (检验安装成功的标志)
65 |
66 | 
67 |
68 | ### 3.3 检验其他是否安装成功。尤其是 Anaconda Navifator
69 |
70 | > 点击,看是否能够进入界面,若成功,大功告成。
71 |
72 | 
73 |
74 | 
75 |
76 |
77 |
78 | 到这里,你已经完成了Python环境的基础配置,至于更高阶的配置,那么我们等到需要进行对应的分析的时候,再进行安装。
--------------------------------------------------------------------------------
/docs/CONFIG/config_2.md:
--------------------------------------------------------------------------------
1 | # 配置2:Linux环境
2 |
3 | Linux环境对于生物信息学而言,是一个必备的环境,所以我们得确保自己的电脑有一个可以用的linux环境。在这里,我推荐wsl,这是微软官方发布的一个专门用于linux分析用的环境,可以说,这避免了大量的vmware的bug出现,对于小白也是相当友好的。
4 |
5 | 为了简单起见,我这里放上微软的官方教程:https://docs.microsoft.com/zh-cn/windows/wsl/install
6 |
7 |
--------------------------------------------------------------------------------
/docs/MOFA/README.md:
--------------------------------------------------------------------------------
1 | # 分析2:MOFA单细胞多组学因子分析
2 |
3 |
--------------------------------------------------------------------------------
/docs/MOFA/mofa_1.md:
--------------------------------------------------------------------------------
1 | # MOFA分析: 环境配置
2 |
3 | mofa的整个分析,也是分为Python与R两部分,可以说,复杂的生物信息学分析,都与Python跟R密切相关,而对于Mofa分析而言,或许更为复杂。
4 |
5 | ## 1. Python部分
6 |
7 | 在Python部分,主要有以下几个包需要被安装:scglue,scanpy,mofapy2以及episcanpy
8 |
9 | ### 1.1 conda环境
10 |
11 | 在Python部分需要安装的包,可能与过往需要的包会起到冲突,所以我们新建一个conda环境
12 |
13 | ```python
14 | conda create -n rna python=3.6
15 | conda activate rna
16 | ```
17 |
18 | 通过上述两行代码,我们现在进入了一个叫rna的python虚拟环境,这个环境是非常干净的,没有什么多余的包,所以我们在下一步中将依次安装需要的依赖
19 |
20 | ### 1.2 Jupyterlab安装
21 |
22 | 由于这是一个新的python环境,所以我们需要重新装一下jupyter
23 |
24 | ```
25 | conda install -c conda-forge jupyterlab
26 | ```
27 |
28 | ### 1.3 scanpy安装
29 |
30 | 接下来进入正题,我们安装单细胞处理所必需的包-scanpy
31 |
32 | ```python
33 | conda install seaborn scikit-learn statsmodels numba pytables
34 | conda install -c conda-forge python-igraph leidenalg
35 | pip install scanpy
36 | ```
37 |
38 | ### 1.4 scglue安装
39 |
40 | 安装完scanpy后,我们安装一下用于单细胞配对的包scglue
41 |
42 | ```shell
43 | conda install -c defaults -c pytorch -c bioconda -c conda-forge -c scglue scglue --yes
44 | ```
45 |
46 | ### 1.5 mofapy2安装
47 |
48 | 我们接下来安装用于多组学因子训练的模型mofa
49 |
50 | ```
51 | pip install mofapy2
52 | ```
53 |
54 | ### 1.6 episcanpy安装
55 |
56 | 最后,是表观基因组的单细胞处理的包episcanpy的安装
57 |
58 | ```shell
59 | pip install git+https://github.com/colomemaria/epiScanpy
60 | ```
61 |
62 | 以上,就是Python环境所需要的全部依赖了
63 |
64 | ## 2. R环境
65 |
66 | ### 2.1 安装MOFA2
67 |
68 | ```R
69 | if (!requireNamespace("BiocManager", quietly = TRUE))
70 | install.packages("BiocManager")
71 |
72 | BiocManager::install("MOFA2")
73 | ```
74 |
75 | ### 2.2 安装data.table
76 |
77 | ```R
78 | install.packages('data.table',type='source')#源码的形式安装
79 | ```
80 |
81 | ### 2.3 安装scater
82 |
83 | ```R
84 | BiocManager::install("scater",type='source')
85 | ```
86 |
87 | ### 2.4 安装其他
88 |
89 | 上述三个包的安装比较特别,就单独列了出来,其他的就用下面的命令安装就好
90 |
91 | ```R
92 | install.packages(c('purrr', 'ggplot2', 'reticulate', 'argparse','RColorBrewer'))
93 | ```
94 |
95 | 到这里,我们复现Nature所需要的R环境就基本配置好了
--------------------------------------------------------------------------------
/docs/MOFA/mofa_2.md:
--------------------------------------------------------------------------------
1 | # MOFA分析: 单细胞样本对齐
2 |
3 | 前面提到,我们在单细胞多组学中,会得到大量的细胞,每个细胞都有不同的状态。对于mofa而言,2018年的nature使用了scnmt技术,该技术可以同时测定一个细胞的ATAC与RNA情况,但在大部分情况下,我们只能从同一个样本中分别进行ATAC与RNA的测定,而不是对同一个细胞进行测定,但是从理论上来说,应该是有一些细胞是相像的,于是,如何把这些细胞对齐,成为了一个需要解决的问题。
4 |
5 | 本小节参照**分析3:单细胞样本对齐**
6 |
7 |
--------------------------------------------------------------------------------
/docs/MOFA/mofa_3.assets/do3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_3.assets/do3.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_3.assets/do4-20221019030752121.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_3.assets/do4-20221019030752121.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_3.assets/do4-6120066.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_3.assets/do4-6120066.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_3.assets/do4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_3.assets/do4.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_3.assets/do5-6120069.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_3.assets/do5-6120069.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_3.assets/do5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_3.assets/do5.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_3.assets/do6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_3.assets/do6.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_3.md:
--------------------------------------------------------------------------------
1 | # MOFA分析: scRNA-seq数据处理
2 |
3 | 由于单细胞原始矩阵较大,我们进行mofa分析时往往需要对scRNA-seq数据进行预处理
4 |
5 | ## 1. 数据准备
6 |
7 | ### 1.1 导入包
8 |
9 | ```python
10 | import anndata
11 | import networkx as nx
12 | import scanpy as sc
13 | from matplotlib import rcParams
14 | import pandas as pd
15 | import numpy as np
16 | ```
17 |
18 | ### 1.2 导入数据
19 |
20 | ```python
21 | rna_pair = anndata.read_h5ad("rna_pair.h5ad")
22 | #atac_pair = anndata.read_h5ad("atac_pair.h5ad")
23 | new_pair=pd.read_csv('mofa_pre_pair.csv')
24 | ```
25 |
26 | ### 1.3 配对细胞重命名
27 |
28 | ```python
29 | #new_cell=[]
30 | #for i in rna_pair.obs.index:
31 | # new_cell.append(new_pair[new_pair['scRNA']==i]['sample'].iloc[0])
32 | #rna_pair.obs.index=new_cell
33 | #new_cell[:5]
34 |
35 | r1=rna_pair[new_pair['scRNA']]
36 | r1.obs.index=new_pair.index.values
37 | r1.write_h5ad('rna_mofa_pre1.h5ad',compression="gzip")#必须先保存
38 | rna_pair=anndata.read_h5ad("rna_mofa_pre1.h5ad")
39 |
40 | ```
41 |
42 | ## 2. 单细胞数据质控
43 |
44 | ### 2.1 过滤细胞与基因
45 |
46 | ```python
47 | sc.pp.filter_cells(rna_pair, min_genes=2000)
48 | sc.pp.filter_genes(rna_pair, min_cells=150)
49 | ```
50 |
51 | ### 2.2 过滤线粒体基因
52 |
53 | ```python
54 | rna_pair.var['mt'] = rna_pair.var_names.str.startswith('MT-') # annotate the group of mitochondrial genes as 'mt'
55 | sc.pp.calculate_qc_metrics(rna_pair, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
56 | sc.pl.violin(rna_pair, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
57 | jitter=0.4, multi_panel=True)
58 | ```
59 |
60 | 
61 |
62 | ### 2.3 观察分布
63 |
64 | ```python
65 | sc.pl.scatter(rna_pair, x='total_counts', y='pct_counts_mt')
66 | sc.pl.scatter(rna_pair, x='total_counts', y='n_genes_by_counts')
67 | ```
68 |
69 | 
70 |
71 | ### 2.4 根据分布过滤细胞
72 |
73 | ```python
74 | rna_pair = rna_pair[rna_pair.obs.n_genes_by_counts < 3500, :]
75 | rna_pair = rna_pair[rna_pair.obs.pct_counts_mt < 1, :]
76 | ```
77 |
78 | ### 2.5 高变基因计算
79 |
80 | ```python
81 | sc.pp.normalize_total(rna_pair, target_sum=1e4)
82 | sc.pp.log1p(rna_pair)
83 | sc.pp.highly_variable_genes(rna_pair, min_mean=0.0125, max_mean=3, min_disp=0.5)
84 | sc.pl.highly_variable_genes(rna_pair)
85 | rna_pair = rna_pair[:, rna_pair.var.highly_variable]
86 | ```
87 |
88 | 
89 |
90 | ### 2.6 回归
91 |
92 | ```python
93 | sc.pp.regress_out(rna_pair, ['total_counts', 'pct_counts_mt'])
94 | sc.pp.scale(rna_pair, max_value=10)
95 | rna_pair.write_h5ad('rna_mofa.h5ad',compression='gzip')
96 | ```
97 |
98 |
--------------------------------------------------------------------------------
/docs/MOFA/mofa_4.assets/do10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_4.assets/do10.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_4.assets/do11-6120091.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_4.assets/do11-6120091.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_4.assets/do11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_4.assets/do11.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_4.assets/do12-6120093.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_4.assets/do12-6120093.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_4.assets/do12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_4.assets/do12.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_4.assets/do13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_4.assets/do13.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_4.assets/do14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_4.assets/do14.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_4.assets/do15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_4.assets/do15.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_4.assets/do16-1636004475065.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_4.assets/do16-1636004475065.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_4.assets/do16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_4.assets/do16.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_4.assets/do7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_4.assets/do7.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_4.assets/do8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_4.assets/do8.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_4.assets/do9-6120084.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_4.assets/do9-6120084.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_4.assets/do9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_4.assets/do9.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_4.md:
--------------------------------------------------------------------------------
1 | # MOFA分析:scATAC-seq数据处理
2 |
3 | 由于单细胞ATAC-seq数据依然较大,我们需要对其进行过滤
4 |
5 | ## 1. 数据准备
6 |
7 | ### 1.1 导入包
8 |
9 | ```python
10 | import anndata
11 | import networkx as nx
12 | import scanpy as sc
13 | from matplotlib import rcParams
14 | import pandas as pd
15 | import numpy as np
16 | import episcanpy.api as epi
17 | ```
18 |
19 | ### 1.2 导入数据
20 |
21 | ```python
22 | atac_pair = anndata.read_h5ad("atac_pair.h5ad")
23 | new_pair=pd.read_csv('mofa_pre_pair.csv')
24 | ```
25 |
26 | ### 1.3 配对细胞重命名
27 |
28 | ```python
29 | #new_cell=[]
30 | #for i in atac_pair.obs.index:
31 | # new_cell.append(new_pair[new_pair['scATAC']==i]['sample'].iloc[0])
32 | #atac_pair.obs.index=new_cell
33 | #new_cell[:5]
34 |
35 | r1=atac_pair[new_pair['scATAC']]
36 | r1.obs.index=new_pair.index.values
37 | r1.write_h5ad('atac_mofa_pre1.h5ad',compression="gzip")#必须先保存
38 | atac_pair=anndata.read_h5ad("atac_mofa_pre1.h5ad")
39 | ```
40 |
41 | ### 1.4 染色体位置重命名
42 |
43 | ```python
44 | lo=[]
45 | for i in atac_pair.var_names:
46 | lo.append(i.replace(':','_').replace('-','_'))
47 | atac_pair.var.index=lo
48 | ```
49 |
50 | ### 1.5 染色体位置相关基因标注
51 |
52 | #### 1.5.1 Annotation下载
53 |
54 | ```shell
55 | wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz -O gencode.v19.annotation.gtf.gz
56 | gunzip gencode.v19.annotation.gtf
57 | ```
58 |
59 | #### 1.5.2 Annotation标注
60 |
61 | ```python
62 | epi.tl.find_genes(atac_pair,
63 | gtf_file='gencode.v19.annotation.gtf',
64 | key_added='transcript_annotation',
65 | upstream=2000,
66 | feature_type='transcript',
67 | annotation='HAVANA',
68 | raw=False)
69 | ```
70 |
71 | ## 2. 数据处理
72 |
73 | ### 2.1 过滤空feature跟barcode
74 |
75 | ```python
76 | # remove any potential empty features or barcodes
77 | epi.pp.filter_cells(atac_pair, min_features=1)
78 | epi.pp.filter_features(atac_pair, min_cells=1)
79 | atac_pair
80 | ```
81 |
82 | > AnnData object with n_obs × n_vars = 23770 × 214125
83 |
84 | ### 2.2 对数化
85 |
86 | ```python
87 | atac_pair.obs['log_nb_features'] = [np.log10(x) for x in atac_pair.obs['nb_features']]
88 | epi.pl.violin(atac_pair, ['nb_features'])
89 | epi.pl.violin(atac_pair, ['log_nb_features'])
90 | ```
91 |
92 | 
93 |
94 | ### 2.3 设置每个细胞最小具有的feature数
95 |
96 | ```python
97 | # set a minimum number of cells to keep
98 | min_features = 400
99 |
100 | epi.pp.coverage_cells(atac_pair, binary=True, log=False, bins=50,
101 | threshold=min_features, save='Buenrostro_bulk_peaks_coverage_cells.png')
102 | epi.pp.coverage_cells(atac_pair, binary=True, log=10, bins=50,
103 | threshold=min_features, save='Buenrostro_bulk_peaks_coverage_cells_log10.png')
104 | ```
105 |
106 | 
107 |
108 | 
109 |
110 | ### 2.4 设置每个feature存在于最小的细胞数
111 |
112 | ```python
113 | # minimum number of cells sharing a feature
114 | min_cells = 80
115 | epi.pp.coverage_features(atac_pair, binary=True, log=False,
116 | threshold=min_cells, save='Buenrostro_bulk_peaks_coverage_peaks.png')
117 | epi.pp.coverage_features(atac_pair, binary=True, log=True,
118 | threshold=min_cells, save='Buenrostro_bulk_peaks_coverage_peaks_log10.png')
119 | ```
120 |
121 | 
122 |
123 | 
124 |
125 | ### 2.5 过滤细胞与feature
126 |
127 | ```python
128 | min_features = 400
129 | epi.pp.filter_cells(atac_pair, min_features=min_features)
130 | min_cells = 80
131 | epi.pp.filter_features(atac_pair, min_cells=min_cells)
132 | ```
133 |
134 | ### 2.6 计算高变feature
135 |
136 | ```python
137 | min_score_value = 0.515
138 | nb_feature_selected = 20000
139 | epi.pl.variability_features(atac_pair,log=None,
140 | min_score=min_score_value, nb_features=nb_feature_selected,
141 | save='variability_features_plot_bonemarrow_peakmatrix.png')
142 |
143 | epi.pl.variability_features(atac_pair,log='log10',
144 | min_score=min_score_value, nb_features=nb_feature_selected,
145 | save='variability_features_plot_bonemarrow_peakmatrix_log10.png')
146 | ```
147 |
148 | 
149 |
150 | 
151 |
152 | ## 3. 过滤高变feature的cell
153 |
154 | ```python
155 | atac_pair.raw=atac_pair
156 | # create a new AnnData containing only the most variable features
157 | atac_pair = epi.pp.select_var_feature(atac_pair,nb_features=nb_feature_selected,show=False,copy=True)
158 | epi.pl.violin(atac_pair, ['nb_features'])
159 | epi.pl.violin(atac_pair, ['log_nb_features'])
160 | ```
161 | 
162 |
163 | 
164 |
165 | ```python
166 | epi.pp.filter_cells(atac_pair, min_features=1000)
167 | epi.pp.filter_cells(atac_pair, max_features=4000)
168 | atac_pair.write_h5ad('atac_mofa.h5ad',compression="gzip")
169 | ```
170 |
171 |
--------------------------------------------------------------------------------
/docs/MOFA/mofa_5.assets/image-20211104134822671.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_5.assets/image-20211104134822671.png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_5.md:
--------------------------------------------------------------------------------
1 | # MOFA分析:MOFA模型构建
2 |
3 | 到本小节,就正式构建MOFA模型了
4 |
5 | ## 1. 数据准备
6 |
7 | ### 1.1 导入包
8 |
9 | ```python
10 | from mofapy2.run.entry_point import entry_point
11 | import anndata
12 | import networkx as nx
13 | import scanpy as sc
14 | from matplotlib import rcParams
15 | import pandas as pd
16 | import numpy as np
17 | ```
18 |
19 | ### 1.2 导入数据
20 |
21 | ```python
22 | rna=anndata.read_h5ad("rna_mofa.h5ad")
23 | atac=anndata.read_h5ad("atac_mofa.h5ad")
24 | ```
25 |
26 | ### 1.3 计算多组学公共list
27 |
28 | ```python
29 | ret3= list(set(rna.obs.index).intersection(atac.obs.index))
30 | ```
31 |
32 | ### 1.4 观察细胞类型
33 |
34 | ```python
35 | for i in list(set(rna[ret3].obs['cell_type'])):
36 | print(i,len(rna[ret3].obs.loc[rna[ret3].obs['cell_type']==i])/len(rna[ret3].obs))
37 | ```
38 |
39 | > OPC 0.046986390149060274
40 | >
41 | > PER.END 0.0031756318859364873
42 | >
43 | > ASC 0.07465975372650681
44 | >
45 | > INH 0.08165910563836681
46 | >
47 | > MG 0.0390149060272197
48 | >
49 | > ODC 0.6180816591056384
50 | >
51 | > EX 0.13642255346727156
52 |
53 | ## 2. MOFA模型
54 |
55 | ### 2.1 MOFA参数设置
56 |
57 | ```python
58 | # initialise the entry point
59 |
60 | ent1 = entry_point()
61 | ent1.set_data_options(
62 | scale_groups = False,
63 | scale_views = False,
64 | center_groups=True,
65 | )
66 | ```
67 |
68 | ### 2.2 构建组学层
69 |
70 | ```python
71 | data_mat=[[None for g in range(1)] for m in range(2)]
72 | data_mat[0][0]=rna[ret3].X
73 | data_mat[1][0]=np.array(atac[ret3].X.todense())
74 | ```
75 |
76 | ### 2.3 MOFA数据导入
77 |
78 | ```python
79 | ent1.set_data_matrix(data_mat, likelihoods = ["gaussian","gaussian"],
80 | views_names=['rna','atac'],
81 | samples_names=[ret3],
82 | features_names=[rna[ret3].var_names,atac[ret3].var_names])
83 | ```
84 |
85 | ### 2.4 模型参数设置
86 |
87 | ```python
88 | ent1.set_model_options(
89 | factors = 20,
90 | spikeslab_weights = True,
91 | ard_factors = True,
92 | ard_weights = True
93 | )
94 | ent1.set_train_options(
95 | iter = 3000,
96 | convergence_mode = "slow",
97 | startELBO = 1,
98 | freqELBO = 1,
99 | dropR2 = 0.001,
100 | gpu_mode = True,
101 | verbose = False,
102 | seed = 1
103 | )
104 | ```
105 |
106 | ### 2.5 模型运行并保存
107 |
108 | ```python
109 |
110 | ent1.build()
111 |
112 | ent1.run()
113 |
114 | # Save the output
115 | ent1.save(outfile='mofa_factor.hdf5')
116 | ```
117 |
118 | 
119 |
120 | ### 2.6 meta数据导出
121 |
122 | ```
123 | rna[ret3].obs.to_csv('mofa_meta.csv')
124 | ```
125 |
126 |
--------------------------------------------------------------------------------
/docs/MOFA/mofa_6.md:
--------------------------------------------------------------------------------
1 | # MOFA下游分析:R语言环节
2 |
3 | ## 1. MOFA数据处理
4 |
5 | ### 1.1 数据导入
6 |
7 | ```R
8 | #load library
9 | library(MOFA2)
10 | library(data.table)
11 | library(ggplot2)
12 | library(tidyverse)
13 | #install.packages('psych')
14 | #install.packages('ggpubr')
15 |
16 | #load data
17 | sample_metadata <- fread('mofa_meta.csv')#注意第一列细胞的标头要写为sample
18 | model <- load_model("mofa_factor.hdf5")
19 | samples_metadata(model) <- sample_metadata
20 | ```
21 |
22 | ### 1.2 Var数据导出
23 |
24 | ```R
25 | #Variance decomposition by Factor
26 | plot_variance_explained(model, max_r2=15)
27 | variance_explained=model@cache[["variance_explained"]][["r2_per_factor"]][["group0"]]
28 | write.table(variance_explained,file='variance_explained.csv',sep=',',row.names =FALSE)
29 |
30 | ```
31 |
32 | ### 1.3 Cor数据导出
33 |
34 | ```R
35 | #Association analysis
36 | p=correlate_factors_with_covariates(model, covariates = c("cell_type","Age","Sex"), plot="log_pval",return_data=TRUE)
37 | write.table(p,file='correlate_factors.csv',sep=',')
38 | ```
--------------------------------------------------------------------------------
/docs/MOFA/mofa_7.assets/下载 (3).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_7.assets/下载 (3).png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_7.assets/下载 (4).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_7.assets/下载 (4).png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_7.assets/下载 (5).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_7.assets/下载 (5).png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_7.assets/下载 (6).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_7.assets/下载 (6).png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_7.assets/下载 (7).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_7.assets/下载 (7).png
--------------------------------------------------------------------------------
/docs/MOFA/mofa_7.assets/下载 (8).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MOFA/mofa_7.assets/下载 (8).png
--------------------------------------------------------------------------------
/docs/MultiOMIC/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/.DS_Store
--------------------------------------------------------------------------------
/docs/MultiOMIC/README.md:
--------------------------------------------------------------------------------
1 | # 多组学分析白皮书
2 |
3 | 在本章中,我们将根据Cell的单细胞转录组与单细胞染色质开放组的数据,构建出一套完整的pipeline用于下游的分析,目的在于使的一些测得很好的数据能被充分的挖掘出来
4 |
5 | - 第一章-数据预处理
6 | - 第二章-GLUE多组学整合
7 | - 第三章-整体细胞分析
8 | - 第四章-细胞亚群分析
9 |
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-1.md:
--------------------------------------------------------------------------------
1 | ## 1. 数据预处理
2 |
3 | 在本章中,我们将介绍如何对数据进行初步的处理,数据来源分别为`Cellranger`,`Cellranger-atac`,`velocyto`三个上游分析结果的数据,格式分别为:
4 |
5 | - cellranger:`filtered_feature_bc_matrix.h5`或`filtered_feature_bc_matrix`文件夹
6 | - cellranger-atac: `filtered_peak_bc_matrix`文件夹下的`matrix.mtx`,`barcodes.tsv`,`peaks.bed`三个文件以及`filtered_peak_bc_matrix.h5`文件(如果有的话最好)
7 | - velocyto: `possorted_genome_bam.loom`文件
8 |
9 | ### 1.1 RNA数据预处理
10 |
11 | 我们将`filtered_feature_bc_matrix.h5`与`possorted_genome_bam.loom`文件放在同一个目录下,然后使用scvelo的教程读取这两个文件,然后保存成h5ad格式
12 |
13 | ```python
14 | import scanpy as sc
15 | import scvelo as scv
16 |
17 | #读取矩阵文件
18 | adata=sc.read_10x_h5('filtered_feature_bc_matrix.h5')
19 | #使得obs跟var名唯一
20 | adata.var_names_make_unique()
21 | adata.obs_names_make_unique()
22 | #读取velocyto文件
23 | ldata = scv.read('possorted_genome_bam.loom', cache=True)
24 | #合并两文件
25 | adata = scv.utils.merge(adata, ldata)
26 | #保存文件
27 | adata.write_h5ad('rna_raw.h5ad',compression='gzip')
28 | ```
29 |
30 | ### 1.2 ATAC数据预处理
31 |
32 | 我们找到`filtered_peak_bc_matrix`文件夹下的`matrix.mtx`,`barcodes.tsv`,`peaks.bed`三个文件,使用episcanpy读取后保存
33 |
34 | ```python
35 | import episcanpy
36 | adata=episcanpy.pp.read_ATAC_10x('matrix.mtx', \
37 | cell_names='barcodes.tsv', \
38 | var_names='peaks.bed')
39 | adata.write_h5ad('atac_raw.h5ad',compression="gzip")
40 | ```
41 |
42 | ### 1.3 ATAC数据生成Gene-Activity矩阵
43 |
44 | 这里提供两种不同的方法达到此目的,一个较简单,一个较繁琐。区别在于,简单的方法你不一定有文件,但繁琐的方法你一定能实现
45 | #### 方法1: 使用`filtered_peak_bc_matrix.h5`文件
46 |
47 | 然后再运行下面的代码生成一个`gene_activity_gene_score.h5`文件
48 | ```shell
49 | MAESTRO scatac-genescore \
50 | --format h5 \
51 | --peakcount filtered_peak_bc_matrix.h5 \
52 | --genedistance 10000 \
53 | --species GRCh38 \
54 | --model Enhanced \
55 | -d /data/result \
56 | --outprefix gene_activity
57 | ```
58 |
59 | #### 方法2:使用前面生成的`atac_raw.h5ad`文件
60 |
61 | ```python
62 | import scanpy as sc
63 | atac=sc.read('../data/raw_data/atac.h5ad')
64 | atac.to_df().T.to_csv('../data/raw_data/brca_atac.tsv', sep='\t')
65 | ```
66 |
67 | 然后再运行下面的代码生成一个`gene_activity_gene_score.h5`文件
68 | ```shell
69 | MAESTRO scatac-genescore \
70 | --format plain \
71 | --peakcount brca_atac.tsv \
72 | --genedistance 10000 \
73 | --species GRCh38 \
74 | --model Enhanced \
75 | -d /data/result \
76 | --outprefix gene_activity
77 | ```
78 |
79 | ### 1.4 Gene-Activity矩阵保存为h5ad
80 |
81 | 这一步骤稍微有一些繁琐,不过代码照着运行就好了
82 |
83 | ```python
84 | import os
85 | import collections
86 | import tables
87 | import h5py
88 | import scipy.io
89 | import csv
90 | import gzip
91 | import scipy.sparse as sp_sparse
92 | import argparse as ap
93 | import pandas as pd
94 | FeatureBCMatrix = collections.namedtuple('FeatureBCMatrix', ['ids', 'names', 'barcodes', 'matrix'])
95 | def read_10X_h5(filename):
96 | """Read 10X HDF5 files, support both gene expression and peaks."""
97 | with tables.open_file(filename, 'r') as f:
98 | try:
99 | group = f.get_node(f.root, 'matrix')
100 | except tables.NoSuchNodeError:
101 | print("Matrix group does not exist in this file.")
102 | return None
103 | feature_group = getattr(group, 'features')
104 | ids = getattr(feature_group, 'id').read()
105 | names = getattr(feature_group, 'name').read()
106 | barcodes = getattr(group, 'barcodes').read()
107 | data = getattr(group, 'data').read()
108 | indices = getattr(group, 'indices').read()
109 | indptr = getattr(group, 'indptr').read()
110 | shape = getattr(group, 'shape').read()
111 | matrix = sp_sparse.csc_matrix((data, indices, indptr), shape=shape)
112 | return FeatureBCMatrix(ids, names, barcodes, matrix)
113 |
114 | ```
115 |
116 | ```python
117 | import re
118 | scatac_count=read_10X_h5('gene_activity_gene_score.h5')
119 | peakmatrix = scatac_count.matrix
120 | features = scatac_count.names.tolist()
121 | features = [re.sub("\W", "_", feature.decode()) for feature in features]
122 | features = [feature.encode() for feature in features]
123 | barcodes = scatac_count.barcodes.tolist()
124 | adata=anndata.AnnData(peakmatrix.T,obs=barcodes,var=features)
125 | adata.obs.index=[i.decode('utf-8') for i in adata.obs[0]]
126 | adata.var.index=[i.decode('utf-8') for i in adata.var[0]]
127 | del adata.obs[0]
128 | del adata.var[0]
129 | adata.write_h5ad('gene_activity_gene_score.h5ad',compression='gzip')
130 | ```
131 |
132 | 到此,我们数据分析所需要的所有基本文件就准备完成了
133 |
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-2.md:
--------------------------------------------------------------------------------
1 | ## 2. GLUE多组学整合
2 |
3 | 文字版教程解释参照GLUE官方文档:https://scglue.readthedocs.io/zh_CN/latest/tutorials.html
4 |
5 | 但具体的代码参照我的文件,我对教程以及我们的数据进行了配套处理,分别是`glue-0.ipynb`,`glue-1.ipynb`,`glue-2.ipynb`三个文件。
6 |
7 | - `glue-0.ipynb`: https://github.com/Starlitnightly/bioinformatic_tutorial/blob/main/MultiOMIC-ipynb/glue-0.ipynb
8 | - `glue-1.ipynb`: https://github.com/Starlitnightly/bioinformatic_tutorial/blob/main/MultiOMIC-ipynb/glue-1.ipynb
9 | - `glue-2.ipynb`: https://github.com/Starlitnightly/bioinformatic_tutorial/blob/main/MultiOMIC-ipynb/glue-2.ipynb
10 |
11 | 但值得注意的是,我们这里得到的细胞只是整合后,还没进行配对,就是使得一个细胞同时具有两个组学层,配对的方法如下,我们需要使用具有X_glue层的rna与atac文件
12 |
13 | ```python
14 | #读取数据
15 | rna=sc.read('../cellanno/rna_anno.h5ad')
16 | atac=sc.read('../cellanno/atac_anno.h5ad')
17 |
18 | #提取GLUE层结果
19 | rna_loc=pd.DataFrame(rna.obsm['X_glue'], index=rna.obs.index)
20 | atac_loc=pd.DataFrame(atac.obsm['X_glue'], index=atac.obs.index)
21 |
22 | #对GLUE层进行Pearson系数分析
23 | import numpy as np
24 | import gc
25 | len1=(len(rna_loc)//5000)+1
26 | p_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
27 | n_pd=pd.DataFrame(columns=['rank_'+str(i) for i in range(50)])
28 | for j in range(len1):
29 | c=pd.DataFrame()
30 | for i in range(len1):
31 | t1=rna_loc.iloc[5000*(i):5000*(i+1)]
32 | t2=atac_loc.iloc[5000*(j):5000*(j+1)]
33 | a=np.corrcoef(t1,t2)[len(t1):,0:len(t1)]
34 | b=pd.DataFrame(a,index=t2.index,columns=t1.index)
35 |
36 | c=pd.concat([c,b],axis=1)
37 | del t1
38 | del t2
39 | del a
40 | del b
41 | gc.collect()
42 | for i in range(len(c)):
43 | t_c=c.iloc[i]
44 | p_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].values
45 | n_pd.loc[t_c.name]=c.iloc[i].sort_values(ascending=False)[:50].index.tolist()
46 | print('Now epoch is {}, {}/{}'.format(j,j*5000+len(c),len(atac_loc)))
47 | del c
48 | gc.collect()
49 |
50 | #寻找最近的细胞,其中depth的灵活调整可以使得配对成功的细胞数变大,同时精度有所下降
51 | def find_neighbor_cell(p_pd,n_pd,depth=10):
52 | rubish_c=[]
53 | finish_c=[]
54 | for d in range(depth):
55 | p_pd=p_pd.loc[p_pd['rank_{}'.format(d)]>0.9]
56 | p_pd=p_pd.sort_values('rank_{}'.format(d),ascending=False)
57 | for i in p_pd.index:
58 | name=n_pd.loc[i,'rank_{}'.format(d)]
59 | if name not in rubish_c:
60 | finish_c.append(i)
61 | rubish_c.append(name)
62 | else:
63 | continue
64 | p_pd=p_pd.loc[~p_pd.index.isin(finish_c)]
65 | n_pd=n_pd.loc[~n_pd.index.isin(finish_c)]
66 | result=pd.DataFrame()
67 | result['omic_1']=rubish_c
68 | result['omic_2']=finish_c
69 | result.index=['cell_{}'.format(i) for i in range(len(result))]
70 | return result
71 |
72 | res_pair=find_neighbor_cell(p_pd,n_pd,depth=20)
73 | res_pair.head()
74 | ```
75 |
76 | | omic_1 | omic_2 | |
77 | | -----: | -----------------: | ---------------------- |
78 | | cell_0 | AACTCAGCATGATCCA | neg-GCACGGTGTGCAAGAC-1 |
79 | | cell_1 | AAACGGGTCGGCGCAT | neg-TCTATTGAGAGGCAGG-1 |
80 | | cell_2 | GACGCGTAGACAAGCC | neg-GTGTCCTGTCATTGCA-1 |
81 | | cell_3 | AAACCTGTCCCAACGG-1 | neg-AGTGTACCATGTGGGA-1 |
82 | | cell_4 | CATATTCCAAACCCAT | neg-TCAGCTCCAACGGGTA-1 |
83 |
84 | ## 3. 整体细胞分析
85 |
86 | 文字版教程解释参照Pyomic官方文档:https://pyomic.readthedocs.io/en/latest/Tutorials/t_cellanno.html
87 |
88 | 但具体的代码依旧参照我的文件,我对相关配置进行了修改处理,分别是`cellanno-1.ipynb`,`cellanno-2.ipynb`
89 |
90 | - `cellanno-1.ipynb`: https://github.com/Starlitnightly/bioinformatic_tutorial/blob/main/MultiOMIC-ipynb/cellanno-1.ipynb
91 | - `cellanno-2.ipynb`: https://github.com/Starlitnightly/bioinformatic_tutorial/blob/main/MultiOMIC-ipynb/cellanno-2.ipynb
92 |
93 | 绘图代码见`figure1.ipynb`与`figure2.ipynb`文件
94 |
95 | - `figure1.ipynb`: https://github.com/Starlitnightly/bioinformatic_tutorial/blob/main/MultiOMIC-ipynb/figure1.ipynb
96 | - `figure2.ipynb`: https://github.com/Starlitnightly/bioinformatic_tutorial/blob/main/MultiOMIC-ipynb/figure2.ipynb
97 |
98 |
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019004245874-6119913.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019004245874-6119913.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019004245874-6119960.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019004245874-6119960.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019004245874.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019004245874.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019004442469-6119965.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019004442469-6119965.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019004442469.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019004442469.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019004556158-6119968.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019004556158-6119968.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019004556158.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019004556158.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019004807328-6119971.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019004807328-6119971.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019004807328.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019004807328.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019004930306-6119974.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019004930306-6119974.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019004930306.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019004930306.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019005115998-6119977.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019005115998-6119977.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019005115998.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019005115998.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019005254018-6119981.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019005254018-6119981.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019005254018.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019005254018.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019005328419-6119984.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019005328419-6119984.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019005328419.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019005328419.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019005554337-6119988.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019005554337-6119988.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019005554337.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019005554337.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019005740026-6119990.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019005740026-6119990.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-3.assets/image-20221019005740026.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-3.assets/image-20221019005740026.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-4.assets/image-20221019012328268-6120000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-4.assets/image-20221019012328268-6120000.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-4.assets/image-20221019012328268.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-4.assets/image-20221019012328268.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-4.assets/image-20221019012404062-6120003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-4.assets/image-20221019012404062-6120003.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-4.assets/image-20221019012404062.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-4.assets/image-20221019012404062.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-4.assets/image-20221019012503861-6120007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-4.assets/image-20221019012503861-6120007.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-4.assets/image-20221019012503861.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-4.assets/image-20221019012503861.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-5.assets/image-20221019014049480-6120013.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-5.assets/image-20221019014049480-6120013.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-5.assets/image-20221019014049480.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-5.assets/image-20221019014049480.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-5.assets/image-20221019014313452-6120017.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-5.assets/image-20221019014313452-6120017.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-5.assets/image-20221019014313452.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-5.assets/image-20221019014313452.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-5.assets/image-20221019014538577-6120020.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-5.assets/image-20221019014538577-6120020.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-5.assets/image-20221019014538577.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-5.assets/image-20221019014538577.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-5.assets/image-20221019014718599-6120022.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-5.assets/image-20221019014718599-6120022.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-5.assets/image-20221019014718599.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-5.assets/image-20221019014718599.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-5.assets/image-20221019014912052-6120025.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-5.assets/image-20221019014912052-6120025.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-5.assets/image-20221019014912052.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-5.assets/image-20221019014912052.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-6.assets/image-20221019020116601-6120031.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-6.assets/image-20221019020116601-6120031.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-6.assets/image-20221019020116601.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-6.assets/image-20221019020116601.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-6.assets/image-20221019020147975-6120034.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-6.assets/image-20221019020147975-6120034.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-6.assets/image-20221019020147975.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-6.assets/image-20221019020147975.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-6.assets/image-20221019020412203-6120038.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-6.assets/image-20221019020412203-6120038.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-6.assets/image-20221019020412203.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-6.assets/image-20221019020412203.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-7.assets/image-20221019021043508-6120043.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-7.assets/image-20221019021043508-6120043.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-7.assets/image-20221019021043508.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-7.assets/image-20221019021043508.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-7.assets/image-20221019021239322-6120046.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-7.assets/image-20221019021239322-6120046.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-7.assets/image-20221019021239322.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-7.assets/image-20221019021239322.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-7.assets/image-20221019021513334-6120051.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-7.assets/image-20221019021513334-6120051.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-7.assets/image-20221019021513334.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/MultiOMIC/book-7.assets/image-20221019021513334.png
--------------------------------------------------------------------------------
/docs/MultiOMIC/book-7.md:
--------------------------------------------------------------------------------
1 | ### 4.5 转录因子网络
2 |
3 | 恭喜你,来到了B细胞亚群的最后一小节分析,我们在前面的研究中已经比较过淋巴结转移的差异表达、细胞通讯以及动态调控的差异,更进一步的,我们需要挖掘背后调控的分子机制,到底是什么因素在控制着淋巴结转移,于是很直观的,我们将采取转录因子分析。
4 |
5 | 我们在GLUE的第三部分,已经得到了每个细胞的AUCell文件,该文件代表了每一个细胞所含有的转录因子类型以及活性,下面,我们就利用这个文件,进一步挖掘阳性淋巴结与阴性淋巴结背后的调控关系。
6 |
7 | 首先依然是导入包
8 |
9 | ```python
10 | #导入包
11 | import anndata
12 | print('anndata(Ver): ',anndata.__version__)
13 | import scanpy as sc
14 | print('scanpy(Ver): ',sc.__version__)
15 | import scltnn #非必需
16 | print('scltnn(Ver): ',scltnn.__version__)
17 | import matplotlib.pyplot as plt
18 | import matplotlib
19 | print('matplotlib(Ver): ',matplotlib.__version__)
20 | import seaborn as sns
21 | print('seaborn(Ver): ',sns.__version__)
22 | import numpy as np
23 | print('numpy(Ver): ',np.__version__)
24 | import pandas as pd
25 | print('pandas(Ver): ',pd.__version__)
26 | import scvelo as scv
27 | print('scvelo(Ver): ',scv.__version__)
28 | import Pyomic
29 | print('Pyomic(Ver): ',Pyomic.__version__)
30 |
31 | #绘图参数设置
32 | sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
33 | sc.settings.set_figure_params(dpi=80, facecolor='white')
34 |
35 | sc_color=['#7CBB5F','#368650','#A499CC','#5E4D9A','#78C2ED','#866017','#9F987F', '#E0DFED', '#EF7B77', '#279AD7',
36 | '#F0EEF0', '#1F577B', '#A56BA7', '#E0A7C8', '#E069A6', '#941456', '#FCBC10', '#EAEFC5', '#01A0A7', '#75C8CC',
37 | '#F0D7BC', '#D5B26C', '#D5DA48', '#B6B812','#9DC3C3', '#A89C92', '#FEE00C','#FEF2A1']
38 |
39 | ```
40 |
41 | 接着,我们导入了一些数据
42 |
43 | ```python
44 | #rna表达数据
45 | rna=sc.read('B_cell_anno_new.h5ad')
46 | #aucell数据
47 | rna_aucell=pd.read_csv('../glue_true/rna_aucell.tsv',sep ='\t')
48 | rna_aucell.set_index(rna_aucell.columns[0],inplace=True)
49 | #生成aucell的h5ad(B细胞)
50 | rna_auc_adata=anndata.AnnData(rna_aucell)
51 | rna_auc_adata=rna_auc_adata[rna.obs.index]
52 | rna_auc_adata.obs=rna.obs
53 | rna_auc_adata.uns=rna.uns
54 |
55 | ```
56 |
57 | 我们绘制出阳性淋巴结与阴性淋巴结的平均转录因子水平
58 |
59 | ```python
60 | regulons=pd.DataFrame(columns=rna_auc_adata.var.index)
61 | for i in list(set(rna_auc_adata.obs['Type'])):
62 | regulons.loc[i]=rna_auc_adata[rna_auc_adata.obs['Type']==i].X.mean(axis=0)
63 | regulons.head()
64 | ```
65 |
66 |
67 |
28 |
29 | 本作品采用知识共享署名-非商业性使用-相同方式共享 4.0 国际许可协议进行许可。
--------------------------------------------------------------------------------
/docs/RNASEQ/README.md:
--------------------------------------------------------------------------------
1 | # 分析1:RNA-seq下游分析
2 |
3 | 写在前面,当今时代是多组学的时代,而其中,研究最多的应该是RNA-seq相关的分析,所以,作为一个生物信息学专业的学生,最应掌握的基础分析技能,应该是RNA-seq相关的分析,本教程将从RNA-seq分析入手,详细讲解RNA-seq分析相关的技术
--------------------------------------------------------------------------------
/docs/RNASEQ/rnaseq_1.md:
--------------------------------------------------------------------------------
1 | # RNA-seq分析: 环境配置
2 |
3 | 对于RNA-seq相关的分析,一共分为python跟R两部分。
4 |
5 | 我们这里处理的是counts矩阵,count矩阵简单来说,可以理解成横坐标为基因,纵坐标为样本的矩阵,如下表所示
6 |
7 | | geneid | sample1 | sample2 |
8 | | ------------------- | ------- | ------- |
9 | | **ENSG00000223972** | 0 | 0 |
10 | | **ENSG00000227232** | 82 | 63 |
11 | | **ENSG00000278267** | 11 | 2 |
12 |
13 | 我们的目的,就是挖掘这个矩阵中可能包含的信息,各种生物学意义的东西。
14 |
15 | ## 1. Python部分
16 |
17 | 对于python部分,在前面分析时所安装的Anaconda,理论上就足够完成全部分析了,但是我们仍有一些包需要安装,比如基因id转换,看到ENSG可能你也不知道这是一个什么东西吧。
18 |
19 | ```shell
20 | #安装mygene
21 | pip install mygene
22 | ```
23 |
24 |
25 |
26 | ## 2. R语言部分
27 |
28 | 对于R语言部分,实际上仅DESeq2这个包是需要的,所以我们就安装这个包就好了
29 |
30 | ```R
31 | if (!requireNamespace("BiocManager", quietly = TRUE))
32 | install.packages("BiocManager")
33 |
34 | BiocManager::install("DESeq2")
35 | ```
36 |
37 |
--------------------------------------------------------------------------------
/docs/RNASEQ/rnaseq_2.assets/NHDF_heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/RNASEQ/rnaseq_2.assets/NHDF_heatmap.png
--------------------------------------------------------------------------------
/docs/RNASEQ/rnaseq_2.assets/NHDF_volcano-1635819681870.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/RNASEQ/rnaseq_2.assets/NHDF_volcano-1635819681870.png
--------------------------------------------------------------------------------
/docs/RNASEQ/rnaseq_2.assets/NHDF_volcano.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/RNASEQ/rnaseq_2.assets/NHDF_volcano.png
--------------------------------------------------------------------------------
/docs/RNASEQ/rnaseq_2.assets/untitled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/RNASEQ/rnaseq_2.assets/untitled.png
--------------------------------------------------------------------------------
/docs/RNASEQ/rnaseq_2.assets/untitled1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/RNASEQ/rnaseq_2.assets/untitled1.png
--------------------------------------------------------------------------------
/docs/RNASEQ/rnaseq_3.assets/NOD-like receptor signaling pathway.prerank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/RNASEQ/rnaseq_3.assets/NOD-like receptor signaling pathway.prerank.png
--------------------------------------------------------------------------------
/docs/RNASEQ/rnaseq_3.assets/untitled1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/RNASEQ/rnaseq_3.assets/untitled1.png
--------------------------------------------------------------------------------
/docs/RNASEQUP/README.md:
--------------------------------------------------------------------------------
1 | # 分析1:RNA-seq上游分析
2 |
3 |
--------------------------------------------------------------------------------
/docs/RNASEQUP/rnasequp_1.md:
--------------------------------------------------------------------------------
1 | # RNA-seq. 环境配置
2 |
3 | 我们在前面的教程已经完成了Linux的安装,目前我们的linux可以理解成一个空壳,里面大概什么都没有,于是我们需要安装一些RNA-seq上游分析所必要的包。
4 |
5 | ## 1. 环境要求
6 |
7 | ### 1.1 Linux环境
8 |
9 | 对于Linux环境,我们安装的系统为Ubuntu18.04,满足此系统即可。
10 |
11 | ### 1.2 Window环境
12 |
13 | 对于Windows操作系统,我们首先安装WSL即可
14 |
15 | ## 2. 软件安装
16 |
17 | ### 2.1 miniconda安装
18 |
19 | conda是一个开源的软件包管理系统和环境管理系统,可用于安装多个版本的软件包及其依赖关系,并且可以任意切换(安装conda的目的是为了防止软件版本与包之间互相干扰。)(e.g. 有些软件只能在Python3.6上运行,再新的版本会出现bug,这时就需要用conda来解决这个问题)
20 |
21 | 我们在这里选择安装miniconda,这是一个轻量级的conda框架,相较于Anaconda的臃肿而言,更加轻便
22 |
23 | #### 2.1.1 下载miniconda
24 |
25 | ```python
26 | # 在linux在使用以下命令下载miniconda
27 | wget-c https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-latest-Linux-x86_64.sh
28 | ```
29 |
30 | #### 2.1.2 安装miniconda
31 |
32 | ```python
33 | # 安装刚刚下载的Miniconda,bash就是运行.sh文件的意思
34 | bash Miniconda3-latest-Linux-x86_64.sh
35 | ```
36 |
37 | #### 2.1.3 激活conda
38 |
39 | ```python
40 | #将conda命令添加到环境变量中
41 | source .bashrc
42 | ```
43 |
44 | #### *2.1.4 国内用户选择清华镜像
45 |
46 | ```python
47 | # 添加镜像
48 | conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free
49 | conda config-- add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge
50 | conda config-- add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/biocondacondaconfig--setshow_channel_urls yes
51 | ```
52 |
53 | ### 2.2 RNA-seq上游依赖包安装
54 |
55 | 在这一步中,我们将安装转录组上游分析所需要用到的各个包,我将在最后的一小节中简单介绍一下每个包的用途
56 |
57 | #### 2.2.1 创建虚拟环境
58 |
59 | 为了不干扰Linux系统下其他包的运行,我们将创建一个全新的虚拟环境用来管理RNA-seq分析将用到的包,我们将此虚拟环境命名为rna
60 |
61 | ```python
62 | #创建名为rna的软件安装环境
63 | conda create -n rna python=3
64 | #查看当前conda环境
65 | conda info --envs
66 | #激活conda的rna环境
67 | source activate rna
68 | ```
69 |
70 | 在每一次退出Linux重进后,都不要忘记了输入source activate rna激活环境
71 |
72 | #### 2.2.2 安装RNA-seq上游依赖包
73 |
74 | ```python
75 | #以下一行命令即可安装完成
76 | conda install -y fastp fastqc multiqc subread bedtools cutadapt trim-galore sra-tools
77 | ```
78 |
79 | #### *2.2.3 RNA-seq上游依赖包简介
80 |
81 | | Package | Description |
82 | | ----------- | ------------------------------------------------------------ |
83 | | fastp | fastq文件质控软件,极其智能 |
84 | | fastqc | 高通量测序数据的高级质控工具 |
85 | | multiqc | 对测序数据进行质量评估(将fastqc生成的多个报告整合成一个文件) |
86 | | subread | 将reads比对到参考基因组上(速度极快) |
87 | | bedtools | 涵盖各种基因组计算所需要的工具 |
88 | | cutadapt | 从高通量测序数据中发现并去除衔接子序列,引物,poly-A尾巴和其他类型的不需要的序列 |
89 | | trim-galore | 是对FastQC和Cutadapt的包装。适用于所有高通量测序,包括RRBS(Reduced Representation Bisulfite-Seq ), Illumina、Nextera 和smallRNA测序平台的双端和单端数据 |
90 | | sra-tools | 来自NCBI的SRA工具包和SDK是工具和库的集合,这些工具和库用于使用INSDC序列读取档案中的数据 |
91 |
92 |
--------------------------------------------------------------------------------
/docs/RNASEQUP/rnasequp_2.md:
--------------------------------------------------------------------------------
1 | # RNA-seq. 上游分析全教程
2 |
3 | ## 1. 测序数据下载
4 |
5 | 在本节中,我们将介绍如何从ncbi上下载原始测序数据SRA文件进行分析
6 |
7 | ### 1.1 SRA文件下载
8 |
9 | #### 1.1.1 单SRA文件下载
10 |
11 | 如果你只想分析一个SRA文件,并且已经知道了SRA号,那么我们可以在linux交互环境输入以下命令进行下载
12 |
13 | ```python
14 | #单SRA下载prefetch
15 | SRR10502962
16 | ```
17 |
18 | #### 1.1.2 多SRA文件下载
19 |
20 | 如果你需要批量下载SRA文件,那么你需要得到一个带有多个SRA号的txt文件
21 |
22 | e.g. SRR_Acc_List.txt文件内容
23 |
24 | ```python
25 | SRR5113012
26 | SRR5113013
27 | SRR5113014
28 | ```
29 |
30 | ```python
31 | #多SRA下载
32 | prefetch --option-file SRR_Acc_List.txt
33 | ```
34 |
35 | ### 1.2 sra格式转fastq格式
36 |
37 | sra格式的文件一般是经过压缩的测序文件,我们需要转换成原始测序数据fastq格式。
38 |
39 | #### 1.2.1 单端测序文件转换
40 |
41 | ```python
42 | #转换当前目录下全部以.sra结尾的文件
43 | fastq-dump *.sra
44 | ```
45 |
46 | #### 1.2.2 双端测序文件转换
47 |
48 | ```python
49 | #--split-files参数可以将其分解为两个fastq文件。
50 | fastq-dump --split-files *.sra
51 | ```
52 |
53 | ## 2. 测序数据质控
54 |
55 | 为了检测我们测序数据的质量,我们常常需要生成一份质控报告进行直观的观察
56 |
57 | #### 2.1 检测数据质量
58 |
59 | #### 2.1.1 单份报告生成
60 |
61 | 在本小节,我们使用fastqc生成质控报告,在fastq文件目录下输入
62 |
63 | ```python
64 | #批量生成所有fastq文件的质控报告
65 | fastqc *.fastq
66 | ```
67 |
68 | 等待运行结束后,在同目录下有着*_fastqc.html和*_fastqc.zip两个文件,我们可以打开对应的html文件查看该fastq数据的质量。报告的解读见文章
69 |
70 | #### 2.1.2 多份报告整合
71 |
72 | 通过上面的步骤,我们得到了每个fastq文件的质控报告,为了整体进行评估,我们使用multiqc整合报告结果
73 |
74 | ```python
75 | #整合质控报告结果
76 | multiqc *.zip
77 | ```
78 |
79 | ### 2.2 测序数据过滤
80 |
81 | 由于我们得到的数据可能包括接头序列,引物,poly-A尾巴和其他类型的不需要的序列,为避免影响下面的分析,我们需要去除这些无关的测序数据。
82 |
83 | 在这里,相较于其他过滤工具而言,我们选择trim-galore
84 |
85 | #### 2.2.1 单端测序
86 |
87 | ```shell
88 | #新建clean文件夹存放测序结果
89 | mkdir clean
90 | #单端测序数据质量过滤
91 | trim_galore -q 20 \ #设定Phred quality score阈值
92 | --phred33 \ #选择-phred33或者-phred64,表示测序平台使用的Phred qualityscore
93 | --stringency 3 \ #设定可以忍受的前后adapter重叠的碱基数,默认为1(非常苛刻)。可以适度放宽,因为后一个adapter几乎不可能被测序仪读到
94 | --length 20 \ #设定输出reads长度阈值,小于设定值会被抛弃。
95 | -e 0.1 \ #容错率
96 | -o /home/seq/clean #输出结果
97 | /home/seq/SRR10502962_1.fastq #待处理文件
98 | ```
99 |
100 | #### 2.2.2 双端测序
101 |
102 | ```shell
103 | #新建clean文件夹存放测序结果
104 | mkdir clean
105 | #双端测序数据质量过滤
106 | trim_galore -q 25 \
107 | --phred33 \
108 | --stringency 3 \
109 | --length 36 \
110 | -e 0.1 \
111 | --paired #双端测序
112 | -o /home/seq/clean
113 | /home/seq/SRR10502962_1.fastq /home/seq/SRR10502962_2.fastq
114 | ```
115 |
116 | #### 2.2.3 多组测序数据同时过滤
117 |
118 | 为了同时完成多组测序数据的同时过滤,我们在这里编写.sh脚本来在Linux系统下批量运行
119 |
120 | **config**
121 |
122 | ```shell
123 | mkdir clean
124 | cd clean
125 | ls /home/seq/*_1.fastq >1
126 | ls /home/seq/*_2.fastq >2
127 | paste 1 2 > config
128 | ```
129 |
130 | **qc.sh**
131 |
132 | ```shell
133 | bin_trim_galore=trim_galore
134 | dir='/home/seq/clean'
135 | cat $1 |while read id
136 | do
137 | arr(${id})
138 | fq1=${arr[0]}
139 | fq2=${arr[1]}
140 | $bin_trim_galore -q 25 --phred33 --phred33 --length 36 --stringency 3 --paired -o $dir $fq1 $fq2
141 | done
142 | ```
143 |
144 | 运行qc.sh
145 |
146 | ```shell
147 | #config是传递进去的参数
148 | bash qc.sh config
149 | ```
150 |
151 | ## 3. 比对到参考基因组
152 |
153 | 由于测序仪机器读长的限制,在构建文库的过程中首先需要将DNA片段化,测序得到的序列只是基因组上的部分序列。为了确定测序reads在基因组上的位置,需要将reads比对回参考基因组上,这个步骤叫做mapping
154 |
155 | ### 3.1 参考基因组文件下载
156 |
157 | ```shell
158 | #参考基因组下载(hg38)(subread)
159 | wget -O hg38.fa.gz http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz
160 | #解压
161 | gunzip hg38.fa.gz
162 | ```
163 |
164 | ### 3.2 subread比对
165 |
166 | #### 3.2.1 构建索引
167 |
168 | subread有着极其快速的比对效率,这与其构建索引的预处理是不可分开的,所以我们用以下代码来构建索引
169 |
170 | ```shell
171 | cd /home/seq/index/hg38
172 | subread-buildindex -o hg38 hg38
173 | ```
174 |
175 | #### 3.2.2 比对
176 |
177 | ```shell
178 | subread-align -t 0 \ #0代表RNA-seq,1代表DNA-seq
179 | -T5 \ #线程数
180 | -i hg38 \ #指定参考基因组的basename
181 | -r /home/seq/SRR10502962_1.fastq \
182 | -R /home/seq/SRR10502962_2.fastq \
183 | -o SRR10502962.bam #输出文件
184 | ```
185 |
186 | ## 4. 统计基因counts数
187 |
188 | 在这里,我们仅介绍一个工具featureCounts。
189 |
190 | featuresCounts软件用于统计基因/转录本上mapping的reads数,也就是用于raw count定量。该软件不仅支持基因/转录本的定量,也支持exon,gene bodies,genomic bins,chromsomal locations等区间的定量
191 |
192 | ### 4.1 下载gtf基因组注释文件
193 |
194 | 撰写本教程时的最新版本为103,如有更新可以去官网看看再来下载
195 |
196 | ```shell
197 | #我们将基因组注释文件存放到refer文件夹中
198 | mkdir /home/seq/refer
199 | cd /home/seq/refer
200 | #下载
201 | wget http://ftp.ensembl.org/pub/release-103/gtf/homo_sapiens/Homo_sapiens.GRCh38.103.gtf.gz
202 | ```
203 |
204 | ### 4.2 使用featureCounts进行定量分析
205 |
206 | ```shell
207 | #选择gtf路径
208 | gtf="/home/seq/refer/Homo_sapiens.GRCh38.103.gtf.gz"
209 | #featureCounts 定量分析
210 | featureCounts -T 5 \ #线程数
211 | -p \ #针对paired-end数据
212 | -t exon \ #跟-g一样的意思,其是默认将exon作为一个feature
213 | -g gene_id \ #从注释文件中提取Meta-features信息用于read count,默认是gene_id
214 | -a $gtf \ #输入GTF/GFF基因组注释文件
215 | -o all.id.txt \ #输出文件
216 | *.bam #待处理数据
217 |
218 | #去除多余信息,矩阵保存为counts.txt
219 | cat all.id.txt | cut-f1,7- > counts.txt
220 |
221 | ```
222 |
223 | 到这里,我们就已经得到一个Counts矩阵了,后续的分析被称为转录组下游分析。将在后面的教程中继续介绍
224 |
225 | | Geneid | SRR10502962.bam |
226 | | --------------- | --------------- |
227 | | ENSG00000223972 | 1 |
228 | | ENSG00000227232 | 132 |
229 | | ENSG00000278267 | 3 |
230 | | ENSG00000243485 | 0 |
231 |
232 |
--------------------------------------------------------------------------------
/docs/SCGLUE/README.md:
--------------------------------------------------------------------------------
1 | # 分析3:单细胞样本对齐
2 |
3 | 前面提到,我们在单细胞多组学中,会得到大量的细胞,每个细胞都有不同的状态。对于mofa而言,2018年的nature使用了scnmt技术,该技术可以同时测定一个细胞的ATAC与RNA情况,但在大部分情况下,我们只能从同一个样本中分别进行ATAC与RNA的测定,而不是对同一个细胞进行测定,但是从理论上来说,应该是有一些细胞是相像的,于是,如何把这些细胞对齐,成为了一个需要解决的问题。
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_1.md:
--------------------------------------------------------------------------------
1 | # 单细胞样本对齐: 环境配置
2 |
3 | 单细胞样本对齐,只需要在Python环境即可完成
4 |
5 | ## 1. Python部分
6 |
7 | 在Python部分,主要有以下几个包需要被安装:scglue,scanpy,mofapy2以及episcanpy
8 |
9 | ### 1.1 conda环境
10 |
11 | 在Python部分需要安装的包,可能与过往需要的包会起到冲突,所以我们新建一个conda环境
12 |
13 | ```python
14 | conda create -n rna python=3.6
15 | conda activate rna
16 | ```
17 |
18 | 通过上述两行代码,我们现在进入了一个叫rna的python虚拟环境,这个环境是非常干净的,没有什么多余的包,所以我们在下一步中将依次安装需要的依赖
19 |
20 | ### 1.2 Jupyterlab安装
21 |
22 | 由于这是一个新的python环境,所以我们需要重新装一下jupyter
23 |
24 | ```
25 | conda install -c conda-forge jupyterlab
26 | ```
27 |
28 | ### 1.3 scanpy安装
29 |
30 | 接下来进入正题,我们安装单细胞处理所必需的包-scanpy
31 |
32 | ```python
33 | conda install seaborn scikit-learn statsmodels numba pytables
34 | conda install -c conda-forge python-igraph leidenalg
35 | pip install scanpy
36 | ```
37 |
38 | ### 1.4 scglue安装
39 |
40 | 安装完scanpy后,我们安装一下用于单细胞配对的包scglue
41 |
42 | ```shell
43 | conda install -c defaults -c pytorch -c bioconda -c conda-forge -c scglue scglue --yes
44 | ```
45 |
46 | ###
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do10.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do11.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do12.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do13.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do14.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do15.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do16.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do2.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do3.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do4.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do5.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do6.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do7.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do8.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.assets/do9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/SCGLUE/scglue_3.assets/do9.png
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_3.md:
--------------------------------------------------------------------------------
1 | # 单细胞样本对齐: 模型准备
2 |
3 | 我们在准备好需要对齐的两个单细胞数据后,便需要开始构建模型了
4 |
5 | ## 1. 数据清洗
6 |
7 | ### 1.1 导入包
8 |
9 | ```python
10 | import anndata
11 | import networkx as nx
12 | import scanpy as sc
13 | import scglue
14 | import pandas as pd
15 | import numpy as np
16 | from matplotlib import rcParams
17 | ```
18 |
19 | ### 1.2 导入数据
20 |
21 | ```
22 | rna = anndata.read_h5ad("/content/GSE174367rna_61472.h5ad")
23 | rna
24 | ```
25 |
26 | > AnnData object with n_obs × n_vars = 61472 × 58721 obs: 'cell_type', 'Sample.ID', 'Batch', 'Age', 'Sex', 'PMI', 'Tangle.Stage', 'Plaque.Stage', 'Diagnosis', 'RIN' var: 'gene_ids', 'feature_types', 'genome'
27 |
28 | ```
29 | atac = anndata.read_h5ad("/content/GSE174367atac_61472.h5ad")
30 | atac
31 | ```
32 |
33 | > AnnData object with n_obs × n_vars = 61472 × 217707 obs: 'cell_type', 'ran', 'Sample.ID', 'Batch', 'Age', 'Sex', 'PMI', 'Tangle.Stage', 'Plaque.Stage', 'Diagnosis', 'RIN' var: 'feature_types', 'genome', 'chrom', 'chromStart', 'chromEnd', 'n_counts'
34 |
35 | ### 1.3 rna数据处理
36 |
37 | ```python
38 | rna.layers["raw"] = rna.X.copy()
39 | rna.layers["raw"] = rna.X.copy()
40 | rna.var_names_make_unique()
41 | sc.pp.filter_genes(rna, min_cells=3)
42 | sc.pp.normalize_total(rna, target_sum=1e4)
43 | sc.pp.log1p(rna)
44 | sc.pp.highly_variable_genes(rna, min_mean=0.0125, max_mean=3, min_disp=0.5)
45 | ```
46 |
47 | ### 1.4 rna数据可视化
48 |
49 | ```python
50 | sc.tl.pca(rna, n_comps=100, svd_solver="auto")
51 | sc.pp.neighbors(rna, metric="cosine")
52 | sc.tl.umap(rna)
53 | sc.pl.umap(rna, color="cell_type")
54 | rna.write_h5ad('GSE174367rna_61472_process.h5ad',compression="gzip")
55 | ```
56 |
57 | 
58 |
59 | ### 1.5 atac数据处理
60 |
61 | ```python
62 | scglue.data.lsi(atac, n_components=100, n_iter=15)
63 | sc.pp.neighbors(atac, use_rep="X_lsi", metric="cosine")
64 | sc.tl.umap(atac)
65 | sc.pl.umap(atac, color="cell_type")
66 | atac.write_h5ad('GSE174367atac_61472_process.h5ad',compression="gzip")
67 | ```
68 |
69 | 
70 |
71 | ### 1.6 Construct prior regulatory graph
72 |
73 | 因为我们得到的rna只有基因的名字,我们还需要知道他们在染色体上的位置,所以在这里,我们用scglue中标注基因的方法
74 |
75 | #### 1.6.1 下载gtf标注文件
76 |
77 | ```shell
78 | #人源
79 | wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_31/gencode.v31.chr_patch_hapl_scaff.annotation.gtf.gz
80 | #鼠源
81 | wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M25/gencode.vM25.chr_patch_hapl_scaff.annotation.gtf.gz
82 | ```
83 |
84 | #### 1.6.2 标注
85 |
86 | ```python
87 | scglue.data.get_gene_annotation(
88 | rna, gtf="gencode.v31.chr_patch_hapl_scaff.annotation.gtf.gz",
89 | gtf_by="gene_name"
90 | )
91 | rna.var.loc[:, ["chrom", "chromStart", "chromEnd"]].head()
92 | ```
93 |
94 | ### 1.6.3 去除空标注基因
95 |
96 | 有一些基因并没有在gtf中找到对应的染色体位置,我们需要把这一部分基因去掉
97 |
98 | ```python
99 | rna.var['dell']=np.zeros(len(rna.var))
100 | a=rna.var[~rna.var['chromStart'].isnull()].index
101 | rna.var.loc[a,'dell']=1
102 | rna1=rna[:,rna.var.dell==1]
103 | rna1.var = rna1.var.astype({"chromStart": int, "chromEnd": int})
104 | rna1
105 | ```
106 |
107 | 到此,rna与atac数据的预处理已经完成。
108 |
109 | ## 2. 多组学网络构建
110 |
111 | 根据组学层的先验关系,我们用scglue构建分子层之间的关系
112 |
113 | ### 2.1 构建图网络
114 |
115 | ```python
116 | graph = scglue.genomics.rna_anchored_prior_graph(rna1, atac)
117 | graph.number_of_nodes(), graph.number_of_edges()
118 | ```
119 |
120 | > 100%|██████████| 35363/35363 [00:04<00:00, 7923.69it/s]
121 | >
122 | > (253070, 613256)
123 |
124 | ### 2.2 检查节点
125 |
126 | ```python
127 | # Graph node covers all omic features
128 | all(graph.has_node(gene) for gene in rna1.var_names), \
129 | all(graph.has_node(peak) for peak in atac.var_names)
130 | ```
131 |
132 | > (True, True)
133 |
134 | ```python
135 | # Edge attributes contain weights and signs
136 | for _, e in zip(range(5), graph.edges):
137 | print(f"{e}: {graph.edges[e]}")
138 | ```
139 |
140 | > ('AL669831.3', 'chr1:629708-630559', 0): {'dist': 0, 'weight': 1.0, 'sign': 1} ('AL669831.3', 'chr1:631640-631948', 0): {'dist': 0, 'weight': 1.0, 'sign': 1} ('AL669831.3', 'chr1:632511-633105', 0): {'dist': 0, 'weight': 1.0, 'sign': 1} ('AL669831.3', 'chr1:633740-634682', 0): {'dist': 0, 'weight': 1.0, 'sign': 1} ('AL669831.3', 'chr1:778234-779324', 0): {'dist': 0, 'weight': 1.0, 'sign': 1}
141 |
142 | ### 2.3 保存图网络
143 |
144 | ```python
145 | rna1.write("rna_preprocessed.h5ad", compression="gzip")
146 | atac.write("atac_preprocessed.h5ad", compression="gzip")
147 | nx.write_graphml(graph, "prior.graphml.gz")
148 | ```
149 |
150 | 到这里,模型的预处理就算是完成了,更多的可以参照scglue的官方说明文档。
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_4.md:
--------------------------------------------------------------------------------
1 | # 单细胞样本对齐: 模型训练
2 |
3 | 在本小节,我们就要开始对齐单细胞了
4 |
5 | ## 1. 数据准备
6 |
7 | ### 1.1 导入包
8 |
9 | ```python
10 | import anndata
11 | import itertools
12 | import networkx as nx
13 | import numpy as np
14 | import pandas as pd
15 | import scanpy as sc
16 | import scglue
17 | from matplotlib import rcParams
18 | ```
19 |
20 | ### 1.2 导入数据
21 |
22 | ```python
23 | rna = anndata.read_h5ad("rna_preprocessed.h5ad")
24 | atac = anndata.read_h5ad("atac_preprocessed.h5ad")
25 | graph = nx.read_graphml("prior.graphml.gz")
26 | ```
27 |
28 | ## 2 模型训练
29 |
30 | ### 2.1 设置模型参数
31 |
32 | ```python
33 | scglue.plot.set_publication_params()
34 | rcParams["figure.figsize"] = (4, 4)
35 |
36 | scglue.models.configure_dataset(
37 | rna, "NB", use_highly_variable=True,
38 | use_layer="raw", use_rep="X_pca"
39 | )
40 |
41 | scglue.models.configure_dataset(
42 | atac, "NB", use_highly_variable=True,
43 | use_rep="X_lsi"
44 | )
45 |
46 | graph = graph.subgraph(itertools.chain(
47 | rna.var.query("highly_variable").index,
48 | atac.var.query("highly_variable").index
49 | ))
50 | ```
51 |
52 | ### 2.2 模型训练
53 |
54 | ```python
55 |
56 | glue = scglue.models.SCGLUEModel(
57 | {"rna": rna, "atac": atac}, sorted(graph.nodes),
58 | random_seed=0
59 | )
60 |
61 | glue.compile()
62 |
63 | glue.fit(
64 | {"rna": rna, "atac": atac},
65 | graph, edge_weight="weight", edge_sign="sign",
66 | directory="glue"
67 | )
68 | ```
69 |
70 | > ```
71 | > [INFO] SCGLUEModel: Setting `graph_batch_size` = 28027
72 | > [INFO] SCGLUEModel: Setting `align_burnin` = 10
73 | > [INFO] SCGLUEModel: Setting `max_epochs` = 38
74 | > [INFO] SCGLUEModel: Setting `patience` = 5
75 | > [INFO] SCGLUEModel: Setting `reduce_lr_patience` = 3
76 | > [INFO] SCGLUETrainer: Using training directory: "glue"
77 | > [INFO] SCGLUETrainer: [Epoch 10] train={'g_nll': 0.383, 'g_kl': 0.004, 'g_elbo': 0.387, 'x_rna_nll': 0.246, 'x_rna_kl': 0.009, 'x_rna_elbo': 0.254, 'x_atac_nll': 0.029, 'x_atac_kl': 0.0, 'x_atac_elbo': 0.03, 'dsc_loss': 0.684, 'gen_loss': 0.286}, val={'g_nll': 0.425, 'g_kl': 0.004, 'g_elbo': 0.429, 'x_rna_nll': 0.246, 'x_rna_kl': 0.009, 'x_rna_elbo': 0.254, 'x_atac_nll': 0.029, 'x_atac_kl': 0.0, 'x_atac_elbo': 0.029, 'dsc_loss': 0.692, 'gen_loss': 0.287}, 24.2s elapsed
78 | > [INFO] SCGLUETrainer: [Epoch 20] train={'g_nll': 0.338, 'g_kl': 0.004, 'g_elbo': 0.342, 'x_rna_nll': 0.244, 'x_rna_kl': 0.009, 'x_rna_elbo': 0.253, 'x_atac_nll': 0.029, 'x_atac_kl': 0.0, 'x_atac_elbo': 0.029, 'dsc_loss': 0.688, 'gen_loss': 0.282}, val={'g_nll': 0.442, 'g_kl': 0.004, 'g_elbo': 0.447, 'x_rna_nll': 0.245, 'x_rna_kl': 0.008, 'x_rna_elbo': 0.253, 'x_atac_nll': 0.029, 'x_atac_kl': 0.0, 'x_atac_elbo': 0.029, 'dsc_loss': 0.691, 'gen_loss': 0.286}, 24.3s elapsed
79 | > Epoch 21: reducing learning rate of group 0 to 2.0000e-04.
80 | > Epoch 21: reducing learning rate of group 0 to 2.0000e-04.
81 | > 2021-10-22 16:24:42,283 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training
82 | > [INFO] EarlyStopping: Retoring checkpoint "22"...
83 | > ```
84 |
85 | ### 2.3 保存模型
86 |
87 | ```python
88 | glue.save("glue/final.dill")
89 | ```
--------------------------------------------------------------------------------
/docs/SCGLUE/scglue_5.md:
--------------------------------------------------------------------------------
1 | # 单细胞样本对齐: 对齐细胞
2 |
3 | 在上一小节,我们已经获得了训练好的对齐模型,在本小节中,我们将根据模型参数配对细胞。
4 |
5 | ## 1. 数据准备
6 |
7 | ### 1.1 导入包
8 |
9 | ```python
10 | import anndata
11 | import networkx as nx
12 | import scanpy as sc
13 | import scglue
14 | import numpy as np
15 | import pandas as pd
16 | from matplotlib import rcParams
17 | ```
18 |
19 | ### 1.2 导入数据
20 |
21 | ```python
22 | rna = anndata.read_h5ad("rna_preprocessed.h5ad")
23 | atac = anndata.read_h5ad("atac_preprocessed.h5ad")
24 | glue = scglue.models.load_model("final.dill")
25 | ```
26 |
27 | ## 2. 整合模型
28 |
29 | ### 2.1 数据预处理
30 |
31 | ```python
32 | rna.obs['domain']='scRNA-seq'
33 | atac.obs['domain']='scATAC-seq'
34 | rna.obsm["X_glue"] = glue.encode_data("rna", rna)
35 | atac.obsm["X_glue"] = glue.encode_data("atac", atac)
36 | ```
37 |
38 | ### 2.2 导出细胞参数(特征向量)
39 |
40 | ```python
41 | rna_loc=pd.DataFrame(rna.obsm['X_glue'], index=rna.obs.index)
42 | atac_loc=pd.DataFrame(atac.obsm['X_glue'], index=atac.obs.index)
43 | ```
44 |
45 | ### 2.3 配对细胞
46 |
47 | ```python
48 | len1=(len(rna_loc)//5000)+1
49 | xl=[]
50 | for j in range(len1):
51 |
52 | c=pd.DataFrame()
53 | for i in range(len1):
54 | t1=rna_loc.iloc[5000*(i):5000*(i+1)]
55 | t2=atac_loc.iloc[5000*(j):5000*(j+1)]
56 | a=np.corrcoef(t1,t2)[len(t1):,0:len(t1)]
57 | b=pd.DataFrame(a,index=t2.index,columns=t1.index)
58 |
59 | c=pd.concat([c,b],axis=1)
60 | del t1
61 | del t2
62 | del a
63 | del b
64 | for i in range(len(c)):
65 | xl.append(c.columns[np.where(c.iloc[i]==c.iloc[i].max())[0]].values[0])
66 | del c
67 | print('Now epoch is {}'.format(j))
68 | res=pd.DataFrame(index=atac_loc.index)
69 | res['pair']=xl
70 | res.to_csv('pair_res.csv')
71 | ```
72 |
73 | ## 3. 过滤重复细胞
74 |
75 | ### 3.1 导入配对数据
76 |
77 | ```python
78 | pair=pd.read_csv('pair_res.csv')
79 | pair.columns=['scATAC','scRNA']
80 | ```
81 |
82 | ### 3.2 过滤重复细胞
83 |
84 | ```python
85 | rna_only_pair=list(set(pair['scRNA']))
86 | atac_only_pair=[]
87 | for i in rna_only_pair:
88 | atac_only_pair.append(pair[pair['scRNA']==i]['scATAC'].iloc[0])
89 | new_pair=pd.DataFrame()
90 | new_pair['scRNA']=rna_only_pair
91 | new_pair['scATAC']=atac_only_pair
92 | new_name=[]
93 | for i in range(len(new_pair)):
94 | k='cell_{0}'.format(i)
95 | new_name.append(k)
96 | new_pair['sample']=new_name
97 | new_pair.to_csv('new_pair.csv')
98 | ```
99 |
100 | ### 3.3 提取scRNA-seq的配对细胞
101 |
102 | ```python
103 | delli=[]
104 | for i in rna.obs.index:
105 | if i in rna_only_pair:
106 | delli.append('tr')
107 | else:
108 | delli.append('fa')
109 | rna.obs['delli']=delli
110 | rna_pair=rna[rna.obs['delli']=='tr']
111 | rna_pair.write_h5ad('rna_pair.h5ad',compression="gzip")
112 | ```
113 |
114 | > View of AnnData object with n_obs × n_vars = 23770 × 35363
115 |
116 | ### 3.4 提取scATAC-seq的配对细胞
117 |
118 | ```python
119 | delli=[]
120 | for i in atac.obs.index:
121 | if i in atac_only_pair:
122 | delli.append('tr')
123 | else:
124 | delli.append('fa')
125 | atac.obs['delli']=delli
126 | atac_pair=atac[atac.obs['delli']=='tr']
127 | atac_pair.write_h5ad('atac_pair.h5ad',compression="gzip")
128 | ```
129 |
130 | ### 3.5 scRNA-seq与scATAC-seq的细胞类型配对
131 |
132 | 由于我们配对的细胞仅仅是特征向量相似,而不是真正意义上的相似,可能存在scRNA-seq的男性细胞与scATAC-seq的女性细胞比较相似的情况,为了避免性别与诊断出错,我们对配对后的细胞进行进一步配对
133 |
134 | #### 3.5.1 男女性别平衡
135 |
136 | 由于女性样本比男性样本要多两个,我们随机去除两个女性样本使得男:女=1:1
137 |
138 | ```python
139 | rna_sex=rna_pair[(rna_pair.obs['Sample.ID']!='Sample-22') & (rna_pair.obs['Sample.ID']!='Sample-27')]
140 | rna_sex.obs.index=rna_pair.obs[(rna_pair.obs['Sample.ID']!='Sample-22') & (rna_pair.obs['Sample.ID']!='Sample-27')].index.values
141 | for i in list(set(rna_sex.obs['cell_type'])):
142 | print(i,len(rna_sex.obs.loc[rna_sex.obs['cell_type']==i])/len(rna_sex.obs))
143 | ```
144 |
145 | > ODC 0.622879391860573
146 | >
147 | > MG 0.0530731435987763
148 | >
149 | > INH 0.0965977565588208
150 | >
151 | > EX 0.11754890145545564
152 | >
153 | > OPC 0.03680355984054881
154 | >
155 | > ASC 0.06711782701399834
156 | >
157 | > PER.END 0.0059794196718271995
158 |
159 | ```python
160 | atac_sex=atac_pair[(atac_pair.obs['Sample.ID']!='Sample-22') & (atac_pair.obs['Sample.ID']!='Sample-27')]
161 | atac_sex.obs.index=atac_pair.obs[(atac_pair.obs['Sample.ID']!='Sample-22') & (atac_pair.obs['Sample.ID']!='Sample-27')].index
162 | for i in list(set(atac_sex.obs['cell_type'])):
163 | print(i,len(atac_sex.obs.loc[atac_sex.obs['cell_type']==i])/len(atac_sex.obs))
164 | ```
165 |
166 | > ODC 0.6223660050712599
167 | >
168 | > MG 0.05993704642825916
169 | >
170 | > INH 0.10037597272011892
171 | >
172 | > EX 0.10457287750284165
173 | >
174 | > OPC 0.03733496546297106
175 | >
176 | > ASC 0.06933636443123196
177 | >
178 | > PER.END 0.006076768383317304
179 |
180 | ```
181 | ret_sex= list(set(rna_sex.obs.index).intersection(atac_sex.obs.index))
182 | len(ret_sex)
183 | ```
184 |
185 | #### 3.5.2 细胞类型配对-性别
186 |
187 | ```
188 | rna_sex_F=rna_sex[rna_sex.obs['Sex']=='F']
189 | rna_sex_F.obs.index=rna_sex.obs[rna_sex.obs['Sex']=='F'].index
190 | atac_sex_F=atac_sex[atac_sex.obs['Sex']=='F']
191 | atac_sex_F.obs.index=atac_sex.obs[atac_sex.obs['Sex']=='F'].index
192 | ret_F=list(set(rna_sex_F.obs.index).intersection(atac_sex_F.obs.index))
193 |
194 | rna_sex_M=rna_sex[rna_sex.obs['Sex']=='M']
195 | rna_sex_M.obs.index=rna_sex.obs[rna_sex.obs['Sex']=='M'].index
196 | atac_sex_M=atac_sex[atac_sex.obs['Sex']=='M']
197 | atac_sex_M.obs.index=atac_sex.obs[atac_sex.obs['Sex']=='M'].index
198 | ret_M=list(set(rna_sex_M.obs.index).intersection(atac_sex_M.obs.index))
199 |
200 | ```
201 |
202 | #### 3.5.3 细胞类型配对-诊断
203 |
204 | ```
205 | rna_sex_ad=rna_sex[ret_M+ret_F][rna_sex[ret_M+ret_F].obs['Diagnosis']=='AD']
206 | atac_sex_ad=atac_sex[ret_M+ret_F][atac_sex[ret_M+ret_F].obs['Diagnosis']=='AD']
207 | ret_ad=list(set(rna_sex_ad.obs.index).intersection(atac_sex_ad.obs.index))
208 |
209 | rna_sex_ctrl=rna_sex[ret_M+ret_F][rna_sex[ret_M+ret_F].obs['Diagnosis']=='Control']
210 | atac_sex_ctrl=atac_sex[ret_M+ret_F][atac_sex[ret_M+ret_F].obs['Diagnosis']=='Control']
211 | ret_ctrl=list(set(rna_sex_ctrl.obs.index).intersection(atac_sex_ctrl.obs.index))
212 | ```
213 |
214 | ### 3.6 保存新配对列表
215 |
216 | ```python
217 | pair=pd.read_csv('new_pair.csv')
218 | pair.set_index(pair.columns[0],inplace=True)
219 | pair.set_index(pair.columns[2],inplace=True)
220 | #pair.columns=['scATAC','scRNA']
221 | pair.loc[ret_ad+ret_ctrl].to_csv('mofa_pre_pair.csv')
222 | !cp /content/mofa_pre_pair.csv /content/drive/MyDrive/mofa_gse174367
223 | ```
224 |
225 |
--------------------------------------------------------------------------------
/docs/SUMMARY.md:
--------------------------------------------------------------------------------
1 | # Summary
2 |
3 | * [Bioinformatics Tutorial](README.md)
4 | * [目录](README.md)
5 | * [配置:基础环境配置](CONFIG/README.md)
6 | * [配置1:Python环境](CONFIG/config_1.md)
7 | * [配置2:Linux环境](CONFIG/config_2.md)
8 | * [配置3:R语言环境](CONFIG/config_3.md)
9 | * [分析1:RNA-seq上游分析](RNASEQUP/README.md)
10 | * [1-1. 环境配置](RNASEQUP/rnasequp_1.md)
11 | * [1-2. 上游分析全教程](RNASEQUP/rnasequp_2.md)
12 | * [分析2:RNA-seq下游分析](RNASEQ/README.md)
13 | * [2-1. 环境配置](RNASEQ/rnaseq_1.md)
14 | * [2-2. 差异表达基因(DEG)分析](RNASEQ/rnaseq_2.md)
15 | * [2-3. 通路富集分析(GSEA)](RNASEQ/rnaseq_3.md)
16 | * [分析3:单细胞样本对齐](SCGLUE/README.md)
17 | * [3-1. 环境配置](SCGLUE/scglue_1.md)
18 | * [3-2. 数据预处理](SCGLUE/scglue_2.md)
19 | * [3-3. 模型准备](SCGLUE/scglue_3.md)
20 | * [3-4. 模型训练](SCGLUE/scglue_4.md)
21 | * [3-5. 对齐细胞](SCGLUE/scglue_5.md)
22 | * [分析4:MOFA单细胞多组学因子分析](MOFA/README.md)
23 | * [4-1. 环境配置](MOFA/mofa_1.md)
24 | * [4-2. 单细胞样本对齐](MOFA/mofa_2.md)
25 | * [4-3. scRNA-seq数据处理](MOFA/mofa_3.md)
26 | * [4-4. scATAC-seq数据处理](MOFA/mofa_4.md)
27 | * [4-5. MOFA模型构建](MOFA/mofa_5.md)
28 | * [4-6. MOFA下游分析-R语言环节](MOFA/mofa_6.md)
29 | * [4-7. MOFA下游分析-Python环节](MOFA/mofa_7.md)
30 | * [分析5:TMT蛋白组学分析](TMT/README.md)
31 | * [5-1. 环境配置](TMT/tmt_1.md)
32 | * [5-2. TMT蛋白组学上游分析](TMT/tmt_2.md)
33 | * [5-3. TMT蛋白组学下游分析](TMT/tmt_3.md)
34 | * [分析6:多组学分析](MultiOMIC/README.md)
35 | * [6-1. 数据准备](MultiOMIC/book-1.md)
36 | * [6-2. GLUE多组学整合](MultiOMIC/book-2.md)
37 | * [6-3. 细胞亚群分析](MultiOMIC/book-3.md)
38 | * [6-4. 差异表达分析](MultiOMIC/book-4.md)
39 | * [6-5. 细胞通讯分析](MultiOMIC/book-5.md)
40 | * [6-6. 动态调控分析](MultiOMIC/book-6.md)
41 | * [6-7. 转录因子网络](MultiOMIC/book-7.md)
42 | * [绘图:Python可视化](PLOT/README.md)
43 | * [1-1. 火山图](PLOT/plot_1.md)
44 | * [1-2. 箱线图](PLOT/plot_2.md)
45 | * [1-3. 热图(聚类)](PLOT/plot_3.md)
46 |
47 |
--------------------------------------------------------------------------------
/docs/TMT/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/.DS_Store
--------------------------------------------------------------------------------
/docs/TMT/README.md:
--------------------------------------------------------------------------------
1 | # 分析5:TMT蛋白组学分析
2 |
3 |
--------------------------------------------------------------------------------
/docs/TMT/tmt_1.assets/image-20220126012940259-6120196.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_1.assets/image-20220126012940259-6120196.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_1.assets/image-20220126012940259.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_1.assets/image-20220126012940259.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_1.md:
--------------------------------------------------------------------------------
1 | # TMT蛋白组学. 环境配置
2 |
3 | 对于TMT串联质谱分析,我们这里使用MaxQuant完成对raw文件的处理,同时,我们可以考虑使用服务器来完成处理。这意味着,环境我们既可以选择windows,也可以选择linux,我们首先介绍一下windows环境如何配置
4 |
5 | ## 1. Windows环境配置
6 |
7 | 在Windows环境下,我们环境配置分为以下几个步骤:
8 |
9 | ### 1.1 .Net环境安装
10 |
11 | 在本教程中,我们使用的MaxQuant对应的版本为1.6.7.10,这个版本要求的.Net框架为2.0。于是,我们从微软官网下载:https://dotnet.microsoft.com/en-us/download/dotnet/2.1
12 |
13 | 
14 |
15 | ### 1.2 MaxQuant安装
16 |
17 | 在windows下,我们只需要将MaxQuant下载下来即可直接使用(装完.net框架后)
18 |
19 | 下载地址:https://maxquant.net/
20 |
21 | ## 2. Linux环境下的安装
22 |
23 | 由于我们有时候会不想用自己的电脑跑,更希望在服务器上运行。在这里,我们使用的是Ubuntu18.04来对蛋白组学数据进行处理。
24 |
25 | ### 2.1 .Net环境安装
26 |
27 | 使用 APT 进行安装可通过几个命令来完成。 安装 .NET 之前,请运行以下命令,将 Microsoft 包签名密钥添加到受信任密钥列表,并添加包存储库。
28 |
29 | 打开终端并运行以下命令:
30 |
31 | Bash
32 |
33 | ```bash
34 | wget https://packages.microsoft.com/config/ubuntu/18.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb
35 | sudo dpkg -i packages-microsoft-prod.deb
36 | rm packages-microsoft-prod.deb
37 | ```
38 |
39 | ### 2.2 安装 SDK
40 |
41 | .NET Core SDK 使你可以通过 .NET Core 开发应用。 如果安装 .NET Core SDK,则无需安装相应的运行时。 若要安装 .NET Core SDK,请运行以下命令:
42 |
43 | Bash
44 |
45 | ```bash
46 | sudo apt-get update; \
47 | sudo apt-get install -y apt-transport-https && \
48 | sudo apt-get update && \
49 | sudo apt-get install -y dotnet-sdk-2.1
50 | ```
51 |
52 | ### 2.3 验证安装
53 |
54 | 我们在终端输入
55 |
56 | ```bash
57 | sudo dotnet --version
58 | ```
59 |
60 | 如果显示正确的版本号,那么就意味着安装成功了。
--------------------------------------------------------------------------------
/docs/TMT/tmt_2.assets/image-20220126013900416-6120205.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_2.assets/image-20220126013900416-6120205.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_2.assets/image-20220126013900416.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_2.assets/image-20220126013900416.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_2.assets/image-20220126014204530.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_2.assets/image-20220126014204530.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_2.assets/image-20220126014423239.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_2.assets/image-20220126014423239.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_2.assets/image-20220126014658979.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_2.assets/image-20220126014658979.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_2.assets/image-20220126014844199.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_2.assets/image-20220126014844199.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_2.assets/image-20220126014944222.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_2.assets/image-20220126014944222.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_2.assets/image-20220126015312481.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_2.assets/image-20220126015312481.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_2.assets/image-20220126015354523.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_2.assets/image-20220126015354523.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_2.assets/image-20220126015849066.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_2.assets/image-20220126015849066.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_2.md:
--------------------------------------------------------------------------------
1 | # TMT蛋白组学:上游分析
2 |
3 | 顾名思义,上游分析即从质谱raw文件中得到蛋白定量的矩阵,无论是windows还是linux,都需要在windows的软件内设置以下的参数
4 |
5 | ## 1. 参数设置
6 |
7 | 对于我没提到的参数,都保持默认即可
8 |
9 | ### 1.1 原始数据窗格
10 |
11 | - Set experiment:对每一个样本设定实验内容,每个实验作为一个单独的输出
12 | - Set reference channels:这个是TMT串联质谱必须要设置的参数,使得标记可以被分离
13 | - Set parameter group:如果你的实验有多个组别可以设置,但一般多组别会分开跑
14 |
15 | 
16 |
17 | ### 1.2 组特定参数窗格
18 |
19 | #### 1.2.1 Type
20 |
21 | - 对于TMT6plex,我们选择**Reporter ion MS2**作为类型、
22 | - Isobaric:我们选择**6plex TMT**
23 |
24 | 
25 |
26 | #### 1.2.2 Modification
27 |
28 | - Variable modifications: 选择Oxidation(M),Acetyl(Protein N-term),Deamidation(NQ)
29 | - Fixed modification:选择Carbamidomethyl(C)
30 |
31 | 
32 |
33 | #### 1.2.3 Digestion
34 |
35 | - Digestion mode:Specific
36 | - Enzyme:Trypsin/P(实验使拿什么酶消化就选什么)
37 | - Max. missed:设置成2即可
38 |
39 | 
40 |
41 | #### 1.2.4 Label-free quantification
42 |
43 | 设置成None
44 |
45 | 
46 |
47 | ### 1.3 全局参数窗格
48 |
49 | #### 1.3.1 Sequence
50 |
51 | - Fasta files:选择你要搜的蛋白质库,在这里,我们选择从Uniprot上的Mus全蛋白库
52 | - Min. peptide length: 设置成7,太低会出现误差
53 | - Max. peptide mass [Da]: 设置成6000
54 |
55 | 
56 |
57 | #### 1.3.2 Protein quantification
58 |
59 | select Use only unmodified peptides and a list of modifications such as Oxidation (M), Acetyl (Protein N-term) and Deamidation.
60 |
61 | 
62 |
63 | #### 1.3.3 MS/MS analyzer
64 |
65 | - a. FTMS MS/MS match tolerance: 0.05 Da
66 | - b. ITMS MS/MS match tolerance: 0.6 Da
67 |
68 | 
69 |
70 | ## 2. 搜库(获取表达矩阵)
71 |
72 | 在上述环节,我们已经设置好了所有参数,那么我们首先点击**文件->保存参数**,得到一个mqpar.xml文件
73 |
74 | ### 2.1 Windows
75 |
76 | 我们在左下角设置好处理器的数量,一般设置的比CPU数少1会比较稳定,不会闪退,效率也会最高,然后直接开始即可
77 |
78 | 开始后性能窗格会显示当前进度,可以点击Details按钮左边的勾查看,也可以点击左上角show all activity查看
79 |
80 | 
81 |
82 | ### 2.2 Linux
83 |
84 | 我们将MaxQuant拷贝到Linux上,首先进入mpqar.xml所在的文件夹,在终端输入
85 |
86 | ```bash
87 | sudo nohup dotnet /home/MaxQuant/bin/MaxQuantCmd.exe mqpar.xml
88 | ```
89 |
90 | 即可开始运行,其中`/home/MaxQuant/bin/MaxQuantCmd.exe`为我们拷贝的路径,等待时间24-48小时不等
91 |
92 |
--------------------------------------------------------------------------------
/docs/TMT/tmt_3.assets/image-20220126021021714.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_3.assets/image-20220126021021714.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_3.assets/image-20220126022018085.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_3.assets/image-20220126022018085.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_3.assets/image-20220126022234665.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_3.assets/image-20220126022234665.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_3.assets/image-20220126022304695.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_3.assets/image-20220126022304695.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_3.assets/image-20220126022354890.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Starlitnightly/bioinformatic_tutorial/58c572931aadf75e479eca3ef3506cb0585c9337/docs/TMT/tmt_3.assets/image-20220126022354890.png
--------------------------------------------------------------------------------
/docs/TMT/tmt_3.md:
--------------------------------------------------------------------------------
1 | # TMT蛋白组学:下游分析
2 |
3 | 在前面的分析,我们已经得到了蛋白组学的搜库结果,那么我们怎么才能将其分析出具有生物学意义的东西呢?
4 |
5 | 在结果的路径下,有一个Combined文件夹,里面的txt文件夹存放了我们的搜库结果,其中的evidence.txt跟proteinGroup.txt为最重要的两个,在这里,我们使用R包proteusTMT进行下游分析。
6 |
7 | ## 1. 安装依赖
8 |
9 | Proteus can be installed directly from GitHub. First, you need to install BioConductor and limma:
10 |
11 | ```
12 | install.packages("BiocManager")
13 | BiocManager::install()
14 | BiocManager::install("limma")
15 | ```
16 |
17 | You also need devtools:
18 |
19 | ```
20 | install.packages("devtools")
21 | ```
22 |
23 | In order to run examples or vignette code, additional packages with example data need to be installed:
24 |
25 | ```
26 | devtools::install_github("bartongroup/proteusLabelFree")
27 | devtools::install_github("bartongroup/proteusTMT")
28 | devtools::install_github("bartongroup/proteusSILAC")
29 | ```
30 |
31 | Finally, you can install proteus:
32 |
33 | ```
34 | devtools::install_github("bartongroup/Proteus", build_opts= c("--no-resave-data", "--no-manual"), build_vignettes=TRUE)
35 | ```
36 |
37 | Note: use `build_vignettes = FALSE` if you run into problems with vignettes installation.
38 |
39 | ## 2. meta文件准备
40 |
41 | 对于Proteus,其中最为重要的是我们需要对TMT6plex的每一个标记进行标注清楚,在这里,我们准备一个模板作为参考
42 |
43 | - experiment:这个是你在上游分析搜库的时候,对每一个raw设置的experiment情况,比如Ctrl1.raw文件里面,包含了6个TMT标记,其中每一个标记代表不同的样本,比如Ctrl1包括了WT1-1,WT2-1,WT3-1,SHED1-1,SHED2-1,SHED3-1,WT1-1指的是野生型的第一个重复
44 | - measure:TMT6plex将返回6个Reporter intensity,每一个对应上述的一个sample
45 | - sample:表示TMT6plex里都有哪六个样本
46 | - condition:样本分组情况
47 | - replicate:重复次数
48 |
49 | 
50 |
51 | ## 3. Proteus分析
52 |
53 | ### 3.1 加载依赖
54 |
55 | ```
56 | library(proteusTMT)
57 | library(proteus)
58 | ```
59 |
60 | ### 3.2 导入数据
61 |
62 | #### 3.2.1 列名设置
63 |
64 | The default `measure.cols` object is designed for label-free data. For TMT data we need to specify all reporter intensity columns. In out example we have 10 reporter columns, numbered from 1 to 6:
65 |
66 | ```
67 | measCols <- paste0("Reporter intensity ", 1:6)
68 | names(measCols) <- paste0("reporter_", 1:6)
69 | ```
70 |
71 | 意味着我们将Reporter intensity 1重命名为reporter_1
72 |
73 | #### 3.2.2 导入evidence和meta
74 |
75 | ```
76 | evi <- readEvidenceFile('txt/evidence.txt', measure.cols=measCols)
77 | meta <- read.delim('meta.txt', header=TRUE, sep="\t")
78 | ```
79 |
80 | #### 3.2.3 创建peptide数据集
81 |
82 | ```
83 | pepdat <- makePeptideTable(evi, meta, measure.cols=measCols, aggregate.fun=aggregateMedian, experiment.type="TMT")
84 | plotCount(pepdat)
85 | ```
86 |
87 | 
88 |
89 | ### 3.3 蛋白结果数据
90 |
91 | #### 3.3.1 创建蛋白结果数据
92 |
93 | We create protein data using the high-flyer method.
94 |
95 | ```
96 | prodat <- makeProteinTable(pepdat, aggregate.fun=aggregateHifly, hifly=3)
97 | ```
98 |
99 | #### 3.3.2 Normalization
100 |
101 | For TMT data we recommend using CONSTANd normalization [Maes et al. 2016](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4974351/pdf/zjw2779.pdf).
102 |
103 | ```
104 | prodat.norm <- normalizeTMT(prodat)
105 | write.csv(prodat.norm[["tab"]],file='prodat.csv')
106 | ```
107 |
108 | These two figures show reporter intensity distributions before and after normalization.
109 |
110 | ```
111 | plotSampleDistributions(prodat, fill="replicate")
112 | ```
113 |
114 | 
115 |
116 | ```
117 | plotSampleDistributions(prodat.norm, log.scale=FALSE, fill="replicate")
118 | ```
119 |
120 | 
121 |
122 | #### 3.3.3 Clustering
123 |
124 | We can use the same function `plotClustering()` to see the dendrogram for the proteins.
125 |
126 | ```
127 | plotClustering(prodat.norm)
128 | ```
129 |
130 | 
131 |
132 | ### 3.4 差异表达分析
133 |
134 | 我们想比较SHED组跟WT组的差异情况,于是
135 |
136 | ```
137 | res <- limmaDE(prodat.norm, conditions=c("WT", "SHED"))
138 | write.csv(res,file='WT_SHED_result.csv')
139 | ```
140 |
141 | 得到的csv文件与前面的prodat.csv文件,即可参考RNA-seq的下游分析进行,这里不做过多的阐述
142 |
143 | RNA-seq下游分析教程:https://starlitnightly.github.io/bioinformatic_tutorial/RNASEQ/rnaseq_2.html
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # Bioinformatics_tutorial
2 |
3 | - 作者:starlitnightly
4 | - 日期:2021.11.02
5 |
6 | !!! note 楔子
7 | 撰写这个笔记,目的在于提供一个生物信息学相关的教程吧
8 |
9 | ——2021.11.02,星夜
10 |
11 | ## 目录
12 |
13 | - 【配置】:[CONFIG](https://starlitnightly.github.io/bioinformatic_tutorial/CONFIG/)
14 | - 【分析1: RNA-seq上游】:[RNASEQUP](https://starlitnightly.github.io/bioinformatic_tutorial/RNASEQUP/)
15 | - 【分析2: RNA-seq下游】:[RNASEQDOWN](https://starlitnightly.github.io/bioinformatic_tutorial/RNASEQ/)
16 | - 【分析3: 单细胞样本对齐】:[GLUEPAIR](https://starlitnightly.github.io/bioinformatic_tutorial/SCGLUE/)
17 | - 【分析4: MOFA单细胞多组学因子分析】:[MOFA](https://starlitnightly.github.io/bioinformatic_tutorial/MOFA/)
18 | - 【绘图:Python数据可视化-生物信息学专栏】:[VISABLE](https://starlitnightly.github.io/bioinformatic_tutorial/PLOT/)
19 |
20 | ## 数据
21 |
22 | 【绘图:Python数据可视化-生物信息学专栏】:[DATA](https://github.com/Starlitnightly/bioinformatic_tutorial/tree/main/PLOT/data)
23 |
24 | ## License
25 |
26 |
27 |
28 | 本作品采用知识共享署名-非商业性使用-相同方式共享 4.0 国际许可协议进行许可。
--------------------------------------------------------------------------------
/docs/overrides/main.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block content %}
4 | {{ super() }}
5 |
6 | {% if git_page_authors %}
7 |