├── .github └── workflows │ └── release.yml ├── .gitignore ├── LICENSE ├── README.md ├── cover.png ├── doc_style.md └── latex ├── apalike_fullname.bst ├── bib.bib ├── figures ├── XPixelMetaverse_small.jpg ├── basicsr_logo.png ├── code_structure_log_example.png ├── code_structure_overview.png ├── code_structure_tensorboard.png ├── getting_start_build_dataset.png ├── getting_start_build_loss.png ├── getting_start_build_model.png ├── getting_start_build_network.png ├── getting_start_calculate_metric.png ├── getting_start_exp_folder.png ├── getting_start_init_data_model.png ├── getting_start_init_dataloader.png ├── getting_start_loss_init.png ├── getting_start_network_init.png ├── getting_start_optimize.png ├── getting_start_parse_options.png ├── getting_start_srmodel.png ├── getting_start_train_entracne.png ├── getting_start_train_pipeline.png ├── getting_start_training_loop.png ├── getting_start_validation_metric.png ├── good_luck.png ├── installation_clone_install_location.jpg ├── installation_correct_install.jpg ├── installation_pip_install_location.jpg ├── installation_version.jpg ├── rocket_logo.png └── xpixel-logo.jpg ├── main.tex ├── sections ├── authors.tex ├── code_structure.tex ├── data_preparation.tex ├── deploy.tex ├── experience.tex ├── getting_start.tex ├── howto.tex ├── installation.tex ├── metrics.tex ├── overview.tex ├── scripts.tex ├── template.tex └── xpixel_metaverse.tex └── style.sty /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | on: 3 | push: 4 | tags: 5 | - '*' 6 | 7 | env: 8 | APPLICATION_NAME: BasicSR-Tutorial 9 | 10 | jobs: 11 | setup: 12 | runs-on: ubuntu-latest 13 | outputs: 14 | APPNAME: ${{ steps.get_appname.outputs.APPNAME }} 15 | VERSION: ${{ steps.get_version.outputs.VERSION }} 16 | steps: 17 | - name: get-appname 18 | id: get_appname 19 | run: echo ::set-output name=APPNAME::${APPLICATION_NAME} 20 | - name: get-version 21 | id: get_version 22 | run: echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\//} 23 | 24 | build_latex: 25 | needs: [setup] 26 | runs-on: ubuntu-latest 27 | env: 28 | PACKAGENAME: ${{ needs.setup.outputs.APPNAME }}-${{ needs.setup.outputs.VERSION }} 29 | steps: 30 | - uses: actions/checkout@v2 31 | with: 32 | submodules: 'recursive' 33 | - name: Compile LaTeX 34 | uses: xu-cheng/latex-action@master 35 | with: 36 | working_directory: latex 37 | root_file: main.tex 38 | latexmk_shell_escape: true 39 | - name: package 40 | run: | 41 | cd latex 42 | ls 43 | mv main.pdf ../${{ env.PACKAGENAME }}.pdf 44 | - name: upload 45 | uses: actions/upload-artifact@v2 46 | with: 47 | name: ${{ env.PACKAGENAME }} 48 | path: ${{ env.PACKAGENAME }}.pdf 49 | 50 | release: 51 | permissions: write-all 52 | needs: [setup, build_latex] 53 | runs-on: ubuntu-latest 54 | steps: 55 | - name: download 56 | uses: actions/download-artifact@v2 57 | with: 58 | path: artifacts 59 | 60 | - name: create-release 61 | id: create_release 62 | uses: actions/create-release@v1 63 | env: 64 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 65 | with: 66 | tag_name: ${{ github.ref }} 67 | release_name: BasicSR 中文文档 ${{ github.ref }} 68 | body: | 69 | 🚀 See you again 😸 70 | 🚀Have a nice day 😸 and happy everyday 😃 71 | 🚀 Long time no see ☄️ 72 | 73 | ✨ **Highlights** 74 | ✅ [Features] Support ... 75 | 76 | 🐛 **Bug Fixes** 77 | 78 | 🌴 **Improvements** 79 | 80 | 📢📢📢 81 | 82 |

83 | 84 |

85 | draft: true 86 | prerelease: false 87 | 88 | - name: upload-pdf 89 | uses: actions/upload-release-asset@v1 90 | env: 91 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 92 | PACKAGENAME: ${{ needs.setup.outputs.APPNAME }}-${{ needs.setup.outputs.VERSION }} 93 | with: 94 | upload_url: ${{ steps.create_release.outputs.upload_url }} 95 | asset_path: artifacts/${{ env.PACKAGENAME }}/${{ env.PACKAGENAME }}.pdf 96 | asset_name: ${{ env.PACKAGENAME }}.pdf 97 | asset_content_type: application/pdf 98 | 99 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | main.pdf 2 | _minted-main 3 | resources 4 | 5 | *.emf 6 | 7 | *.pptx 8 | 9 | *.synctex.gz 10 | *.brf 11 | *.fdb_latexmk 12 | *.fls 13 | *.log 14 | *.aux 15 | *.cfg 16 | *.glo 17 | *.idx 18 | *.toc 19 | *.ilg 20 | *.ind 21 | *.out 22 | *.lof 23 | *.lot 24 | *.bbl 25 | *.blg 26 | *.gls 27 | *.cut 28 | *.hd 29 | *.dvi 30 | *.ps 31 | *.thm 32 | *.tgz 33 | *.zip 34 | *.rpi 35 | *~ 36 | *.bcf 37 | *.run.xml 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2022 BasicSR-docs Authors 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [BasicSR](https://github.com/XPixelGroup/BasicSR) 中文解读文档 2 | 3 | [![download](https://img.shields.io/github/downloads/XPixelGroup/BasicSR-docs/total)](https://github.com/XPixelGroup/BasicSR-docs/releases) 4 | ![visitors](https://visitor-badge.glitch.me/badge?page_id=XPixelGroup/BasicSR-docs) 5 | [![license](https://img.shields.io/github/license/xinntao/BasicSR-docs)](https://github.com/XPixelGroup/BasicSR-docs/blob/master/LICENSE) 6 | [![release](https://github.com/XPixelGroup/BasicSR-docs/actions/workflows/release.yml/badge.svg)](https://github.com/XPixelGroup/BasicSR-docs/blob/master/.github/workflows/release.yml) 7 | 8 | 这是 [BasicSR](https://github.com/XPixelGroup/BasicSR) 的中文解读文档 LaTex 源码文件。 9 | 10 | ## [PDF 下载](https://github.com/XPixelGroup/BasicSR-docs/releases) 11 | 12 | ⏬ 本 PDF 的最新版可以从 [releases 页面](https://github.com/XPixelGroup/BasicSR-docs/releases) 下载。 13 | 14 | (Release 界面的 **Assets** 下面有编译好的最新 PDF,可以下载) 15 | 16 |

17 | 18 | Cover 19 | 20 |

21 | 22 | ## 欢迎贡献 23 | 24 | 😊 欢迎大家一起来帮助查找文档中的错误,完善文档。 25 | 26 | - 文档风格的约定:[doc_style.md](doc_style.md) 27 | 28 | ## Release 29 | 30 | - 当设置 tags 并 push 后,会触发 [release 流程](https://github.com/XPixelGroup/BasicSR-docs/blob/master/.github/workflows/release.yml) 31 | - 触发 GitHub workflow 后,会进行 LaTex 自动编译,并构建一个 release draft 32 | - 修改 release draft,进行正式 release 33 | 34 | ## 📜 License and Acknowledgement 35 | 36 | The LaTex template is modified from 37 | -------------------------------------------------------------------------------- /cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/cover.png -------------------------------------------------------------------------------- /doc_style.md: -------------------------------------------------------------------------------- 1 | 中文文档 (特别是 LaTex) 的写作规范。 2 | 3 | 本规范: 4 | 5 | 1. 从写[《BasicSR 入门教程》](https://github.com/XPixelGroup/BasicSR-docs)的过程中总结而来 6 | 1. 参考了[阮一峰的写作规范](https://github.com/ruanyf/document-style-guide) 7 | 1. 原则: 8 | 1. 易读 9 | 1. 风格统一 10 | 11 | ## 插件 12 | 13 | 使用 VSCode 编辑,善用插件帮助 format (主要是缩进)。 14 | 15 | 插件:[latex-formatter](https://marketplace.visualstudio.com/items?itemName=nickfode.latex-formatter) 16 | 17 | 注意:使用 latex-formatter 后,缩进是 Tab。为了统一,替换成4个空格。 18 | 19 | ## 标题 20 | 21 | 1. 除了章 (Chapter) 的题目外,节 (Section) 的标题分为3级。\section{}, \subsection{}, \subsubsection{} 22 | 1. 层级之间不要出现跳跃,即 \subsubsection{} 前需要有 \subsection{}。下面这个是不被允许的 23 | 24 | ```latex 25 | \section{} 26 | \subsubsection{} 27 | ``` 28 | 29 | 1. section 后面一定要同时写上 label,方便其他章节引用 30 | 31 | ```latex 32 | \section{数据 (Data Loader 和 Dataset)}\label{code_structure:data} 33 | ``` 34 | 35 | 1. 为方便编辑器中区分,section 和 subsection 加上注释线 36 | 37 | ```latex 38 | % ------------------------------------------------------------------------------ 39 | \section{配置(Options)}\label{code_structure:config} 40 | 41 | % ---------------------------------- 42 | \subsection{实验命名}\label{code_structure:name_convention} 43 | ``` 44 | 45 | 1. 章节标题 (\section{}, \subsection{}, \subsubsection{}) 后空一行,方便编辑时区分 46 | 47 | ```latex 48 | 49 | % ------------------------------------------------------------------------------ 50 | \section{配置(Options)}\label{code_structure:config} 51 | 52 | 在这个章节,我们先简单介绍一下... 53 | ``` 54 | 55 | ## 标点与空格 56 | 57 | 1. 我们的主体是中文写作,因此使用中文标点 (全角符号)。有几个特殊的: 58 | 1. 为保持统一,括号使用英文标点。括号中的内容是中文,括号也用英文括号。因为括号在有的编辑器里面会“吃字”。英文括号前后需要空格,但如果括号后面接标点,则不需要 59 | 60 | ```latex 61 | 常见数据 (dataset) 的定义在... 62 | 有整体框架 (参见第4小节),网络结构 (参见第5小节) 等。 63 | ``` 64 | 65 | 1. 如果整句为英文,使用英文标点。几个英语单词之间也可用英文标点 (这个是灵活的,主要看英语占比)。如果仅是英语单词结尾,使用中文标点 66 | 67 | ```latex 68 | 小节标题使用 \section{}, \subsection{}, \subsubsection{}。 69 | ``` 70 | 71 | 1. 英文单词前后空一格。但如果单词前后是标点符号,则不需要空格 72 | 73 | ```latex 74 | 这个部分主要定义了 Dataset 和 Data Loader 文件 75 | 包括图像读取、归一化 (normalization)、数据增强 (augmentation) 以及封装为 PyTorch Tensor。 76 | ``` 77 | 78 | 1. 数字前后不用空格。数字后有英文单位,根据需要决定是否需要空格 (一般不需要) 79 | 80 | ```latex 81 | 训练了1000K iterations。 82 | 占用显存16GB。 83 | ``` 84 | 85 | 1. 省略号:...或中文省略号均可 86 | 1. 其他标点符号例子 (修改自[阮一峰的写作规范](https://github.com/ruanyf/document-style-guide)) 87 | 88 | ```latex 89 | 句号和括号关系:关于文件的输出,请参照第1.3节 (见第26页)。 90 | 并列词用顿号 (即使是英语单词):科技公司有 Google、Facebook、腾讯、阿里和百度等。 91 | ``` 92 | 93 | 1. LaTex 中的连接号和下划线 94 | 95 | ```latex 96 | {-}opt 97 | {-}{-}net 98 | \_arch.py 99 | ``` 100 | 101 | 1. LaTex 中的波浪线 102 | 103 | ```latex 104 | $\sim$ 105 | ``` 106 | 107 | ## 列表 108 | 109 | 1. 列表末尾不用句号。LaTex 主要是 enumerate 和 itemize 110 | 111 | ```latex 112 | \begin{enumerate} 113 | \item 训练和 validation 的 data loader 的创建 114 | \item model 的创建 115 | \end{enumerate} 116 | ``` 117 | 118 | ## 引用 119 | 120 | 1. section 后面一定要同时写上 label,方便其他章节引用。section label 按照 {chapter_name:section_name} 的方式写。 121 | a. 不要出现空格 122 | b. 下划线 _ 或者短划线 - 均可。推荐使用短划线,section name 和文件名保持一致,可使用下划线 123 | 124 | ```latex 125 | \section{数据 (Data Loader 和 Dataset)}\label{code_structure:data} 126 | ``` 127 | 128 | 1. 。每一个 section 都给一个 label,方便其他章节引用。 129 | 比如:\section{目录解读}\label{getting_start:content-overview} 130 | a. 不要出现空格 131 | b. 132 | 133 | 1. 章节引用。编译后格式为:章节4.10:日志系统(Logger) 134 | 135 | ```latex 136 | 章节\ref{code_structure:logger}:\nameref{code_structure:logger} 137 | ``` 138 | 139 | 1. 小节引用。编译后格式为:第4.1小节 140 | 141 | ```latex 142 | 第\ref{code_structure:overview}小节 143 | ``` 144 | 145 | 1. 文章中提到其他小节或内容,一定要使用引用。方便读者进行点击跳转 146 | 147 | ## 图表 148 | -------------------------------------------------------------------------------- /latex/apalike_fullname.bst: -------------------------------------------------------------------------------- 1 | % BibTeX `apalike' bibliography style (version 0.99a, 8-Dec-10), adapted from 2 | % the `alpha' style, version 0.99a; for BibTeX version 0.99a. 3 | % 4 | % Copyright (C) 1988, 2010 Oren Patashnik. 5 | % Unlimited copying and redistribution of this file are permitted as long as 6 | % it is unmodified. Modifications (and redistribution of modified versions) 7 | % are also permitted, but only if the resulting file is renamed. 8 | % 9 | % Differences between this style and `alpha' are generally heralded by a `%'. 10 | % The file btxbst.doc has the documentation for alpha.bst. 11 | % 12 | % This style should be used with the `apalike' LaTeX style (apalike.sty). 13 | % \cite's come out like "(Jones, 1986)" in the text but there are no labels 14 | % in the bibliography, and something like "(1986)" comes out immediately 15 | % after the author. Author (and editor) names appear as last name, comma, 16 | % initials. A `year' field is required for every entry, and so is either 17 | % an author (or in some cases, an editor) field or a key field. 18 | % 19 | % Editorial note: 20 | % Many journals require a style like `apalike', but I strongly, strongly, 21 | % strongly recommend that you not use it if you have a choice---use something 22 | % like `plain' instead. Mary-Claire van Leunen (A Handbook for Scholars, 23 | % Knopf, 1979) argues convincingly that a style like `plain' encourages better 24 | % writing than one like `apalike'. Furthermore the strongest arguments for 25 | % using an author-date style like `apalike'---that it's "the most practical" 26 | % (The Chicago Manual of Style, University of Chicago Press, thirteenth 27 | % edition, 1982, pages 400--401)---fall flat on their face with the new 28 | % computer-typesetting technology. For instance page 401 anachronistically 29 | % states "The chief disadvantage of [a style like `plain'] is that additions 30 | % or deletions cannot be made after the manuscript is typed without changing 31 | % numbers in both text references and list." LaTeX sidesteps the disadvantage. 32 | % 33 | % History: 34 | % 15-sep-86 (OP) Original version by Oren Patashnik, ideas from Susan King. 35 | % 10-nov-86 (OP) Truncated the sort.key$ string to the correct length 36 | % in bib.sort.order to eliminate error message. 37 | % 24-jan-88 (OP) Updated for BibTeX version 0.99a, from alpha.bst 0.99a; 38 | % apalike now sorts by author, then year, then title; 39 | % THIS `apalike' VERSION DOES NOT WORK WITH BIBTEX 0.98i. 40 | % 8-dec-10 (OP) Still version 0.99a, as the code itself was unchanged; 41 | % this release clarified the license. 42 | 43 | ENTRY 44 | { address 45 | author 46 | booktitle 47 | chapter 48 | edition 49 | editor 50 | howpublished 51 | institution 52 | journal 53 | key 54 | % month not used in apalike 55 | note 56 | number 57 | organization 58 | pages 59 | publisher 60 | school 61 | series 62 | title 63 | type 64 | volume 65 | year 66 | } 67 | {} 68 | { label extra.label sort.label } 69 | 70 | INTEGERS { output.state before.all mid.sentence after.sentence after.block } 71 | 72 | FUNCTION {init.state.consts} 73 | { #0 'before.all := 74 | #1 'mid.sentence := 75 | #2 'after.sentence := 76 | #3 'after.block := 77 | } 78 | 79 | STRINGS { s t } 80 | 81 | FUNCTION {output.nonnull} 82 | { 's := 83 | output.state mid.sentence = 84 | { ", " * write$ } 85 | { output.state after.block = 86 | { add.period$ write$ 87 | newline$ 88 | "\newblock " write$ 89 | } 90 | { output.state before.all = 91 | 'write$ 92 | { add.period$ " " * write$ } 93 | if$ 94 | } 95 | if$ 96 | mid.sentence 'output.state := 97 | } 98 | if$ 99 | s 100 | } 101 | 102 | FUNCTION {output} 103 | { duplicate$ empty$ 104 | 'pop$ 105 | 'output.nonnull 106 | if$ 107 | } 108 | 109 | FUNCTION {output.check} 110 | { 't := 111 | duplicate$ empty$ 112 | { pop$ "empty " t * " in " * cite$ * warning$ } 113 | 'output.nonnull 114 | if$ 115 | } 116 | 117 | % apalike needs this function because 118 | % the year has special punctuation; 119 | % apalike ignores the month 120 | FUNCTION {output.year.check} 121 | { year empty$ 122 | { "empty year in " cite$ * warning$ } 123 | { write$ 124 | " (" year * extra.label * ")" * 125 | mid.sentence 'output.state := 126 | } 127 | if$ 128 | } 129 | 130 | FUNCTION {output.bibitem} 131 | { newline$ 132 | "\bibitem[" write$ 133 | label write$ 134 | "]{" write$ 135 | cite$ write$ 136 | "}" write$ 137 | newline$ 138 | "" 139 | before.all 'output.state := 140 | } 141 | 142 | FUNCTION {fin.entry} 143 | { add.period$ 144 | write$ 145 | newline$ 146 | } 147 | 148 | FUNCTION {new.block} 149 | { output.state before.all = 150 | 'skip$ 151 | { after.block 'output.state := } 152 | if$ 153 | } 154 | 155 | FUNCTION {new.sentence} 156 | { output.state after.block = 157 | 'skip$ 158 | { output.state before.all = 159 | 'skip$ 160 | { after.sentence 'output.state := } 161 | if$ 162 | } 163 | if$ 164 | } 165 | 166 | FUNCTION {not} 167 | { { #0 } 168 | { #1 } 169 | if$ 170 | } 171 | 172 | FUNCTION {and} 173 | { 'skip$ 174 | { pop$ #0 } 175 | if$ 176 | } 177 | 178 | FUNCTION {or} 179 | { { pop$ #1 } 180 | 'skip$ 181 | if$ 182 | } 183 | 184 | FUNCTION {new.block.checkb} 185 | { empty$ 186 | swap$ empty$ 187 | and 188 | 'skip$ 189 | 'new.block 190 | if$ 191 | } 192 | 193 | FUNCTION {field.or.null} 194 | { duplicate$ empty$ 195 | { pop$ "" } 196 | 'skip$ 197 | if$ 198 | } 199 | 200 | FUNCTION {emphasize} 201 | { duplicate$ empty$ 202 | { pop$ "" } 203 | { "{\em " swap$ * "}" * } 204 | if$ 205 | } 206 | 207 | INTEGERS { nameptr namesleft numnames } 208 | 209 | FUNCTION {format.names} 210 | { 's := 211 | #1 'nameptr := 212 | s num.names$ 'numnames := 213 | numnames 'namesleft := 214 | { namesleft #0 > } 215 | { %s nameptr "{vv~}{ll}{, jj}{, ff}" format.name$ 't := % last name first 216 | s nameptr "{ff }{vv }{ll}{, jj}" format.name$ 't := 217 | nameptr #1 > 218 | { namesleft #1 > 219 | { ", " * t * } 220 | { numnames #2 > 221 | { "," * } 222 | 'skip$ 223 | if$ 224 | t "others" = 225 | { " et~al." * } 226 | { " and " * t * } 227 | if$ 228 | } 229 | if$ 230 | } 231 | 't 232 | if$ 233 | nameptr #1 + 'nameptr := 234 | namesleft #1 - 'namesleft := 235 | } 236 | while$ 237 | } 238 | 239 | FUNCTION {format.authors} 240 | { author empty$ 241 | { "" } 242 | { author format.names } 243 | if$ 244 | } 245 | 246 | FUNCTION {format.key} % this function is just for apalike 247 | { empty$ 248 | { key field.or.null } 249 | { "" } 250 | if$ 251 | } 252 | 253 | FUNCTION {format.editors} 254 | { editor empty$ 255 | { "" } 256 | { editor format.names 257 | editor num.names$ #1 > 258 | { ", editors" * } 259 | { ", editor" * } 260 | if$ 261 | } 262 | if$ 263 | } 264 | 265 | FUNCTION {format.title} 266 | { title empty$ 267 | { "" } 268 | { title "t" change.case$ } 269 | if$ 270 | } 271 | 272 | FUNCTION {n.dashify} 273 | { 't := 274 | "" 275 | { t empty$ not } 276 | { t #1 #1 substring$ "-" = 277 | { t #1 #2 substring$ "--" = not 278 | { "--" * 279 | t #2 global.max$ substring$ 't := 280 | } 281 | { { t #1 #1 substring$ "-" = } 282 | { "-" * 283 | t #2 global.max$ substring$ 't := 284 | } 285 | while$ 286 | } 287 | if$ 288 | } 289 | { t #1 #1 substring$ * 290 | t #2 global.max$ substring$ 't := 291 | } 292 | if$ 293 | } 294 | while$ 295 | } 296 | 297 | FUNCTION {format.btitle} 298 | { title emphasize 299 | } 300 | 301 | FUNCTION {tie.or.space.connect} 302 | { duplicate$ text.length$ #3 < 303 | { "~" } 304 | { " " } 305 | if$ 306 | swap$ * * 307 | } 308 | 309 | FUNCTION {either.or.check} 310 | { empty$ 311 | 'pop$ 312 | { "can't use both " swap$ * " fields in " * cite$ * warning$ } 313 | if$ 314 | } 315 | 316 | FUNCTION {format.bvolume} 317 | { volume empty$ 318 | { "" } 319 | { "volume" volume tie.or.space.connect 320 | series empty$ 321 | 'skip$ 322 | { " of " * series emphasize * } 323 | if$ 324 | "volume and number" number either.or.check 325 | } 326 | if$ 327 | } 328 | 329 | FUNCTION {format.number.series} 330 | { volume empty$ 331 | { number empty$ 332 | { series field.or.null } 333 | { output.state mid.sentence = 334 | { "number" } 335 | { "Number" } 336 | if$ 337 | number tie.or.space.connect 338 | series empty$ 339 | { "there's a number but no series in " cite$ * warning$ } 340 | { " in " * series * } 341 | if$ 342 | } 343 | if$ 344 | } 345 | { "" } 346 | if$ 347 | } 348 | 349 | FUNCTION {format.edition} 350 | { edition empty$ 351 | { "" } 352 | { output.state mid.sentence = 353 | { edition "l" change.case$ " edition" * } 354 | { edition "t" change.case$ " edition" * } 355 | if$ 356 | } 357 | if$ 358 | } 359 | 360 | INTEGERS { multiresult } 361 | 362 | FUNCTION {multi.page.check} 363 | { 't := 364 | #0 'multiresult := 365 | { multiresult not 366 | t empty$ not 367 | and 368 | } 369 | { t #1 #1 substring$ 370 | duplicate$ "-" = 371 | swap$ duplicate$ "," = 372 | swap$ "+" = 373 | or or 374 | { #1 'multiresult := } 375 | { t #2 global.max$ substring$ 't := } 376 | if$ 377 | } 378 | while$ 379 | multiresult 380 | } 381 | 382 | FUNCTION {format.pages} 383 | { pages empty$ 384 | { "" } 385 | { pages multi.page.check 386 | { "pages" pages n.dashify tie.or.space.connect } 387 | { "page" pages tie.or.space.connect } 388 | if$ 389 | } 390 | if$ 391 | } 392 | 393 | FUNCTION {format.vol.num.pages} 394 | { volume field.or.null 395 | number empty$ 396 | 'skip$ 397 | { "(" number * ")" * * 398 | volume empty$ 399 | { "there's a number but no volume in " cite$ * warning$ } 400 | 'skip$ 401 | if$ 402 | } 403 | if$ 404 | pages empty$ 405 | 'skip$ 406 | { duplicate$ empty$ 407 | { pop$ format.pages } 408 | { ":" * pages n.dashify * } 409 | if$ 410 | } 411 | if$ 412 | } 413 | 414 | FUNCTION {format.chapter.pages} 415 | { chapter empty$ 416 | 'format.pages 417 | { type empty$ 418 | { "chapter" } 419 | { type "l" change.case$ } 420 | if$ 421 | chapter tie.or.space.connect 422 | pages empty$ 423 | 'skip$ 424 | { ", " * format.pages * } 425 | if$ 426 | } 427 | if$ 428 | } 429 | 430 | FUNCTION {format.in.ed.booktitle} 431 | { booktitle empty$ 432 | { "" } 433 | { editor empty$ 434 | { "In " booktitle emphasize * } 435 | { "In " format.editors * ", " * booktitle emphasize * } 436 | if$ 437 | } 438 | if$ 439 | } 440 | 441 | FUNCTION {format.thesis.type} 442 | { type empty$ 443 | 'skip$ 444 | { pop$ 445 | type "t" change.case$ 446 | } 447 | if$ 448 | } 449 | 450 | FUNCTION {format.tr.number} 451 | { type empty$ 452 | { "Technical Report" } 453 | 'type 454 | if$ 455 | number empty$ 456 | { "t" change.case$ } 457 | { number tie.or.space.connect } 458 | if$ 459 | } 460 | 461 | FUNCTION {format.article.crossref} 462 | { "In" % this is for apalike 463 | " \cite{" * crossref * "}" * 464 | } 465 | 466 | FUNCTION {format.book.crossref} 467 | { volume empty$ 468 | { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ 469 | "In " 470 | } 471 | { "Volume" volume tie.or.space.connect 472 | " of " * 473 | } 474 | if$ 475 | "\cite{" * crossref * "}" * % this is for apalike 476 | } 477 | 478 | FUNCTION {format.incoll.inproc.crossref} 479 | { "In" % this is for apalike 480 | " \cite{" * crossref * "}" * 481 | } 482 | 483 | FUNCTION {article} 484 | { output.bibitem 485 | format.authors "author" output.check 486 | author format.key output % special for 487 | output.year.check % apalike 488 | new.block 489 | format.title "title" output.check 490 | new.block 491 | crossref missing$ 492 | { journal emphasize "journal" output.check 493 | format.vol.num.pages output 494 | } 495 | { format.article.crossref output.nonnull 496 | format.pages output 497 | } 498 | if$ 499 | new.block 500 | note output 501 | fin.entry 502 | } 503 | 504 | FUNCTION {book} 505 | { output.bibitem 506 | author empty$ 507 | { format.editors "author and editor" output.check 508 | editor format.key output 509 | } 510 | { format.authors output.nonnull 511 | crossref missing$ 512 | { "author and editor" editor either.or.check } 513 | 'skip$ 514 | if$ 515 | } 516 | if$ 517 | output.year.check % special for apalike 518 | new.block 519 | format.btitle "title" output.check 520 | crossref missing$ 521 | { format.bvolume output 522 | new.block 523 | format.number.series output 524 | new.sentence 525 | publisher "publisher" output.check 526 | address output 527 | } 528 | { new.block 529 | format.book.crossref output.nonnull 530 | } 531 | if$ 532 | format.edition output 533 | new.block 534 | note output 535 | fin.entry 536 | } 537 | 538 | FUNCTION {booklet} 539 | { output.bibitem 540 | format.authors output 541 | author format.key output % special for 542 | output.year.check % apalike 543 | new.block 544 | format.title "title" output.check 545 | new.block 546 | howpublished output 547 | address output 548 | new.block 549 | note output 550 | fin.entry 551 | } 552 | 553 | FUNCTION {inbook} 554 | { output.bibitem 555 | author empty$ 556 | { format.editors "author and editor" output.check 557 | editor format.key output 558 | } 559 | { format.authors output.nonnull 560 | crossref missing$ 561 | { "author and editor" editor either.or.check } 562 | 'skip$ 563 | if$ 564 | } 565 | if$ 566 | output.year.check % special for apalike 567 | new.block 568 | format.btitle "title" output.check 569 | crossref missing$ 570 | { format.bvolume output 571 | format.chapter.pages "chapter and pages" output.check 572 | new.block 573 | format.number.series output 574 | new.sentence 575 | publisher "publisher" output.check 576 | address output 577 | } 578 | { format.chapter.pages "chapter and pages" output.check 579 | new.block 580 | format.book.crossref output.nonnull 581 | } 582 | if$ 583 | format.edition output 584 | new.block 585 | note output 586 | fin.entry 587 | } 588 | 589 | FUNCTION {incollection} 590 | { output.bibitem 591 | format.authors "author" output.check 592 | author format.key output % special for 593 | output.year.check % apalike 594 | new.block 595 | format.title "title" output.check 596 | new.block 597 | crossref missing$ 598 | { format.in.ed.booktitle "booktitle" output.check 599 | format.bvolume output 600 | format.number.series output 601 | format.chapter.pages output 602 | new.sentence 603 | publisher "publisher" output.check 604 | address output 605 | format.edition output 606 | } 607 | { format.incoll.inproc.crossref output.nonnull 608 | format.chapter.pages output 609 | } 610 | if$ 611 | new.block 612 | note output 613 | fin.entry 614 | } 615 | 616 | FUNCTION {inproceedings} 617 | { output.bibitem 618 | format.authors "author" output.check 619 | author format.key output % special for 620 | output.year.check % apalike 621 | new.block 622 | format.title "title" output.check 623 | new.block 624 | crossref missing$ 625 | { format.in.ed.booktitle "booktitle" output.check 626 | format.bvolume output 627 | format.number.series output 628 | format.pages output 629 | address output % for apalike 630 | new.sentence % there's no year 631 | organization output % here so things 632 | publisher output % are simpler 633 | } 634 | { format.incoll.inproc.crossref output.nonnull 635 | format.pages output 636 | } 637 | if$ 638 | new.block 639 | note output 640 | fin.entry 641 | } 642 | 643 | FUNCTION {conference} { inproceedings } 644 | 645 | FUNCTION {manual} 646 | { output.bibitem 647 | format.authors output 648 | author format.key output % special for 649 | output.year.check % apalike 650 | new.block 651 | format.btitle "title" output.check 652 | organization address new.block.checkb 653 | organization output 654 | address output 655 | format.edition output 656 | new.block 657 | note output 658 | fin.entry 659 | } 660 | 661 | FUNCTION {mastersthesis} 662 | { output.bibitem 663 | format.authors "author" output.check 664 | author format.key output % special for 665 | output.year.check % apalike 666 | new.block 667 | format.title "title" output.check 668 | new.block 669 | "Master's thesis" format.thesis.type output.nonnull 670 | school "school" output.check 671 | address output 672 | new.block 673 | note output 674 | fin.entry 675 | } 676 | 677 | FUNCTION {misc} 678 | { output.bibitem 679 | format.authors output 680 | author format.key output % special for 681 | output.year.check % apalike 682 | new.block 683 | format.title output 684 | new.block 685 | howpublished output 686 | new.block 687 | note output 688 | fin.entry 689 | } 690 | 691 | FUNCTION {phdthesis} 692 | { output.bibitem 693 | format.authors "author" output.check 694 | author format.key output % special for 695 | output.year.check % apalike 696 | new.block 697 | format.btitle "title" output.check 698 | new.block 699 | "PhD thesis" format.thesis.type output.nonnull 700 | school "school" output.check 701 | address output 702 | new.block 703 | note output 704 | fin.entry 705 | } 706 | 707 | FUNCTION {proceedings} 708 | { output.bibitem 709 | format.editors output 710 | editor format.key output % special for 711 | output.year.check % apalike 712 | new.block 713 | format.btitle "title" output.check 714 | format.bvolume output 715 | format.number.series output 716 | address output % for apalike 717 | new.sentence % we always output 718 | organization output % a nonempty organization 719 | publisher output % here 720 | new.block 721 | note output 722 | fin.entry 723 | } 724 | 725 | FUNCTION {techreport} 726 | { output.bibitem 727 | format.authors "author" output.check 728 | author format.key output % special for 729 | output.year.check % apalike 730 | new.block 731 | format.title "title" output.check 732 | new.block 733 | format.tr.number output.nonnull 734 | institution "institution" output.check 735 | address output 736 | new.block 737 | note output 738 | fin.entry 739 | } 740 | 741 | FUNCTION {unpublished} 742 | { output.bibitem 743 | format.authors "author" output.check 744 | author format.key output % special for 745 | output.year.check % apalike 746 | new.block 747 | format.title "title" output.check 748 | new.block 749 | note "note" output.check 750 | fin.entry 751 | } 752 | 753 | FUNCTION {default.type} { misc } 754 | 755 | MACRO {jan} {"January"} 756 | 757 | MACRO {feb} {"February"} 758 | 759 | MACRO {mar} {"March"} 760 | 761 | MACRO {apr} {"April"} 762 | 763 | MACRO {may} {"May"} 764 | 765 | MACRO {jun} {"June"} 766 | 767 | MACRO {jul} {"July"} 768 | 769 | MACRO {aug} {"August"} 770 | 771 | MACRO {sep} {"September"} 772 | 773 | MACRO {oct} {"October"} 774 | 775 | MACRO {nov} {"November"} 776 | 777 | MACRO {dec} {"December"} 778 | 779 | MACRO {acmcs} {"ACM Computing Surveys"} 780 | 781 | MACRO {acta} {"Acta Informatica"} 782 | 783 | MACRO {cacm} {"Communications of the ACM"} 784 | 785 | MACRO {ibmjrd} {"IBM Journal of Research and Development"} 786 | 787 | MACRO {ibmsj} {"IBM Systems Journal"} 788 | 789 | MACRO {ieeese} {"IEEE Transactions on Software Engineering"} 790 | 791 | MACRO {ieeetc} {"IEEE Transactions on Computers"} 792 | 793 | MACRO {ieeetcad} 794 | {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"} 795 | 796 | MACRO {ipl} {"Information Processing Letters"} 797 | 798 | MACRO {jacm} {"Journal of the ACM"} 799 | 800 | MACRO {jcss} {"Journal of Computer and System Sciences"} 801 | 802 | MACRO {scp} {"Science of Computer Programming"} 803 | 804 | MACRO {sicomp} {"SIAM Journal on Computing"} 805 | 806 | MACRO {tocs} {"ACM Transactions on Computer Systems"} 807 | 808 | MACRO {tods} {"ACM Transactions on Database Systems"} 809 | 810 | MACRO {tog} {"ACM Transactions on Graphics"} 811 | 812 | MACRO {toms} {"ACM Transactions on Mathematical Software"} 813 | 814 | MACRO {toois} {"ACM Transactions on Office Information Systems"} 815 | 816 | MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"} 817 | 818 | MACRO {tcs} {"Theoretical Computer Science"} 819 | 820 | READ 821 | 822 | FUNCTION {sortify} 823 | { purify$ 824 | "l" change.case$ 825 | } 826 | 827 | INTEGERS { len } 828 | 829 | FUNCTION {chop.word} 830 | { 's := 831 | 'len := 832 | s #1 len substring$ = 833 | { s len #1 + global.max$ substring$ } 834 | 's 835 | if$ 836 | } 837 | 838 | % There are three apalike cases: one person (Jones), 839 | % two (Jones and de~Bruijn), and more (Jones et~al.). 840 | % This function is much like format.crossref.editors. 841 | % 842 | FUNCTION {format.lab.names} 843 | { 's := 844 | s #1 "{vv~}{ll}" format.name$ 845 | s num.names$ duplicate$ 846 | #2 > 847 | { pop$ " et~al." * } 848 | { #2 < 849 | 'skip$ 850 | { s #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = 851 | { " et~al." * } 852 | { " and " * s #2 "{vv~}{ll}" format.name$ * } 853 | if$ 854 | } 855 | if$ 856 | } 857 | if$ 858 | } 859 | 860 | FUNCTION {author.key.label} 861 | { author empty$ 862 | { key empty$ 863 | { cite$ #1 #3 substring$ } 864 | 'key % apalike uses the whole key 865 | if$ 866 | } 867 | { author format.lab.names } 868 | if$ 869 | } 870 | 871 | FUNCTION {author.editor.key.label} 872 | { author empty$ 873 | { editor empty$ 874 | { key empty$ 875 | { cite$ #1 #3 substring$ } 876 | 'key % apalike uses the whole key 877 | if$ 878 | } 879 | { editor format.lab.names } 880 | if$ 881 | } 882 | { author format.lab.names } 883 | if$ 884 | } 885 | 886 | FUNCTION {editor.key.label} 887 | { editor empty$ 888 | { key empty$ 889 | { cite$ #1 #3 substring$ } 890 | 'key % apalike uses the whole key, no organization 891 | if$ 892 | } 893 | { editor format.lab.names } 894 | if$ 895 | } 896 | 897 | FUNCTION {calc.label} 898 | { type$ "book" = 899 | type$ "inbook" = 900 | or 901 | 'author.editor.key.label 902 | { type$ "proceedings" = 903 | 'editor.key.label % apalike ignores organization 904 | 'author.key.label % for labeling and sorting 905 | if$ 906 | } 907 | if$ 908 | ", " % these three lines are 909 | * % for apalike, which 910 | year field.or.null purify$ #-1 #4 substring$ % uses all four digits 911 | * 912 | 'label := 913 | } 914 | 915 | FUNCTION {sort.format.names} 916 | { 's := 917 | #1 'nameptr := 918 | "" 919 | s num.names$ 'numnames := 920 | numnames 'namesleft := 921 | { namesleft #0 > } 922 | { nameptr #1 > 923 | { " " * } 924 | 'skip$ 925 | if$ % apalike uses initials 926 | s nameptr "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" format.name$ 't := % <= here 927 | nameptr numnames = t "others" = and 928 | { "et al" * } 929 | { t sortify * } 930 | if$ 931 | nameptr #1 + 'nameptr := 932 | namesleft #1 - 'namesleft := 933 | } 934 | while$ 935 | } 936 | 937 | FUNCTION {sort.format.title} 938 | { 't := 939 | "A " #2 940 | "An " #3 941 | "The " #4 t chop.word 942 | chop.word 943 | chop.word 944 | sortify 945 | #1 global.max$ substring$ 946 | } 947 | 948 | FUNCTION {author.sort} 949 | { author empty$ 950 | { key empty$ 951 | { "to sort, need author or key in " cite$ * warning$ 952 | "" 953 | } 954 | { key sortify } 955 | if$ 956 | } 957 | { author sort.format.names } 958 | if$ 959 | } 960 | 961 | FUNCTION {author.editor.sort} 962 | { author empty$ 963 | { editor empty$ 964 | { key empty$ 965 | { "to sort, need author, editor, or key in " cite$ * warning$ 966 | "" 967 | } 968 | { key sortify } 969 | if$ 970 | } 971 | { editor sort.format.names } 972 | if$ 973 | } 974 | { author sort.format.names } 975 | if$ 976 | } 977 | 978 | FUNCTION {editor.sort} 979 | { editor empty$ 980 | { key empty$ 981 | { "to sort, need editor or key in " cite$ * warning$ 982 | "" 983 | } 984 | { key sortify } 985 | if$ 986 | } 987 | { editor sort.format.names } 988 | if$ 989 | } 990 | 991 | % apalike uses two sorting passes; the first one sets the 992 | % labels so that the `a's, `b's, etc. can be computed; 993 | % the second pass puts the references in "correct" order. 994 | % The presort function is for the first pass. It computes 995 | % label, sort.label, and title, and then concatenates. 996 | FUNCTION {presort} 997 | { calc.label 998 | label sortify 999 | " " 1000 | * 1001 | type$ "book" = 1002 | type$ "inbook" = 1003 | or 1004 | 'author.editor.sort 1005 | { type$ "proceedings" = 1006 | 'editor.sort 1007 | 'author.sort 1008 | if$ 1009 | } 1010 | if$ 1011 | #1 entry.max$ substring$ % for 1012 | 'sort.label := % apalike 1013 | sort.label % style 1014 | * 1015 | " " 1016 | * 1017 | title field.or.null 1018 | sort.format.title 1019 | * 1020 | #1 entry.max$ substring$ 1021 | 'sort.key$ := 1022 | } 1023 | 1024 | ITERATE {presort} 1025 | 1026 | SORT % by label, sort.label, title---for final label calculation 1027 | 1028 | STRINGS { last.label next.extra } % apalike labels are only for the text; 1029 | 1030 | INTEGERS { last.extra.num } % there are none in the bibliography 1031 | 1032 | FUNCTION {initialize.extra.label.stuff} % and hence there is no `longest.label' 1033 | { #0 int.to.chr$ 'last.label := 1034 | "" 'next.extra := 1035 | #0 'last.extra.num := 1036 | } 1037 | 1038 | FUNCTION {forward.pass} 1039 | { last.label label = 1040 | { last.extra.num #1 + 'last.extra.num := 1041 | last.extra.num int.to.chr$ 'extra.label := 1042 | } 1043 | { "a" chr.to.int$ 'last.extra.num := 1044 | "" 'extra.label := 1045 | label 'last.label := 1046 | } 1047 | if$ 1048 | } 1049 | 1050 | FUNCTION {reverse.pass} 1051 | { next.extra "b" = 1052 | { "a" 'extra.label := } 1053 | 'skip$ 1054 | if$ 1055 | label extra.label * 'label := 1056 | extra.label 'next.extra := 1057 | } 1058 | 1059 | EXECUTE {initialize.extra.label.stuff} 1060 | 1061 | ITERATE {forward.pass} 1062 | 1063 | REVERSE {reverse.pass} 1064 | 1065 | % Now that the label is right we sort for real, 1066 | % on sort.label then year then title. This is 1067 | % for the second sorting pass. 1068 | FUNCTION {bib.sort.order} 1069 | { sort.label 1070 | " " 1071 | * 1072 | year field.or.null sortify 1073 | * 1074 | " " 1075 | * 1076 | title field.or.null 1077 | sort.format.title 1078 | * 1079 | #1 entry.max$ substring$ 1080 | 'sort.key$ := 1081 | } 1082 | 1083 | ITERATE {bib.sort.order} 1084 | 1085 | SORT % by sort.label, year, title---giving final bibliography order 1086 | 1087 | FUNCTION {begin.bib} 1088 | { preamble$ empty$ % no \etalchar in apalike 1089 | 'skip$ 1090 | { preamble$ write$ newline$ } 1091 | if$ 1092 | "\begin{thebibliography}{}" write$ newline$ % no labels in apalike 1093 | } 1094 | 1095 | EXECUTE {begin.bib} 1096 | 1097 | EXECUTE {init.state.consts} 1098 | 1099 | ITERATE {call.type$} 1100 | 1101 | FUNCTION {end.bib} 1102 | { newline$ 1103 | "\end{thebibliography}" write$ newline$ 1104 | } 1105 | 1106 | EXECUTE {end.bib} 1107 | -------------------------------------------------------------------------------- /latex/bib.bib: -------------------------------------------------------------------------------- 1 | @String(PAMI = {IEEE TPAMI}) 2 | @String(IJCV = {IJCV}) 3 | @String(CVPR = {CVPR}) 4 | @String(CVPRW = {CVPR Workshops}) 5 | @String(ICCV = {ICCV}) 6 | @String(ICCVW = {ICCV Workshops}) 7 | @String(ECCV = {ECCV}) 8 | @String(ECCVW = {ECCV Workshops}) 9 | @String(NIPS = {NeurIPS}) 10 | @String(ICPR = {ICPR}) 11 | @String(BMVC = {BMVC}) 12 | @String(TOG = {ACM TOG}) 13 | @String(TIP = {IEEE TIP}) 14 | @String(TVCG = {IEEE TVCG}) 15 | @String(TCSVT = {IEEE TCSVT}) 16 | @String(TMM = {IEEE TMM}) 17 | @String(ACMMM = {ACM MM}) 18 | @String(ICME = {ICME}) 19 | @String(ICASSP= {ICASSP}) 20 | @String(ICIP = {ICIP}) 21 | @String(ACCV = {ACCV}) 22 | @String(ICLR = {ICLR}) 23 | @String(IJCAI = {IJCAI}) 24 | @String(PR = {PR}) 25 | @String(AAAI = {AAAI}) 26 | @String(CSVT = {IEEE TCSVT}) 27 | @String(ICML = {ICML}) 28 | 29 | @inproceedings{zhang2021designing, 30 | title = {Designing a practical degradation model for deep blind image super-resolution}, 31 | author = {Zhang, Kai and Liang, Jingyun and Van Gool, Luc and Timofte, Radu}, 32 | booktitle = iccv, 33 | year = {2021} 34 | } 35 | 36 | @inproceedings{zhang2018learning, 37 | title = {Learning a single convolutional super-resolution network for multiple degradations}, 38 | author = {Zhang, Kai and Zuo, Wangmeng and Zhang, Lei}, 39 | booktitle = cvpr, 40 | year = {2018} 41 | } 42 | 43 | @inproceedings{zhang2020deep, 44 | title = {Deep unfolding network for image super-resolution}, 45 | author = {Zhang, Kai and Gool, Luc Van and Timofte, Radu}, 46 | booktitle = cvpr, 47 | year = {2020} 48 | } 49 | 50 | @inproceedings{ding2019acnet, 51 | title = {Acnet: Strengthening the kernel skeletons for powerful cnn via asymmetric convolution blocks}, 52 | author = {Ding, Xiaohan and Guo, Yuchen and Ding, Guiguang and Han, Jungong}, 53 | booktitle = iccv, 54 | year = {2019} 55 | } 56 | 57 | @inproceedings{arora2018optimization, 58 | title = {On the optimization of deep networks: Implicit acceleration by overparameterization}, 59 | author = {Arora, Sanjeev and Cohen, Nadav and Hazan, Elad}, 60 | booktitle = icml, 61 | year = {2018} 62 | } 63 | 64 | @article{zagoruyko2017diracnets, 65 | title = {Diracnets: Training very deep neural networks without skip-connections}, 66 | author = {Zagoruyko, Sergey and Komodakis, Nikos}, 67 | journal = {arXiv:1706.00388}, 68 | year = {2017} 69 | } 70 | 71 | @inproceedings{zhang2019aimconstrainedsr, 72 | title = {Aim 2019 challenge on constrained super-resolution: Methods and results}, 73 | author = {Zhang, Kai and Gu, Shuhang and Timofte, Radu and Hui, Zheng and Wang, Xiumei and Gao, Xinbo and Xiong, Dongliang and Liu, Shuai and Gang, Ruipeng and Nan, Nan and others}, 74 | booktitle = iccvw, 75 | year = {2019} 76 | } 77 | 78 | @inproceedings{zhang2020aimefficientsr, 79 | title = {AIM 2020 challenge on efficient super-resolution: Methods and results}, 80 | author = {Zhang, Kai and Danelljan, Martin and Li, Yawei and Timofte, Radu and Liu, Jie and Tang, Jie and Wu, Gangshan and Zhu, Yu and He, Xiangyu and Xu, Wenjie and others}, 81 | booktitle = eccvw, 82 | year = {2020} 83 | } 84 | 85 | @inproceedings{hui2019imdn, 86 | title = {Lightweight image super-resolution with information multi-distillation network}, 87 | author = {Hui, Zheng and Gao, Xinbo and Yang, Yunchu and Wang, Xiumei}, 88 | booktitle = acmmm, 89 | year = {2019} 90 | } 91 | @inproceedings{hui2018idn, 92 | title = {Fast and accurate single image super-resolution via information distillation network}, 93 | author = {Hui, Zheng and Wang, Xiumei and Gao, Xinbo}, 94 | booktitle = cvpr, 95 | year = {2018} 96 | } 97 | 98 | @inproceedings{ahn2018carn, 99 | title = {Fast, accurate, and lightweight super-resolution with cascading residual network}, 100 | author = {Ahn, Namhyuk and Kang, Byungkon and Sohn, Kyung-Ah}, 101 | booktitle = eccv, 102 | year = {2018} 103 | } 104 | @inproceedings{mei2020image, 105 | title = {Image super-resolution with cross-scale non-local attention and exhaustive self-exemplars mining}, 106 | author = {Mei, Yiqun and Fan, Yuchen and Zhou, Yuqian and Huang, Lichao and Huang, Thomas S and Shi, Honghui}, 107 | booktitle = cvpr, 108 | year = {2020} 109 | } 110 | @inproceedings{tai2017image, 111 | author = {Tai, Ying and Yang, Jian and Liu, Xiaoming}, 112 | title = {Image Super-Resolution via Deep Recursive Residual Network}, 113 | booktitle = cvpr, 114 | year = {2017}, 115 | groups = {SR}, 116 | timestamp = {2018-07-30} 117 | } 118 | @inproceedings{kim2016deeply, 119 | author = {Kim, Jiwon and Kwon Lee, Jung and Mu Lee, Kyoung}, 120 | title = {Deeply-recursive convolutional network for image super-resolution}, 121 | booktitle = cvpr, 122 | year = {2016}, 123 | comment = {DRCN}, 124 | groups = {SR}, 125 | timestamp = {2018-08-03} 126 | } 127 | @inproceedings{zhang2018residual, 128 | author = {Zhang, Yulun and Tian, Yapeng and Kong, Yu and Zhong, Bineng and Fu, Yun}, 129 | title = {Residual dense network for image super-resolution}, 130 | booktitle = cvpr, 131 | year = {2018}, 132 | comment = {RDN}, 133 | file = {:papers/Residual dense network for image super-resolution.pdf:PDF}, 134 | groups = {SR}, 135 | timestamp = {2018-08-03} 136 | } 137 | 138 | @inproceedings{ledig2017photo, 139 | author = {Ledig, Christian and Theis, Lucas and Husz{\'a}r, Ferenc and Caballero, Jose and Cunningham, Andrew 140 | and Acosta, Alejandro and Aitken, Andrew and Tejani, Alykhan and Totz, Johannes and Wang, Zehan and others}, 141 | title = {Photo-realistic single image super-resolution using a generative adversarial network}, 142 | booktitle = cvpr, 143 | year = {2017}, 144 | file = {:papers/Photo-realistic single image super-resolution using a generative adversarial network.pdf:PDF}, 145 | groups = {SR}, 146 | timestamp = {2018-07-31} 147 | } 148 | @inproceedings{haris2018deep, 149 | author = {Haris, Muhammad and Shakhnarovich, Greg and Ukita, Norimichi}, 150 | title = {Deep backprojection networks for super-resolution}, 151 | booktitle = cvpr, 152 | year = {2018}, 153 | comment = {DBPN}, 154 | groups = {SR}, 155 | timestamp = {2018-08-10} 156 | } 157 | @inproceedings{lai2017deep, 158 | author = {Lai, Wei-Sheng and Huang, Jia-Bin and Ahuja, Narendra and Yang, Ming-Hsuan}, 159 | title = {Deep Laplacian Pyramid Networks for Fast and Accurate Super-Resolution}, 160 | booktitle = cvpr, 161 | year = {2017}, 162 | groups = {SR}, 163 | timestamp = {2018-07-30} 164 | } 165 | 166 | @inproceedings{kim2016accurate, 167 | author = {Kim, Jiwon and Kwon Lee, Jung and Mu Lee, Kyoung}, 168 | title = {Accurate image super-resolution using very deep convolutional networks}, 169 | booktitle = cvpr, 170 | year = {2016}, 171 | comment = {VDSR}, 172 | file = {:papers/Accurate image super-resolution using very deep convolutional networks.pdf:PDF}, 173 | groups = {SR}, 174 | timestamp = {2018-08-10} 175 | } 176 | @article{dong2016image, 177 | author = {Dong, Chao and Loy, Chen Change and He, Kaiming and Tang, Xiaoou}, 178 | title = {Image super-resolution using deep convolutional networks}, 179 | journal = pami, 180 | year = {2016}, 181 | groups = {SR}, 182 | timestamp = {2018-07-30} 183 | } 184 | @inproceedings{sun2008image, 185 | title = {Image super-resolution using gradient profile prior}, 186 | author = {Sun, Jian and Xu, Zongben and Shum, Heung-Yeung}, 187 | booktitle = cvpr, 188 | year = {2008} 189 | } 190 | 191 | @inproceedings{wang2021realesrgan, 192 | author = {Xintao Wang and Liangbin Xie and Chao Dong and Ying Shan}, 193 | booktitle = iccvw, 194 | year = {2021}, 195 | title = {Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data}, 196 | file = {:JabRef/Mine/Real-ESRGAN Training Real-World Blind Super-Resolution with Pure Synthetic Data.pdf:PDF}, 197 | groups = {Mine} 198 | } 199 | @inproceedings{loshchilov2016sgdr, 200 | title = {Sgdr: Stochastic gradient descent with warm restarts}, 201 | author = {Loshchilov, Ilya and Hutter, Frank}, 202 | booktitle = iclr, 203 | year = {2017} 204 | } 205 | @inproceedings{wang2019edvr, 206 | author = {Wang, Xintao and Chan, Kelvin C.K. and Yu, Ke and Dong, Chao and Loy, Chen Change}, 207 | booktitle = cvprw, 208 | title = {EDVR: Video restoration with enhanced deformable convolutional networks}, 209 | file = {:JabRef/Mine/EDVR Video restoration with enhanced deformable convolutional networks.pdf:PDF}, 210 | groups = {Mine, Restoration}, 211 | owner = {Xintao}, 212 | timestamp = {2020-06-15}, 213 | year = {2019} 214 | } 215 | @misc{wang2018basicsr, 216 | author = {Xintao Wang and Ke Yu and Kelvin C.K. Chan and 217 | Chao Dong and Chen Change Loy}, 218 | title = {{BasicSR}: Open Source Image and Video Restoration Toolbox}, 219 | howpublished = {\url{https://github.com/xinntao/BasicSR}}, 220 | year = {2018} 221 | } 222 | @inproceedings{martin2001database, 223 | author = {Martin, David and Fowlkes, Charless and Tal, Doron and Malik, Jitendra}, 224 | title = {A database of human segmented natural images and its application to evaluating segmentation algorithms 225 | and measuring ecological statistics}, 226 | booktitle = iccv, 227 | year = {2001}, 228 | comment = {BSD}, 229 | groups = {segmentation}, 230 | timestamp = {2018-08-15} 231 | } 232 | 233 | @inproceedings{zeyde2010single, 234 | author = {Zeyde, Roman and Elad, Michael and Protter, Matan}, 235 | title = {On single image scale-up using sparse-representations}, 236 | booktitle = {International Conference on Curves and Surfaces}, 237 | year = {2010}, 238 | comment = {set14}, 239 | timestamp = {2018-08-15} 240 | } 241 | 242 | @inproceedings{huang2015single, 243 | title = {Single image super-resolution from transformed self-exemplars}, 244 | author = {Huang, Jia-Bin and Singh, Abhishek and Ahuja, Narendra}, 245 | booktitle = cvpr, 246 | year = {2015} 247 | } 248 | @article{bevilacqua2012low, 249 | title = {Low-complexity single-image super-resolution based on nonnegative neighbor embedding}, 250 | author = {Bevilacqua, Marco and Roumy, Aline and Guillemot, Christine and Alberi-Morel, Marie Line}, 251 | year = {2012}, 252 | journal = {BMVA press} 253 | } 254 | 255 | @inproceedings{agustsson2017ntire, 256 | author = {Agustsson, Eirikur and Timofte, Radu}, 257 | title = {Ntire 2017 challenge on single image super-resolution: Dataset and study}, 258 | booktitle = cvprw, 259 | year = {2017}, 260 | comment = {NTIRE17, DIV2K}, 261 | file = {:papers/Ntire 2017 challenge on single image super-resolution\: Dataset and study.pdf:PDF}, 262 | groups = {SR}, 263 | timestamp = {2018-08-07} 264 | } 265 | 266 | @inproceedings{yu2020wide, 267 | title = {Wide activation for efficient image and video super-resolution}, 268 | author = {Yu, Jiahui and Fan, Yuchen and Huang, Thomas}, 269 | booktitle = bmvc, 270 | year = {2020} 271 | } 272 | @inproceedings{guo2018expandnets, 273 | title = {Expandnets: Linear over-parameterization to train compact convolutional networks}, 274 | author = {Guo, Shuxuan and Alvarez, Jose M and Salzmann, Mathieu}, 275 | booktitle = nips, 276 | year = {2020} 277 | } 278 | @inproceedings{he2016identity, 279 | author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, 280 | booktitle = eccv, 281 | title = {Identity mappings in deep residual networks}, 282 | year = {2016}, 283 | file = {:papers/Identity mappings in deep residual networks.pdf:PDF;:JabRef/papers/Identity mappings in deep residual networks.pdf:PDF}, 284 | timestamp = {2018-10-09} 285 | } 286 | @inproceedings{ding2021ddb, 287 | author = {Ding, Xiaohan and Zhang, Xiangyu and Han, Jungong and Ding, Guiguang}, 288 | booktitle = cvpr, 289 | year = {2021}, 290 | title = {Diverse Branch Block: Building a Convolution as an Inception-like Unit}, 291 | file = {:JabRef/Others/Diverse Branch Block Building a Convolution as an Inception-like Unit.pdf:PDF}, 292 | groups = {Others, ToRead} 293 | } 294 | @inproceedings{xie2019intriguing, 295 | title = {Intriguing properties of adversarial training at scale}, 296 | author = {Xie, Cihang and Yuille, Alan}, 297 | booktitle = iclr, 298 | year = {2020} 299 | } 300 | @inproceedings{johnson2018image, 301 | title = {Image generation from scene graphs}, 302 | author = {Johnson, Justin and Gupta, Agrim and Fei-Fei, Li}, 303 | booktitle = cvpr, 304 | year = {2018} 305 | } 306 | @article{wu2021rethinking, 307 | author = {Wu, Yuxin and Johnson, Justin}, 308 | year = {2021}, 309 | journal = {arXiv:2105.07576}, 310 | title = {Rethinking" Batch" in BatchNorm}, 311 | file = {:JabRef/Others/Rethinking Batch in BatchNorm.pdf:PDF}, 312 | groups = {Others} 313 | } 314 | @inproceedings{musgrave2020metric, 315 | title = {A metric learning reality check}, 316 | author = {Musgrave, Kevin and Belongie, Serge and Lim, Ser-Nam}, 317 | booktitle = eccv, 318 | year = {2020} 319 | } 320 | @inproceedings{li2019fully, 321 | title = {Fully quantized network for object detection}, 322 | author = {Li, Rundong and Wang, Yan and Liang, Feng and Qin, Hongwei and Yan, Junjie and Fan, Rui}, 323 | booktitle = cvpr, 324 | year = {2019} 325 | } 326 | @inproceedings{ren2015faster, 327 | title = {Faster {R-CNN}: Towards real-time object detection with region proposal networks}, 328 | author = {Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian}, 329 | booktitle = nips, 330 | year = {2015} 331 | } 332 | @inproceedings{gu2020interpreting, 333 | title = {Interpreting Super-Resolution Networks with Local Attribution Maps}, 334 | author = {Gu, Jinjin and Dong, Chao}, 335 | booktitle = cvpr, 336 | year = {2020} 337 | } 338 | 339 | @inproceedings{ioffe2015batch, 340 | author = {Ioffe, Sergey and Szegedy, Christian}, 341 | booktitle = icml, 342 | year = {2015}, 343 | title = {Batch normalization: Accelerating deep network training by reducing internal covariate shift}, 344 | file = {:JabRef/Others/Batch Normalization Accelerating Deep Network Training by Reducing Internal Covariate Shift.pdf:PDF}, 345 | groups = {Others} 346 | } 347 | 348 | @inproceedings{ahn2018fast, 349 | title = {Fast, accurate, and lightweight super-resolution with cascading residual network}, 350 | author = {Ahn, Namhyuk and Kang, Byungkon and Sohn, Kyung-Ah}, 351 | booktitle = eccv, 352 | year = {2018} 353 | } 354 | 355 | @inproceedings{hui2019lightweight, 356 | title = {Lightweight image super-resolution with information multi-distillation network}, 357 | author = {Hui, Zheng and Gao, Xinbo and Yang, Yunchu and Wang, Xiumei}, 358 | booktitle = acmmm, 359 | year = {2019} 360 | } 361 | 362 | @inproceedings{lee2020journey, 363 | author = {Lee, Royson and Dudziak, {\L}ukasz and Abdelfattah, Mohamed and Venieris, Stylianos I and Kim, Hyeji and Wen, Hongkai and Lane, Nicholas D}, 364 | booktitle = eccv, 365 | year = {2020}, 366 | title = {Journey towards tiny perceptual super-resolution}, 367 | comment = {TPSR}, 368 | groups = {Others} 369 | } 370 | 371 | @article{bhardwaj2021collapsible, 372 | author = {Bhardwaj, Kartikeya and Milosavljevic, Milos and Chalfin, Alex and Suda, Naveen and O'Neil, Liam and Gope, Dibakar and Meng, Lingchuan and Matas, Ramon and Loh, Danny}, 373 | date = {2021}, 374 | journal = {arXiv:2103.09404}, 375 | title = {Collapsible Linear Blocks for Super-Efficient Super Resolution}, 376 | file = {:JabRef/Others/Collapsible Linear Blocks for Super-Efficient Super Resolution.pdf:PDF}, 377 | groups = {Others, ToRead}, 378 | year = {2021} 379 | } 380 | 381 | @inproceedings{dong2016accelerating, 382 | author = {Dong, Chao and Loy, Chen Change and Tang, Xiaoou}, 383 | title = {Accelerating the super-resolution convolutional neural network}, 384 | booktitle = eccv, 385 | year = {2016}, 386 | comment = {fsrcnn}, 387 | file = {:papers/Accelerating the super-resolution convolutional neural network.pdf:PDF}, 388 | groups = {SR}, 389 | timestamp = {2018-08-10} 390 | } 391 | 392 | @inproceedings{zhang2021ecbsr, 393 | author = {Zhang, Xindong and Zeng, Hui and Zhang, Lei}, 394 | booktitle = acmmm, 395 | year = {2021}, 396 | title = {Edge-oriented Convolution Block for Real-time Super Resolution on Mobile Devices}, 397 | file = {:JabRef/Others/Edge-oriented Convolution Block for Real-time Super Resolution on Mobile Devices.pdf:PDF}, 398 | groups = {Others, ToRead} 399 | } 400 | 401 | @inproceedings{ding2021repvgg, 402 | author = {Ding, Xiaohan and Zhang, Xiangyu and Ma, Ningning and Han, Jungong and Ding, Guiguang and Sun, Jian}, 403 | booktitle = cvpr, 404 | year = {2021}, 405 | title = {Repvgg: Making vgg-style convnets great again}, 406 | file = {:JabRef/Others/RepVGG Making VGG-style ConvNets Great Again.pdf:PDF}, 407 | groups = {Others, ToRead} 408 | } 409 | 410 | @inproceedings{caballero2017real, 411 | author = {Caballero, Jose and Ledig, Christian and Aitken, Andrew P and Acosta, Alejandro and Totz, Johannes and Wang, Zehan and Shi, Wenzhe}, 412 | title = {Real-time video super-resolution with spatio-temporal networks and motion compensation}, 413 | booktitle = cvpr, 414 | year = {2017}, 415 | comment = {VESPCN}, 416 | file = {:papers/Real-time video super-resolution with spatio-temporal networks and motion compensation.pdf:PDF}, 417 | groups = {video SR}, 418 | timestamp = {2019-06-28} 419 | } 420 | 421 | @article{yu2019path, 422 | author = {Yu, Ke and Wang, Xintao and Dong, Chao and Tang, Xiaoou and Loy, Chen Change}, 423 | journal = {arXiv:1904.10343}, 424 | title = {Path-restore: Learning network path selection for image restoration}, 425 | year = {2019}, 426 | file = {:JabRef/LowLevel/Path-Restore Learning Network Path Selection for Image Restoration.pdf:PDF}, 427 | groups = {Restoration}, 428 | owner = {Xintao}, 429 | timestamp = {2020-11-07} 430 | } 431 | @inproceedings{dong2014learning, 432 | author = {Dong, Chao and Loy, Chen Change and He, Kaiming and Tang, Xiaoou}, 433 | booktitle = eccv, 434 | title = {Learning a deep convolutional network for image super-resolution}, 435 | year = {2014}, 436 | comment = {SRCNN}, 437 | groups = {Restoration}, 438 | owner = {Xintao}, 439 | timestamp = {2020-11-06} 440 | } 441 | @inproceedings{lim2017edsr, 442 | author = {Lim, Bee and Son, Sanghyun and Kim, Heewon and Nah, Seungjun and Lee, Kyoung Mu}, 443 | title = {Enhanced deep residual networks for single image super-resolution}, 444 | booktitle = cvprw, 445 | year = {2017}, 446 | comment = {EDSR}, 447 | timestamp = {2019-10-30} 448 | } 449 | @inproceedings{chen2018fsrnet, 450 | author = {Chen, Yu and Tai, Ying and Liu, Xiaoming and Shen, Chunhua and Yang, Jian}, 451 | booktitle = cvpr, 452 | title = {Fsrnet: End-to-end learning face super-resolution with facial priors}, 453 | year = {2018}, 454 | file = {:JabRef/Face/FSRNet End-to-End Learning Face Super-Resolution with Facial Priors.pdf:PDF}, 455 | groups = {Face}, 456 | owner = {Xintao}, 457 | timestamp = {2020-11-06} 458 | } 459 | @article{zhang2017beyond, 460 | author = {Zhang, Kai and Zuo, Wangmeng and Chen, Yunjin and Meng, Deyu and Zhang, Lei}, 461 | title = {Beyond a gaussian denoiser: Residual learning of deep cnn for image denoising}, 462 | journal = tip, 463 | year = {2017}, 464 | volume = {26}, 465 | number = {7}, 466 | pages = {3142--3155}, 467 | comment = {DnCNN}, 468 | timestamp = {2018-10-29} 469 | } 470 | @inproceedings{kupyn2018deblurgan, 471 | author = {Kupyn, Orest and Budzan, Volodymyr and Mykhailych, Mykola and Mishkin, Dmytro and Matas, Ji{\v{r}}{\'\i}}, 472 | booktitle = cvpr, 473 | title = {Deblurgan: Blind motion deblurring using conditional adversarial networks}, 474 | year = {2018}, 475 | groups = {Restoration}, 476 | owner = {Xintao}, 477 | timestamp = {2020-11-06} 478 | } 479 | @inproceedings{shen2018deep, 480 | author = {Shen, Ziyi and Lai, Wei-Sheng and Xu, Tingfa and Kautz, Jan and Yang, Ming-Hsuan}, 481 | booktitle = cvpr, 482 | title = {Deep semantic face deblurring}, 483 | year = {2018}, 484 | file = {:JabRef/Face/Deep Semantic Face Deblurring.pdf:PDF}, 485 | groups = {Face}, 486 | owner = {Xintao}, 487 | timestamp = {2020-11-06} 488 | } 489 | @inproceedings{dong2015compression, 490 | author = {Dong, Chao and Deng, Yubin and Change Loy, Chen and Tang, Xiaoou}, 491 | title = {Compression artifacts reduction by a deep convolutional network}, 492 | booktitle = iccv, 493 | year = {2015}, 494 | comment = {ARCNN}, 495 | timestamp = {2018-11-06} 496 | } 497 | @inproceedings{yu2018face, 498 | author = {Yu, Xin and Fernando, Basura and Ghanem, Bernard and Porikli, Fatih and Hartley, Richard}, 499 | booktitle = eccv, 500 | title = {Face super-resolution guided by facial component heatmaps}, 501 | year = {2018}, 502 | pages = {217--233}, 503 | file = {:JabRef/Face/Face Super-resolution Guided by Facial Component Heatmaps.pdf:PDF}, 504 | groups = {Face}, 505 | owner = {Xintao}, 506 | timestamp = {2020-11-06} 507 | } 508 | @article{chen2020psfrgan, 509 | author = {Chaofeng Chen and Xiaoming Li and Lingbo Yang and Xianhui Lin and Lei Zhang and Kwan-Yee K. Wong}, 510 | journal = {arXiv:2009.08709}, 511 | title = {Progressive semantic-aware style transformation for blind face restoration}, 512 | year = {2020}, 513 | eprint = {2009.08709}, 514 | eprintclass = {cs.CV}, 515 | eprinttype = {arXiv}, 516 | file = {:JabRef/Face/Progressive Semantic-Aware Style Transformation for Blind Face Restoration.pdf:PDF}, 517 | groups = {Face}, 518 | owner = {Xintao}, 519 | timestamp = {2020-10-15} 520 | } 521 | @inproceedings{li2018GFRNet, 522 | author = {Li, Xiaoming and Liu, Ming and Ye, Yuting and Zuo, Wangmeng and Lin, Liang and Yang, Ruigang}, 523 | booktitle = eccv, 524 | title = {Learning warped guidance for blind face restoration}, 525 | year = {2018}, 526 | file = {:JabRef/Face/Learning Warped Guidance for Blind Face Restoration.pdf:PDF}, 527 | groups = {Face}, 528 | owner = {Xintao}, 529 | timestamp = {2020-11-06} 530 | } 531 | @inproceedings{dogan2019exemplar, 532 | author = {Dogan, Berk and Gu, Shuhang and Timofte, Radu}, 533 | booktitle = cvprw, 534 | title = {Exemplar guided face image super-resolution without facial landmarks}, 535 | year = {2019}, 536 | groups = {Face}, 537 | owner = {Xintao}, 538 | timestamp = {2020-11-06} 539 | } 540 | @inproceedings{li2020enhanced, 541 | author = {Li, Xiaoming and Li, Wenyu and Ren, Dongwei and Zhang, Hongzhi and Wang, Meng and Zuo, Wangmeng}, 542 | booktitle = cvpr, 543 | title = {Enhanced blind face restoration with multi-exemplar images and adaptive spatial feature fusion}, 544 | file = {:JabRef/LowLevel/Enhanced Blind Face Restoration with Multi-Exemplar Images and Adaptive Spatial Feature Fusion.pdf:PDF}, 545 | groups = {Face}, 546 | owner = {Xintao}, 547 | timestamp = {2020-10-15}, 548 | year = {2020} 549 | } 550 | @inproceedings{li2020dfdnet, 551 | author = {Li, Xiaoming and Chen, Chaofeng and Zhou, Shangchen and Lin, Xianhui and Zuo, Wangmeng and Zhang, Lei}, 552 | booktitle = eccv, 553 | title = {Blind face restoration via deep multi-scale component dictionaries}, 554 | file = {:JabRef/LowLevel/Blind Face Restoration via Deep Multi-scale Component Dictionaries.pdf:PDF}, 555 | groups = {Face}, 556 | owner = {Xintao}, 557 | timestamp = {2020-10-15}, 558 | year = {2020} 559 | } 560 | @inproceedings{karras2018stylegan, 561 | author = {Karras, Tero and Laine, Samuli and Aila, Timo}, 562 | booktitle = cvpr, 563 | title = {A style-based generator architecture for generative adversarial networks}, 564 | comment = {StyleGAN}, 565 | file = {:JabRef/LowLevel/A Style-Based Generator Architecture for Generative Adversarial Networks.pdf:PDF}, 566 | groups = {Generation}, 567 | owner = {Xintao}, 568 | timestamp = {2020-06-19}, 569 | year = {2018} 570 | } 571 | @inproceedings{karras2020stylegan2, 572 | author = {Karras, Tero and Laine, Samuli and Aittala, Miika and Hellsten, Janne and Lehtinen, Jaakko and Aila, Timo}, 573 | title = {Analyzing and improving the image quality of stylegan}, 574 | booktitle = cvpr, 575 | year = {2020}, 576 | comment = {StyleGANv2}, 577 | file = {:JabRef/LowLevel/Analyzing and Improving the Image Quality of StyleGAN.pdf:PDF}, 578 | groups = {Generation}, 579 | timestamp = {2020-06-18} 580 | } 581 | @inproceedings{gu2020mGANprior, 582 | author = {Gu, Jinjin and Shen, Yujun and Zhou, Bolei}, 583 | booktitle = cvpr, 584 | title = {Image processing using multi-code gan prior}, 585 | year = {2020}, 586 | comment = {mGANprior}, 587 | groups = {Generation}, 588 | owner = {Xintao}, 589 | timestamp = {2020-11-07} 590 | } 591 | @inproceedings{pan2020dgp, 592 | author = {Pan, Xingang and Zhan, Xiaohang and Dai, Bo and Lin, Dahua and Loy, Chen Change and Luo, Ping}, 593 | booktitle = eccv, 594 | title = {Exploiting deep generative prior for versatile image restoration and manipulation}, 595 | year = {2020}, 596 | file = {:JabRef/LowLevel/Exploiting Deep Generative Prior for Versatile Image Restoration and Manipulation.pdf:PDF}, 597 | groups = {Generation}, 598 | owner = {Xintao}, 599 | timestamp = {2020-11-06} 600 | } 601 | @inproceedings{menon2020pulse, 602 | author = {Menon, Sachit and Damian, Alexandru and Hu, Shijia and Ravi, Nikhil and Rudin, Cynthia}, 603 | booktitle = cvpr, 604 | title = {PULSE: Self-supervised photo upsampling via latent space exploration of generative models}, 605 | year = {2020}, 606 | file = {:JabRef/LowLevel/PULSE Self-Supervised Photo Upsampling via Latent Space Exploration of Generative Models.pdf:PDF}, 607 | groups = {Generation, Restoration}, 608 | owner = {Xintao}, 609 | timestamp = {2020-11-06} 610 | } 611 | @inproceedings{ronneberger2015unet, 612 | author = {Ronneberger, Olaf and Fischer, Philipp and Brox, Thomas}, 613 | booktitle = {International Conference on Medical Image Computing and Computer-Assisted Intervention}, 614 | title = {U-net: Convolutional networks for biomedical image segmentation}, 615 | year = {2015}, 616 | groups = {HighLevel}, 617 | owner = {Xintao}, 618 | timestamp = {2020-11-07} 619 | } 620 | @inproceedings{wang2018sftgan, 621 | author = {Wang, Xintao and Yu, Ke and Dong, Chao and Loy, Chen Change}, 622 | booktitle = cvpr, 623 | title = {Recovering realistic texture in image super-resolution by deep spatial feature transform}, 624 | comment = {SFT, SFTGAN}, 625 | file = {:JabRef/Mine/Recovering realistic texture in image super-resolution by deep spatial feature transform.pdf:PDF}, 626 | groups = {Mine, Restoration}, 627 | owner = {Xintao}, 628 | timestamp = {2020-06-15}, 629 | year = {2018} 630 | } 631 | @inproceedings{park2019spade, 632 | author = {Park, Taesung and Liu, Ming-Yu and Wang, Ting-Chun and Zhu, Jun-Yan}, 633 | booktitle = cvpr, 634 | title = {Semantic image synthesis with spatially-adaptive normalization}, 635 | year = {2019}, 636 | comment = {SPADE}, 637 | groups = {Generation}, 638 | owner = {Xintao}, 639 | timestamp = {2020-11-07} 640 | } 641 | @inproceedings{li2018beautygan, 642 | author = {Li, Tingting and Qian, Ruihe and Dong, Chao and Liu, Si and Yan, Qiong and Zhu, Wenwu and Lin, Liang}, 643 | booktitle = acmmm, 644 | title = {Beautygan: Instance-level facial makeup transfer with deep generative adversarial network}, 645 | year = {2018}, 646 | file = {:JabRef/LowLevel/Beautygan Instance-level facial makeup transfer with deep generative adversarial network.pdf:PDF}, 647 | groups = {Generation}, 648 | owner = {Xintao}, 649 | timestamp = {2020-11-07} 650 | } 651 | @inproceedings{gu2019ladn, 652 | author = {Gu, Qiao and Wang, Guanzhi and Chiu, Mang Tik and Tai, Yu-Wing and Tang, Chi-Keung}, 653 | booktitle = iccv, 654 | title = {Ladn: Local adversarial disentangling network for facial makeup and de-makeup}, 655 | year = {2019}, 656 | file = {:JabRef/LowLevel/Ladn Local adversarial disentangling network for facial makeup and de-makeup.pdf:PDF}, 657 | groups = {Generation}, 658 | owner = {Xintao}, 659 | timestamp = {2020-11-07} 660 | } 661 | @article{iizuka2017globally, 662 | author = {Iizuka, Satoshi and Simo-Serra, Edgar and Ishikawa, Hiroshi}, 663 | journal = {ACM Transactions on Graphics (ToG)}, 664 | title = {Globally and locally consistent image completion}, 665 | year = {2017}, 666 | number = {4}, 667 | pages = {1--14}, 668 | volume = {36}, 669 | file = {:JabRef/LowLevel/Globally and Locally Consistent Image Completion.pdf:PDF}, 670 | groups = {Restoration}, 671 | owner = {Xintao}, 672 | timestamp = {2020-11-07} 673 | } 674 | @inproceedings{li2017generative, 675 | author = {Li, Yijun and Liu, Sifei and Yang, Jimei and Yang, Ming-Hsuan}, 676 | booktitle = cvpr, 677 | title = {Generative face completion}, 678 | year = {2017}, 679 | file = {:JabRef/LowLevel/Generative Face Completion.pdf:PDF}, 680 | groups = {Generation}, 681 | owner = {Xintao}, 682 | timestamp = {2020-11-07} 683 | } 684 | @inproceedings{kupyn2019deblurganv2, 685 | author = {Kupyn, Orest and Martyniuk, Tetiana and Wu, Junru and Wang, Zhangyang}, 686 | booktitle = iccv, 687 | title = {Deblurgan-v2: Deblurring (orders-of-magnitude) faster and better}, 688 | year = {2019}, 689 | file = {:JabRef/LowLevel/DeblurGANv2 Deblurring Orders-of-Magnitude Faster and Better.pdf:PDF}, 690 | owner = {Xintao}, 691 | timestamp = {2020-11-10} 692 | } 693 | 694 | @inproceedings{wan2020bringing, 695 | author = {Wan, Ziyu and Zhang, Bo and Chen, Dongdong and Zhang, Pan and Chen, Dong and Liao, Jing and Wen, Fang}, 696 | booktitle = cvpr, 697 | title = {Bringing old photos back to life}, 698 | file = {:JabRef/LowLevel/Bringing Old Photos Back to Life.pdf:PDF}, 699 | groups = {Restoration, Face}, 700 | owner = {Xintao}, 701 | timestamp = {2020-10-15}, 702 | year = {2020} 703 | } 704 | @inproceedings{zhang2018rcan, 705 | author = {Zhang, Yulun and Li, Kunpeng and Li, Kai and Wang, Lichen and Zhong, Bineng and Fu, Yun}, 706 | title = {Image super-resolution using very deep residual channel attention networks}, 707 | booktitle = eccv, 708 | year = {2018}, 709 | comment = {RCAN}, 710 | timestamp = {2019-10-30} 711 | } 712 | @inproceedings{wang2018esrgan, 713 | author = {Wang, Xintao and Yu, Ke and Wu, Shixiang and Gu, Jinjin and Liu, Yihao and Dong, Chao and Qiao, Yu and Loy, Chen Change}, 714 | booktitle = eccvw, 715 | title = {ESRGAN: Enhanced super-resolution generative adversarial networks}, 716 | file = {:JabRef/Mine/ESRGAN Enhanced super-resolution generative adversarial networks.pdf:PDF}, 717 | groups = {Mine, Restoration}, 718 | owner = {Xintao}, 719 | timestamp = {2020-06-15}, 720 | year = {2018} 721 | } 722 | @inproceedings{huang2017wavelet, 723 | author = {Huang, Huaibo and He, Ran and Sun, Zhenan and Tan, Tieniu}, 724 | booktitle = iccv, 725 | title = {Wavelet-srnet: A wavelet-based cnn for multi-scale face super resolution}, 726 | year = {2017}, 727 | groups = {Face}, 728 | owner = {Xintao}, 729 | timestamp = {2020-11-10} 730 | } 731 | @inproceedings{timofte2017ntire, 732 | author = {Timofte, Radu and Agustsson, Eirikur and Van Gool, Luc and Yang, Ming-Hsuan and Zhang, Lei}, 733 | booktitle = cvprw, 734 | title = {Ntire 2017 challenge on single image super-resolution: Methods and results}, 735 | year = {2017}, 736 | groups = {Restoration}, 737 | owner = {Xintao}, 738 | timestamp = {2020-11-12} 739 | } 740 | @inproceedings{lai2017lapsrn, 741 | author = {Lai, Wei-Sheng and Huang, Jia-Bin and Ahuja, Narendra and Yang, Ming-Hsuan}, 742 | booktitle = cvpr, 743 | title = {Deep laplacian pyramid networks for fast and accurate super-resolution}, 744 | year = {2017}, 745 | owner = {Xintao}, 746 | timestamp = {2020-11-12} 747 | } 748 | @inproceedings{chen2017dpn, 749 | author = {Chen, Yunpeng and Li, Jianan and Xiao, Huaxin and Jin, Xiaojie and Yan, Shuicheng and Feng, Jiashi}, 750 | booktitle = nips, 751 | title = {Dual path networks}, 752 | year = {2017}, 753 | owner = {Xintao}, 754 | timestamp = {2020-11-12} 755 | } 756 | @inproceedings{karras2018pggan, 757 | author = {Karras, Tero and Aila, Timo and Laine, Samuli and Lehtinen, Jaakko}, 758 | booktitle = iclr, 759 | title = {Progressive growing of gans for improved quality, stability, and variation}, 760 | year = {2018}, 761 | groups = {Face}, 762 | owner = {Xintao}, 763 | timestamp = {2020-11-12} 764 | } 765 | @inproceedings{zhu2020domain, 766 | author = {Zhu, Jiapeng and Shen, Yujun and Zhao, Deli and Zhou, Bolei}, 767 | booktitle = eccv, 768 | title = {In-domain gan inversion for real image editing}, 769 | file = {:JabRef/LowLevel/In-Domain GAN Inversion for Real Image Editing.pdf:PDF}, 770 | groups = {Generation}, 771 | owner = {Xintao}, 772 | timestamp = {2020-08-23}, 773 | year = {2020} 774 | } 775 | @inproceedings{goodfellow2014gan, 776 | author = {Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua}, 777 | booktitle = nips, 778 | title = {Generative adversarial nets}, 779 | year = {2014}, 780 | groups = {Generation}, 781 | owner = {Xintao}, 782 | timestamp = {2020-11-12} 783 | } 784 | @inproceedings{mescheder2018training, 785 | author = {Mescheder, Lars and Geiger, Andreas and Nowozin, Sebastian}, 786 | booktitle = icml, 787 | title = {Which training methods for GANs do actually converge?}, 788 | file = {:JabRef/LowLevel/Which Training Methods for GANs do actually Converge.pdf:PDF}, 789 | groups = {Generation}, 790 | owner = {Xintao}, 791 | timestamp = {2020-08-25}, 792 | year = {2018} 793 | } 794 | @inproceedings{wang2017pix2pixHD, 795 | author = {Wang, Ting-Chun and Liu, Ming-Yu and Zhu, Jun-Yan and Tao, Andrew and Kautz, Jan and Catanzaro, Bryan}, 796 | title = {High-resolution image synthesis and semantic manipulation with conditional gans}, 797 | booktitle = cvpr, 798 | year = {2018}, 799 | timestamp = {2018-11-06} 800 | } 801 | @inproceedings{gatys2016style, 802 | author = {Gatys, Leon A and Ecker, Alexander S and Bethge, Matthias}, 803 | title = {Image style transfer using convolutional neural networks}, 804 | booktitle = cvpr, 805 | year = {2016}, 806 | timestamp = {2018-11-06} 807 | } 808 | @inproceedings{gondal2018unreasonable, 809 | author = {Gondal, Muhammad Waleed and Sch{\"o}lkopf, Bernhard and Hirsch, Michael}, 810 | booktitle = eccv, 811 | title = {The unreasonable effectiveness of texture transfer for single image super-resolution}, 812 | year = {2018}, 813 | owner = {Xintao}, 814 | timestamp = {2020-11-12} 815 | } 816 | @inproceedings{he2017maskrcnn, 817 | author = {He, Kaiming and Gkioxari, Georgia and Doll{\'a}r, Piotr and Girshick, Ross}, 818 | booktitle = iccv, 819 | title = {Mask r-cnn}, 820 | year = {2017}, 821 | groups = {HighLevel}, 822 | owner = {Xintao}, 823 | timestamp = {2020-11-12} 824 | } 825 | @inproceedings{johnson2016perceptual, 826 | author = {Johnson, Justin and Alahi, Alexandre and Fei-Fei, Li}, 827 | title = {Perceptual losses for real-time style transfer and super-resolution}, 828 | booktitle = eccv, 829 | year = {2016}, 830 | timestamp = {2018-07-30} 831 | } 832 | @inproceedings{ledig2017srgan, 833 | author = {Ledig, Christian and Theis, Lucas and Husz{\'a}r, Ferenc and Caballero, Jose and Cunningham, Andrew and Acosta, Alejandro and Aitken, Andrew and Tejani, Alykhan and Totz, Johannes and Wang, Zehan and others}, 834 | title = {Photo-realistic single image super-resolution using a generative adversarial network}, 835 | booktitle = cvpr, 836 | year = {2017}, 837 | comment = {SRResNet, SRGAN}, 838 | timestamp = {2018-08-06} 839 | } 840 | @inproceedings{simonyan2015vgg, 841 | author = {Simonyan, Karen and Zisserman, Andrew}, 842 | booktitle = iclr, 843 | title = {Very deep convolutional networks for large-scale image recognition}, 844 | year = {2015}, 845 | groups = {HighLevel}, 846 | owner = {Xintao}, 847 | timestamp = {2020-11-12} 848 | } 849 | @inproceedings{huang2017tpgan, 850 | author = {Huang, Rui and Zhang, Shu and Li, Tianyu and He, Ran}, 851 | booktitle = cvpr, 852 | title = {Beyond face rotation: Global and local perception gan for photorealistic and identity preserving frontal view synthesis}, 853 | year = {2017}, 854 | file = {:JabRef/Face/Beyond Face Rotation Global and Local Perception GAN for Photorealistic and Identity Preserving Frontal View Synthesis.pdf:PDF}, 855 | groups = {Face}, 856 | owner = {Xintao}, 857 | timestamp = {2020-11-10} 858 | } 859 | @inproceedings{deng2019arcface, 860 | author = {Deng, Jiankang and Guo, Jia and Xue, Niannan and Zafeiriou, Stefanos}, 861 | booktitle = cvpr, 862 | title = {Arcface: Additive angular margin loss for deep face recognition}, 863 | year = {2019}, 864 | groups = {Face}, 865 | owner = {Xintao}, 866 | timestamp = {2020-11-12} 867 | } 868 | @inproceedings{han2020ghostnet, 869 | author = {Han, Kai and Wang, Yunhe and Tian, Qi and Guo, Jianyuan and Xu, Chunjing and Xu, Chang}, 870 | booktitle = cvpr, 871 | title = {GhostNet: More features from cheap operations}, 872 | year = {2020}, 873 | file = {:JabRef/HighLevel/GhostNet More Features from Cheap Operations.pdf:PDF}, 874 | groups = {HighLevel}, 875 | owner = {Xintao}, 876 | timestamp = {2020-11-10} 877 | } 878 | @inproceedings{abdal2019image2stylegan, 879 | author = {Abdal, Rameen and Qin, Yipeng and Wonka, Peter}, 880 | booktitle = iccv, 881 | title = {Image2stylegan: How to embed images into the stylegan latent space?}, 882 | year = {2019}, 883 | groups = {Generation}, 884 | owner = {Xintao}, 885 | timestamp = {2020-11-13} 886 | } 887 | @inproceedings{zhang2016colorful, 888 | author = {Zhang, Richard and Isola, Phillip and Efros, Alexei A}, 889 | booktitle = eccv, 890 | title = {Colorful image colorization}, 891 | year = {2016}, 892 | owner = {Xintao}, 893 | timestamp = {2020-11-13} 894 | } 895 | @inproceedings{gecer2019ganfit, 896 | author = {Gecer, Baris and Ploumpis, Stylianos and Kotsia, Irene and Zafeiriou, Stefanos}, 897 | booktitle = cvpr, 898 | title = {Ganfit: Generative adversarial network fitting for high fidelity 3d face reconstruction}, 899 | year = {2019}, 900 | groups = {Generation}, 901 | owner = {Xintao}, 902 | timestamp = {2020-11-13} 903 | } 904 | @inproceedings{kingma2014adam, 905 | author = {Kingma, Diederik P and Ba, Jimmy}, 906 | booktitle = iclr, 907 | title = {Adam: A method for stochastic optimization}, 908 | year = {2015}, 909 | owner = {Xintao}, 910 | timestamp = {2020-11-13} 911 | } 912 | @inproceedings{he2016resnet, 913 | author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, 914 | booktitle = cvpr, 915 | title = {Deep residual learning for image recognition}, 916 | year = {2016}, 917 | comment = {ResNet}, 918 | file = {:JabRef/HighLevel/Deep residual learning for image recognition.pdf:PDF}, 919 | groups = {HighLevel}, 920 | owner = {Xintao}, 921 | timestamp = {2020-11-07} 922 | } 923 | @inproceedings{bulat2018super, 924 | author = {Bulat, Adrian and Tzimiropoulos, Georgios}, 925 | booktitle = cvpr, 926 | title = {Super-fan: Integrated facial landmark localization and super-resolution of real-world low resolution faces in arbitrary poses with gans}, 927 | year = {2018}, 928 | comment = {SuperFAN}, 929 | file = {:JabRef/Face/Super-FAN Integrated facial landmark localization and super-resolution of real-world low resolution faces in arbitrary poses with GANs.pdf:PDF}, 930 | groups = {Face}, 931 | owner = {Xintao}, 932 | timestamp = {2020-11-14} 933 | } 934 | @inproceedings{kim2016vdsr, 935 | author = {Kim, Jiwon and Kwon Lee, Jung and Mu Lee, Kyoung}, 936 | booktitle = cvpr, 937 | title = {Accurate image super-resolution using very deep convolutional networks}, 938 | year = {2016}, 939 | groups = {Restoration}, 940 | owner = {Xintao}, 941 | timestamp = {2020-11-15} 942 | } 943 | @inproceedings{liu2018non, 944 | author = {Liu, Ding and Wen, Bihan and Fan, Yuchen and Loy, Chen Change and Huang, Thomas S}, 945 | title = {Non-Local recurrent network for image restoration}, 946 | booktitle = nips, 947 | year = {2018}, 948 | timestamp = {2018-11-26} 949 | } 950 | @inproceedings{haris2018dbpn, 951 | author = {Haris, Muhammad and Shakhnarovich, Greg and Ukita, Norimichi}, 952 | title = {Deep backprojection networks for super-resolution}, 953 | booktitle = cvpr, 954 | year = {2018}, 955 | timestamp = {2018-08-27} 956 | } 957 | @inproceedings{dai2019second, 958 | author = {Dai, Tao and Cai, Jianrui and Zhang, Yongbing and Xia, Shu-Tao and Zhang, Lei}, 959 | booktitle = cvpr, 960 | title = {Second-order attention network for single image super-resolution}, 961 | year = {2019}, 962 | groups = {Restoration}, 963 | owner = {Xintao}, 964 | timestamp = {2020-11-15} 965 | } 966 | @inproceedings{helou2020stochastic, 967 | author = {Helou, Majed El and Zhou, Ruofan and S{\"u}sstrunk, Sabine}, 968 | booktitle = eccv, 969 | title = {Stochastic frequency masking to improve super-resolution and denoising networks}, 970 | year = {2020}, 971 | groups = {Restoration}, 972 | owner = {Xintao}, 973 | timestamp = {2020-11-15} 974 | } 975 | @inproceedings{guo2020closed, 976 | author = {Guo, Yong and Chen, Jian and Wang, Jingdong and Chen, Qi and Cao, Jiezhang and Deng, Zeshuai and Xu, Yanwu and Tan, Mingkui}, 977 | booktitle = cvpr, 978 | title = {Closed-loop matters: Dual regression networks for single image super-resolution}, 979 | year = {2020}, 980 | groups = {Restoration}, 981 | owner = {Xintao}, 982 | timestamp = {2020-11-15} 983 | } 984 | @inproceedings{liu2020residual, 985 | title = {Residual Feature Aggregation Network for Image Super-Resolution}, 986 | author = {Liu, Jie and Zhang, Wenjie and Tang, Yuting and Tang, Jie and Wu, Gangshan}, 987 | booktitle = cvpr, 988 | year = {2020} 989 | } 990 | @inproceedings{lefkimmiatis2017non, 991 | title = {Non-local color image denoising with convolutional neural networks}, 992 | author = {Lefkimmiatis, Stamatios}, 993 | booktitle = cvpr, 994 | year = {2017} 995 | } 996 | @inproceedings{luo2020unfolding, 997 | title = {Unfolding the Alternating Optimization for Blind Super Resolution}, 998 | author = {Luo, Zhengxiong and Huang, Yan and Li, Shang and Wang, Liang and Tan, Tieniu}, 999 | booktitle = nips, 1000 | year = {2020} 1001 | } 1002 | @inproceedings{chen2018image, 1003 | title = {Image blind denoising with generative adversarial network based noise modeling}, 1004 | author = {Chen, Jingwen and Chen, Jiawei and Chao, Hongyang and Yang, Ming}, 1005 | booktitle = cvpr, 1006 | year = {2018} 1007 | } 1008 | @inproceedings{sajjadi2017enhancenet, 1009 | title = {Enhancenet: Single image super-resolution through automated texture synthesis}, 1010 | author = {Sajjadi, Mehdi SM and Scholkopf, Bernhard and Hirsch, Michael}, 1011 | booktitle = eccv, 1012 | year = {2017} 1013 | } 1014 | @inproceedings{guo2019toward, 1015 | title = {Toward convolutional blind denoising of real photographs}, 1016 | author = {Guo, Shi and Yan, Zifei and Zhang, Kai and Zuo, Wangmeng and Zhang, Lei}, 1017 | booktitle = cvpr, 1018 | year = {2019} 1019 | } 1020 | @inproceedings{xu2014deep, 1021 | title = {Deep convolutional neural network for image deconvolution}, 1022 | author = {Xu, Li and Ren, Jimmy S and Liu, Ce and Jia, Jiaya}, 1023 | booktitle = nips, 1024 | year = {2014} 1025 | } 1026 | @inproceedings{wang2021unsupervised, 1027 | title = {Unsupervised Degradation Representation Learning for Blind Super-Resolution}, 1028 | author = {Wang, Longguang and Wang, Yingqian and Dong, Xiaoyu and Xu, Qingyu and Yang, Jungang and An, Wei and Guo, Yulan}, 1029 | booktitle = cvpr, 1030 | year = {2021} 1031 | } 1032 | @inproceedings{guo2016building, 1033 | title = {Building dual-domain representations for compression artifacts reduction}, 1034 | author = {Guo, Jun and Chao, Hongyang}, 1035 | booktitle = eccv, 1036 | year = {2016} 1037 | } 1038 | @inproceedings{galteri2017deep, 1039 | title = {Deep generative adversarial compression artifact removal}, 1040 | author = {Galteri, Leonardo and Seidenari, Lorenzo and Bertini, Marco and Del Bimbo, Alberto}, 1041 | booktitle = iccv, 1042 | year = {2017} 1043 | } 1044 | @inproceedings{gu2019blind, 1045 | title = {Blind super-resolution with iterative kernel correction}, 1046 | author = {Gu, Jinjin and Lu, Hannan and Zuo, Wangmeng and Dong, Chao}, 1047 | booktitle = cvpr, 1048 | year = {2019} 1049 | } 1050 | @inproceedings{kim2019progressive, 1051 | title = {Progressive face super-resolution via attention to facial landmark}, 1052 | author = {Kim, Deokyun and Kim, Minseon and Kwon, Gihyun and Kim, Dae-Shik}, 1053 | booktitle = bmvc, 1054 | year = {2019} 1055 | } 1056 | @inproceedings{cao2017attention, 1057 | title = {Attention-aware face hallucination via deep reinforcement learning}, 1058 | author = {Cao, Qingxing and Lin, Liang and Shi, Yukai and Liang, Xiaodan and Li, Guanbin}, 1059 | booktitle = cvpr, 1060 | year = {2017} 1061 | } 1062 | @inproceedings{xu2017learning, 1063 | title = {Learning to super-resolve blurry face and text images}, 1064 | author = {Xu, Xiangyu and Sun, Deqing and Pan, Jinshan and Zhang, Yujin and Pfister, Hanspeter and Yang, Ming-Hsuan}, 1065 | booktitle = iccv, 1066 | year = {2017} 1067 | } 1068 | @inproceedings{yu2018super, 1069 | title = {Super-resolving very low-resolution face images with supplementary attributes}, 1070 | author = {Yu, Xin and Fernando, Basura and Hartley, Richard and Porikli, Fatih}, 1071 | booktitle = cvpr, 1072 | year = {2018} 1073 | } 1074 | @inproceedings{zhu2016deep, 1075 | title = {Deep cascaded bi-network for face hallucination}, 1076 | author = {Zhu, Shizhan and Liu, Sifei and Loy, Chen Change and Tang, Xiaoou}, 1077 | booktitle = eccv, 1078 | year = {2016} 1079 | } 1080 | @article{brock2018large, 1081 | title = {Large scale gan training for high fidelity natural image synthesis}, 1082 | author = {Brock, Andrew and Donahue, Jeff and Simonyan, Karen}, 1083 | journal = {arXiv preprint arXiv:1809.11096}, 1084 | year = {2018} 1085 | } 1086 | @article{yang2020hifacegan, 1087 | author = {Lingbo Yang and Chang Liu and Pan Wang and Shanshe Wang and Peiran Ren and Siwei Ma and Wen Gao}, 1088 | journal = {ACM Multimedia}, 1089 | title = {HiFaceGAN: Face renovation via collaborative suppression and replenishment}, 1090 | year = {2020}, 1091 | date = {2020-05-11}, 1092 | eprint = {2005.05005}, 1093 | eprintclass = {cs.CV}, 1094 | eprinttype = {arXiv}, 1095 | file = {:JabRef/Face/HiFaceGAN Face Renovation via Collaborative Suppression and Replenishment.pdf:PDF}, 1096 | groups = {Face}, 1097 | owner = {Xintao}, 1098 | timestamp = {2020-10-15} 1099 | } 1100 | @article{howard2017mobilenets, 1101 | title = {Mobilenets: Efficient convolutional neural networks for mobile vision applications}, 1102 | author = {Howard, Andrew G and Zhu, Menglong and Chen, Bo and Kalenichenko, Dmitry and Wang, Weijun and Weyand, Tobias and Andreetto, Marco and Adam, Hartwig}, 1103 | journal = {arXiv:1704.04861}, 1104 | year = {2017} 1105 | } 1106 | @article{zhao2019channel, 1107 | title = {Channel splitting network for single MR image super-resolution}, 1108 | author = {Zhao, Xiaole and Zhang, Yulun and Zhang, Tao and Zou, Xueming}, 1109 | journal = {IEEE Transactions on Image Processing}, 1110 | volume = {28}, 1111 | number = {11}, 1112 | pages = {5649--5662}, 1113 | year = {2019}, 1114 | publisher = {IEEE} 1115 | } 1116 | @inproceedings{liu2015faceattributes, 1117 | title = {Deep learning face attributes in the wild}, 1118 | author = {Liu, Ziwei and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou}, 1119 | booktitle = iccv, 1120 | year = {2015} 1121 | } 1122 | @techreport{LFWTech, 1123 | author = {Gary B. Huang and Manu Ramesh and Tamara Berg and 1124 | Erik Learned-Miller}, 1125 | title = {Labeled faces in the wild: A database for studying 1126 | face recognition in unconstrained environments}, 1127 | institution = {University of Massachusetts, Amherst}, 1128 | year = 2007 1129 | } 1130 | 1131 | @inproceedings{zhang2018perceptual, 1132 | title = {The unreasonable effectiveness of deep features as a perceptual metric}, 1133 | author = {Zhang, Richard and Isola, Phillip and Efros, Alexei A and Shechtman, Eli and Wang, Oliver}, 1134 | booktitle = cvpr, 1135 | year = {2018} 1136 | } 1137 | @inproceedings{heusel2017gans, 1138 | title = {Gans trained by a two time-scale update rule converge to a local nash equilibrium}, 1139 | author = {Heusel, Martin and Ramsauer, Hubert and Unterthiner, Thomas and Nessler, Bernhard and Hochreiter, Sepp}, 1140 | booktitle = nips, 1141 | year = {2017} 1142 | } 1143 | @article{mittal2012making, 1144 | title = {Making a “completely blind” image quality analyzer}, 1145 | author = {Mittal, Anish and Soundararajan, Rajiv and Bovik, Alan C}, 1146 | journal = {IEEE Signal processing letters}, 1147 | volume = {20}, 1148 | number = {3}, 1149 | pages = {209--212}, 1150 | year = {2012}, 1151 | publisher = {IEEE} 1152 | } 1153 | @inproceedings{blau20182018, 1154 | title = {The 2018 pirm challenge on perceptual image super-resolution}, 1155 | author = {Blau, Yochai and Mechrez, Roey and Timofte, Radu and Michaeli, Tomer and Zelnik-Manor, Lihi}, 1156 | booktitle = eccvw, 1157 | year = {2018} 1158 | } -------------------------------------------------------------------------------- /latex/figures/XPixelMetaverse_small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/XPixelMetaverse_small.jpg -------------------------------------------------------------------------------- /latex/figures/basicsr_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/basicsr_logo.png -------------------------------------------------------------------------------- /latex/figures/code_structure_log_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/code_structure_log_example.png -------------------------------------------------------------------------------- /latex/figures/code_structure_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/code_structure_overview.png -------------------------------------------------------------------------------- /latex/figures/code_structure_tensorboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/code_structure_tensorboard.png -------------------------------------------------------------------------------- /latex/figures/getting_start_build_dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_build_dataset.png -------------------------------------------------------------------------------- /latex/figures/getting_start_build_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_build_loss.png -------------------------------------------------------------------------------- /latex/figures/getting_start_build_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_build_model.png -------------------------------------------------------------------------------- /latex/figures/getting_start_build_network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_build_network.png -------------------------------------------------------------------------------- /latex/figures/getting_start_calculate_metric.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_calculate_metric.png -------------------------------------------------------------------------------- /latex/figures/getting_start_exp_folder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_exp_folder.png -------------------------------------------------------------------------------- /latex/figures/getting_start_init_data_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_init_data_model.png -------------------------------------------------------------------------------- /latex/figures/getting_start_init_dataloader.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_init_dataloader.png -------------------------------------------------------------------------------- /latex/figures/getting_start_loss_init.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_loss_init.png -------------------------------------------------------------------------------- /latex/figures/getting_start_network_init.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_network_init.png -------------------------------------------------------------------------------- /latex/figures/getting_start_optimize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_optimize.png -------------------------------------------------------------------------------- /latex/figures/getting_start_parse_options.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_parse_options.png -------------------------------------------------------------------------------- /latex/figures/getting_start_srmodel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_srmodel.png -------------------------------------------------------------------------------- /latex/figures/getting_start_train_entracne.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_train_entracne.png -------------------------------------------------------------------------------- /latex/figures/getting_start_train_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_train_pipeline.png -------------------------------------------------------------------------------- /latex/figures/getting_start_training_loop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_training_loop.png -------------------------------------------------------------------------------- /latex/figures/getting_start_validation_metric.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/getting_start_validation_metric.png -------------------------------------------------------------------------------- /latex/figures/good_luck.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/good_luck.png -------------------------------------------------------------------------------- /latex/figures/installation_clone_install_location.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/installation_clone_install_location.jpg -------------------------------------------------------------------------------- /latex/figures/installation_correct_install.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/installation_correct_install.jpg -------------------------------------------------------------------------------- /latex/figures/installation_pip_install_location.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/installation_pip_install_location.jpg -------------------------------------------------------------------------------- /latex/figures/installation_version.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/installation_version.jpg -------------------------------------------------------------------------------- /latex/figures/rocket_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/rocket_logo.png -------------------------------------------------------------------------------- /latex/figures/xpixel-logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XPixelGroup/BasicSR-docs/f4e2f8b8950fa645d9b043a937f0e95d2e8e82ab/latex/figures/xpixel-logo.jpg -------------------------------------------------------------------------------- /latex/main.tex: -------------------------------------------------------------------------------- 1 | % Modified from: https://arxiv.org/abs/2201.09746 2 | 3 | \documentclass[a4paper, 12pt, oneside]{memoir} 4 | \usepackage{style} 5 | 6 | \usepackage[utf8]{inputenc} 7 | \usepackage{fancyhdr} 8 | 9 | % ----------- support Chinese ------------- 10 | \usepackage{CJKutf8} 11 | \usepackage{cmap} 12 | \usepackage[10pt]{type1ec} % use only 10pt fonts 13 | \usepackage[T1]{fontenc} 14 | \usepackage{textalpha} % improved LGR support 15 | \usepackage[overlap, CJK]{ruby} 16 | \usepackage{CJKulem} 17 | % ----------- End: support Chinese ------------- 18 | 19 | % ----------- support table and shline ------------- 20 | \usepackage{tabulary} 21 | \newcolumntype{x}[1]{>{\centering\arraybackslash}p{#1pt}} 22 | \newcommand{\tablestyle}[2]{\setlength{\tabcolsep}{#1}\renewcommand{\arraystretch}{#2}\centering\footnotesize} 23 | \newlength\savewidth\newcommand\shline{\noalign{\global\savewidth\arrayrulewidth 24 | \global\arrayrulewidth 1pt}\hline\noalign{\global\arrayrulewidth\savewidth}} 25 | % ----------- End: support table and shline ------------- 26 | 27 | \usepackage{fontawesome5} % for icons used in the Author page 28 | \usepackage{dirtree} % for folder structure 29 | \renewcommand*\DTstylecomment{\rmfamily\color{black}} 30 | \renewcommand*\DTstyle{\ttfamily\textcolor{red}} 31 | 32 | \newcommand{\todo}[1]{{\textbf{\color{red}{\emph{#1}}}}} % need to fill 33 | 34 | \DisemulatePackage{setspace} 35 | \usepackage{setspace} 36 | \setstretch{1.1} 37 | \setlength{\parskip}{5pt} 38 | 39 | \usepackage{minted} 40 | \definecolor{bg}{rgb}{0.95,0.95,0.95} 41 | 42 | \usepackage{makecell} 43 | 44 | \usepackage{float} 45 | 46 | %\usepackage{import} 47 | \usepackage{subfiles} 48 | 49 | % title 50 | \title{ 51 | \vspace{4cm} 52 | \normalfont \normalsize 53 | \horrule{0.5pt} \\[0.4cm] 54 | \huge \begin{CJK}{UTF8}{gbsn} \quad BasicSR: 图像、视频超分复原增强开源库 55 | \\ 中文解读文档\end{CJK} 56 | \horrule{2pt} \\[0.5cm] 57 | } 58 | 59 | %\author{\href{https://github.com/xinntao/BasicSR}} 60 | 61 | \date{\normalsize\today} 62 | 63 | \begin{document} 64 | \mathversion{bold} 65 | \maketitle 66 | \thispagestyle{empty} 67 | 68 | \vspace{-15.5cm} 69 | \begin{adjustwidth}{0.5cm}{} 70 | \hspace{5.5cm} 71 | \includegraphics[width=0.3\textwidth]{figures/xpixel-logo.jpg} 72 | \end{adjustwidth} 73 | \vspace{1.2cm} 74 | \begin{adjustwidth}{0.5cm}{} 75 | \hspace{-0.5cm} 76 | \includegraphics[width=0.07\textwidth]{figures/rocket_logo.png} 77 | \end{adjustwidth} 78 | \vspace{5cm} 79 | 80 | %\begin{adjustwidth}{7cm}{} 81 | \begin{center} 82 | \includegraphics[width=0.5\textwidth]{figures/basicsr_logo.png} 83 | \end{center} 84 | %\end{adjustwidth} 85 | \begin{center} 86 | \underline{\textbf{\url{https://github.com/XPixelGroup/BasicSR}}} 87 | \end{center} 88 | % \begin{center} 89 | % Version: V0 90 | % \end{center} 91 | 92 | % 注释 93 | \vspace{0.2cm} 94 | \begin{center} 95 | \textcolor{ChadBlue}{\underline{\textbf{Notes}}} 96 | \end{center} 97 | \vspace{0.5cm} 98 | \begin{CJK}{UTF8}{gbsn} 99 | 100 | 本文档的最新版可以从 \url{https://github.com/XPixelGroup/BasicSR-docs/releases} 下载 101 | 102 | 欢迎大家一起来帮助查找文档中的错误,完善文档 103 | 104 | \hyperref[toc]{\faIcon{hand-point-right}点击快速跳转到\textbf{文档主目录}} 105 | \end{CJK} 106 | 107 | % 作者页、目录 108 | \begin{CJK}{UTF8}{gbsn} 109 | \subfile{sections/authors.tex} 110 | \subfile{sections/xpixel_metaverse.tex} 111 | 112 | \newpage 113 | \tableofcontents*\label{toc} 114 | \end{CJK} 115 | 116 | % 正文 117 | \begin{CJK}{UTF8}{gbsn} 118 | { 119 | %\pagestyle{headings} 120 | \pagestyle{fancy} 121 | \fancyhf{} 122 | \rhead{\rightmark} 123 | \chead{\thepage} 124 | \renewcommand{\chaptermark}[1]{\markboth{#1}{}} 125 | \lhead{第~\thechapter~章: \leftmark} 126 | \rfoot{\hyperref[toc]{回到主目录}} 127 | 128 | \subfile{sections/overview.tex} 129 | \subfile{sections/installation.tex} 130 | \subfile{sections/getting_start.tex} 131 | \subfile{sections/code_structure.tex} 132 | \subfile{sections/metrics.tex} 133 | \subfile{sections/howto.tex} 134 | \subfile{sections/data_preparation.tex} 135 | \subfile{sections/deploy.tex} 136 | \subfile{sections/scripts.tex} 137 | \subfile{sections/template.tex} 138 | \subfile{sections/experience.tex} 139 | } 140 | \end{CJK} 141 | 142 | \clearpage 143 | \newpage 144 | \thispagestyle{empty} 145 | \begin{center} 146 | \vspace*{5cm} 147 | \includegraphics[width=16cm]{figures/good_luck.png} 148 | \end{center} 149 | \begin{center} 150 | \vspace*{1cm} 151 | \includegraphics[width=0.85\textwidth]{figures/basicsr_logo.png} 152 | \end{center} 153 | 154 | % 教程,暂时用不到 reference 155 | % \newpage 156 | 157 | % \Large \begin{CJK}{UTF8}{gbsn}\textbf{参考文献}\end{CJK} 158 | % {\small 159 | % \bibliography{bib} 160 | % } 161 | 162 | \end{document} -------------------------------------------------------------------------------- /latex/sections/authors.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{subfiles} 2 | 3 | \begin{document} 4 | 5 | \newpage 6 | 7 | {\Large\textbf{Authors}} 8 | 9 | 感谢为 BasicSR 代码与文档贡献的每一位朋友 10 | 11 | 下面仅列出了编撰此文档初稿并持续维护的作者们 12 | 13 | \begin{itemize} 14 | \item 王鑫涛 (Xintao Wang) ~\faIcon{envelope}~ xintao.alpha@gmail.com ~\faIcon{link}~ \url{https://xinntao.github.io/} 15 | %\item 张新栋 (Xindong Zhang) 16 | \item 谢良彬 (Liangbin Xie) ~\faIcon{envelope}~ ~\faIcon{link}~ \url{https://liangbinxie.github.io/} 17 | \item 陈翔宇 (Xiangyu Chen) ~\faIcon{envelope}~ chxy95@gmail.com ~\faIcon{link}~ \url{https://chxy95.github.io/} 18 | \item 张文龙 (Wenlong Zhang) ~\faIcon{envelope}~ wenlong.zhang@connect.polyu.hk ~\faIcon{link}~ \url{https://wenlongzhang0517.github.io/} 19 | \item 刘翼豪 (Yihao Liu) ~\faIcon{envelope}~ ~\faIcon{link}~ \href{https://scholar.google.com.hk/citations?user=WRIYcNwAAAAJ}{Google Scholar} 20 | \item 孔祥涛 (Xiangtao Kong) ~\faIcon{envelope}~ kxtv587@gmail.com ~\faIcon{link}~ \url{https://xiangtaokong.github.io/} 21 | \item 顾津锦 (Jinjin Gu) ~\faIcon{envelope}~ hellojasongt@gmail.com ~\faIcon{link}~ \url{https://www.jasongt.com/} 22 | \item 何静雯 (Jingwen He) ~\faIcon{envelope}~ ~\faIcon{link}~ \href{https://scholar.google.com/citations?user=GUxrycUAAAAJ}{Google Scholar} 23 | \item 董超 (Chao Dong) ~\faIcon{envelope}~ chao.dong@siat.ac.cn ~\faIcon{link}~ \href{https://scholar.google.com.hk/citations?user=OSDCB0UAAAAJ}{Google Scholar} 24 | \end{itemize} 25 | 26 | \end{document} -------------------------------------------------------------------------------- /latex/sections/data_preparation.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{subfiles} 2 | 3 | \begin{document} 4 | 5 | \chapter{数据准备}\label{chapter:data_preparation} 6 | \vspace{-2cm} 7 | 这部分主要讲述数据存储形式,FileClient 类,以及一些常见数据集的获取和描述。 8 | 9 | % ------------------------------------------------------------------------------ 10 | \section{常见用法}\label{data_preparation:common_use} 11 | 12 | 目前支持的数据存储形式有以下三种: 13 | 14 | \begin{enumerate} 15 | \item 直接以图像/视频帧的格式存放在硬盘 16 | \item 制作成 LMDB。 训练数据使用这种形式,一般会加快读取速度 17 | \item 若是支持 Memcached, 则可以使用。 它们一般应用在集群上 18 | \end{enumerate} 19 | 20 | 目前,我们可以通过 yaml 配置文件方便地修改。以支持 DIV2K 的 \href{https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/data/paired_image_dataset.py}{PairedImageDataset} 为例,根据不同的要求修改 yaml 文件。 21 | 22 | \begin{enumerate} 23 | \item 直接读取硬盘数据 24 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{yaml} 25 | type: PairedImageDataset 26 | dataroot_gt: datasets/DIV2K/DIV2K_train_HR_sub 27 | dataroot_lq: datasets/DIV2K/DIV2K_train_LR_bicubic/X4_sub 28 | io_backend: 29 | type: disk 30 | \end{minted} 31 | 32 | \item 使用 LMDB。在使用前需要先制作 LMDB,参见章节 \ref{data_preparation:lmdb}:\nameref{data_preparation:lmdb}, 注意我们在原有的 LMDB 上,新增加了 meta 信息,而且具体保存二进制内容也不同,因此其他来源的 LMDB 并不能直接拿过来使用 33 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{yaml} 34 | type: PairedImageDataset 35 | dataroot_gt: datasets/DIV2K/DIV2K_train_HR_sub.lmdb 36 | dataroot_lq: datasets/DIV2K/DIV2K_train_LR_bicubic_X4_sub.lmdb 37 | io_backend: 38 | type: lmdb 39 | \end{minted} 40 | 41 | \item 使用 Memcached。 机器/集群需要支持 Memcached。具体的配置文件根据实际的 Memcached 需要进行修改: 42 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{yaml} 43 | type: PairedImageDataset 44 | dataroot_gt: datasets/DIV2K_train_HR_sub 45 | dataroot_lq: datasets/DIV2K_train_LR_bicubicX4_sub 46 | io_backend: 47 | type: memcached 48 | server_list_cfg: /mnt/lustre/share/memcached_client/server_list.conf 49 | client_cfg: /mnt/lustre/share/memcached_client/client.conf 50 | sys_path: /mnt/lustre/share/pymc/py3 51 | \end{minted} 52 | \end{enumerate} 53 | 54 | % ------------------------------------------------------------------------------ 55 | \section{数据存储格式}\label{data_preparation:data_format} 56 | 57 | % ---------------------------------- 58 | \subsection{LMDB 具体说明}\label{data_preparation:lmdb} 59 | 60 | 我们在训练的时候使用 LMDB 存储形式可以加快 IO 和 CPU 解压缩的速度 (测试的时候数据较少, 一般就没有太必要使用 LMDB)。其具体的加速要根据机器的配置来,以下几个因素会影响: 61 | \begin{enumerate} 62 | \item 有的机器设置了定时清理缓存,而 LMDB 依赖于缓存。因此若一直缓存不进去,则需要检查一下。一般 \texttt{free -h} 命令下, LMDB 占用的缓存会记录在 \texttt{buff/cache} 条目下面 63 | \item 机器的内存是否足够大,能够把整个 LMDB 数据都放进去。如果不是,则它由于需要不断更换缓存,会影响速度 64 | \item 若是第一次缓存 LMDB 数据集,可能会影响训练速度。可以在训练前,进入 LMDB 数据集目录,把数据先缓存进去:\texttt{cat data.mdb > /dev/null} 65 | \end{enumerate} 66 | 67 | \subsubsection{文件结构} 68 | 69 | 除了标准的 LMDB 文件 (data.mdb 和 lock.mdb) 外,我们还增加了 meta\_info.txt 来记录额外的信息。下面用一个例子来说明: 70 | 71 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{yaml} 72 | DIV2K_train_HR_sub.lmdb 73 | ├── data.mdb 74 | ├── lock.mdb 75 | ├── meta_info.txt 76 | \end{minted} 77 | 78 | \subsubsection{meta信息} 79 | 80 | meta\_info.txt。我们采用 txt 来记录,是为了可读性。其里面的内容为: 81 | 82 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{yaml} 83 | 0001_s001.png (480,480,3) 1 84 | 0001_s002.png (480,480,3) 1 85 | 0001_s003.png (480,480,3) 1 86 | 0001_s004.png (480,480,3) 1 87 | ... 88 | \end{minted} 89 | 90 | 每一行记录了一张图片,有三个字段,分别表示: 91 | \begin{enumerate} 92 | \item 图像名称 (带后缀): 0001\_s001.png 93 | \item 图像大小:(480,480,3) 表示是$480\times480\times3$的图像 94 | \item 其他参数 (BasicSR 里面使用了 cv2 压缩 png 程度):因为在复原任务中,我们通常使用 png 来存储,所以这个 1 表示 png 的压缩程度,也就是 CV\_IMWRITE\_PNG\_COMPRESSION 为 1。CV\_IMWRITE\_PNG\_COMPRESSION可以取值为 [0, 9] 的整数,更大的值表示更强的压缩,即更小的储存空间和更长的压缩时间 95 | \end{enumerate} 96 | 97 | \subsubsection{二进制内容} 98 | 99 | 为了方便,我们在 LMDB 数据集中存储的二进制内容是 cv2 encode 过的 image: 100 | \texttt{cv2.imencode(`.png', img, [cv2.IMWRITE\_PNG\_COMPRESSION, compress\_level]}。可以通过 \texttt{compress\_level} 控制压缩程度,平衡存储空间和读取 (包括解压缩) 的速度。 101 | 102 | \subsubsection{如何制作} 103 | 104 | 我们提供了脚本 \href{https://github.com/XPixelGroup/BasicSR/blob/master/scripts/data_preparation/create_lmdb.py}{scripts/data\_preparation/create\_lmdb.py} 来制作。在运行脚本前,需要根据需求修改相应的参数。目前支持 DIV2K,REDS 和 Vimeo90K 数据集,其他数据集可仿照进行制作。 105 | 106 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 107 | python scripts/data_preparation/create_lmdb.py --dataset div2k 108 | python scripts/data_preparation/create_lmdb.py --dataset reds 109 | python scripts/data_preparation/create_lmdb.py --dataset vimeo90k 110 | \end{minted} 111 | 112 | \begin{note} % ---------------- Note block ---------------- % 113 | \textbf{加速 IO 方法} 114 | 115 | 除了使用 LMDB 加速 IO 外,还可以使用 prefetch 方式,具体参见章节\ref{code_structure:dataset_prefecth}:\nameref{code_structure:dataset_prefecth}。 116 | \end{note} 117 | 118 | % ------------------------------------------------------------------------------ 119 | \section{meta 文件介绍}\label{data_preparation:meta_file} 120 | 121 | meta 文件是记录数据集信息的。一般我们使用 txt 格式,这样我们打开就能够知道它里面记录的内容。 122 | 123 | 有时候我们从一个目录里面扫描全部的文件会比较慢、耗时,此时如果提供了 meta 文件,就可以比较快速地得到所有文件 (比如图片) 的路径列表了。 124 | 125 | 同时我们也会使用 meta 文件来划分数据集,比如训练、测试集等。 126 | 127 | 它一般在以下几个场景中使用: 128 | 129 | \begin{enumerate} 130 | \item 制作 LMDB 后会同步产生一个 meta 文件,这个meta 有着自己固定的格式,不能修改,否则可能会影响 LMDB 数据的读取。详细参见章节\ref{data_preparation:lmdb}:\nameref{data_preparation:lmdb} 131 | \item PairedImageDataset 支持 \texttt{meta\_info\_file} 参数,会使用这个 meta 文件生成待读取的文件路径。这个可以根据用户自己的需要进行自定义 132 | \end{enumerate} 133 | 134 | \todo{待完善} 135 | 136 | % ---------------------------------- 137 | \subsection{现有 meta 文件说明}\label{data_preparation:existing_meta_file} 138 | 139 | 在 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/data/meta_info}{basicsr/data/meta\_info} 目录下有提供了一些常用的 meta 文件,说明如下: 140 | 141 | \todo{待完善} 142 | 143 | % ------------------------------------------------------------------------------ 144 | \section{File Client 介绍}\label{data_preparation:file_client} 145 | 146 | 我们参考了 MMCV 的 FileClient 设计。为了使其兼容 BasicSR,我们对接口做了一些改动 (主要是为了适应 LMDB)。具体可以参见代码 \href{https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/utils/file_client.py}{file\_client.py}。 147 | 148 | \todo{待完善} 149 | 150 | % ------------------------------------------------------------------------------ 151 | \section{常见数据集介绍与准备}\label{data_preparation:dataset} 152 | 153 | 推荐把数据通过 \texttt{ln -s src dst} 软链到 datasets 目录下。 154 | 155 | % ---------------------------------- 156 | \subsection{图像数据集 DIV2K 与 DF2K} 157 | 158 | DIV2K 与 DF2K 数据集被广泛使用在图像复原的任务中。 159 | 其中 DF2K 是 DIV2K 和 Flickr2K 的融合。 160 | 161 | \noindent\textbf{数据准备步骤} 162 | \begin{enumerate} 163 | \item 从 \href{https://data.vision.ee.ethz.ch/cvl/DIV2K}{DIV2K 官网}下载数据。 Flickr 2K 可从 \url{https://cv.snu.ac.kr/research/EDSR/Flickr2K.tar} 下载 164 | \item Crop to sub-images:因为 DIV2K 数据集是 2K 分辨率的 (比如: 2048$\times$1080), 而我们在训练的时候往往并不要那么大 (常见的是 128$\times$128 或者 192$\times$192 的训练 patch). 因此我们可以先把 2K 的图片裁剪成有 overlap 的 480$\times$480 的子图像块. 然后再由 dataloader 从这个 480$\times$480 的子图像块中随机 crop 出 128$\times$128 或者 192$\times$192 的训练 patch。 165 | 运行脚本 \href{https://github.com/XPixelGroup/BasicSR/blob/master/scripts/data_preparation/extract_subimages.py}{extract\_subimages.py}: 166 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 167 | python scripts/data_preparation/extract_subimages.py 168 | \end{minted} 169 | 使用之前可能需要修改文件里面的路径和配置参数。注意:sub-image 的尺寸和训练 patch 的尺寸 (gt\_size) 是不同的。我们先把2K分辨率的图像 crop 成 sub-images (往往是 480$\times$480),然后存储起来。在训练的时候,dataloader 会读取这些 sub-images,然后进一步随机裁剪成 gt\_size $\times$ gt\_size 的大小 170 | \item\,[可选] 若需要使用 LMDB,则需要制作 LMDB,参考章节\ref{data_preparation:lmdb}。\nameref{data_preparation:lmdb} 171 | 运行脚本: 172 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 173 | python scripts/data_preparation/create_lmdb.py --dataset div2k 174 | \end{minted} 175 | 注意选择 create\_lmdb\_for\_div2k 函数,并需要修改函数相应的配置和路径 176 | \item 单元测试:我们可以单独测试 dataset 是否正常。注意修改函数相应的配置和路径:test\_scripts/test\_paired\_image\_dataset.py。 177 | \item\,[可选] 若需要生成 meta\_info\_file 文件,请运行 178 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 179 | python scripts/data_preparation/generate_meta_info.py 180 | \end{minted} 181 | \end{enumerate} 182 | 183 | % ---------------------------------- 184 | \subsection{视频帧数据集 REDS} 185 | 186 | REDS 是常用的视频帧数据集。数据集官方网站:\url{https://seungjunnah.github.io/Datasets/reds.html}。 187 | 我们重新整合了 training 和 validation 数据到一个文件夹中:训练集合原来有240个 clip (序号从000到239),我们把 validation clips 重命名,从240到269。 188 | 189 | \noindent\textbf{Validation 的划分} 190 | 191 | 官方的 validation 划分和 EDVR、BasicVSR 论文中的划分不同 (当时为了比赛的设置): 192 | 193 | \begin{table}[h] 194 | \centering 195 | \begin{tabular}{|c|c|c|} 196 | \hline 197 | \textbf{name} & \textbf{clips} & \textbf{total number} \\ \hline 198 | REDSOfficial & [240, 269] & 30 clips \\ \hline 199 | REDS4 & 000, 011, 015, 020 clips from the original training set & 4 clips \\ \hline 200 | \end{tabular} 201 | \caption{REDS 数据集中 Validation 的划分} 202 | \end{table} 203 | 余下的 clips 拿来做训练集合。 注意: 我们不需要显式地分开训练和验证集合, dataloader 会做这件事。 204 | 205 | \noindent\textbf{数据准备步骤} 206 | 207 | \begin{enumerate} 208 | \item 从\href{https://seungjunnah.github.io/Datasets/reds.html}{官网}下载数据 209 | \item 整合 training 和 validation 数据。运行 210 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 211 | python scripts/data_preparation/regroup_reds_dataset.py 212 | \end{minted} 213 | \item\,[可选] 若需要使用 LMDB,则需要制作 LMDB,参考章节 \ref{data_preparation:lmdb}:\nameref{data_preparation:lmdb}。运行 214 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 215 | python scripts/data_preparation/create_lmdb.py --dataset reds 216 | \end{minted} 217 | 注意选择 create\_lmdb\_for\_reds 函数,并需要修改函数相应的配置和路径 218 | \item 单元测试:我们可以单独测试 dataset 是否正常。注意修改函数相应的配置和路径:test\_scripts/test\_reds\_dataset.py。 219 | \end{enumerate} 220 | 221 | % ---------------------------------- 222 | \subsection{视频帧数据集 Vimeo90K} 223 | 224 | Vimeo90K 是常用的视频帧数据集。官网地址:\url{http://toflow.csail.mit.edu}。 225 | 226 | \noindent\textbf{数据准备步骤} 227 | \begin{enumerate} 228 | \item 下载数据:\href{http://data.csail.mit.edu/tofu/dataset/vimeo_septuplet.zip}{Septuplets dataset --> The original training + test set (82GB)}. 这些是 Ground-Truth。里面有 sep\_trainlist.txt 文件来区分训练数据 229 | \item 生成低分辨率图片。Vimeo90K 测试集中的低分辨率图片是由 MATLAB bicubic 降采样函数而来。运行脚本 \texttt{data\_scripts/generate\_LR\_Vimeo90K.m} (run in MATLAB) 来生成低清图片 230 | \item\,[可选] 若需要使用 LMDB,则需要制作 LMDB,参考章节\ref{data_preparation:lmdb}:\nameref{data_preparation:lmdb}。运行 231 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 232 | python scripts/data_preparation/create_lmdb.py --dataset vimeo90k 233 | \end{minted} 234 | 注意选择 create\_lmdb\_for\_vimeo90k 函数,并需要修改函数相应的配置和路径 235 | \item 单元测试:我们可以单独测试 dataset 是否正常。注意修改函数相应的配置和路径:test\_scripts/test\_vimeo90k\_dataset.py。 236 | \end{enumerate} 237 | 238 | \end{document} -------------------------------------------------------------------------------- /latex/sections/deploy.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{subfiles} 2 | 3 | \begin{document} 4 | 5 | \chapter{部署} 6 | 7 | \vspace{-2cm} 8 | 9 | 将在第二期加入 10 | 11 | \end{document} 12 | -------------------------------------------------------------------------------- /latex/sections/experience.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{subfiles} 2 | 3 | \begin{document} 4 | 5 | \chapter{经验} 6 | \vspace{-2cm} 7 | 8 | 将在第二期加入 9 | 10 | \end{document} 11 | -------------------------------------------------------------------------------- /latex/sections/getting_start.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{subfiles} 2 | 3 | \begin{document} 4 | 5 | \chapter{入门} 6 | \vspace{-2cm} 7 | 8 | 本部分为 BasicSR 方法的入门部分,主要涉及的有目录解读,训练、测试和快速推理的流程。这个部分的主要目的是希望读者能够快速入门 BasicSR 整体框架。 9 | 10 | % ------------------------------------------------------------------------------ 11 | \section{目录解读}\label{getting_start:content-overview} 12 | 13 | 所谓“看书先看目录”。我们首先来看一下 BasicSR 仓库的基本结构,先来整体地把握一下。根据仓库的目录层级,第一部分为仓库的整体概览。这部分主要包括算法核心文件和代码基础配置文件。具体的目录结构如下。 14 | 15 | 其中,\newline 16 | \noindent\textcolor{red}{红色} 表示和跑实验直接相关的文件,即我们平时打交道最多的文件;\newline 17 | \noindent\textcolor{blue}{蓝色} 表示其他与 BasicSR 强相关的代码文件;\newline 18 | \noindent\textcolor{black}{黑色} 表示配置文件。 19 | 20 | \vspace{0.5cm} 21 | \renewcommand*\DTstyle{\ttfamily\textcolor{black}} 22 | \dirtree{% 23 | .1 \textcolor{orange}{BasicSR 根目录}. 24 | .2 .github/workflows\DTcomment{GitHub 的自动 workflows,比如 PyLint、PyPI Publish等}. 25 | .2 .vscode\DTcomment{VSCode 配置,用于统一格式}. 26 | .2 LICENSE\DTcomment{使用的其他代码的 LICENSE 和 Acknowledgement}. 27 | .2 assets\DTcomment{存放仓库中展示使用的图片}. 28 | .2 \uline{\textcolor{red}{basicsr}}\DTcomment{\textcolor{red}{BasicSR 核心代码}}. 29 | .2 \textcolor{blue}{colab}\DTcomment{\textcolor{blue}{Google Colab 的 Notebook, 提供方便的 inference demo}}. 30 | .2 \textcolor{red}{datasets}\DTcomment{\textcolor{red}{“存放”使用的数据集,推荐 soft link,做到代码、数据的分离}}. 31 | .2 docs\DTcomment{使用和说明文档}. 32 | .2 \textcolor{red}{experiments}\DTcomment{\textcolor{red}{实验 checkpoints 保存路径}}. 33 | .3 \textcolor{blue}{pretrained\_models}\DTcomment{\textcolor{blue}{预训练模型存放路径}}. 34 | .2 \textcolor{red}{inference}\DTcomment{\textcolor{red}{快速推理,主要用于得到 demo 结果}}. 35 | .2 \uline{\textcolor{red}{options}}\DTcomment{\textcolor{red}{训练和测试的配置文件}}. 36 | .2 \uline{\textcolor{red}{scripts}}\DTcomment{\textcolor{red}{功能脚本,包含数据集制作,指标测试和数据集下载等}}. 37 | .2 \textcolor{blue}{test\_scripts}\DTcomment{\textcolor{blue}{一些用于手动单元测试的脚本}}. 38 | .2 \textcolor{blue}{tests}\DTcomment{\textcolor{blue}{PyTest 自动单元测试}}. 39 | .2 .gitignore\DTcomment{Git 忽略文件的配置}. 40 | .2 .pre-commit-config.yaml\DTcomment{Pre-commit Hook 的配置文件}. 41 | .2 .readthedocs.yaml\DTcomment{自动触发 basicsr.readthedocs.io 的配置文件}. 42 | .2 MANIFEST.in\DTcomment{发布 basicsr 时,额外需要包含进去的文件}. 43 | .2 README.md\DTcomment{说明文档}. 44 | .2 README\_CN.md\DTcomment{说明文档中文版}. 45 | .2 \textcolor{blue}{VERSION}\DTcomment{\textcolor{blue}{版本文件}}. 46 | .2 \textcolor{blue}{requirements.txt}\DTcomment{ \textcolor{blue}{安装依赖包文件}}. 47 | .2 setup.cfg\DTcomment{格式配置文件,比如 flake8,yapf 和 isort}. 48 | .2 \textcolor{blue}{setup.py}\DTcomment{\textcolor{blue}{安装文件}}. 49 | } 50 | 51 | \vspace{0.5cm} 52 | 53 | 在 BasicSR 仓库中,核心代码在 basicsr 这个文件夹中。这个部分主要为深度学习模型常用的代码文件,比如网络结构,损失函数和数据加载等,具体目录如下。 54 | 55 | 其中,\textcolor{red}{红色} 表示我们在开发中主要修改的文件。 56 | 57 | \vspace{0.5cm} 58 | \dirtree{% 59 | .1 \textcolor{orange}{basicsr}. 60 | .2 \textcolor{red}{archs}\DTcomment{\textcolor{red}{定义网络结构和 forward 的步骤}}. 61 | .2 \textcolor{red}{data}\DTcomment{\textcolor{red}{定义 Dataset 来喂给模型的训练,Dataloader 的定义也在这里}}. 62 | .2 losses\DTcomment{定义损失函数}. 63 | .2 metrics\DTcomment{定义评价指标,比如 PSNR,SSIM,NIQE 等}. 64 | .2 \textcolor{red}{models}\DTcomment{\textcolor{red}{定义一次完整训练,比如前向、反向传播,梯度优化,Validation等}}. 65 | .2 ops\DTcomment{定义需要编译的算子,例如 StyleGAN 中用到的算子等}. 66 | .2 utils\DTcomment{定义基础工具,例如 file client,logger,registry,image process,matlab function等}. 67 | .2 \textcolor{blue}{test.py}\DTcomment{\textcolor{blue}{定义测试流程,是测试文件的主文件、入口}}. 68 | .2 \textcolor{blue}{train.py}\DTcomment{\textcolor{blue}{定义训练流程,是训练文件的主文件、入口}}. 69 | } 70 | 71 | \vspace{0.5cm} 72 | 73 | 由于在算法设计和开发中,还需要用到一些脚本,比如数据的预处理、指标计算等,相关的文件位于scripts,目录如下: 74 | \vspace{0.5cm} 75 | 76 | \dirtree{% 77 | .1 \textcolor{orange}{scripts}. 78 | .2 data\_preparation\DTcomment{准备数据}. 79 | .2 matlab\_scripts\DTcomment{基于 MATLAB 语言的数据处理脚本}. 80 | .2 metrics\DTcomment{计算指标的脚本,例如 PSNR,SSIM,NIQE等}. 81 | .2 model\_conversion\DTcomment{模型转换的脚本,主要是 .pth 文件的 keys 转换}. 82 | .2 dist\_test.sh\DTcomment{方便的分布式测试启动脚本}. 83 | .2 dist\_train.sh\DTcomment{方便的分布式训练启动脚本}. 84 | .2 download\_gdrive.py\DTcomment{从 Google Drive 下载文件的脚本}. 85 | .2 download\_pretrained\_models.py\DTcomment{从 Google Drive 批量下载预训练模型的脚本}. 86 | .2 publish\_models\DTcomment{发布模型的脚本,包括添加 SHA 等}. 87 | } 88 | 89 | 至此,我们对 BasicSR 的整体框架便有了一定的了解啦 $\sim$ 90 | 91 | % ------------------------------------------------------------------------------ 92 | \section{训练流程}\label{getting_start:training_pipeline} 93 | 94 | 在对目录结构有了初步的了解之后就可以进行训练了。我们希望 BasicSR 即方便使用,又清晰易懂,降低使用者的门槛。但随着 BasicSR 代码库逐渐抽象和复杂起来,很多刚接触的同学不知道程序入口在哪里,数据、模型和网络是在哪里定义的,流程又是在哪里控制的,那么我们就通过一个例子简要地说一下。 95 | 96 | 本节的目的是希望能够初步地让读者了解到训练的基本流程和代码逻辑流,具体的细节我们会采用引用的方式来供读者查阅。 97 | \textbf{我们强烈建议你跟着下面的流程和实际代码,走一遍训练的流程。遮掩可以对 BasicSR 整体的框架有一个全面理解。} 98 | 99 | 训练流程是从 \href{https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/train.py}{basicsr/train.py} 开始的。 100 | 101 | % ---------------------------------- 102 | \subsection{代码的入口和训练的准备工作}\label{getting_start:entrance} 103 | 我们以训练超分辨率模型 MSRResNet 为例,首先需要在终端输入命令来开始训练: 104 | 105 | \begin{minted}[xleftmargin=20pt,breaklines,bgcolor=bg]{bash} 106 | python basicsr/train.py -opt options/train/SRResNet_SRGAN/train_MSRResNet_x4.yml 107 | \end{minted} 108 | 109 | 其中 \href{https://github.com/XPixelGroup/BasicSR/blob/master/options/train/SRResNet\_SRGAN/train\_MSRResNet\_x4.yml}{options/train/SRResNet\_SRGAN/train\_MSRResNet\_x4.yml} 为 yml 配置文件,主要设置实验相关的配置参数。参数具体说明参见章节\ref{code_structure:train_config}:\nameref{code_structure:train_config}。 110 | 111 | 它从 \href{https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/train.py}{basicsr/train.py} 的 train\_pipeline 函数作为入口: 112 | 113 | \begin{figure}[h] 114 | \begin{center} 115 | \vspace{-0.2cm} 116 | \includegraphics[width=0.85\linewidth]{figures/getting_start_train_entracne.png} 117 | \vspace{-0.3cm} 118 | \caption{函数 train\_pipeline 作为 basicsr/train.py 的入口} 119 | \label{fig:getting_start_train_entracne} 120 | \end{center} 121 | \vspace{-0.5cm} 122 | \end{figure} 123 | 124 | \begin{exampleBox}[righthand ratio=0.00, sidebyside, sidebyside align=center, lower separated=false]{root\_path 作为参数传进去} 125 | 这里为什么要把root\_path 作为参数传进去呢?是因为,当我们把 basicsr 作为 package 使用的时候,需要根据当前的目录路径来创建文件,否则程序会错误地使用 basicsr package 所在位置的目录了。 126 | \end{exampleBox} 127 | 128 | train\_pipeline 函数会做一些基础的事,比如: 129 | \begin{enumerate} 130 | \item 解析配置文件 option file,即 yml 文件 131 | \item 设置 distributed training 的相关选项,设置 random seed 等 132 | \item 如果有 resume,需要 load 相应的状态 133 | \item 创建相关文件夹,拷贝配置的 yml 文件 134 | \item 合理初始化日志系统 logger 135 | \end{enumerate} 136 | 137 | 我们对着代码一一讲解,如图\ref{fig:getting_start_train_pipeline}所示: 138 | 139 | \begin{figure}[ht] 140 | \begin{center} 141 | \vspace{-0.2cm} 142 | \includegraphics[width=0.85\linewidth]{figures/getting_start_train_pipeline.png} 143 | \vspace{-0.3cm} 144 | \caption{函数 train\_pipeline 的基础准备工作} 145 | \label{fig:getting_start_train_pipeline} 146 | \end{center} 147 | \vspace{-0.5cm} 148 | \end{figure} 149 | 150 | 具体的子函数,大家可以对着代码点进去查看,这里我们着重说几点。 151 | 152 | \begin{enumerate} 153 | \item 我们在命令行中的参数输入,在哪里完成解析呢,即 argparse 在哪里? 154 | 答:是在 parse\_options 这个函数中。我们截取一部分来看一下。 155 | 156 | \begin{figure}[ht] 157 | \begin{center} 158 | \vspace{-0.2cm} 159 | \includegraphics[width=0.85\linewidth]{figures/getting_start_parse_options.png} 160 | \vspace{-0.3cm} 161 | \caption{函数 parse\_options 解析参数输入} 162 | \label{fig:getting_start_parse_options} 163 | \end{center} 164 | \vspace{-0.5cm} 165 | \end{figure} 166 | 167 | 从图\ref{fig:getting_start_parse_options}中,我们看到命令行的参数不多。我们一一讲解一下。 168 | 169 | \begin{enumerate} 170 | \item {-}opt,配置文件的路径,一般采用这个命令配置训练或者测试的 yml 文件。 171 | \item {-}{-} laucher,用于指定 distibuted training 的,比如 pytorch 或者 slurm。默认是 none,即单卡非 distributed training。 172 | \item {-}{-} auto\_resume,是否自动 resume,即自动查找最近的 checkpoint ,然后 resume。详见章节\ref{code_structure:howto_resume}:\nameref{code_structure:howto_resume}。 173 | \item {-}{-} debug,能够快速帮助 debug。详见章节\ref{code_structure:debug_mode}:\nameref{code_structure:debug_mode}。 174 | \item {-}{-} local\_rank,这个不用管,是 distributed training 中程序自动会传入。 175 | \item {-}{-} force\_yml,方便在命令行中修改 yml 中的配置文件。详见章节\ref{code_structure:yml_modification_with_commands}:\nameref{code_structure:yml_modification_with_commands}。 176 | \end{enumerate} 177 | 178 | 179 | \item 每个实验创建的文件夹 180 | 181 | 每个实验都会在 experiments 目录中创建一个以配置文件中的 name 为名字的文件夹,里面的文件如图\ref{fig:getting_start_exp_folder}所示。log 的内容参见章节\ref{code_structure:logger}:\nameref{code_structure:logger}。在实验文件夹中有把配置文件也 copy 一份,还会额外添加 copy 的时间和运行使用的具体命令,方便事后检查和复现。 182 | 183 | \begin{figure}[h] 184 | \begin{center} 185 | \vspace{-0.2cm} 186 | \includegraphics[width=0.85\linewidth]{figures/getting_start_exp_folder.png} 187 | \vspace{-0.3cm} 188 | \caption{实验过程中创建的文件} 189 | \label{fig:getting_start_exp_folder} 190 | \end{center} 191 | \vspace{-0.5cm} 192 | \end{figure} 193 | \end{enumerate} 194 | 195 | % ---------------------------------- 196 | \subsection{Dataset 和 Model 的创建}\label{getting_start:data_model_creation} 197 | 198 | 当训练准备工作结束后,我们接下来就要看 dataset 和 model 的创建过程了。下图\ref{fig:getting_start_init_data_model}是相对应的代码。 199 | 它主要包括: 200 | \begin{enumerate} 201 | \item 训练和 validation 的 data loader 创建,下面会展开 202 | \item model 的创建,下面会展开 203 | \item logger 的初始化,这块详见章节\ref{code_structure:logger}:\nameref{code_structure:logger} 的相关内容 204 | \item 还有 dataset prefetch 的内容,这块详见章节\ref{code_structure:dataset_prefecth}:\nameref{code_structure:dataset_prefecth} 的相关内容 205 | \end{enumerate} 206 | 207 | \begin{figure}[h] 208 | \begin{center} 209 | \vspace{-0.2cm} 210 | \includegraphics[width=0.85\linewidth]{figures/getting_start_init_data_model.png} 211 | \vspace{-0.3cm} 212 | \caption{train\_pipeline 中初始化 dataset 和 model} 213 | \label{fig:getting_start_init_data_model} 214 | \end{center} 215 | \vspace{-0.5cm} 216 | \end{figure} 217 | 218 | 这里我们着重讲解两块, dataloader 的创建和 model 的创建。 219 | 220 | \begin{enumerate} 221 | 222 | \item \textbf{dataloader 创建。} 223 | 首先我们看调用的 create\_train\_val\_dataloader 函数 (如图\ref{fig:getting_start_init_dataloader}所示)。 224 | 225 | \begin{figure}[h] 226 | \begin{center} 227 | \vspace{-0.2cm} 228 | \includegraphics[width=0.85\linewidth]{figures/getting_start_init_dataloader.png} 229 | \vspace{-0.2cm} 230 | \caption{初始化 train 和 valid 的 dataloader} 231 | \label{fig:getting_start_init_dataloader} 232 | \end{center} 233 | \vspace{-0.5cm} 234 | \end{figure} 235 | 236 | 里面主要就是两个函数, build\_dataset 和 build\_dataloader 。无论是 train 还是 val 的 dataloader 都是这两个函数构建的。 237 | 创建 dataloder 要靠 build\_dataloader ,其中又要用到 dataset 。而 dataset 是由 build\_dataset 创建的。 dataloder 其实大家都是共用的。当我们说要新写一个 dataloader ,其实写的是 dataset 。 build\_dataloader 和 build\_dataset 都是定义在 \href{https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/data/\_\_init\_\_.py}{basicsr/data/\_\_init\_\_.py} 文件里。 238 | 239 | \begin{figure}[H] 240 | \begin{center} 241 | \vspace{-0.2cm} 242 | \includegraphics[width=0.85\linewidth]{figures/getting_start_build_dataset.png} 243 | \vspace{-0.3cm} 244 | \caption{build\_dataset 和 build\_dataloader 的定义} 245 | \label{fig:getting_start_build_dataset} 246 | \end{center} 247 | \vspace{-0.5cm} 248 | \end{figure} 249 | 250 | 这里面,build\_dataset 是核心 (如图\ref{fig:getting_start_build_dataset})。它会根据配置文件 yml 中的 dataset 类型,比如在我们这个例子中就是 PairedImageDataset ,创建相应的实例。核心的代码是:\texttt{DATASET\_REGISTRY.get()}。这里是如何做到根据“类名”动态创建实例的,请参见章节\ref{code_structure:register}:\nameref{code_structure:register}。(实例就是由类 class 创建的,具体运行的对象)。这里我们只要理解,通过这一句调用,就可以创建相应的实例了。 251 | build\_dataloader 是比较容易理解的。它根据传入的 dataset 和其他在 yml 中的参数,构建 dataloader。 252 | 253 | \item \textbf{model 的创建。} 254 | model 的创建是通过 build\_model 这个函数,定义在 \href{https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/models/\_\_init\_\_.py}{basicsr/models/\_\_init\_\_.py} 文件里,简略图参见图\ref{fig:getting_start_build_model}。 255 | 256 | \begin{figure}[h] 257 | \begin{center} 258 | \vspace{-0.2cm} 259 | \includegraphics[width=0.85\linewidth]{figures/getting_start_build_model.png} 260 | \vspace{-0.3cm} 261 | \caption{基于 build\_model 创建 model 实例} 262 | \label{fig:getting_start_build_model} 263 | \end{center} 264 | \vspace{-0.5cm} 265 | \end{figure} 266 | 267 | build\_model 会根据配置文件 yml 中的 model 类型,比如在我们这个例子中就是 SRModel ,创建相应的实例。 268 | 269 | 接下来我们再具体地看看 SRModel 这个实例的创建过程吧,以便更好地理解一个模型中做了什么操作。 270 | 让我们进入 SRModel 这个类。简略图参见图\ref{fig:getting_start_srmodel}。 271 | 272 | \begin{figure}[h] 273 | \begin{center} 274 | \vspace{-0.2cm} 275 | \includegraphics[width=0.85\linewidth]{figures/getting_start_srmodel.png} 276 | \vspace{-0.3cm} 277 | \caption{SR model 类的定义} 278 | \label{fig:getting_start_srmodel} 279 | \end{center} 280 | \vspace{-0.5cm} 281 | \end{figure} 282 | 283 | 在这里我们主要关注以下几个方面,关于 model 具体的介绍,参见章节\ref{code_structure:model}:\nameref{code_structure:model}。 284 | 285 | \begin{enumerate} 286 | \item network 的创建 287 | \item loss 的创建 288 | \item optimize\_parameters ,即一个 iteration 的 train step 289 | \item metric 的使用 290 | \end{enumerate} 291 | 292 | 下面我们分别简略说明,希望大家可以抓住大致的脉络。 293 | 294 | \begin{enumerate} 295 | 296 | \item \textbf{network 的创建}一般是在 model 的 \_\_init\_\_() 函数里面,是通过调用 build\_network() 实现的。 \_\_init\_\_() 函数一般还会加载预训练模型,并初始化训练相关的设置。如图\ref{fig:getting_start_network_init}所示。 297 | 298 | \begin{figure}[h] 299 | \begin{center} 300 | \vspace{-0.2cm} 301 | \includegraphics[width=0.85\linewidth]{figures/getting_start_network_init.png} 302 | \vspace{-0.3cm} 303 | \caption{模型初始化 - 创建网络结构} 304 | \label{fig:getting_start_network_init} 305 | \end{center} 306 | \vspace{-0.5cm} 307 | \end{figure} 308 | 309 | build\_network 会根据配置文件 yml 中的 network 类型,比如在我们这个例子中就是 MSRResNet ,从 ARCH\_REGISTRY 创建相应的实例。如图\ref{fig:getting_start_build_network}所示。 310 | 311 | \begin{figure}[H] 312 | \begin{center} 313 | \vspace{-0.2cm} 314 | \includegraphics[width=0.85\linewidth]{figures/getting_start_build_network.png} 315 | \vspace{-0.3cm} 316 | \caption{build\_network 说明:根据 yml 配置文件中的网络结构类型,创建相应实例} 317 | \label{fig:getting_start_build_network} 318 | \end{center} 319 | \vspace{-0.5cm} 320 | \end{figure} 321 | 322 | \item \textbf{loss 的创建}一般是在 model 的 init\_training\_settings() 函数里面。其他剋先不关注,我们主要关注 build\_loss 这个函数。loss 就是通过调用 build\_loss() 实现的。如果有多个 loss ,则会多次调用 build\_loss() ,创建多个 loss 实例。如图\ref{fig:getting_start_loss_init}所示。 323 | 324 | \begin{figure}[h] 325 | \begin{center} 326 | \vspace{-0.2cm} 327 | \includegraphics[width=0.85\linewidth]{figures/getting_start_loss_init.png} 328 | \vspace{-0.3cm} 329 | \caption{SR\_model 类中,使用 build\_loss 创建 loss } 330 | \label{fig:getting_start_loss_init} 331 | \end{center} 332 | \vspace{-0.5cm} 333 | \end{figure} 334 | 335 | build\_loss 会根据配置文件 yml 中的 loss 类型,比如在我们这个例子中就是 L1Loss ,从 LOSS\_REGISTRY 中创建相应的实例。如图\ref{fig:getting_start_build_loss}所示。 336 | 337 | \begin{figure}[h] 338 | \begin{center} 339 | \vspace{-0.2cm} 340 | \includegraphics[width=0.85\linewidth]{figures/getting_start_build_loss.png} 341 | \vspace{-0.3cm} 342 | \caption{build\_loss 说明:根据 yml 配置文件中的 loss 类型,创建相应实例} 343 | \label{fig:getting_start_build_loss} 344 | \end{center} 345 | \vspace{-0.5cm} 346 | \end{figure} 347 | 348 | \item \textbf{optimize\_parameter 函数},即一个 iteration 下的 train step 。这个函数里面主要包含了 network forward ,loss 计算,backward 和优化器的更新。如图\ref{fig:getting_start_optimize}所示。 349 | 350 | \begin{figure}[h] 351 | \begin{center} 352 | \vspace{-0.2cm} 353 | \includegraphics[width=0.85\linewidth]{figures/getting_start_optimize.png} 354 | \vspace{-0.3cm} 355 | \caption{optimize\_parameter 函数:一个 iteration 的参数优化过程} 356 | \label{fig:getting_start_optimize} 357 | \end{center} 358 | \vspace{-0.5cm} 359 | \end{figure} 360 | 361 | \item \textbf{metric 的使用}主要是在 validation 里面。我们来看在训练 MSRResNet 中调用的 nondist\_validation 函数。其中核心是在 calculate\_metric 这个函数,它会根据配置文件 yml 中的 metrics 配置,调用相应的函数。如图\ref{fig:getting_start_validation_metric}所示。 362 | 363 | \begin{figure}[h] 364 | \begin{center} 365 | \vspace{-0.2cm} 366 | \includegraphics[width=0.85\linewidth]{figures/getting_start_validation_metric.png} 367 | \vspace{-0.3cm} 368 | \caption{validation 中 metric 的使用:基于 yml 文件配置调用对应的 metric 函数} 369 | \label{fig:getting_start_validation_metric} 370 | \end{center} 371 | \vspace{-0.5cm} 372 | \end{figure} 373 | 374 | calculate\_metric 具体定义在 \href{https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/metrics/\_\_init\_\_.py}{basicsr/metrics/\_\_init\_\_()} 文件中,它也是使用了 REGISTRY 机制:METRIC\_REGISTRY。它会根据配置文件 yml 中的 metric 类型,比如在我们这个例子中就有两个 metrics:PSNR 和 SSIM ,调用相应的函数。注意和前面 DATASET, ARCH, MODEL, LOSS 的 REGISTRY 不同,这里返回的是函数调用,而其他返回的是类的实例。如图\ref{fig:getting_start_calculate_metric}所示。 375 | 376 | \begin{figure}[h] 377 | \begin{center} 378 | \vspace{-0.2cm} 379 | \includegraphics[width=0.85\linewidth]{figures/getting_start_calculate_metric.png} 380 | \vspace{-0.3cm} 381 | \caption{calculate\_metric 函数:基于 yml 文件配置中 metric 的类型,调用相应的函数} 382 | \label{fig:getting_start_calculate_metric} 383 | \end{center} 384 | \vspace{-0.5cm} 385 | \end{figure} 386 | 387 | 到此,我们已经看到了 dataset (data loader) 的创建,以及 model 的创建。model 的创建包含了 network architecture 和 loss 的创建,一次完整的训练流程以及 validation 中用到的 metric 的计算。 388 | \end{enumerate} 389 | \end{enumerate} 390 | 391 | % ---------------------------------- 392 | \subsection{训练过程}\label{getting_start:training_procedure} 393 | 394 | 当以上这些部件都被创建后,就进入训练过程了。它就是一个循环的过程,不断地喂数据,然后不断执行训练步骤。整个训练过程如图\ref{fig:getting_start_training_loop}所示。 395 | 看图中的说明基本就能明白大概啦,这里就不赘述了。 396 | 397 | \begin{figure}[h] 398 | \begin{center} 399 | \vspace{-0.2cm} 400 | \includegraphics[width=0.85\linewidth]{figures/getting_start_training_loop.png} 401 | \vspace{-0.3cm} 402 | \caption{完整的训练过程} 403 | \label{fig:getting_start_training_loop} 404 | \end{center} 405 | \vspace{-0.5cm} 406 | \end{figure} 407 | 408 | 上面的 for 循环结束,整个训练过程也就结束啦。 409 | 410 | 411 | % ------------------------------------------------------------------------------ 412 | \section{测试流程}\label{getting_start:test_pipeline} 413 | 414 | 这里的测试流程指的是,使用 \href{https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/test.py}{basicsr/test.py} 和 配置文件 yml 来测试模型,以得到测试结果,同时输出指标结果的过程。 415 | 416 | 测试流程是从 \href{https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/test.py}{basicsr/test.py} 开始的。 417 | 418 | 在测试阶段,很多流程 (比如 dataset 和 data loader 的创建、model 的创建、网络结构的创建) 都和训练流程是共用的。因此我们在这里主要解释测试流程中特有的部分。 419 | 420 | 测试阶段,我们需要在终端输入命令来开始训练。 421 | \begin{minted}[xleftmargin=20pt,linenos,breaklines,bgcolor=bg]{python} 422 | python basicsr/test.py -opt options/test/SRResNet_SRGAN/test_MSRResNet_x4.yml 423 | \end{minted} 424 | 425 | 其中 \href{https://github.com/XPixelGroup/BasicSR/blob/master/options/test/SRResNet\_SRGAN/test\_MSRResNet\_x4.yml}{options/test/SRResNet\_SRGAN/test\_MSRResNet\_x4.yml} 为 yml 配置文件,主要设置实验相关的配置参数。参数具体说明参见章节\ref{code_structure:test_config}:\nameref{code_structure:test_config}。 426 | 427 | 下面是 basicsr/test.py 主要的测试流程 test\_pipeline 函数,相比于 basicsr/train.py 着实简单了很多。 428 | 429 | \begin{minted}[xleftmargin=20pt,bgcolor=bg,breaklines]{python} 430 | def test_pipeline(root_path): 431 | # 解析 yml 文件,加载配置参数 432 | opt, _ = parse_options(root_path, is_train=False) 433 | ... 434 | 435 | # 新建 logger 并初始化,打印基础信息 436 | make_exp_dirs(opt) 437 | log_file = osp.join(opt['path']['log'], f"test_{opt['name']}_{get_time_str()}.log") 438 | logger = get_root_logger(logger_name='basicsr', log_level=logging.INFO, log_file=log_file) 439 | logger.info(get_env_info()) 440 | logger.info(dict2str(opt)) 441 | 442 | # 创建测试集和 dataloader。和训练过程一样,调用 build_dataset 和 build_dataloader 443 | test_loaders = [] 444 | for _, dataset_opt in sorted(opt['datasets'].items()): 445 | test_set = build_dataset(dataset_opt) 446 | test_loader = build_dataloader( 447 | test_set, dataset_opt, num_gpu=opt['num_gpu'], dist=opt['dist'], sampler=None, seed=opt['manual_seed']) 448 | logger.info(f"Number of test images in {dataset_opt['name']}: {len(test_set)}") 449 | test_loaders.append(test_loader) 450 | 451 | # 创建模型,和训练过程一样,调用 build_model 452 | model = build_model(opt) 453 | 454 | # 测试多个测试集,调用的是 model 里面的 validation 函数 455 | for test_loader in test_loaders: 456 | test_set_name = test_loader.dataset.opt['name'] 457 | logger.info(f'Testing {test_set_name}...') 458 | model.validation(test_loader, current_iter=opt['name'], tb_logger=None, save_img=opt['val']['save_img']) 459 | \end{minted} 460 | 461 | 可以看到,整个测试过程和训练过程大部分都是重合的,非常简洁。 462 | 463 | % ------------------------------------------------------------------------------ 464 | \section{推理流程}\label{getting_start:inference_pipeline} 465 | 466 | 这里的推理流程指的是,使用 \href{https://github.com/XPixelGroup/BasicSR/tree/master/inference}{inference} 目录下的代码,快速方便地测试结果。 467 | 和测试流程 (第\ref{getting_start:test_pipeline}小节) 的目的是不同的: 468 | \begin{itemize} 469 | \item 测试流程针对学术数据集,希望能够同时测试多个测试集,同时能够输出相应的指标 470 | \item 推理流程针对实际使用场景,提供 demo。它往往只需要一个输出结果,而不需要有 GT (Ground-Truth) 数据,也不需要有指标输出。 471 | \end{itemize} 472 | 473 | 简而言之,推理流程方便用户快速得到一个 demo 的结果。因此我们希望 inference 的文件,能够尽可能少的依赖 BasicSR 框架,即可以自己读数据,创建模型。我们只需要使用 BasicSR 中的网络结构即可 (而网络结构在 BasicSR 中又是相对独立的)。这样,使用者便可以根据 inference 文件,快速将所需要的模型“摘”出来,放到自己的应用场景里面去。 474 | 475 | 在快速推理阶段,我们只需要在终端输入命令: 476 | \begin{minted}[xleftmargin=20pt,breaklines,bgcolor=bg]{python} 477 | python inference/inference_esrgan.py --input input_path --output out_path 478 | \end{minted} 479 | 480 | \href{https://github.com/XPixelGroup/BasicSR/tree/master/inference/inference\_esrgan.py}{basicsr/inference/inference\_esrgan.py} 提供了一个非常简洁且具有代表性的例子,相信你可以轻而易举地看懂。 481 | 482 | % ------------------------------------------------------------------------------ 483 | % \section{入门样例} 484 | 485 | % 这个部分我们以一个基础的超分模型SRResNet作为例子来展示BasicSR的入门使用。相关的文件目录如下所示: 486 | 487 | % \dirtree{% 488 | % .1 \textcolor{black}{BasicSR}. 489 | % .2 \textcolor{red}{basicsr}\DTcomment{BasicSR核心代码包}. 490 | % .2 scripts\DTcomment{常用脚本}. 491 | % .3 data\_preparation\DTcomment{数据准备脚本目录}. 492 | % .4 extract\_subimages.py\DTcomment{生成子图脚本}. 493 | % .2 datasets\DTcomment{数据集存放,推荐soft link}. 494 | % .3 DIV2K\DTcomment{训练数据集}. 495 | % .4 DIV2K\_train\_HR\_sub\DTcomment{训练数据集的GT子图}. 496 | % .4 DIV2K\_train\_LR\_bicubic\_X4\_sub\DTcomment{训练数据集子图的下采样图}. 497 | % .3 Set5\DTcomment{验证集}. 498 | % .4 GTmod12\DTcomment{验证集的GT图}. 499 | % .4 LRbicx4\DTcomment{验证集的下采样图}. 500 | % .3 Set14\DTcomment{验证集}. 501 | % .4 GTmod12\DTcomment{验证集的GT图}. 502 | % .4 LRbicx4\DTcomment{验证集的下采样图}. 503 | % .2 experiments\DTcomment{实验保存路径}. 504 | % .3 pretrained\_models\DTcomment{预训练模型保存路径}. 505 | % .3 001\_MSRResNet\_x4\_f64b16\_DIV2K\_1000k\_B16G1\_wandb\DTcomment{SRResNet实验存放路径}. 506 | % .4 models\DTcomment{SRResNet训练模型存放位置}. 507 | % .4 visualization\DTcomment{SRResNet实验验证图像}. 508 | % .4 training\_states\DTcomment{SRResNet实验resume文件存放路径}. 509 | % .4 001\_MSRResNet\_x4\_f64b16\_DIV2K\_1000k\_B16G1\_wandb\DTcomment{SRResNet实验配置文件}. 510 | % .2 inference\DTcomment{快速推理获得结果}. 511 | % .2 options\DTcomment{训练和测试配置文件}. 512 | % .3 train\DTcomment{训练配置文件夹}. 513 | % .4 SRResNet\_SRGAN\DTcomment{SRResNet训练配置文件夹}. 514 | % .5 train\_MSRResNet\_x4.yml\DTcomment{SRResNet训练配置文件}. 515 | % .3 test\DTcomment{测试配置文件夹}. 516 | % .4 SRResNet\_SRGAN\DTcomment{SRResNet测试配置文件夹}. 517 | % .5 test\_MSRResNet\_x4.yml\DTcomment{SRResNet测试配置文件}. 518 | % % .4 train\_MSRResNet_x4.yml\DTcomment{SRResNet配置文件}. 519 | % % .2 \textcolor{red}{scripts}\DTcomment{功能脚本,包含数据集制作,指标测试和数据集下载等}. 520 | % } 521 | 522 | % \begin{enumerate} 523 | 524 | % \item 第一步是下载训练所用的数据集,常用的数据集链接可以参考: 525 | % \href{https://github.com/XPixelGroup/BasicSR/blob/master/docs/DatasetPreparation.md#DIV2K}{https://github.com/XPixelGroup/BasicSR/blob/master/docs/DatasetPreparation.md\#DIV2K} 526 | 527 | % 在这里我们采用DIV2K 作为训练数据集,Set5作为验证集 528 | 529 | % \begin{exampleBox}[]{数据集链接} 530 | 531 | % DIV2K: 532 | % \href{https://data.vision.ee.ethz.ch/cvl/DIV2K/}{https://data.vision.ee.ethz.ch/cvl/DIV2K/} 533 | 534 | % Set5和Set14: 535 | % \href{https://drive.google.com/drive/folders/1B3DJGQKB6eNdwuQIhdskA64qUuVKLZ9u}{https://drive.google.com/drive/folders/1B3DJGQKB6eNdwuQIhdskA64qUuVKLZ9u} 536 | 537 | % \end{exampleBox} 538 | 539 | % 将下载好的训练集和验证集放在datasets目录下。(软链接是更好的方式,这里为了进行入门样例展示,采用了直接存放数据集的方式) 540 | 541 | % \item 第二步是将下载好的DIV2K数据集切成子图的形式存放在DIV2K\_train\_HR\_sub目录下,由于2K图像的读取会占用大量的时间所以采用子图的形式进行读入 542 | 543 | % \href{https://github.com/XPixelGroup/BasicSR/blob/master/scripts/data_preparation/extract_subimages.py}{https://github.com/XPixelGroup/BasicSR/blob/master/scripts/data\_preparation/extract\_subimages.py} 544 | 545 | % \begin{minted}[xleftmargin=20pt,linenos,breaklines,bgcolor=bg]{python} 546 | 547 | % # HR images 这个过程将2K的图像给切成480X480的子图 548 | % # 原始2K图像路径 549 | % opt['input_folder'] = 'datasets/DIV2K/DIV2K_train_HR' 550 | % # 子图存放路径 551 | % opt['save_folder'] = 'datasets/DIV2K/DIV2K_train_HR_sub' 552 | % opt['crop_size'] = 480 # 子图的尺寸 553 | % opt['step'] = 240 # 切图的步长 554 | % opt['thresh_size'] = 0 555 | % extract_subimages(opt) 556 | 557 | % # LRx4 images 558 | % opt['input_folder'] = 'datasets/DIV2K/DIV2K_train_LR_bicubic/X4' 559 | % opt['save_folder'] = 'datasets/DIV2K/DIV2K_train_LR_bicubic/X4_sub' 560 | % opt['crop_size'] = 120 # 子图的尺寸 561 | % opt['step'] = 60 # 切图的步长 562 | 563 | % \end{minted} 564 | 565 | % \item 制作好数据之后,修改yml配置文件中训练集和验证集的路径,就可以初步的把实验配置完成了 566 | 567 | % % \begin{exampleBox}[]{配置文件修改} 568 | 569 | % % dataroot_gt: datasets/DF2K/DIV2K_train_HR_sub 570 | % % dataroot_lq: datasets/DF2K/DIV2K_train_LR_bicubic_X4_sub 571 | 572 | % % \end{exampleBox} 573 | 574 | % \begin{minted}[xleftmargin=20pt,linenos,breaklines,bgcolor=bg]{python} 575 | 576 | % # dataroot_gt: datasets/DF2K/DIV2K_train_HR_sub 577 | % # dataroot_lq: datasets/DF2K/DIV2K_train_LR_bicubic_X4_sub 578 | % # DF2K是DIV2K和Flickr2K数据集合并的数据集,这里我们先用DIV2K进行实验,可以根据需求调整自己的数据集 579 | % dataroot_gt: datasets/DIV2K/DIV2K_train_HR_sub 580 | % dataroot_lq: datasets/DIV2K/DIV2K_train_LR_bicubic_X4_sub 581 | 582 | % \end{minted} 583 | 584 | % \item 训练命令和log显示 585 | 586 | % \begin{minted}[xleftmargin=20pt,linenos,breaklines,bgcolor=bg]{python} 587 | % python basicsr/train.py -opt options/train/SRResNet_SRGAN/train_MSRResNet_x4.yml 588 | % \end{minted} 589 | 590 | % 执行训练命令之后,终端会打印出训练的相关信息和验证集上的PSNR的精度,如下所示: 591 | 592 | % \begin{minted}[xleftmargin=20pt,linenos,breaklines,bgcolor=bg]{python} 593 | 594 | % 2020-08-21 00:13:08,623 INFO: [001_M..][epoch: 4, iter:1,000,000, lr:(1.000e-07,)] [eta: 0:00:00, time (data): 0.041 (0.000)] l_pix: 2.1622e-02 595 | % 2020-08-21 00:13:08,624 INFO: Saving models and training states. 596 | % ... ... 597 | % Test head 598 | % [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 5/5, 21.3 task/s, elapsed: 0s, ETA: 0s 599 | % Test woman 600 | % 2020-08-21 00:13:08,926 INFO: Validation Set5 601 | % # psnr: 30.2497 602 | 603 | % \end{minted} 604 | 605 | % 同时,我们配置了wandb之后,可以在自己的wandb云端主页上看到训练曲线,wandb的配置见xx 606 | % \begin{figure}[h] 607 | % \vspace{1cm} 608 | % \begin{center} 609 | % %\fbox{\rule{0pt}{2.5in} \rule{0.9\linewidth}{0pt}} 610 | % \includegraphics[width=\linewidth]{figures/SRResNet_psnr_curve.jpg} 611 | % %\vspace{-1cm} 612 | % \caption{SRResNet的PSNR训练曲线} 613 | % %\label{fig:logo} 614 | % \end{center} 615 | % %\vspace{-0.7cm} 616 | % \end{figure} 617 | 618 | % 5. 测试过程,我们采用以下命令来对训练好的模型进行测试 619 | % \begin{minted}[xleftmargin=20pt,linenos,breaklines,bgcolor=bg]{python} 620 | % CUDA_VISIBLE_DEVICES=0 python basicsr/test.py -opt options/test/SRResNet_SRGAN/test_MSRResNet_x4.yml 621 | % \end{minted} 622 | 623 | % 其中,我们需要准备好待测数据集,比如test\_MSRResNet\_x4.yml配置文件中的DIV2K100测试集,需要我们按照第一步数据集准备的过程进行制作,然后设置对应路径 624 | 625 | % \begin{minted}[xleftmargin=20pt,linenos,breaklines,bgcolor=bg]{python} 626 | % test_3: 627 | % name: DIV2K100 628 | % type: PairedImageDataset 629 | % dataroot_gt: datasets/DIV2K/DIV2K_valid_HR 630 | % dataroot_lq: datasets/DIV2K/DIV2K_valid_LR_bicubic/X4 631 | % \end{minted} 632 | 633 | % 此外,我们还需要对模型存放的路径进行设置 634 | % \begin{minted}[xleftmargin=20pt,linenos,breaklines,bgcolor=bg]{python} 635 | % path: 636 | % pretrain_network_g: experiments/001_MSRResNet_x4_f64b16_DIV2K_1000k_B16G1_wandb/ 637 | % models/net_g_1000000.pth 638 | % \end{minted} 639 | 640 | % 设置完成后执行测试命令就可以在results文件夹下得到测试的结果图和PSNR等定量的指标结果。 641 | 642 | % \end{enumerate} 643 | 644 | \end{document} -------------------------------------------------------------------------------- /latex/sections/howto.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{subfiles} 2 | 3 | \begin{document} 4 | 5 | \chapter{如何添加与修改} 6 | \vspace{-2cm} 7 | 8 | 本章主要介绍如何在 BasicSR 框架中添加自定义的 Dataset ,网络结构 (Architecture),模型 (Model),损失函数 (Loss) 以及指标 (Metric)。 9 | 使用者需要关注四个方面,即: 10 | \begin{enumerate} 11 | \item 相关文件的存放和命名 12 | \item 编写自定义文件 13 | \item 注册新添加类 14 | \item 以及在配置文件中进行设置 15 | \end{enumerate} 16 | 17 | 这一部分的内容大体上十分相似,使用者只要对某一个模块比较熟悉(如添加修改网络结构),即可快速类比至其他各个部分。 18 | 在添加新的自定义模块时,理解并参考已有文件可以帮助使用者快速上手。 19 | 20 | 值得提及的是,当用户使用 \textbf{BasicSR-template} 进行开发,尤其是针对\textbf{指标}模块时,以下操作可能并不完全适用,具体详见第\ref{chapter:template}章:\nameref{chapter:template} 相关部分。 21 | 22 | % ------------------------------------------------------------------------------ 23 | \section{添加修改 Dataset}\label{howto:add_dataset} 24 | \begin{enumerate}[第 1 步] 25 | \item Dataset 文件的存放与命名:Dataset 文件存放在 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/data}{basicsr/data/} 文件夹下。例如,\href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/data}{basicsr/data/paired\_image\_dataset.py}。用户可根据需求对已有的 Dataset 进行修改,或是添加自定义 Dataset 文件。在创建新的自定义 Dataset 文件时,注意文件名需以 \textbf{\_dataset.py} 作为结尾 26 | 27 | \item 编写自定义 Dataset :在 Dataset 文件中对自定义 Dataset 类进行命名,\textbf{需要注意新建类名不能与已有类名重复,否则会导致后续注册机制报错}。关于 Dataset 文件中函数功能详解见章节\ref{code_structure:data}:\nameref{code_structure:data},此处不再赘述。对于需要添加新的设置参数,用户可以灵活利用 \textbf{opt} 参数从配置文件中读取 28 | 29 | \item 注册 Dataset :用户需要对新建的 Dataset 类进行注册。注册机制的原理详见章节\ref{code_structure:register}:\nameref{code_structure:register}。此处具体操作为,首先对 \textbf{DATASET\_REGISTRY} 函数进行导入,然后在新建类上方添加修饰器来注册新建函数。以 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/data/paired_image_dataset.py}{paired\_image\_dataset.py} 中的 \textbf{PairedImageDataset} 为例: 30 | 31 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{python} 32 | from basicsr.utils.registry import DATASET_REGISTRY 33 | 34 | @DATASET_REGISTRY.register() 35 | class PairedImageDataset(data.Dataset): 36 | ... 37 | \end{minted} 38 | 39 | \item 在配置文件中设置自定义 Dataset :将配置文件(即 YAML 文件)中 \textbf{datasets} 部分中 \textbf{type} 参数设置为新建的 Dataset 类名即可。 40 | 该部分其余参数的功能与 Dataset 中用户自定义的功能对应。以使用 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/data/paired_image_dataset.py}{paired\_image\_dataset.py} 中的 \textbf{PairedImageDataset} 为例: 41 | 42 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{python} 43 | # dataset and data loader settings 44 | dataset: 45 | ... 46 | type: PairedImageDataset # 设置为需要使用的 Dataset 类名 47 | ... 48 | \end{minted} 49 | \end{enumerate} 50 | 51 | % ------------------------------------------------------------------------------ 52 | \section{添加修改模型}\label{howto:add_model} 53 | 54 | \begin{enumerate}[第 1 步] 55 | \item 模型文件的存放与命名:模型文件存放在 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/models}{basicsr/models/} 文件夹下。例如,\href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/models}{basicsr/archs/sr\_model.py}。用户可根据需求对已有的模型进行修改,或是添加自定义模型文件。在创建新的自定义模型文件时,注意文件名需以 \textbf{\_model.py} 作为结尾。 56 | 57 | \item 编写自定义模型:在模型文件中对自定义模型类进行命名,\textbf{需要注意新建类名不能与已有类名重复,否则会导致后续注册机制报错}。关于模型文件中的函数功能详解见章节\ref{code_structure:model}:\nameref{code_structure:model}。模型部分涉及的函数较多,但一般情况下需要改写的部分非常有限。用户往往只需要继承已有模型,并对需要更改的函数进行重构即可。以 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/models}{basicsr/archs/swinir\_model.py} 中的 \textbf{SwinIRModel} 为例,该模型相较于图像超分通用的 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/models}{basicsr/archs/sr\_model.py} 中的 \textbf{SRModel} 仅需更改 test 函数,因此 \textbf{SwinIRModel} 类在继承了 \textbf{SRModel} 的基础上只对 \textbf{test} 函数进行了重构: 58 | 59 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{python} 60 | class SwinIRModel(SRModel): # SwinIRModel 继承自 SRModel 61 | def test(self): # 重构 test 函数 62 | ... 63 | \end{minted} 64 | 65 | \item 注册模型:用户需要对新建的模型类进行注册。注册机制的原理详见章节\ref{code_structure:register}:\nameref{code_structure:register}。此处具体操作为,首先对 \textbf{MODEL\_REGISTRY} 函数进行导入,然后在新建类上方添加修饰器来注册新建类。以 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/models/sr_model.py}{sr\_model.py} 中的 \textbf{SRModel} 为例: 66 | 67 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{python} 68 | from basicsr.utils.registry import MODEL_REGISTRY 69 | 70 | @MODEL_REGISTRY.register() 71 | class SRModel(nn.Module): 72 | ... 73 | \end{minted} 74 | 75 | \item 在配置文件中设置自定义模型:将配置文件(即 YAML 文件)中 \textbf{general settings} 部分中的 \textbf{model\_type} 参数设置为新建的模型类名即可。以使用 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/models/sr_model.py}{sr\_model.py} 中的 \textbf{SRModel} 为例: 76 | 77 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{python} 78 | # general settings 79 | ... 80 | type: SRModel # 设置为需要使用的模型类名 81 | ... 82 | \end{minted} 83 | 除此之外,模型与整个配置文件的内容都是息息相关的,涉及到数据的读取与处理、模型网络结构、训练优化和测试评估等几乎所有内容的设置组成,而非一个独立的部分。用户在修改配置文件的结构时,建议参考已有文件作为模板,重点对模型进行修改的部分在配置文件中做对应处理。 84 | 85 | \end{enumerate} 86 | 87 | % ------------------------------------------------------------------------------ 88 | \section{添加修改网络结构}\label{howto:add_arch} 89 | 90 | \begin{enumerate}[第 1 步] 91 | \item 网络结构文件的存放与命名:网络结构文件存放在 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/archs}{basicsr/archs/} 文件夹下。例如,\href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/archs}{basicsr/archs/srresnet\_arch.py}。用户可根据需求对已有的网络结构进行修改,或是添加自定义网络结构文件。在创建新的自定义网络结构文件时,注意文件名需以 \textbf{\_arch.py} 作为结尾。 92 | 93 | \item 编写自定义网络结构:在网络结构文件中对自定义网络结构类进行命名,\textbf{需要注意新建类名不能与已有类名重复,否则会导致后续注册机制报错}。关于网络结构文件中的函数功能详解见章节\ref{code_structure:arch}:\nameref{code_structure:arch}。对于需要手工设置的参数,用户可以灵活利用 \textbf{opt} 参数从配置文件中读取。 94 | 95 | \item 注册网络结构:用户需要对新建的网络结构类进行注册。注册机制的原理详见章节\ref{code_structure:register}:\nameref{code_structure:register}。此处具体操作为,首先对 \textbf{ARCH\_REGISTRY} 函数进行导入,然后在新建类上方添加修饰器来注册新建类。以 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/archs/srresnet_arch.py}{srresnet\_arch.py} 中的 \textbf{MSRResNet} 类为例: 96 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{python} 97 | from basicsr.utils.registry import ARCH_REGISTRY 98 | 99 | @ARCH_REGISTRY.register() 100 | class MSRResNet(nn.Module): 101 | ... 102 | \end{minted} 103 | 104 | \item 在配置文件中设置自定义网络结构:将配置文件(即 YAML 文件)中 \textbf{network structures} 部分中的 \textbf{type} 参数设置为新建的网络结构类名即可。 105 | 该部分其余参数的功能与模型和网络结构中用户自定义的功能对应。以使用 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/archs/srresnet_arch.py}{srresnet\_arch.py} 中的 \textbf{MSRResNet} 为例: 106 | 107 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{python} 108 | # network structures 109 | network_g: # g网络设置 110 | ... 111 | type: MSRResNet # 设置为需要使用的网络结构类名 112 | ... 113 | \end{minted} 114 | \end{enumerate} 115 | 116 | % ------------------------------------------------------------------------------ 117 | \section{添加修改损失函数}\label{howto:add_loss} 118 | 119 | \begin{enumerate}[第 1 步] 120 | \item 损失函数的存放与命名:损失函数文件存放在 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/losses}{basicsr/losses/} 文件夹下。例如,\href{https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/losses/gan_loss.py}{basicsr/losses/gan\_loss.py}。用户可根据需求对已有的损失函数进行修改,或是添加自定义损失函数文件。在创建新的损失函数文件时,注意文件名需以 \textbf{\_loss.py} 作为结尾。 121 | 122 | \item 编写自定义损失函数:在损失函数文件中对自定义损失函数类进行命名,\textbf{需要注意新建类名不能与已有类名重复,否则会导致后续注册机制报错}。关于损失函数的功能详解见章节\ref{code_structure:loss}:\nameref{code_structure:loss}。对于需要手工设置的参数,用户可以灵活利用 \textbf{opt} 参数从配置文件中读取。 123 | 124 | \item 注册损失函数:用户需要对新建的损失函数类进行注册。注册机制的原理详见章节\ref{code_structure:register}:\nameref{code_structure:register}。此处具体操作为,首先对 \textbf{LOSS\_REGISTRY} 函数进行导入,然后在新建类上方添加修饰器来注册新建类。以 \href{https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/losses/basic_loss.py}{basicsr/losses/basic\_loss.py} 中的 \textbf{L1Loss} 为例: 125 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{python} 126 | from basicsr.utils.registry import LOSS_REGISTRY 127 | 128 | @LOSS_REGISTRY.register() 129 | class L1Loss(nn.Module): 130 | ... 131 | \end{minted} 132 | 133 | \item 在配置文件中设置自定义损失函数:将配置文件(即 YAML 文件)中 \textbf{losses} 部分中相应损失函数项的 \textbf{type} 参数设置为新建的损失类名即可。需要注意损失函数项的存在与模型有关。以使用 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/losses/basic_loss.py}{basicsr/losses/basic\_loss.py} 中的 \textbf{L1Loss} 为例: 134 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{python} 135 | # losses 136 | pixel_opt: # pixel-wise 损失函数项,与模型有关 137 | type: L1Loss # 设置为需要使用的损失函数类名 138 | ... 139 | \end{minted} 140 | \end{enumerate} 141 | 142 | \begin{hl} % ---------------- Highlight block ---------------- % 143 | \textbf{添加非 \texttt{Class} 的损失函数} 144 | 145 | 在实际使用情况中,我们会遇到一些损失函数,他们不是以类 (\texttt{Class} )的形式出现,而是普通的函数。比如 StyleGAN2 中使用的 \href{https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/losses/gan_loss.py}{r1\_penalty} 和 \href{https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/losses/gan_loss.py}{gradient\_penalty\_loss}。 146 | 147 | 此时,我们不再以注册机制的方式使用,而是直接在模型中调用相关函数。 148 | \begin{minted}[xleftmargin=20pt,linenos,breaklines,bgcolor=bg]{python} 149 | from basicsr.losses.gan_loss import r1_penalty 150 | 151 | class StyleGAN2Model(BaseModel): 152 | ... 153 | def optimize_parameters(self, current_iter): 154 | ... 155 | real_pred = self.net_d(self.real_img) 156 | l_d_r1 = r1_penalty(real_pred, self.real_img) # 直接调用损失函数 r1_penalty 157 | ... 158 | \end{minted} 159 | \end{hl} 160 | 161 | % ------------------------------------------------------------------------------ 162 | \section{添加修改指标}\label{howto:add_metric} 163 | 164 | \begin{enumerate}[第 1 步] 165 | \item 指标的存放与命名:指标文件存放在 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/metrics}{basicsr/metrics/} 文件夹下。对于命名规则无要求,一般直接以功能命名即可,如 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/metrics/psnr_ssim.py}{basicsr/metrics/psnr\_ssim.py}。 166 | 167 | \item 编写自定义指标:在指标文件中对自定义指标函数进行命名,\textbf{需要注意新建函数名不能与已有函数名重复,否则会导致后续注册机制报错}。关于指标的功能详解见第\ref{chapter:metrics}章:\nameref{chapter:metrics}。在编写完自定义指标后,注意在 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/metrics/__init__.py}{basicsr/metrics/\_\_init\_\_.py} 文件中对添加的自定义指标进行导入。以 \textbf{calculate\_psnr} 为例: 168 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{python} 169 | from .psnr_ssim import calculate_psnr 170 | \end{minted} 171 | 172 | \item 注册指标:用户需要对新建的指标函数进行注册。注册机制的原理详见章节\ref{code_structure:register}:\nameref{code_structure:register}。此处具体操作为,首先对 \textbf{METRIC\_REGISTRY} 函数进行导入,然后在新建函数上方添加修饰器来注册新建函数。以 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/metrics/psnr_ssim.py}{psnr\_ssim.py} 中的 \textbf{calculate\_psnr} 为例: 173 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{python} 174 | from basicsr.utils.registry import METRIC_REGISTRY 175 | 176 | @METRIC_REGISTRY.register() 177 | def calculate_psnr(img, img2, ... ): 178 | ... 179 | \end{minted} 180 | 181 | \item 在配置文件中设置自定义指标:将配置文件(即 YAML文件)中 \textbf{validation settings} 部分中 \textbf{metric} 部分中的 \textbf{type} 参数设置为新建的指标函数名即可。指标的其他参数设置对应其功能部分代码。以使用 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/metrics/psnr_ssim.py}{psnr\_ssim.py} 中的 \textbf{calculate\_psnr} 为例: 182 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg]{python} 183 | # validation settings 184 | val: 185 | ... 186 | metrics: 187 | psnr: # 指标名称,可以是任意的,用于标记 188 | type: calculate_psnr # 设置为需要使用的指标函数名 189 | ... 190 | \end{minted} 191 | \end{enumerate} 192 | 193 | \begin{note} % ---------------- Note block ---------------- % 194 | \textbf{注意} 195 | 196 | \begin{itemize} 197 | \item 指标的新建注册机制和其他几类 (dataset, model, arch, loss) 不同:1) 需要显式地在 \href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/metrics/__init__.py}{basicsr/metrics/\_\_init\_\_.py} import 函数;2) 新建的是函数而不是类 198 | \item 目前指标主要是在 Numpy 上计算的,比如 \texttt{calculate\_psnr}、\texttt{calculate\_ssim},有些也提供了基于 PyTorch 计算的版本,比如 \texttt{calculate\_psnr\_pt}、\texttt{calculate\_psnr\_pt} 199 | \end{itemize} 200 | \end{note} 201 | 202 | \end{document} -------------------------------------------------------------------------------- /latex/sections/installation.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{subfiles} 2 | 3 | \begin{document} 4 | 5 | \chapter{安装} 6 | \vspace{-2cm} 7 | 8 | 本章节首先介绍安装 BasicSR 所需的环境依赖 (第\ref{installation:env-reqirement}小节),随后介绍安装 BasicSR 的两种方式:本地 clone 源代码安装和 pip 安装 basicsr 包 (第\ref{installation:install}小节)。对于需要在项目中使用 PyTorch C++ 编译算子的情况,我们也提供了相应的安装方式 (第\ref{installation:c++}小节)。 9 | 最后,我们将安装过程中的常见问题进行了汇总 (第\ref{installation:faq}小节)。 10 | 11 | % ------------------------------------------------------------------------------ 12 | \section{环境依赖}\label{installation:env-reqirement} 13 | 14 | 由于 BasicSR 是基于 \href{https://www.python.org/}{Python} 语言和 \href{https://pytorch.org/}{PyTorch} 深度学习框架进行开发的,因此在安装 BasicsR 之前,需要在电脑或者服务器上安装 Python 环境以及各种相关的 Python 库;如果想要在 \textbf{GPU} 上运行程序的话,也需要先在电脑上配置相应的 CUDA 环境。以下我们分别对 CUDA 和相应的 Python 库进行简要说明。 15 | 16 | \begin{enumerate} 17 | \item NVIDIA GPU + \href{https://developer.nvidia.com/cuda-downloads}{CUDA}:GPU (Graphics Processing Unit) 由于其高效的并行能力,目前被广泛用于深度学习的计算中;CUDA (Compute Unified Device Architecture) 是 NVIDIA 推出的可以让 GPU 解决复杂计算问题的运算平台。如果需要训练 BasicSR 中的模型,需要使用 GPU 并配置好相应的 CUDA 环境 18 | \item Python 和 Python 库 (对于 Python 库,我们提供了相应的安装脚本): 19 | \begin{enumerate} 20 | \item Python >= 3.7 (推荐使用\href{https://www.anaconda.com/products/distribution#linux}{Anaconda}或者\href{https://docs.conda.io/en/latest/miniconda.html}{Miniconda}) 21 | \item \href{https://pytorch.org/}{PyTorch >= 1.7}:目前深度学习领域广泛使用的深度学习框架 22 | \end{enumerate} 23 | \end{enumerate} 24 | 25 | 当配置好 Python 环境和 CUDA 环境之后,可以直接运行以下的脚本一次性安装 BasicSR 中调用的各种 Python 库: 26 | 27 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 28 | pip install -r requirements.txt 29 | \end{minted} 30 | 31 | \begin{note} % ---------------- Note block ---------------- % 32 | \textbf{Windows 环境} 33 | 34 | BasicSR 也支持 Windows 环境。 35 | 36 | 更多注意事项,参见第\ref{installation:faq}小节 Q1 问题。 37 | \end{note} 38 | 39 | % ------------------------------------------------------------------------------ 40 | \section{BasicSR 安装}\label{installation:install} 41 | 42 | 在安装好上述的环境依赖后,此时就可以进行 BasicSR 的安装了。 43 | 44 | 本小节的安装默认不适用 PyTorch C++ 编译算子,若需要,则参考第 \ref{installation:c++} 小节进行安装。 45 | 46 | \begin{hl} % ---------------- Highlight block ---------------- % 47 | \textbf{BasicSR 安装方式} 48 | 49 | 根据不同的需求,我们提供了两种安装 BasicSR 方式,{\color{red}\textbf{两种方式只能选择一种安装,否则容易产生冲突}}。 50 | 51 | \begin{itemize} 52 | \item 如果希望\textbf{查看 BasicSR 中的细节}或者需要对其进行\textbf{修改},推荐通过本地 clone 代码的方式进行安装 53 | \item 如果仅仅是将 BasicSR 作为一个\textbf{ Python 包}进行使用 (比如 项目 \href{https://github.com/TencentARC/GFPGAN}{GFPGAN} 和 \href{https://github.com/xinntao/Real-ESRGAN}{Real-ESRGAN}),推荐直接从 PyPI 安装 BasicSR,这样可以使得自身项目的代码结构更加简洁 54 | \end{itemize} 55 | \end{hl} 56 | 57 | % ---------------------------------- 58 | \subsection{本地 clone 代码}\label{installation:local-clone} 59 | 60 | 要通过本地 clone 安装 BasicSR,需要在终端上依次进行以下3个步骤。 61 | 62 | \begin{enumerate} 63 | \item 克隆项目: 64 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 65 | git clone https://github.com/XPixelGroup/BasicSR.git 66 | \end{minted} 67 | 68 | \item 安装依赖包: 69 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 70 | cd BasicSR 71 | pip install -r requirements.txt 72 | \end{minted} 73 | 74 | \item 在 BasicSR 的根目录下安装 BasicSR: 75 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 76 | python setup.py develop 77 | \end{minted} 78 | \end{enumerate} 79 | 80 | 如果希望安装的时候指定 CUDA 路径,可使用如下指令: 81 | 82 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 83 | CUDA_HOME=/usr/local/cuda \ 84 | CUDNN_INCLUDE_DIR=/usr/local/cuda \ 85 | CUDNN_LIB_DIR=/usr/local/cuda \ 86 | python setup.py develop 87 | \end{minted} 88 | 89 | % ---------------------------------- 90 | \subsection{pip 安装}\label{installation:pip-install} 91 | 92 | 对于使用 pip 安装 BasicSR,在终端上运行以下指令即可: 93 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 94 | pip install basicsr 95 | \end{minted} 96 | 97 | 如果希望安装的时候指定 CUDA 路径,可使用如下指令: 98 | 99 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 100 | CUDA_HOME=/usr/local/cuda \ 101 | CUDNN_INCLUDE_DIR=/usr/local/cuda \ 102 | CUDNN_LIB_DIR=/usr/local/cuda \ 103 | pip install basicsr 104 | \end{minted} 105 | 106 | % ---------------------------------- 107 | \subsection{验证 BasicSR 是否安装成功}\label{installation:verify-installation} 108 | 109 | 当选择了上述两种方式中的一种方式安装 BasicSR 后,我们可以通过图\ref{fig:correct-install}的方式来判断是否成功安装 BasicSR: 110 | 111 | \begin{figure}[H] 112 | %\vspace{-0.5cm} 113 | \begin{center} 114 | \includegraphics[width=0.9\linewidth]{figures/installation_correct_install.jpg} 115 | \caption{验证成功安装 BasicSR} 116 | \label{fig:correct-install} 117 | \end{center} 118 | \vspace{-0.5cm} 119 | \end{figure} 120 | 121 | 如果此时没有报错,则说明 BasicSR 安装成功,此时便可以基于 BasicSR 进行开发啦 $\sim \sim \sim$ 122 | 123 | % ------------------------------------------------------------------------------ 124 | \section{PyTorch C++ 编译算子}\label{installation:c++} 125 | 126 | 考虑到某些项目中会需要使用 PyTorch C++ 编译算子,我们在这个小节针对这种情况也提供了相应的 BasicSR 安装方式。如果不需要使用相关 C++ 编译算子,则此小节可以跳过。 127 | 128 | 对于项目中需要使用以下的 PyTorch C++ 编译算子时,比如: 129 | 130 | \begin{itemize} 131 | \item 可变性卷积 DCN (如果安装的 Torchvision 版本 >= 0.9.0,会自动使用 TorchVision 中提供的 DCN,故不需要安装此编译算子),比如:\href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/ops}{EDVR 中的 DCN} 132 | 133 | \item StyleGAN 中的特定的算子,比如:\href{https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/ops}{upfirdn2d, fused\_act} 134 | \end{itemize} 135 | 136 | 由于第\ref{installation:install}小节所提到的安装方式不支持 PyTorch C++ 编译算子,为了能够使用 PyTorch C++ 编译算子,此时需要一些特定的修改 (有以下两种方式可供选择): 137 | 138 | \begin{enumerate} 139 | \item \textbf{安装}的时候对 PyTorch C++ 编译算子进行编译:此时需要将原先的安装指令进行修改,其中 \texttt{BASICSR\_EXT=True} 中的 \texttt{EXT} 是单词 Extension 的缩写。 140 | \begin{enumerate} 141 | \item 对于通过本地 clone 代码安装 BasicSR 的方式,此时修改指令: 142 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 143 | python setup.py develop --> BASICSR_EXT=True python setup.py develop 144 | \end{minted} 145 | \item 对于通过 pip 安装 BasicSR 的方式,此时修改指令: 146 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 147 | pip install basicsr --> BASICSR_EXT=True pip install basicsr 148 | \end{minted} 149 | \end{enumerate} 150 | 进行了上述的修改之后,如果我们需要运行 StyleGAN 的测试代码 (需要用到 PyTorch C++ 编译算子) (代码位于 \href{https://github.com/XPixelGroup/BasicSR/blob/master/inference/inference_stylegan2.py}{inference/inference\_stylegan2.py}),此时直接输入指令即可: 151 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 152 | python inference/inference_stylegan2.py 153 | \end{minted} 154 | 155 | \item \textbf{每次在跑程序}的时候\textbf{即时加载 (JIT)} PyTorch C++ 编译算子:如果我们选择了这种方式,此时不需要修改 BasicSR 的安装指令。依然拿 StyleGAN 的测试代码举例,在这种情况下,如果想要运行 StyleGAN 的测试代码,此时需要输入的指令是: 156 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 157 | BASICSR_JIT=True python inference/inference_stylegan2.py 158 | \end{minted} 159 | 160 | \end{enumerate} 161 | 162 | 关于上述提到的两种使用 PyTorch C++ 编译算子方式之间的优劣和场景对比如表\ref{tab:env}所示: 163 | 164 | \begin{table}[h] 165 | \centering 166 | \footnotesize 167 | \begin{tabular}{|c|c|c|c|c|} 168 | \hline 169 | 选项 & 优点 & 缺点 & 适用场景 & 具体安装指令 \\ 170 | \hline 171 | \textbf{安装}编译 C++ 算子 & \makecell[c]{运行代码的时候, \\ 能够快速加载编译算子} & \makecell*[c]{配置环境的时候, \\ 需要更多的依赖,\\碰到的问题可能更多} & \makecell[c]{需要多次训练或 \\ 多次测试模型} & \makecell[c]{在安装的时候,设置 \\\textbf{BASICSR\_EXT=True}}\\ 172 | \hline 173 | \textbf{即时加载} C++ 算子 & \makecell[c]{有着更少的依赖, \\碰到的问题可能更少} & \makecell[c]{每次运行代码的时候,\\都需要花费几分钟\\重新编译算子} & 仅仅是进行测试 & \makecell[c]{在跑程序的时候,设置 \\\textbf{BASICSR\_JIT=True}} \\ 174 | \hline 175 | \end{tabular} 176 | \caption{\label{tab:env}安装编译算子和即时加载编译算子的对比。} 177 | \end{table} 178 | 179 | \begin{note} % ---------------- Note block ---------------- % 180 | \textbf{注意} 181 | \begin{enumerate} 182 | \item 对于需要在安装的时候就编译 PyTorch C++ 算子,需要确保:gcc 和 g++ 版本 >= 5。 183 | \item \texttt{BasicSR\_JIT} 有最高的优先级。即使在安装的时候已经成功编译了 C++ 编译算子,若在运行代码指令中设置了 \texttt{BasicSR\_JIT=True},此时代码仍旧会即时加载 C++ 编译算子。 184 | \item 在\textbf{安装}的时候,不能设置 \texttt{BasicSR\_JIT=True}。 185 | \end{enumerate} 186 | \end{note} 187 | 188 | % ------------------------------------------------------------------------------ 189 | \section{常见安装问题}\label{installation:faq} 190 | 191 | \begin{enumerate} 192 | \item \textbf{Q1: Windows 下是否可以使用?} 193 | 194 | 经过验证,Windows 下可以通过上述的两种安装方式安装 BasicSR。如果需要使用 CUDA,需要指定 CUDA 路径。 另外需要注意的是如果需要在 Windows 环境中使用环境变量,需要使用以下方式: 195 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 196 | set BASICSR_EXT=True 197 | \end{minted} 198 | 199 | 由于 BasicSR 项目是在 Linux (Ubuntu) 环境下进行开发的,因此推荐在 Linux 环境下基于 BasicSR 进行项目的开发。 200 | 201 | \item \textbf{Q2: \texttt{BASICSR\_EXT} 和 \texttt{BASICSR\_JIT} 在什么环境下才能执行?} 202 | 203 | 如果在加入 \texttt{BASICSR\_EXT} 和 \texttt{BASICSR\_JIT} 环境变量之后运行报错,此时需要检查 gcc 版本。BasicSR 在已被验证在 gcc5 $\sim$ gcc7 版本下可以成功编译 C++ 编译算子。 204 | 205 | \item \textbf{Q3: 安装路径混淆的问题} 206 | 207 | 很多问题都是由于安装路径混淆,其主要原因是本地 clone 代码和 pip 安装包两个方式被同时执行。 208 | 209 | 具体而言,如果先通过 pip 安装了 BasicSR,随后又使用本地 clone 的方式进行安装,此时项目中调用的 BasicSR 路径还是 pip 安装的 BasicSR;反过来,如果先使用本地 clone 的方式进行安装,随后又使用 pip 安装,此时项目中调用的 BasicSR 路径还是本地 clone 下的BasicSR (分别如图\ref{fig:false-clone-install}和图\ref{fig:false-pip-install}所示)。 210 | 211 | \begin{enumerate} 212 | \item 通过本地 clone 安装成功的时候,此时使用 \texttt{pip list} 命令查看 basicsr 路径: 213 | \begin{figure}[H] 214 | %\vspace{-0.5cm} 215 | \begin{center} 216 | %\fbox{\rule{0pt}{2.5in} \rule{0.9\linewidth}{0pt}} 217 | \includegraphics[width=0.7\linewidth]{figures/installation_clone_install_location.jpg} 218 | \caption{本地 clone 安装成功时的 basicsr 路径显示} 219 | \label{fig:correct-clone-install} 220 | \end{center} 221 | \vspace{-0.5cm} 222 | \end{figure} 223 | 224 | \item 通过 pip 安装成功的时候,此时使用 \texttt{pip list} 命令查看 basicsr 路径: 225 | \begin{figure}[H] 226 | %\vspace{-0.5cm} 227 | \begin{center} 228 | %\fbox{\rule{0pt}{2.5in} \rule{0.9\linewidth}{0pt}} 229 | \includegraphics[width=0.7\linewidth]{figures/installation_pip_install_location.jpg} 230 | \caption{pip 安装成功时的 basicsr 路径显示 (如果指向 anaconda 下的路径,也是正常的)} 231 | \label{fig:correct-pip-install} 232 | \end{center} 233 | \vspace{-0.5cm} 234 | \end{figure} 235 | 236 | \item 如果先通过 pip 安装,随后通过本地 clone 安装,此时使用 \texttt{pip list} 命令查看 basicsr 路径: 237 | \begin{figure}[H] 238 | %\vspace{-0.5cm} 239 | \begin{center} 240 | %\fbox{\rule{0pt}{2.5in} \rule{0.9\linewidth}{0pt}} 241 | \includegraphics[width=0.7\linewidth]{figures/installation_pip_install_location.jpg} 242 | \caption{basicsr 路径并未指向本地 clone 的 BasicSR} 243 | \label{fig:false-clone-install} 244 | \end{center} 245 | \vspace{-0.5cm} 246 | \end{figure} 247 | 248 | \item 如果先通过本地 clone 安装,随后通过 pip 安装,通过 \texttt{pip list} 命令查看此时 basicsr 路径: 249 | \begin{figure}[H] 250 | %\vspace{-0.5cm} 251 | \begin{center} 252 | %\fbox{\rule{0pt}{2.5in} \rule{0.9\linewidth}{0pt}} 253 | \includegraphics[width=0.7\linewidth]{figures/installation_clone_install_location.jpg} 254 | \caption{basicsr 路径并未指向 python 环境下 (或者 anaconda) 下 的 BasicSR} 255 | \label{fig:false-pip-install} 256 | \end{center} 257 | \vspace{-0.5cm} 258 | \end{figure} 259 | 260 | \end{enumerate} 261 | 对于上述的两种错误情况 (图\ref{fig:false-clone-install}和图\ref{fig:false-pip-install}),此时正常的解决方式为:先将安装的 BasicSR 进行卸载,随后再根据项目的需要重新选择一种方式安装 BasicSR。 262 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 263 | pip uninstall basicsr 264 | \end{minted} 265 | 266 | \item \textbf{Q4: 如何更新最新版本的 BasicSR?} 267 | 268 | \begin{enumerate} 269 | \item 对于通过本地 clone 进行安装的方式,需要将本地的 BasicSR 项目代码与\href{https://github.com/XPixelGroup/BasicSR}{远端的 BasicSR 项目代码}进行同步。 270 | \item 对于通过 pip 安装的方式, 271 | \begin{minted}[xleftmargin=20pt,bgcolor=bg]{bash} 272 | pip install basicsr --upgrade 273 | \end{minted} 274 | \end{enumerate} 275 | 276 | \item \textbf{Q5: 如何解决运行代码时出现的 version 问题?} 277 | 278 | 有时候在运行代码的时候,会出现类似于如下的问题: 279 | \begin{figure}[H] 280 | %\vspace{-0.5cm} 281 | \begin{center} 282 | %\fbox{\rule{0pt}{2.5in} \rule{0.9\linewidth}{0pt}} 283 | \includegraphics[width=0.8\linewidth]{figures/installation_version.jpg} 284 | \caption{version问题示例} 285 | \label{fig:version} 286 | \end{center} 287 | \vspace{-0.5cm} 288 | \end{figure} 289 | 此时,可以尝试: 290 | \begin{enumerate} 291 | \item 重新运行安装 BasicSR 的指令。 292 | \item 将涉及到 version 的代码进行注释。 293 | \end{enumerate} 294 | 295 | \end{enumerate} 296 | 297 | \begin{hl} 298 | 如果小伙伴们在安装过程中还遇到其它的问题,可以在我们的 BasicSR 微信群、 QQ 群 (可从 \href{https://github.com/XPixelGroup/BasicSR/blob/master/README_CN.md}{BasicSR 项目主页}中获取)、\href{https://github.com/XPixelGroup/BasicSR/issues}{github 的 issue} 上面进行反馈,我们会持续将一些常见的问题更新到这个小节当中。 299 | \end{hl} 300 | 301 | \end{document} 302 | -------------------------------------------------------------------------------- /latex/sections/metrics.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{subfiles} 2 | 3 | \begin{document} 4 | 5 | \chapter{指标}\label{chapter:metrics} 6 | \vspace{-2cm} 7 | 8 | 本章节介绍在图像超分辨研究中经常使用的评价指标的相关知识,以及如何在 BasicSR 框架中使用这些指标进行测试。 9 | 10 | % ------------------------------------------------------------------------------ 11 | \section{概述} 12 | 13 | 深度学习发展的旋风产生了源源不断的图像处理算法,这些算法可以生成失真较小、或对人感知上友好的复原图像。然而,限制图像处理方法未来发展的关键瓶颈之一就是“评估机制”。尽管人眼几乎可以毫不费力地区分感知上更好的图像,但算法要公平地衡量视觉质量是一项挑战。我们通常通过一些图像质量评估方法 (Image quality assessment, IQA) 测量复原图像和真实图像(Ground Truth,GT) 之间的相似性来评估。最近,一些不需要参考图像的 IQA 方法也被用于评估各种算法,例如 Ma 和 Perceptual Index (PI) 。在某种程度上,这些 IQA 方法是图像处理领域取得长足进步的主要原因,因为他们提供了一个量化的基准,以促进指标上更优秀的算法的诞生。 14 | 15 | 接下来,我们先介绍图像质量评估(IQA) 方面的知识。IQA 方法用于测量在采集、压缩、复制和后处理操作过程中可能会降低的图像质量。根据不同的使用场景,IQA 方法可以分为全参考法 (Full-reference IQA,FR-IQA) 和无参考法 (No-reference IQA,NR-IQA) 。FR-IQA 方法通常从信息或感知特征相似度的角度衡量两幅图像之间的相似度,已广泛应用于图像/视频编码、恢复和通信质量的评估。除了最广泛使用的 PSNR,FR-IQA 已经经过广泛的研究,并至少可以追溯到 2004 年提出的 SSIM,它首先在测量图像相似性时引入了结构信息。PSNR 和 SSIM 同时也是在各种图像复原研究中使用最广泛的评价指标。除此之外,很多 FR-IQA 方法也被提出来弥补 IQA 方法的结果与人类判断之间的差距,例如 IFC,VSI,FSIM 等。然而,不断出现的新算法一直在不断提高图像恢复的效果,PSNR 和 SSIM 的定量结果和感知质量之间越来越不一致。有研究指出,面向感知效果的图像处理中,PSNR 和 SSIM 等指标衡量的失真程度和图像所展示的感知质量是彼此冲突的。此时,一些更符合人感知判断的评价指标也被用于评价图像复原算法,如 LPIPS。 16 | 17 | 除了上述 FR-IQA 方法外,一些 NR-IQA 方法也经常被用来在没有参考图像时评价图像的质量。一个比较典型的场景就是对真实世界中图像复原效果的评价。一些流行的 NR-IQA 方法包括 NIQE、BRISQUE 和 PI。在最近的一些工作中,结合 NR-IQA 和 FR-IQA 方法来测量 IR 算法。 18 | 19 | % ------------------------------------------------------------------------------ 20 | \section{PSNR} 21 | 22 | PSNR (Peak signal-to-noise ratio,峰值信噪比) 是图像处理研究中应用最广泛的评价指标之一。PSNR 是一个表示信号的最大可能功率和影响它的精度的破坏性噪声功率的比值的工程术语。PSNR 常用对数分贝单位来表示,简写为 dB (decibel) 。PSNR 基于逐像素的均方误差 (Mean square error,MSE) 来定义。两个尺寸为 $m\times n$ 的单通道图像 $I$ 和 $I'$,其中 $I$是高质量的参考图像,$I'$为经过退化的低质量图片或者复原后的图像,那么它们的的均方误差定义为: 23 | $$ 24 | \mathrm{MSE}=\frac{1}{mn}\sum_{i=1}^{m}\sum_{j=1}^n(I[i,j]-I'[i,j])^2. 25 | $$ 26 | 而PSNR 被定义为: 27 | $$ 28 | \mathrm{PSNR}=10\times \log_{10}\Big(\frac{\mathrm{Peak}^2}{\mathrm{MSE}}\Big)=20\times\log_{10}\Big(\frac{\mathrm{Peak}}{\sqrt{\mathrm{MSE}}}\Big). 29 | $$ 30 | 其中,$\mathrm{Peak}$是表示图像像素强度的最大取值,如果每个采样点用 8 位表示,那么$\mathrm{Peak}=255$。 31 | 32 | 在 BasicSR 框架中,与 PSNR 计算相关的代码存放在 \href{https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/metrics/psnr_ssim.py#L12}{basicsr/metrics/psnr\_ssim.py} 文件中。对于 \texttt{numpy.ndarray} 类型的变量,我们约定输入图像的数据格式为 \texttt{Unit8},尺寸为 \texttt{[h,w,c]} (高,宽,通道数) 。对于彩色图片通道顺序为 \texttt{BGR}。此时输入图像像素的取值范围为$[0,255]$整数取值。此时,我们使用如下函数计算 PSNR: 33 | 34 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg,breaklines]{python} 35 | @METRIC_REGISTRY.register() 36 | def calculate_psnr(img, img2, crop_border, input_order='HWC', 37 | test_y_channel=False, **kwargs): 38 | # img, img2: 输入图像变量 39 | # crop_border: 是否在计算PSNR时切除边缘的像素。使用神经网络处理图像时,边缘的几个像素通常会有较大误差。 40 | # input_order: 输入的尺寸顺序,默认为'HWC' 41 | # test_y_channel: 是否转换到 Y 空间计算 PSNR。Y 指代 YCbCr格式图像中的灰度通道。 42 | ... 43 | \end{minted} 44 | 45 | 当输入变量为 \texttt{torch.Tensor} 类型时,我们约定输入图像的数据格式为 \texttt{Float32},尺寸为 \texttt{[n,c,h,w]} (批次大小,通道数 (3或者1) ,高,宽) 。对于彩色图片通道顺序为 \texttt{RGB}。此时输入图像像素的取值范围为$ [0,1]$ 浮点数取值。此时,我们使用如下以 \texttt{\_pt} 结尾的函数计算 PSNR: 46 | 47 | \begin{minted}[xleftmargin=20pt,linenos,bgcolor=bg,breaklines]{python} 48 | 49 | @METRIC_REGISTRY.register() 50 | def calculate_psnr_pt(img, img2, crop_border, 51 | test_y_channel=False, **kwargs): 52 | ... 53 | \end{minted} 54 | 55 | 需要注意的是,此函数支持对于一整个批次 (batch) 的数据计算 PSNR。 56 | 57 | 在实现上, PSNR 的计算在不同人、不同版本的实现之间有微小的差异。我们对比了我们的实现和其他实现之间的差异,结果如\tablename~\ref{tab:psnr} 所示: 58 | 59 | \begin{table}[] 60 | \centering 61 | \begin{tabular}{c|c|c|c|c|c} 62 | \toprule 63 | Image & 色彩空间 & Matlab & Numpy & Pytorch CPU & Pytorch GPU \\ 64 | \midrule 65 | Set14/baboon & RGB & 20.419710 & 20.419710 & 20.419710 & 20.419710 \\ 66 | Set14/baboon & Y & -- & 22.441898 & 22.441899 & 22.444916\\ 67 | Set14/comic & RGB & 20.239912& 20.239912&20.239912 &20.239912\\ 68 | Set14/comic&Y&--&21.720398&21.720398&21.721663\\ 69 | \bottomrule 70 | \end{tabular} 71 | \caption{各个 PSNR 实现结果之间的比较} 72 | \label{tab:psnr} 73 | \end{table} 74 | 75 | % ------------------------------------------------------------------------------ 76 | \section{SSIM} 77 | SSIM (structural similarity index,图像结构相似性指标) 是另一个被广泛使用的图像相似度评价指标。与 PSNR 评价逐像素的图像之间差异不同,SSIM 在图像质量上的衡量更侧重于图像的结构信息,这与人类对于视觉信息的感知是相似的。因此普遍认为 SSIM 更贴近人类对于图像质量的判断。 78 | 79 | 此处的结构相似性的基本思想是自然图像是高度结构化的,即自然图像中相邻像素之间存在很强的相关性,这种相关性承载着场景中物体的结构信息。人类视觉系统习惯于在查看图像时提取这样的结构信息。因此,在设计衡量图像畸变程度的图像质量测量指标时,结构畸变的测量是重要的一环。 80 | 81 | 给定两个图像信号$\mathbf{x}$ 和$\mathbf{y} $,SSIM 被定义为: 82 | $$\text{SSIM}(\mathbf {x} ,\mathbf {y} )=[l(\mathbf {x} ,\mathbf {y} )]^{\alpha }[c(\mathbf {x} ,\mathbf {y} )]^{\beta }[s(\mathbf {x} ,\mathbf {y} )]^{\gamma },$$ 83 | SSIM 由亮度对比 $l(\mathbf {x} ,\mathbf {y} )$、对比度对比$c(\mathbf {x} ,\mathbf {y} )$、结构对比$s(\mathbf {x} ,\mathbf {y} )$三部分组成。这些评价指标由以下方式定义: 84 | $$ 85 | l(\mathbf {x} ,\mathbf {y} )={\frac {2\mu _{x}\mu _{y}+C_{1}}{\mu _{x}^{2}+\mu _{y}^{2}+C_{1}}}, 86 | c(\mathbf {x} ,\mathbf {y} )={\frac {2\sigma _{x}\sigma _{y}+C_{2}}{\sigma _{x}^{2}+\sigma _{y}^{2}+C_{2}}}, 87 | s(\mathbf {x} ,\mathbf {y} )={\frac {\sigma _{xy}+C_{3}}{\sigma _{x}\sigma _{y}+C_{3}}}. 88 | $$ 89 | 其中 $\alpha >0$,$\beta >0$,$\gamma >0$用于调整亮度,对比度和结构之间的相对重要性。 90 | $\mu _{x}$及$\mu _{y}$、$\sigma _{x}$及$\sigma_{y}$分別表示$\mathbf{x}$和$\mathbf {y}$的平均值和标准差,$\sigma_{xy}$為$\mathbf{x}$和$\mathbf{y}$的协方差,$C_{1}$、$C_{2}$、$C_{3}$是常数,用于维持结果的稳定。实际使用时,为简化起见,我们定义参数为$\alpha =\beta =\gamma =1$以及$C_{3}=C_{2}/2$,得到: 91 | $$ 92 | {\text{SSIM}}(\mathbf {x} ,\mathbf {y} )={\frac {(2\mu _{x}\mu _{y}+C_{1})(2\sigma _{xy}+C_{2})}{(\mu _{x}^{2}+\mu _{y}^{2}+C_{1})(\sigma _{x}^{2}+\sigma _{y}^{2}+C_{2})}}. 93 | $$ 94 | 95 | 在实际计算两幅图像的结构相似度指数时,我们会指定一些局部化的窗口,一般为$N\times N$的小块,计算窗口内信号的结构相似度指数。然后每次以像素为单位移动窗口,直到计算出整幅图像每个位置的局部结构相似度指数。所有局部结构相似度指标的平均值为两幅图像的结构相似度指标。结构相似度指数的值越大,表明两个信号之间的相似度越高。一般来讲,PSNR 和 SSIM 的结果趋势是一致的,即一般 PSNR 高,则 SSIM 也高。 96 | 97 | 在 BasicSR 框架中,与 PSNR 计算相关的代码存放在 \href{https://github.com/XPixelGroup/BasicSR/blob/master/basicsr/metrics/psnr_ssim.py#L12}{basicsr/metrics/psnr\_ssim.py} 文件中。与计算 PSNR 的接口类似,其函数包含对 \texttt{numpy.ndarray} 类型的输入进行处理的 \texttt{calculate\_ssim} 函数以及对 \texttt{torch.Tensor} 类型的输入进行处理的 \texttt{calculate\_ssim\_pt} 函数。其对于输入的类型,格式,尺寸以及各参数的约定是与 PSNR 的计算中描述的一致的。 98 | 99 | 与 PSNR 不同,SSIM 的计算在不同实验版本之间的差异较大。在 BasicSR 中,我们以 Matlab 最原始的版本保持一致。我们对比了我们的实现和其他实现之间的差异,结果如\tablename~\ref{tab:psnr} 所示 100 | 101 | \begin{table}[] 102 | \centering 103 | \begin{tabular}{c|c|c|c|c|c} 104 | \toprule 105 | Image & 色彩空间 & Matlab & Numpy & Pytorch CPU & Pytorch GPU \\ 106 | \midrule 107 | Set14/baboon & RGB & 0.391853 & 0.391853 & 0.391853 & 0.391853 \\ 108 | Set14/baboon & Y & -- & 0.453097 & 0.453097 & 0.453171\\ 109 | Set14/comic & RGB & .567738& .567738&.567738 &.567738\\ 110 | Set14/comic&Y&--&0.585511&0.585511&0.585522\\ 111 | \bottomrule 112 | \end{tabular} 113 | \caption{各个 SSIM 实现结果之间的比较} 114 | \label{tab:psnr} 115 | \end{table} 116 | 117 | % ------------------------------------------------------------------------------ 118 | \section{NIQE} 119 | 120 | 将在第二期加入 121 | 122 | % ------------------------------------------------------------------------------ 123 | \section{如何使用指标} 124 | 125 | % ---------------------------------- 126 | \subsection{通过配置文件指定} 127 | 128 | 在训练的 validation 阶段或者使用 \texttt{test.py} 测试时,我们可以在配饰文件中指定所需要使用的指标。这样程序就会自动计算相应指标了。 129 | 比如下面的配置就会计算 PSNR 和 NIQE 的指标,分别调用了 \texttt{calculate\_psnr} 和 \texttt{calculate\_niqe}。 130 | 131 | \begin{minted}[xleftmargin=20pt,bgcolor=bg,breaklines]{python} 132 | # validation settings 133 | val: 134 | ... 135 | metrics: # 这块是 validation 中使用的指标的配置 136 | psnr: # metric 名字, 这个名字可以是任意的 137 | type: calculate_psnr # 选择指标类型 138 | # 以下属性是灵活的, 根据不同 metric 有不同的设置 139 | crop_border: 4 # 计算指标时 crop 图像边界像素范围 (不纳入计算范围) 140 | test_y_channel: false # 是否转成在 Y(CbCr) 空间上计算 141 | better: higher # 该指标是越高越好,还是越低越好。选择 higher 或者 lower,默认为 higher 142 | niqe: # 这是在 validation 中使用的另外一个指标 143 | type: calculate_niqe 144 | crop_border: 4 145 | better: lower # the lower, the better 146 | \end{minted} 147 | 148 | % ---------------------------------- 149 | \subsection{使用脚本计算} 150 | 151 | 我们在 \href{https://github.com/XPixelGroup/BasicSR/tree/master/scripts/metrics}{scripts/metrics} 文件中也提供了调用指标的脚本。 152 | 读者可以根据相关说明计算指标。 153 | 154 | \end{document} -------------------------------------------------------------------------------- /latex/sections/overview.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{subfiles} 2 | 3 | \begin{document} 4 | 5 | \chapter{概述} 6 | \vspace{-2cm} 7 | 8 | 本章节对本文档 (第\ref{overview:docs-intro}小节)、BasicSR (第\ref{overview:basicsr-intro}小节) 以及使用方式与场景 (第\ref{overview:scenario}小节) 做一个简略的概述。 9 | 10 | % ------------------------------------------------------------------------------ 11 | \section{本文档说明}\label{overview:docs-intro} 12 | 13 | 本文档旨在完整地介绍 BasicSR 的设计和框架,为入门者提供一份上手指南,为使用者提供一份日常参考。 14 | 15 | 本文档不涉及具体函数和代码的介绍。如果需要具体函数和代码的介绍,请查阅 BasicSR 的在线 API 文档。我们更推荐读者直接查看代码,这样可以更加完整、细致的了解实现细节。 16 | 17 | \begin{hl} % ---------------- Highlight block ---------------- % 18 | \textbf{BasicSR API 文档} 19 | 20 | BasicSR 的 API 文档是实时更新的,并且发布在 readthedocs.io 网站上: 21 | 22 | \url{https://basicsr.readthedocs.io/en/latest/} 23 | 24 | 国内可能访问速度缓慢,后续我们会考虑导出 PDF 文档作为附录。 25 | \end{hl} 26 | 27 | 本文档也不涉及超分和复原的专业入门。我们先挖一个大坑:后面我们会推出超分复原入门与经典工作解读的文档。 28 | 29 | % ------------------------------------------------------------------------------ 30 | \section{BasicSR 介绍}\label{overview:basicsr-intro} 31 | 32 | BasicSR 是一个开源项目,旨在提供一个方便易用的图像、视频的超分、复原、增强的工具箱。我们希望它能够: 33 | 34 | \centerline{使入门者更快上手;} 35 | \centerline{使研究者更方便实验;} 36 | \centerline{使更多人更容易使用更先进的算法。} 37 | 38 | BasicSR 代码库从2018年4月20日开始第一个提交,然后随着做研究、打比赛、发论文,逐渐发展与完善起来。它从最开始的针对超分辨率算法到后来拓展到其他更多复原增强相关的算法,因此,BasicSR 中 SR 的涵义也从 Super-Resolution 延拓到 Super-Restoration。 39 | 40 | 2022年5月9日,BasicSR 迎来新的里程碑,它加入到 XPixel 大家庭中,和更多的小伙伴们一起致力于把 BasicSR 建设得更好! 41 | 42 | BasicSR 是一个开源项目,我们欢迎更多的人一起来维护、建设 :-) 43 | 44 | \begin{exampleBox}[]{XPixel 团队介绍} 45 | 46 | XPixel 团队是一个学术组织,它的愿景是让世界看得更清晰、更美好 (Make the world look clearer and better!) 47 | 48 | 官网地址:\url{https://xpixel.group/} 49 | \end{exampleBox} 50 | 51 | % ------------------------------------------------------------------------------ 52 | \section{使用方式与场景}\label{overview:scenario} 53 | 54 | BasicSR 主要有两种使用方式: 55 | 56 | % ---------------------------------- 57 | \subsection{本地克隆代码}\label{overview:local-clone} 58 | 59 | 在这个方式下,我们把整个 BasicSR 的代码都 copy/clone 下来,也就可以方便地查看 BasicSR 的完整代码,修改并使用。 60 | 61 | 当我们尝试复现、开发方法的时候,我们比较推荐这个方式,因为可以更好地看到代码全貌,方便调试。 62 | 63 | 它的弱点是: 64 | 65 | \begin{enumerate} 66 | \item 整个仓库中有很多不需要使用的代码。因为 BasicSR 提供了很多方法的实现,在你自己的实验中,大部分的代码并不需要。但是,当你了解完 BasicSR 的代码框架后,你就放心地知道,它们并不影响。它们基本都是独立存在的 67 | \item 当你 release 你自己新开发的方法 (假设叫 NBCNN) 的代码时,会遇到麻烦:你必须要 release 包含整个 BasicSR 的代码,而不能专注于 NBCNN 的核心代码。遇到这种情况,我们就可以用下面的使用场景:把 basicsr 当作一个 Python package 68 | \end{enumerate} 69 | 70 | \begin{note} % ---------------- Note block ---------------- % 71 | \textbf{本地克隆代码仓库的安装方法} 72 | 73 | 参见章节\ref{installation:local-clone}:\nameref{installation:local-clone}。 74 | \end{note} 75 | 76 | % ---------------------------------- 77 | \subsection{basicsr 作为 Python package}\label{overview:package} 78 | 79 | BasicSR 也有一个单独的 Python package --- basicsr,发布在 \href{https://pypi.org/project/basicsr}{pypi} 上。 80 | 它可以通过 pip 安装,提供了训练框架、流程、BasicSR 中已有的函数和功能。你可以基于 basicsr 方便地搭建你自己的项目。例如, 81 | 82 | \centerline{基于 basicsr 搭建的 \href{https://github.com/xinntao/Real-ESRGAN}{Real-ESRGAN};} 83 | \centerline{基于 basicsr 搭建的 \href{https://github.com/TencentARC/GFPGAN}{GFPGAN}。} 84 | 85 | 它们都使用了 basicsr 里面已有的函数和功能,因此只要专注于新方法的功能即可。 86 | 87 | 一般来说,深度学习项目都可以分为以下几个部分: 88 | \begin{itemize} 89 | \item data: 定义了训练数据,来喂给模型的训练 90 | \item arch (architecture): 定义了网络结构和 forward 的步骤 91 | \item model: 定义了在训练中必要的组件(比如 loss) 和 一次完整的训练过程(包括前向传播,反向传播,梯度优化等),还有其他功能,比如 validation 等 92 | \item training pipeline: 定义了训练的流程,即把数据 dataloader,模型,validation,保存 checkpoints 等等串联起来 93 | \end{itemize} 94 | 95 | 我们开发一个新的方法时,我们往往在改进 data, arch, model 这几块内容。而很多流程、基础的功能其实是共用的。 96 | 97 | BasicSR 就把很多相似的功能都独立出来,我们只要关心 data, arch, model 的开发即可。而 basicsr 这个 Python package,则可以进一步将已有函数和功能封装起来,我们只要关心新方法的开发即可。 98 | 99 | \begin{note} % ---------------- Note block ---------------- % 100 | \textbf{把 basicsr 当作一个 Python package 的安装方法} 101 | 102 | 参见章节\ref{installation:pip-install}:\nameref{installation:pip-install}。 103 | \end{note} 104 | 105 | \textbf{} 106 | 107 | \begin{note} % ---------------- Note block ---------------- % 108 | \textbf{如何基于 basicsr Python package 来开发呢?} 109 | 110 | 我们建立了一个模板:\href{https://github.com/xinntao/BasicSR-examples}{BasicSR-examples}。 111 | 112 | 开发方法参见: \url{https://github.com/xinntao/BasicSR-examples/blob/master/README_CN.md}。 113 | \end{note} 114 | 115 | 当然,直接使用 basicsr Python package 也有缺点:它会调用 BasicSR 里面的函数,如果里面的函数不能满足需求,或者有 bug 的情况,我们往往难以修改。(需要进入安装 pip package 的地方进行修改)。 116 | 117 | 为了应对这种情况,我们一般是在本地克隆仓库的情况下开发,然后等到 release 新方法的时候,再基于 \href{https://github.com/xinntao/BasicSR-examples}{BasicSR-examples} 新建一个仓库,使用 basicsr 的 pip package。 118 | 119 | % ---------------------------------- 120 | \section{单元测试}\label{overview:unit-test} 121 | 122 | 我们使用单元测试主要保证输入输出 shape 的正确性,以及一些流程的正确性。 123 | 一般来说,我们较少使用。BasicSR为了完备,将单元测试也加入进来。 124 | 125 | 在 \texttt{tests} 目录的函数会被单元测试执行。 126 | 单元测试需要 GPU CUDA 环境。 127 | 128 | \begin{hl} % ---------------- Highlight block ---------------- % 129 | \textbf{单元测试命令} 130 | 131 | \begin{minted}[xleftmargin=20pt,linenos,breaklines,bgcolor=bg]{bash} 132 | python -m pytest tests/ 133 | \end{minted} 134 | \end{hl} 135 | 136 | \end{document} 137 | -------------------------------------------------------------------------------- /latex/sections/scripts.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{subfiles} 2 | 3 | \begin{document} 4 | 5 | \chapter{脚本介绍} 6 | \vspace{-2cm} 7 | 8 | 将在第二期加入 9 | 10 | \end{document} 11 | -------------------------------------------------------------------------------- /latex/sections/template.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{subfiles} 2 | 3 | \begin{document} 4 | 5 | \chapter{BasicSR-examples 模板}\label{chapter:template} 6 | \vspace{-2cm} 7 | 8 | 将在第二期加入 9 | 10 | 可暂时先参考:\url{https://github.com/xinntao/BasicSR-examples} 11 | 12 | \end{document} 13 | -------------------------------------------------------------------------------- /latex/sections/xpixel_metaverse.tex: -------------------------------------------------------------------------------- 1 | \documentclass[../main.tex]{subfiles} 2 | 3 | \begin{document} 4 | 5 | \newpage 6 | {\LARGE\textbf{\faIcon{dice} XPixel Metaverse}} 7 | 8 | %################################################################################################## 9 | \begin{figure}[h] 10 | \vspace{1cm} 11 | \begin{center} 12 | %\fbox{\rule{0pt}{2.5in} \rule{0.9\linewidth}{0pt}} 13 | \includegraphics[width=\linewidth]{figures/XPixelMetaverse_small.jpg} 14 | \vspace{-0.7cm} 15 | \caption{XPixel 的元宇宙。你可以在\href{https://xpixel.group/2022/06/06/poster.html}{官网}查看更高清的版本。} 16 | \end{center} 17 | %\vspace{-0.6cm} 18 | \end{figure} 19 | %################################################################################################## 20 | 21 | XPixel Metaverse 是 XPixel Group (官网:\url{https://xpixel.group/}) 集体智慧的结晶,它体现了 XPixel 所有成员对科研的坚持,对艺术的追求,对生活的热爱,和对世界的责任。 22 | 下面就请跟随文字向导,一起来巡游 XPixel 的历史和现在。 23 | 24 | {\large\textbf{设计理念}} 25 | 26 | 为了完整而有特色地展示 XPixel Group 的各项科研成果,我们采用了“地图”的形式,通过山川、河流、建筑等元素,自然地将一项项科研成果与之对应,最终构成了 XPixel Metaverse。目前地图的大陆被三片海洋包围,中间的大陆则遍布 XPixel 从2014年到2022年来经典的科研成果,完美地将科技和艺术结合在了一起。 27 | 28 | \textbf{海洋} 29 | 30 | 海洋以 XPixel 的组内文化:Love(奉献)、Focus(专注)、Balance(平衡)命名,象征着大陆上众多科研成果是在这种优秀的文化中孕育而成。而大陆的河流最终流入这三片汪洋之中,又象征着小组的工作成果最后又回归并加强了这种科研文化。 31 | 32 | \textbf{大陆} 33 | 34 | 贯彻大陆的河流由左上角的高山发源,以此山代表深度学习超分辨率的开山之作 SRCNN,象征着这项工作重若泰山的源头意义。由此发源的河流上段命名为 Image Super Resolution(SR),此段周围的建筑与地形均是传统单图像超分的重要成果。同时在上段出现了最早的支流 Blind,象征着图像盲超分这一分支领域的出现。 35 | 36 | 顺主流而下到达一处大坝,大陆地形由高地形转为海拔更低更平坦的地形,象征着 XPixel 在早期工作的基础上,随着更多优秀同学的加入,科研工作的开展比以往更加顺利。在大坝下方形成的名为 Low-Level Vision 的湖泊分出三条河流,分别是: 37 | \begin{itemize} 38 | \item 象征交互式可调节复原的:Interactive Modulation 39 | \item 象征超分网络可解释性的:Interpretation 40 | \item 象征视频处理和复原的:Video Processing 41 | \end{itemize} 42 | 43 | Interpretation 所流经的地图上方,其沙漠地貌与其他地方产生了强烈反差。这是因为底层视觉可解释性领域的工作非常稀少,且进展尤为不易,所以地貌较其他领域更加严酷。而这份不易对应的,是隐藏在沙漠背后未被探索的广袤空间。 44 | 45 | 在大陆的沿海处有一片码头区域,是超分领域最大的开源代码库 BasicSR。这是 XPixel 科研工作的重要载体和精华所在,同时也在国际上获得了广泛的使用与认可,与码头的作用高度契合。 46 | 47 | \textbf{文字介绍及其他元素} 48 | 49 | 地图的左上角是作品的名称:XPixel Metaverse。地图正上方是 XPixel 的使命愿景:\textbf{Our mission is to make the world look clearer and better!} 地图右上角是 XPixel 的 logo,地图中沙漠和沿海的两处小凤凰是 XPixel 的吉祥物。 50 | 51 | 大陆上各个地形、建筑均以 XPixel 的科研成果命名,并添加文字介绍。文字介绍最多四行,从上到下: 52 | 53 | \begin{itemize} 54 | \item 某领域的开创性成果,以斜体标明该新领域 55 | \item 工作的简称 56 | \item 工作发表的期刊及年份 57 | \item 工作的荣誉在最下方特别标明 58 | \end{itemize} 59 | 60 | 同时为了能更轻松地辨识各个领域的成果,各领域成果均以地图右上角XPixel的Logo图案中的一个颜色进行着色。 61 | 62 | \vspace{0.5cm} 63 | XPixel Metaverse 承载着我们的历史,也呼唤着美好的未来!愿世界各地优秀的学者与我们一起,让世界变得更清晰,更美好! 64 | 65 | \end{document} 66 | -------------------------------------------------------------------------------- /latex/style.sty: -------------------------------------------------------------------------------- 1 | \usepackage[T2A, T1]{fontenc} % ?!? 2 | \usepackage[utf8]{inputenc} % позволяет использовать не-ASCII символы 3 | \usepackage[english]{babel} % используем русский язык 4 | \AtBeginDocument{% 5 | \renewcommand\tablename{表} 6 | \renewcommand\figurename{图} 7 | } 8 | \usepackage{graphicx} % Картинки 9 | \usepackage{tikz} % Схемы с картинками (нужны для орешков ;) ) 10 | %\usepackage{media9} % вставка с ютуба 11 | \usepackage{animate} % вставка анимаций 12 | \usepackage{wrapfig} % изображения по краям страницы 13 | %\graphicspath{{Pictures/}} % Specifies the directory where pictures are stored 14 | \usepackage{pdfpages} % добавить pdf в конец документа 15 | 16 | % математика 17 | \usepackage{mathtools} % ?!? 18 | \usepackage{amsmath,amsfonts,amssymb,amsthm} % уравнения, теоремы и символы в принятом американском стиле 19 | \allowdisplaybreaks % позволяет делать разрывы страниц внутри align многострочных формул. 20 | 21 | %---------------------------------------------------------------------------------------- 22 | % 设计 23 | %---------------------------------------------------------------------------------------- 24 | 25 | % 线条风格 26 | \makepagestyle{custom} 27 | \makeevenfoot{custom}{}{\thepage}{} % 页码底部 28 | \makeoddfoot{custom}{}{\thepage}{} % 页码底部 29 | \makeevenhead{custom}{}{}{} % 顶部为空 30 | \makeoddhead{custom}{}{}{} % 顶部为空 31 | \pagestyle{custom} % 显示样式 32 | \setlrmarginsandblock{1.5cm}{1.5cm}{*} % 左/右缩进 33 | \setulmarginsandblock{2cm}{2cm}{*} % 上下缩进 34 | \checkandfixthelayout % memoir 希望调整 35 | 36 | % 漂亮字体 37 | \usepackage{opensans} 38 | 39 | % подключить прописные символы 40 | \usepackage{mathrsfs} 41 | 42 | % Детали 43 | \newcommand{\horrule}[1]{\rule{\linewidth}{#1}} % горизонтальная линия для титульника 44 | \usepackage{indentfirst} % первый абзац с отступом 45 | \interfootnotelinepenalty=10000 % штраф за разрыв футноутов. 46 | \tolerance=10000 % терпимость к жидким строкам (штрафует за выход строк за правую границу) 47 | 48 | % переопределение понятия выделения через \emph 49 | \let\emph\relax 50 | \DeclareTextFontCommand{\emph}{\bfseries\em} 51 | 52 | % Цвета 53 | \usepackage{xcolor} 54 | \definecolor{Blue}{rgb}{.3,.1,1} 55 | \definecolor{ChadBlue}{rgb}{.1,.1,.5} 56 | \definecolor{ChadRed}{rgb}{.4,0,0} 57 | \definecolor{ChadPurple}{rgb}{.5,0,.5} 58 | \usepackage{afterpage} % для перекраски последней страницы в чёрный 59 | 60 | % футноты в minipages по дефолту латинские и с ними возникают беды 61 | %\let\thempfootnote\thefootnote % заменяет символы сносок на цифры, но это иногда сбивает с толку 62 | \def\@xfootnote[#1]{% позволяет использовать \footnote[символ]{текст} 63 | \protected@xdef\@thefnmark{#1}% 64 | \@footnotemark\@footnotetext} 65 | 66 | 67 | % Якобы помогает корректно переносить английские куски текста 68 | \newcommand{\ENGLISH}[1]{\selectlanguage{english}{#1}\selectlanguage{russian}} 69 | % Ставить перед бинарными операциями в формулах внутри текста, чтобы они дублировались при переносе. 70 | \newcommand*{\HM}[1]{#1\nobreak\discretionary{}{\hbox{\(\mathsurround=0pt #1\)}}{}} 71 | 72 | % цветные квадратики 73 | \def \colorsquare#1{\fcolorbox{#1}{#1}{\rule{0pt}{4pt}\rule{4pt}{0pt}}} 74 | 75 | %---------------------------------------------------------------------------------------- 76 | % ГЛАВЫ, СЕКЦИИ, ПОДСЕКЦИИ 77 | %---------------------------------------------------------------------------------------- 78 | 79 | % 章节标题格式 80 | \usepackage{lipsum} 81 | \usepackage[explicit]{titlesec} 82 | \titleformat{\chapter} 83 | {\bfseries\huge} 84 | {} 85 | {0pt} 86 | { 87 | \ifnum\value{chapter}>0 \titlerule[3pt] ~\raisebox{-1.5pt}{ \scshape{第}~\thechapter~\scshape{章}} ~\titlerule[3pt] % 88 | \\\vspace{.05cm}\titlerule \\\filcenter #1 \\\vspace{.25cm}\titlerule \fi 89 | \ifnum\value{chapter}=0 #1 \fi 90 | } 91 | 92 | % формат секций 93 | \titleformat{\section} 94 | {\needspace{10\baselineskip}\color{ChadBlue}\normalfont\Large\bfseries} 95 | {\underline{\S\color{ChadBlue}\thesection. #1}}{2em}{} 96 | 97 | % Memoir类禁用子目录编号并将其添加到目录: 返回 98 | %\setsecnumdepth{subsection} 99 | \maxtocdepth{subsection} 100 | \setsecnumdepth{subsubsection} 101 | %\maxtocdepth{subsubsection} 102 | 103 | % subsection format 104 | \titleformat{\subsection} 105 | {\needspace{7\baselineskip}\color{ChadBlue}\normalfont\large\bfseries} 106 | {\color{ChadBlue}\thesubsection. #1}{1em}{} 107 | 108 | % subsubsection format 109 | \titleformat{\subsubsection} 110 | {\needspace{4\baselineskip}\color{ChadBlue}\normalfont\large\bfseries} 111 | {\color{ChadBlue}\thesubsubsection. #1}{0.6em}{} 112 | 113 | %---------------------------------------------------------------------------------------- 114 | % ТАБЛИЦЫ 115 | %---------------------------------------------------------------------------------------- 116 | 117 | \usepackage{booktabs} % toprule, midrule, bottomrule 118 | \renewcommand{\arraystretch}{1.25} % расстояние между строчками 119 | \usepackage{multirow} % объединение строчек в одну 120 | 121 | % горизонтальная dashed line для табличек 122 | \usepackage{array} 123 | \usepackage{arydshln} 124 | \setlength\dashlinedash{0.2pt} 125 | \setlength\dashlinegap{1.5pt} 126 | \setlength\arrayrulewidth{0.3pt} 127 | 128 | %---------------------------------------------------------------------------------------- 129 | % 参考文献 130 | %---------------------------------------------------------------------------------------- 131 | %\usepackage[square,numbers]{natbib} 132 | \bibliographystyle{apalike_fullname} 133 | %%\usepackage[nottoc,notlot,notlof]{tocbibind} 134 | \renewcommand{\bibsection}{\section*{Reference}} 135 | 136 | %---------------------------------------------------------------------------------------- 137 | % ССЫЛКИ 138 | %---------------------------------------------------------------------------------------- 139 | % \usepackage[colorlinks,breaklinks, 140 | % bookmarks=false, 141 | % pdfstartview=Fit, % for fitting entire page; FitW just fits width 142 | % pdfview=Fit, % after traversing a hyperlink 143 | % linkcolor=ChadPurple, 144 | % urlcolor=ChadRed, 145 | % citecolor=ChadBlue, 146 | % hyperfootnotes=false]{hyperref} 147 | 148 | \usepackage[pagebackref=true,breaklinks=true,colorlinks,bookmarks=false]{hyperref} 149 | \usepackage[figure,table]{hypcap} % Correct a problem with hyperref 150 | \urlstyle{rm} % so it doesn't use a typewriter font for url's. 151 | 152 | % При нажатии на ссылку отображает референс посередине страницы, а не в самом верху! 153 | \makeatletter 154 | \newcommand\org@hypertarget{} 155 | \let\org@hypertarget\hypertarget 156 | \renewcommand\hypertarget[2]{% 157 | \Hy@raisedlink{\org@hypertarget{#1}{}}#2% 158 | } \makeatother 159 | 160 | %---------------------------------------------------------------------------------------- 161 | % ТЕОРЕМЫ И ПРИМЕРЫ - СТАРАЯ ВЕРСИЯ 162 | % [пришлось отказаться в пользу tcolorbox, чтобы можно 163 | % было использовать wrapfigure внутри доказательств; 164 | % wrapfigure конфликтует как с theorembox, так и с mdframed] 165 | %---------------------------------------------------------------------------------------- 166 | 167 | % Создаём коробку для теорем 168 | % \newtheoremstyle{theorembox}% Theorem style name 169 | % {0pt}% Space above 170 | % {0pt}% Space below 171 | % {\normalfont}% Body font 172 | % {}% Indent amount 173 | % {\small\bfseries\sffamily\color{ChadBlue}}% Theorem head font 174 | % {\;}% Punctuation after theorem head 175 | % {0.25em}% Space after theorem head 176 | % {\underline{\small\sffamily\color{ChadBlue}\thmname{#1}\nobreakspace\thmnumber{\@ifnotempty{#1}{}\@upn{#2}}% Theorem text (e.g. Theorem 2.1) 177 | % \thmnote{\nobreakspace\the\thm@notefont\sffamily\bfseries\color{ChadBlue}---\nobreakspace#3}}:} % Optional theorem note 178 | 179 | % % Назначаем, создавая коробку для теорем 180 | % \theoremstyle{theorembox} 181 | % \newtheorem{theoremeT}{Теорема} 182 | 183 | 184 | % Создаём коробку для примеров 185 | % \newtheoremstyle{examplebox}% Example style name 186 | % {0pt}% Space above 187 | % {0pt}% Space below 188 | % {\normalfont}% Body font 189 | % {}% Indent amount 190 | % {\small\bfseries\sffamily\color{ChadRed}}% Theorem head font 191 | % {\;}% Punctuation after theorem head 192 | % {0.25em}% Space after theorem head 193 | % {\underline{\small\sffamily\color{ChadRed}\thmname{#1}\nobreakspace\thmnumber{\@ifnotempty{#1}{}\@upn{#2}}% Theorem text (e.g. Theorem 2.1) 194 | % \thmnote{\nobreakspace\the\thm@notefont\sffamily\bfseries\color{ChadRed}---\nobreakspace#3}}:} % Optional theorem note 195 | 196 | % % Назначаем, создавая коробку для теорем 197 | % \theoremstyle{examplebox} 198 | % \newtheorem{exampleT}{Пример} 199 | 200 | 201 | % Теоремы 202 | % \newmdenv[skipabove=7pt, 203 | % skipbelow=7pt, 204 | % backgroundcolor=ChadBlue!2, 205 | % linecolor=ChadBlue, 206 | % innerleftmargin=5pt, 207 | % innerrightmargin=5pt, 208 | % innertopmargin=5pt, 209 | % leftmargin=0cm, 210 | % rightmargin=0cm, 211 | % innerbottommargin=5pt]{tBox} 212 | 213 | % % назначаем 214 | % \newenvironment{theorem}{\begin{tBox}\begin{theoremeT}}{\end{theoremeT}\end{tBox}} 215 | 216 | % Примеры 217 | % \newmdenv[skipabove=7pt, 218 | % skipbelow=7pt, 219 | % backgroundcolor=ChadRed!2, 220 | % linecolor=ChadRed, 221 | % innerleftmargin=5pt, 222 | % innerrightmargin=5pt, 223 | % innertopmargin=5pt, 224 | % leftmargin=0cm, 225 | % rightmargin=0cm, 226 | % innerbottommargin=5pt]{eBox} 227 | 228 | % % назначаем 229 | % \newenvironment{example}{\begin{eBox}\begin{exampleT}}{\end{exampleT}\end{eBox}} 230 | 231 | %---------------------------------------------------------------------------------------- 232 | % ТЕОРЕМЫ 233 | %---------------------------------------------------------------------------------------- 234 | 235 | \makeatletter % Чинит какую-то проблему 236 | \renewcommand{\qedsymbol}{$\blacksquare$} % символ конца доказательства 237 | 238 | % breakable рубит коробки в случае начала новой страницы 239 | % enhanced jigsaw из skins делает это аккуратно без лишних линий в месте обрыва 240 | \usepackage[breakable, skins]{tcolorbox} 241 | 242 | % версия коробок для теорем через tcolorbox 243 | \def \ifempty#1{\def\temp{#1}\ifx\temp\empty} 244 | \def \theoremmacro#1#2{\ifempty{#2}Теорема #1\elseТеорема #1 --- #2\fi} 245 | \newtcolorbox[auto counter]{theoremBox}[2][]{ 246 | enhanced jigsaw, 247 | breakable, 248 | colback=ChadBlue!2, % цвет фона 249 | colframe=ChadBlue, % цвет рамки 250 | left=1pt, % отступы 251 | right=1pt, 252 | top=1pt, 253 | bottom=1pt, 254 | before skip=10pt plus 2pt, 255 | after skip=10pt plus 2pt, 256 | arc=0mm, % закругление рамки 257 | boxrule=0.5pt, % толщина рамки 258 | parbox=false, % лечит проблему с отступом в начале абзацев 259 | detach title, % убирает отдельный бокс для заголовка 260 | fonttitle=\small\sffamily\bfseries\color{ChadBlue}, 261 | title={\bfseries\underline{\theoremmacro{\thetcbcounter}{#2}}: \,}, 262 | before upper={\tcbtitle}, 263 | #1 264 | } 265 | 266 | \newenvironment{theorem}[1][]{\begin{theoremBox}{#1}}{\end{theoremBox}} 267 | 268 | \newcommand{\tagqed}{\tag*{$\blacksquare$}} 269 | \newcommand*{\QED}{% 270 | \leavevmode\unskip\penalty9999 \hbox{}\nobreak\hfill 271 | \quad\hbox{$\blacksquare$}% 272 | } 273 | \renewenvironment{proof}[1][Доказательство]{\par\vspace{2mm}\textit{#1}. }{\QED} 274 | \newcommand*{\beginproof}[1][Доказательство]{\par\vspace{2mm}\textit{#1}. } 275 | 276 | %---------------------------------------------------------------------------------------- 277 | % ПРИМЕРЫ 278 | %---------------------------------------------------------------------------------------- 279 | 280 | % тоже самое, что и для теорем 281 | %\def \examplemacro#1#2{\ifempty{#2}实例 #1\else实例 #1 --- #2\fi} 282 | \def \examplemacro#1#2{#2} 283 | \newtcolorbox[]{exampleBox}[2][]{ 284 | enhanced jigsaw, 285 | breakable, 286 | colback=ChadRed!2, % цвет фона 287 | colframe=ChadRed, % цвет рамки 288 | left=1pt, % отступы 289 | right=1pt, 290 | top=1pt, 291 | bottom=1pt, 292 | before skip=10pt plus 2pt, 293 | after skip=10pt plus 2pt, 294 | arc=0mm, % закругление рамки 295 | boxrule=0.5pt, % толщина рамки 296 | parbox=false, % лечит проблему с отступом в начале абзацев 297 | detach title, % убирает отдельный бокс для заголовка 298 | fonttitle=\small\sffamily\bfseries\color{ChadRed}, 299 | title={\bfseries{\examplemacro{\thetcbcounter}{#2}}: \,}, 300 | before upper={\tcbtitle}, 301 | #1 302 | } 303 | 304 | \newenvironment{example}[1][]{\begin{exampleBox}{#1}}{\end{exampleBox}} 305 | 306 | %---------------------------------------------------------------------------------------- 307 | % УТВЕРЖДЕНИЯ И ОПРЕДЕЛЕНИЯ 308 | %---------------------------------------------------------------------------------------- 309 | 310 | % Создаём оформление утверждений и определений 311 | \newtheoremstyle{blacknumbox} % Theorem style name 312 | {0pt}% Space above 313 | {0pt}% Space below 314 | {\normalfont}% Body font 315 | {}% Indent amount 316 | {\small\bfseries\sffamily}% Theorem head font 317 | {\;}% Punctuation after theorem head 318 | {0.25em}% Space after theorem head 319 | {\small\sffamily\thmname{#1}\nobreakspace\thmnumber{\@ifnotempty{#1}{}\@upn{#2}% Theorem text (e.g. Theorem 2.1) 320 | \thmnote{\nobreakspace\the\thm@notefont\sffamily\bfseries---\nobreakspace#3}}}% Optional theorem note 321 | 322 | % назначаем 323 | \theoremstyle{blacknumbox} 324 | \newtheorem*{definitionT}{\faIcon{bell}} % 使用星号, 去除序号 325 | \newtheorem*{propositionT}{\faIcon{bookmark}} 326 | 327 | % теперь сами коробки 328 | \RequirePackage[framemethod=default]{mdframed} 329 | 330 | \newmdenv[skipabove=7pt, 331 | skipbelow=7pt, 332 | rightline=false, 333 | leftline=true, 334 | topline=false, 335 | bottomline=false, 336 | linecolor=Blue, 337 | innerleftmargin=5pt, 338 | innerrightmargin=5pt, 339 | innertopmargin=7pt, 340 | leftmargin=0cm, 341 | rightmargin=0cm, 342 | linewidth=4pt, 343 | innerbottommargin=7pt]{dBox} 344 | 345 | \newenvironment{hl}{\begin{pBox}\begin{definitionT}}{\end{definitionT}\end{pBox}} 346 | 347 | % proposition box 348 | \newmdenv[skipabove=7pt, 349 | skipbelow=7pt, 350 | rightline=false, 351 | leftline=true, 352 | topline=false, 353 | bottomline=false, 354 | linecolor=ChadPurple, 355 | backgroundcolor=black!5, 356 | innerleftmargin=5pt, 357 | innerrightmargin=5pt, 358 | innertopmargin=7pt, 359 | leftmargin=0cm, 360 | rightmargin=0cm, 361 | linewidth=4pt, 362 | innerbottommargin=7pt]{pBox} 363 | 364 | % назначаем 365 | \newenvironment{note}{\begin{dBox}\begin{propositionT}}{\end{propositionT}\end{dBox}} 366 | 367 | %---------------------------------------------------------------------------------------- 368 | % РЕМАРКИ И АЛГОРИТМЫ 369 | %---------------------------------------------------------------------------------------- 370 | 371 | % REMARK ENVIRONMENT 372 | \newenvironment{remark}{\par\vspace{3pt}\small % Vertical white space above the remark and smaller font size 373 | \begin{list}{}{ 374 | \leftmargin=35pt % Indentation on the left 375 | \rightmargin=25pt}\item\ignorespaces % Indentation on the right 376 | \makebox[-2.5pt]{\begin{tikzpicture}[overlay] 377 | \node[inner sep=2pt,outer sep=0pt] at (-15pt,0pt){\includegraphics[width=0.75cm]{Images/nut}};\end{tikzpicture}} % Orange R in a circle 378 | \advance\baselineskip -1pt}{\end{list}\vskip5pt} % Tighter line spacing and white space after remark 379 | 380 | % ALGORITHM BOX 381 | \newtcolorbox[auto counter]{algorithm}[2][]{ 382 | enhanced jigsaw, 383 | breakable=true, 384 | before skip=20pt plus 2pt,after skip=20pt plus 2pt, 385 | colback=ChadRed!5, 386 | colframe=ChadRed, 387 | title={\bfseries Алгоритм \thetcbcounter : #2}, 388 | #1 389 | } 390 | 391 | %---------------------------------------------------------------------------------------- 392 | % МАТЕМАТИЧЕСКИЕ СОКРАЩЕНИЯ 393 | %---------------------------------------------------------------------------------------- 394 | 395 | \def\cdfeq{\mathrel{\stackrel{\mathrm{c.d.f.}}=}} 396 | \def\cdfcoloneqq{\mathrel{\stackrel{\mathrm{c.d.f.}}\coloneqq}} 397 | \newcommand{\E}{\mathbb{E}} 398 | \newcommand*\diff{\mathop{}\!\mathrm{d}} 399 | 400 | \newcommand{\St}{\mathcal{S}} 401 | \newcommand{\A}{\mathcal{A}} 402 | \newcommand{\R}{\mathbb{R}} 403 | \newcommand{\Trans}{\mathcal{P}} 404 | \newcommand{\Traj}{\mathcal{T}} 405 | \newcommand{\T}{\mathbb{T}} 406 | 407 | % \newcommand{\Z}{\mathcal{Z}} 408 | % \newcommand{\G}{\mathcal{G}} 409 | % \newcommand{\D}{\mathcal{D}} 410 | % \newcommand{\W}{\mathcal{W}} 411 | % \newcommand{\N}{\mathcal{N}} 412 | % \newcommand{\B}{\mathfrak{B}} 413 | % \newcommand{\eps}{\varepsilon} 414 | 415 | \newcommand{\argmax}{\mathop{\mathrm{argmax}}} 416 | \newcommand{\Argmax}{\mathop{\mathrm{Argmax}}} 417 | \newcommand{\argmin}{\mathop{\mathrm{argmin}}} 418 | 419 | \newcommand{\const}{\operatorname{const}} 420 | \newcommand{\Loss}{\operatorname{Loss}} 421 | \newcommand{\softmax}{\operatorname*{softmax}} 422 | \newcommand{\KL}{\operatorname{KL}} 423 | \newcommand{\Uniform}{\operatorname{Uniform}} 424 | \newcommand{\entropy}{\mathcal{H}} 425 | 426 | \newcommand{\done}{\mathrm{done}} 427 | \newcommand{\actor}{\mathrm{actor}} 428 | \newcommand{\critic}{\mathrm{critic}} 429 | \newcommand{\old}{\mathrm{old}} 430 | \newcommand{\new}{\mathrm{new}} 431 | \newcommand{\expert}{\mathrm{expert}} 432 | \newcommand{\soft}{\mathrm{soft}} 433 | \newcommand{\intr}{\mathrm{intr}} 434 | \newcommand{\extr}{\mathrm{extr}} 435 | \newcommand{\clip}{\mathrm{clip}} 436 | \newcommand{\Regret}{\operatorname{Regret}} 437 | 438 | \newcommand{\Tr}{\operatorname{Tr}} 439 | \newcommand{\vect}{\operatorname{vec}} 440 | 441 | \usepackage{accents} 442 | \newcommand{\manager}[1]{\accentset{\star}{#1}} 443 | \newcommand{\Pop}{\mathscr{P}} --------------------------------------------------------------------------------