├── LICENSE.md ├── README.md ├── docs ├── Figures │ ├── 1F44D.pdf │ ├── Makefile │ ├── Makefile.fig │ ├── src │ │ ├── zfs_blkptr.tikz │ │ ├── zfs_embedded_blkptr.tikz │ │ ├── zfs_fs.tikz │ │ ├── zfs_indirect.tikz │ │ ├── zfs_label.tikz │ │ ├── zfs_mos.tikz │ │ ├── zfs_ub.tikz │ │ └── zfs_vdev.tikz │ ├── zfs_blkptr.svg │ ├── zfs_embedded_blkptr.svg │ ├── zfs_fs.svg │ ├── zfs_indirect.svg │ ├── zfs_label.svg │ ├── zfs_mos.svg │ ├── zfs_ub.svg │ └── zfs_vdev.svg ├── Makefile ├── Makefile.in ├── README.md ├── styles │ ├── pandoc.css │ ├── tufte-extra.css │ └── tufte.html5 ├── templates │ ├── preamble.tex │ └── template.tex ├── zfs_internals.md ├── zfs_internals.md.html └── zfs_internals.md.pdf ├── ondiskformatfinal.odt └── zwalk └── src ├── Makefile ├── TODO.md └── zdump.c /LICENSE.md: -------------------------------------------------------------------------------- 1 | This is the "Berkeley License", under which Sun licensed the ZFS On-Disk Specification document. Originally available at http://developers.sun.com/berkeley_license.html, retrieved from https://web.archive.org/web/20060112125517/http://developers.sun.com/berkeley_license.html in 2019. 2 | 3 | -------------- >8 -------------------- 4 | 5 | # Code sample 6 | ## License 7 | 8 | **Copyright 1994-2006 Sun Microsystems, Inc. All Rights Reserved.** 9 | 10 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 11 | 12 | * Redistribution of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 13 | 14 | * Redistribution in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 15 | 16 | Neither the name of Sun Microsystems, Inc. or the names of contributors may be used to endorse or promote products derived from this software without specific prior written permission. 17 | 18 | This software is provided "AS IS," without a warranty of any kind. ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN MICROSYSTEMS, INC. ("SUN") AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE FOR ANY LOST REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY OF LIABILITY, ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, EVEN IF SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 19 | 20 | You acknowledge that this software is not designed, licensed or intended for use in the design, construction, operation or maintenance of any nuclear facility. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # zfsondisk 2 | 3 | This repo contains the ZFS On-Disk Specification, from Sun Microsystems, 4 | published 2006. The OpenZFS on-disk format is an extension of this. 5 | -------------------------------------------------------------------------------- /docs/Figures/1F44D.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahrens/zfsondisk/30ae2531352ab0d9d5ee9af82ce39a2cd45ccd79/docs/Figures/1F44D.pdf -------------------------------------------------------------------------------- /docs/Figures/Makefile: -------------------------------------------------------------------------------- 1 | ifeq ($(DEBUG), y) 2 | OUTPUT=--verbose 3 | else 4 | OUTPUT=>/dev/null 2>&1 5 | endif 6 | 7 | include Makefile.fig 8 | FIGURES_SVG = $(patsubst %.pdf,%.svg,$(FIGURES_PDF)) 9 | 10 | %.pdf: src/%.tikz 11 | @printf "%-12s %s\n" "Tikzing " $< 12 | @tikzfile=$<; texfile=$${tikzfile%%.tikz}.tex; tikzfile=`basename $<`; \ 13 | printf "\documentclass[border=2pt]{standalone}\n\n" >$${texfile}; \ 14 | printf "\input{colors.tex}\n" >>$${texfile}; \ 15 | printf "\input{zfs_tikz.tex}\n" >>$${texfile}; \ 16 | printf "\\\begin{document}\n" >>$${texfile}; \ 17 | printf "\input{$${tikzfile}}\n" >>$${texfile}; \ 18 | printf "\\\end{document}\n\n" >>$${texfile}; \ 19 | TEXINPUTS=$(TEXINPUTS):./src \ 20 | xelatex -interaction=nonstopmode -output-directory=./ $${texfile} $(OUTPUT) 21 | 22 | %.svg: %.pdf 23 | @printf "%-12s %-20s to SVG\n" "Exporting" "$^" 24 | @pdf2svg $< $@ 25 | 26 | figures: $(FIGURES_PDF) $(FIGURES_SVG) 27 | -------------------------------------------------------------------------------- /docs/Figures/Makefile.fig: -------------------------------------------------------------------------------- 1 | FIGURES_PDF = zfs_label.pdf \ 2 | zfs_blkptr.pdf \ 3 | zfs_embedded_blkptr.pdf \ 4 | zfs_vdev.pdf \ 5 | zfs_ub.pdf \ 6 | zfs_mos.pdf \ 7 | zfs_fs.pdf \ 8 | zfs_indirect.pdf 9 | -------------------------------------------------------------------------------- /docs/Figures/src/zfs_blkptr.tikz: -------------------------------------------------------------------------------- 1 | \def\bit{.225cm} 2 | \begin{tikzpicture}[% 3 | every node/.style = { 4 | rectangle, 5 | draw=blkptr_li_color, fill=blkptr_bg_color, text=blkptr_ru_color, 6 | text width=64*\bit, minimum height=.5cm, 7 | outer sep=0pt, inner sep=0pt, 8 | font=\small\ttfamily, 9 | align=center, 10 | } 11 | ] 12 | \newcounter{prev} 13 | \setcounter{prev}{1} 14 | \foreach \i in {0,...,8} { 15 | \pgfmathtruncatemacro{\l}{(8-\i)*8} 16 | \draw (\i*\bit*8,0) -- node [bit, above, align=center] {\l} ++(0,.15cm); 17 | } 18 | \node at (0,0) (pad1) [label={[bit]left:0}, anchor=north west, text width=8*\bit] {pad}; 19 | \node (vdev1) [right=0cm of pad1, text width=24*\bit] {vdev1}; 20 | \node (grid1) [right=0cm of vdev1, text width=8*\bit] {GRID}; 21 | \node (asize1) [right=0cm of grid1, text width=24*\bit] {ASIZE}; 22 | \node (g1) [below right=0cm and 0cm of pad1.south west, text width=1*\bit,% 23 | label={[bit]left:1}] {\footnotesize{G}}; 24 | \node (offset1) [right=0cm of g1, text width=63*\bit] {offset1}; 25 | 26 | \node (pad2) [below right=0cm and 0cm of g1.south west, text width=8*\bit, % 27 | label={[bit]left:2}] {pad}; 28 | \node (vdev2) [right=0cm of pad2, text width=24*\bit] {vdev2}; 29 | \node (grid2) [right=0cm of vdev2, text width=8*\bit] {GRID}; 30 | \node (asize2) [right=0cm of grid2, text width=24*\bit] {ASIZE}; 31 | \node (g2) [below right=0cm and 0cm of pad2.south west, text width=1*\bit,% 32 | label={[bit]left:3}] {\footnotesize{G}}; 33 | \node (offset2) [right=0cm of g2, text width=63*\bit] {offset2}; 34 | 35 | \node (pad3) [below right=0cm and 0cm of g2.south west, text width=8*\bit, % 36 | label={[bit]left:4}] {pad}; 37 | \node (vdev3) [right=0cm of pad3, text width=24*\bit] {vdev2}; 38 | \node (grid3) [right=0cm of vdev3, text width=8*\bit] {GRID}; 39 | \node (asize3) [right=0cm of grid3, text width=24*\bit] {ASIZE}; 40 | \node (g3) [below right=0cm and 0cm of pad3.south west, text width=1*\bit,% 41 | label={[bit]left:5}] {\footnotesize{G}}; 42 | \node (offset3) [right=0cm of g3, text width=63*\bit] {offset3}; 43 | 44 | \node (bdx) [below right=0cm and 0cm of g3.south west, % 45 | text width=4*\bit, label={[bit]left:6}] {BDX}; 46 | \node (lvl) [right=0cm of bdx, % 47 | text width=4*\bit] {lvl}; 48 | \node (type) [right=0cm of lvl, % 49 | text width=8*\bit] {type}; 50 | \node (etype) [right=0cm of type, % 51 | text width=8*\bit] {etype}; 52 | \node (E) [right=0cm of etype, % 53 | text width=1*\bit] {\footnotesize{E}}; 54 | \node (comp) [right=0cm of E, % 55 | text width=7*\bit] {comp}; 56 | \node (psize) [right=0cm of comp, % 57 | text width=7*\bit] {PSIZE}; 58 | \node (lsize) [right=0cm of psize, % 59 | text width=25*\bit] {LSIZE}; 60 | 61 | \node (pad4) [below right=0cm and 0cm of bdx.south west,% 62 | label={[bit]left:7}] {padding}; 63 | \node (pad5) [below = 0cm of pad4, label={[bit]left:8}] {padding}; 64 | \node (pbirth) [below = 0cm of pad5, label={[bit]left:9}] {physical birth txg}; 65 | \node (lbirth) [below = 0cm of pbirth, label={[bit]left:a}] {logical birth txg}; 66 | \node (fill) [below = 0cm of lbirth, label={[bit]left:b}] {fill count}; 67 | \node (chk1) [below = 0cm of fill, label={[bit]left:c}] {checksum[0]}; 68 | \node (chk2) [below = 0cm of chk1, label={[bit]left:d}] {checksum[1]}; 69 | \node (chk3) [below = 0cm of chk2, label={[bit]left:e}] {checksum[2]}; 70 | \node (chk4) [below = 0cm of chk3, label={[bit]left:f}] {checksum[3]}; 71 | \end{tikzpicture} 72 | -------------------------------------------------------------------------------- /docs/Figures/src/zfs_embedded_blkptr.tikz: -------------------------------------------------------------------------------- 1 | \def\bit{.225cm} 2 | \begin{tikzpicture}[% 3 | every node/.style = { 4 | rectangle, 5 | draw=blkptr_li_color, fill=blkptr_bg_color, text=blkptr_ru_color, 6 | text width=64*\bit, minimum height=.5cm, 7 | outer sep=0pt, inner sep=0pt, 8 | font=\small\ttfamily, 9 | align=center, 10 | } 11 | ] 12 | \newcounter{prev} 13 | \setcounter{prev}{1} 14 | \foreach \i in {0,...,8} { 15 | \pgfmathtruncatemacro{\l}{(8-\i)*8} 16 | \draw (\i*\bit*8,0) -- node [bit, above, align=center] {\l} ++(0,.15cm); 17 | } 18 | \node at (0,0) (payload1) [label={[bit]left:0}, anchor=north west] {payload}; 19 | \foreach \i in {2,...,6} { 20 | %\pgfmathtruncatemacro{\l}{8*\i} 21 | \pgfmathtruncatemacro{\l}{\i-1} 22 | \node (payload\i) [below=0cm of {payload\theprev},% 23 | label={[bit]left:\l}] {payload}; 24 | \addtocounter{prev}{1} 25 | } 26 | \node (bdx) [below right=0cm and 0cm of payload6.south west, % 27 | text width=4*\bit, label={[bit]left:6}] {BDX}; 28 | \node (lvl) [right=0cm of bdx, % 29 | text width=4*\bit] {lvl}; 30 | \node (type) [right=0cm of lvl, % 31 | text width=8*\bit] {type}; 32 | \node (etype) [right=0cm of type, % 33 | text width=8*\bit] {etype}; 34 | \node (E) [right=0cm of etype, % 35 | text width=1*\bit] {\footnotesize{E}}; 36 | \node (comp) [right=0cm of E, % 37 | text width=7*\bit] {comp}; 38 | \node (psize) [right=0cm of comp, % 39 | text width=7*\bit] {PSIZE}; 40 | \node (lsize) [right=0cm of psize, % 41 | text width=25*\bit] {LSIZE}; 42 | \node (payload7) [below right=0cm and 0cm of bdx.south west,% 43 | label={[bit]left:7}] {payload}; 44 | \addtocounter{prev}{1} 45 | \foreach \i in {8,9} { 46 | \pgfmathtruncatemacro{\l}{\i} 47 | \node (payload\i) [below=0cm of {payload\theprev},% 48 | label={[bit]left:\l}] {payload}; 49 | \addtocounter{prev}{1} 50 | } 51 | \node (birth) [below=0cm of payload9, label={[bit]left:a}] {logical birth txg}; 52 | \node (payload10) [below=0cm of birth, label={[bit]left:b}] {payload}; 53 | \addtocounter{prev}{1} 54 | \foreach \i in {11,...,14} { 55 | \pgfmathtruncatemacro{\l}{\i+1} 56 | \node (payload\i) [below=0cm of {payload\theprev},% 57 | label={[bit]left:\pgfmathhex{\l}\pgfmathresult}] {payload}; 58 | \addtocounter{prev}{1} 59 | } 60 | \end{tikzpicture} 61 | -------------------------------------------------------------------------------- /docs/Figures/src/zfs_fs.tikz: -------------------------------------------------------------------------------- 1 | \begin{tikzpicture}[% 2 | every node/.style={font=\small\ttfamily}, 3 | ] 4 | \node (objset) [objset] at (0,0) {% 5 | \struct{% 6 | objset\_phys\_t\lowstrut, 7 | os\_meta\_dnode\lowstrut, 8 | os\_zil\_header\lowstrut, 9 | os\_type=ZPL\lowstrut, 10 | os\_pad[376]\lowstrut 11 | } 12 | }; 13 | \node (dva0) [dva, outer sep=0pt, below right=.5cm and 0 of objset.south west] {D\\V\\A\\0}; 14 | \node (dva1) [dva, outer sep=0pt, right=0cm of dva0.east] {D\\V\\A\\1}; 15 | \node (dva2) [dva, outer sep=0pt, right=0cm of dva1.east] {D\\V\\A\\2}; 16 | \draw [-, dnode_li_color] (dva2.north east) -- ++(1.25cm,0); 17 | \draw [-, dnode_li_color] (dva2.south east) -- ++(1.25cm,0); 18 | \node [right=.25cm of dva2.east, text=dnode_ru_color] {....}; 19 | 20 | \objects{blkptr0}{% 21 | blkptr, below=1cm of dva0.south}{blk\_ptr} 22 | \node (blkptr02) [% 23 | blkptr, 24 | fit={(blkptr0)}, 25 | draw, below=.5cm of dva2.south, inner sep=0pt, outer sep=0pt, 26 | fill=blkptr_bg_color!50] {}; 27 | \node (blkptr01) [% 28 | blkptr, 29 | fit={(blkptr0)}, 30 | draw, below=.75cm of dva1.south, inner sep=0pt, outer sep=0pt, 31 | fill=blkptr_bg_color!50] {}; 32 | \objects{blkptr0}{% 33 | blkptr, below=1cm of dva0.south}{blk\_ptr} 34 | 35 | \begin{scope}[local bounding box=dnode_array0, name prefix=dnode_array0-] 36 | \node (dnode_31) [dnode_array, below right=.5cm and 1.5cm of blkptr0.south west] {}; 37 | \node (dnode_dots) [dnode_array, left=0cm of dnode_31.west, % 38 | align=center, minimum width=1.2cm, text width=1.2cm] {....}; 39 | \node (dnode_usr) [dnode_array, left=0cm of dnode_dots.west] % 40 | {% 41 | \setlength{\tabcolsep}{0pt} 42 | \renewcommand{\arraystretch}{.75} 43 | \hspace{-1cm} 44 | \begin{tabular}{c} 45 | \footnotesize{u}\\ 46 | \footnotesize{s}\\ 47 | \footnotesize{r} 48 | \end{tabular} 49 | }; 50 | \node (dnode_var) [dnode_array, left=0cm of dnode_usr.west] % 51 | {% 52 | \setlength{\tabcolsep}{0pt} 53 | \renewcommand{\arraystretch}{.75} 54 | \hspace{-1cm} 55 | \begin{tabular}{c} 56 | \footnotesize{v}\\ 57 | \footnotesize{a}\\ 58 | \footnotesize{r} 59 | \end{tabular} 60 | }; 61 | \node (dnode_sbin) [dnode_array, left=0cm of dnode_var.west] % 62 | {% 63 | \setlength{\tabcolsep}{0pt} 64 | \renewcommand{\arraystretch}{.75} 65 | \hspace{-1cm} 66 | \begin{tabular}{c} 67 | \footnotesize{s}\\ 68 | \footnotesize{b}\\ 69 | \footnotesize{i}\\ 70 | \footnotesize{n} 71 | \end{tabular} 72 | }; 73 | \node (dnode_1) [dnode_array, left=0cm of dnode_sbin.west] {}; 74 | \node (dnode_0) [dnode_array, left=0cm of dnode_1.west] {}; 75 | \end{scope} 76 | 77 | \begin{scope}[local bounding box=dnode_array1, name prefix=dnode_array1-] 78 | \node (dnode_32) [dnode_array, right=.6cm of dnode_array0-dnode_31.east] {}; 79 | \node (dnode_33) [dnode_array, right=0cm of dnode_32.east] {}; 80 | \node (dnode_root) [dnode_array, right=0cm of dnode_33.east] % 81 | {% 82 | \setlength{\tabcolsep}{0pt} 83 | \renewcommand{\arraystretch}{.75} 84 | \hspace{-1cm} 85 | \begin{tabular}{c} 86 | \footnotesize{R}\\ 87 | \footnotesize{O}\\ 88 | \footnotesize{O}\\ 89 | \footnotesize{T} 90 | \end{tabular} 91 | }; 92 | \node (dnode_dots) [dnode_array, right=0cm of dnode_root.east, % 93 | align=center, minimum width=2.25cm, text width=2.25cm] {....}; 94 | \node (dnode_63) [dnode_array, right=0cm of dnode_dots.east] {}; 95 | \end{scope} 96 | \node (dots) [right=.25cm of dnode_array1] {....}; 97 | \begin{scope}[local bounding box=dnode_array2, name prefix=dnode_array2-] 98 | \node (dnode_zdump) [dnode_array, right=.25cm of dots.east] % 99 | {% 100 | \setlength{\tabcolsep}{0pt} 101 | \renewcommand{\arraystretch}{.5} 102 | \hspace{-1cm} 103 | \begin{tabular}{c} 104 | \footnotesize{z}\\ 105 | \footnotesize{d}\\ 106 | \footnotesize{u}\\ 107 | \footnotesize{m}\\ 108 | \footnotesize{p}\\ 109 | \end{tabular} 110 | }; 111 | \node (dnode_dots) [dnode_array, right=0cm of dnode_zdump.east, % 112 | align=center, minimum width=3.25cm, text width=3.25cm] {....}; 113 | \end{scope} 114 | 115 | % \node (dnode_zdb) [dnode, below right=.75cm and -.75cm of dnode_array1-dnode_zdb.south, % 116 | % minimum width=4.5cm, text width=4.5cm] {% 117 | % \struct{% 118 | % dnode\_phys\_t\lowstrut, 119 | % dn\_type = \\ \footnotesize{DMU\_OT\_PLAIN\_FILE\_CONTENTS}, 120 | % ....\highstrut, 121 | % dn\_level = 3\lowstrut, 122 | % dn\_blkptr[]\lowstrut, 123 | % dn\_bonus[]\lowstrut, 124 | % dn\_spill\lowstrut, 125 | % } 126 | % }; 127 | 128 | \node (dnode_rootdir) [dnode={7}, above right =.75cm and .5cm of dnode_array1-dnode_root.north, % 129 | xshift=1cm, minimum width=4.5cm, text width=4.5cm] {% 130 | \struct{% 131 | dnode\_phys\_t\lowstrut, 132 | dn\_type = \\ \footnotesize{DMU\_OT\_DIRECTORY\_CONTENTS}, 133 | ....\highstrut, 134 | dn\_level\lowstrut, 135 | dn\_blkptr[]\lowstrut, 136 | dn\_bonus[]\lowstrut, 137 | dn\_spill\lowstrut, 138 | } 139 | }; 140 | 141 | \node [above=0cm of dnode_array0-dnode_0, array_ru_color] {0}; 142 | \node [above=0cm of dnode_array0-dnode_1, array_ru_color] {1}; 143 | \node [above=0cm of dnode_array0-dnode_sbin, array_ru_color] {2}; 144 | \node [above=0cm of dnode_array0-dnode_var, array_ru_color] {3}; 145 | \node [above=0cm of dnode_array0-dnode_usr, array_ru_color] {4}; 146 | \node [above=0cm of dnode_array0-dnode_31, array_ru_color] {1f}; 147 | \node [below=0cm of dnode_array1-dnode_32, array_ru_color] {20}; 148 | \node [below=0cm of dnode_array1-dnode_root, array_ru_color] {22}; 149 | \node [below=0cm of dnode_array1-dnode_63, array_ru_color] {3f}; 150 | \node [above=0cm of dnode_array2-dnode_zdump, array_ru_color] {80}; 151 | \node (zap_rootdir) [zap, right=.5cm of dnode_rootdir.five east, minimum width=2cm, text width=2cm] {% 152 | sbin=2\\ 153 | var=3\\ 154 | usr=4 155 | }; 156 | \node (zap) [below=0 of zap_rootdir, data_ru_color, text width=4cm, align=center] % 157 | {Micro ZAP\\(embedded blkptr)}; 158 | 159 | \node (dnode_meta) [dnode={7}, above left=-.5cm and 1cm of dva0.south west, % 160 | minimum width=4.5cm, text width=4.5cm] {% 161 | \struct{% 162 | dnode\_phys\_t\lowstrut, 163 | dn\_type = DMU\_OT\_DNODE\lowstrut, 164 | ....\highstrut, 165 | dn\_level = 6\lowstrut, 166 | dn\_blkptr[]\lowstrut, 167 | dn\_bonus[]\lowstrut, 168 | dn\_spill\lowstrut, 169 | } 170 | }; 171 | 172 | \node (dnode_sbin) [dnode={7}, below right=.75cm and 1cm of dnode_array0-dnode_sbin.south east, % 173 | minimum width=4.5cm, text width=4.5cm] {% 174 | \struct{% 175 | dnode\_phys\_t\lowstrut, 176 | dn\_type = \\ \footnotesize{DMU\_OT\_DIRECTORY\_CONTENTS}, 177 | ....\highstrut, 178 | dn\_level\lowstrut, 179 | dn\_blkptr[]\lowstrut, 180 | dn\_bonus[]\lowstrut, 181 | dn\_spill\lowstrut, 182 | } 183 | }; 184 | \node (zap_sbin) [zap, above right=-.1cm and .6cm of dnode_sbin.five east] {% 185 | zdump=128 186 | }; 187 | \node (zdump_mzap) [below=0 of zap_sbin, data_ru_color, text width=4cm, align=center]% 188 | {Micro ZAP\\(embedded blkptr)}; 189 | 190 | \node (dnode_master) [dnode={7}, below left=.75cm and -1.5cm of dnode_array0.south west,% 191 | minimum width=4.5cm, text width=4.5cm] {% 192 | \struct{% 193 | dnode\_phys\_t\lowstrut, 194 | dn\_type = \\ \footnotesize{DMU\_OT\_MASTER\_NODE}, 195 | ....\highstrut, 196 | dn\_level\lowstrut, 197 | dn\_blkptr[]\lowstrut, 198 | dn\_bonus[]\lowstrut, 199 | dn\_spill\lowstrut, 200 | } 201 | }; 202 | \node (zap_master) [zap, above right=1cm and -.75cm of dnode_master.north west,% 203 | minimum width=2.75cm, text width=2.75cm] {% 204 | VERSION=5\\ 205 | SA\_ATTRS=32\\ 206 | DELETE\_QUEUE=33\\ 207 | ROOT=34 208 | }; 209 | \node (zap) [above=0 of zap_master, data_ru_color] {Micro ZAP}; 210 | 211 | \node (dnode_zdump) [dnode={7}, below right=.75cm and -.3cm of dnode_array2-dnode_zdump.south east, % 212 | minimum width=4.5cm, text width=4.5cm] {% 213 | \struct{% 214 | dnode\_phys\_t\lowstrut, 215 | dn\_type = \\ \footnotesize{DMU\_OT\_PLAIN\_FILE\_CONTENTS}, 216 | ....\highstrut, 217 | dn\_level\lowstrut, 218 | dn\_blkptr[]\lowstrut, 219 | dn\_bonus[]\lowstrut, 220 | dn\_spill\lowstrut, 221 | } 222 | }; 223 | 224 | \draw [-, densely dashed] (objset.one split west) -- (dnode_meta.north west); 225 | \draw [-, densely dashed] (objset.two split west) -- (dnode_meta.north east); 226 | \draw [-, densely dashed] (dnode_meta.four split east) -- (dva0.north west); 227 | \draw [-, densely dashed] (dnode_meta.five split east) -- (dva0.south west); 228 | \draw [-, densely dashed] (dnode_array1-dnode_root.north west) -- (dnode_rootdir.south west); 229 | \draw [-, densely dashed] (dnode_array1-dnode_root.north east) -- (dnode_rootdir.south east); 230 | \draw [-, densely dashed] (dnode_array0-dnode_1.south west) -- (dnode_master.north west); 231 | \draw [-, densely dashed] (dnode_array0-dnode_1.south east) -- (dnode_master.north east); 232 | \draw [-, densely dashed] (dnode_array0-dnode_sbin.south west) -- (dnode_sbin.north west); 233 | \draw [-, densely dashed] (dnode_array0-dnode_sbin.south east) -- (dnode_sbin.north east); 234 | \draw [-, densely dashed] (dnode_rootdir.four split east) -- (zap_rootdir.north west); 235 | \draw [-, densely dashed] (dnode_rootdir.five split east) -- (zap_rootdir.south west); 236 | \draw [-, densely dashed] (dnode_sbin.four split east) -- (zap_sbin.north west); 237 | \draw [-, densely dashed] (dnode_sbin.five split east) -- (zap_sbin.south west); 238 | \draw [-, densely dashed] (dnode_array2-dnode_zdump.south west) -- (dnode_zdump.north west); 239 | \draw [-, densely dashed] (dnode_array2-dnode_zdump.south east) -- (dnode_zdump.north east); 240 | 241 | \draw [<-] (objset.one west) -- ++(-1cm, 0) % 242 | node [left, text width=3.5cm, align=center, text=ds_ru_color, xshift=.5cm] % 243 | {ds\_dp\\of head data set}; 244 | \draw [->, densely dotted, thick] (blkptr0.one west) -| (dnode_array0.north west) % 245 | node [midway,rotate=-90, xshift=1cm, text width=1.5cm, text=blkptr_ru_color] {5 more\\levels}; 246 | \draw [->] (dva0.south) -- (blkptr0.north); 247 | \draw [->] (dva1.south) -- (blkptr01.north); 248 | \draw [->] (dva2.south) -- (blkptr02.north); 249 | \draw [->] (dnode_master.five west) -- ++(-.5cm,0) coordinate(tmp) -- (tmp |- zap_master.south); 250 | \draw [->] (dnode_zdump.five east) -- ++(.75cm, 0) -- ++(0,-.75cm) % 251 | node [below, fill=data_bg_color, draw=data_li_color, text=data_ru_color] {DATA}; 252 | \end{tikzpicture} 253 | -------------------------------------------------------------------------------- /docs/Figures/src/zfs_indirect.tikz: -------------------------------------------------------------------------------- 1 | \begin{tikzpicture}[% 2 | every text node part/.style={align=left}, 3 | ] 4 | \node (dnode) [dnode={7}] at (0,0) {% 5 | \struct{% 6 | dnode\_phys\_t\lowstrut, 7 | dn\_type\lowstrut, 8 | ....\highstrut, 9 | dn\_blkptr[1]\lowstrut, 10 | dn\_bonus[]\lowstrut, 11 | dn\_spill\lowstrut 12 | } 13 | }; 14 | \node (dva0) [dva, outer sep=0pt, right=1.5cm of dnode.four east, % 15 | minimum width = .2cm, text width=.2cm, minimum height=2.5cm] {D\\V\\A\\0}; 16 | \node (dva1) [dva, outer sep=0pt, right=0cm of dva0.east, % 17 | minimum width = .2cm, text width=.2cm, minimum height=2.5cm] {D\\V\\A\\1}; 18 | \node (dva2) [dva, outer sep=0pt, right=0cm of dva1.east, % 19 | minimum width = .2cm, text width=.2cm, minimum height=2.5cm] {D\\V\\A\\2}; 20 | 21 | \begin{scope}[local bounding box=blkptr0, name prefix=blkptr0-] 22 | \objects{blkptr00}{% 23 | blkptr, below=.8cm of dva0.south}{blk\_ptr} 24 | \node (blkptr02) [% 25 | blkptr, fit={(blkptr00)}, 26 | draw, below=.4cm of dva2.south, inner sep=0pt, outer sep=0pt, 27 | fill=blkptr_bg_color!50] {}; 28 | \node (blkptr01) [% 29 | blkptr, fit={(blkptr00)}, 30 | draw, below=.6cm of dva1.south, inner sep=0pt, outer sep=0pt, 31 | fill=blkptr_bg_color!50] {}; 32 | \objects{blkptr00}{% 33 | blkptr, below=.8cm of dva0.south}{blk\_ptr} 34 | \end{scope} 35 | 36 | \begin{scope}[local bounding box=blkptr2, name prefix=blkptr2-] 37 | \objects{blkptr20}{blkptr, below left=.5cm and .75cm of blkptr0.south}{blk\_ptr} 38 | \node (blkptr22) [% 39 | blkptr, fit={(blkptr20)}, 40 | draw, above right=.25cm and .12cm of blkptr20.south west, inner sep=0pt, outer sep=0pt, 41 | fill=blkptr_bg_color!50] {}; 42 | \node (blkptr21) [% 43 | blkptr, fit={(blkptr20)}, 44 | draw, above right=.125cm and .06cm of blkptr20.south west, inner sep=0pt, outer sep=0pt, 45 | fill=blkptr_bg_color!50] {}; 46 | \objects{blkptr20}{blkptr, below left=.5cm and .75cm of blkptr0.south}{blk\_ptr} 47 | \end{scope} 48 | 49 | \begin{scope}[local bounding box=blkptr1, name prefix=blkptr1-] 50 | \objects{blkptr10}{blkptr, left= .5cm of blkptr2-blkptr20.west}{blk\_ptr} 51 | \node (blkptr12) [% 52 | blkptr, fit={(blkptr10)}, 53 | draw, above right=.25cm and .12cm of blkptr10.south west, inner sep=0pt, outer sep=0pt, 54 | fill=blkptr_bg_color!50] {}; 55 | \node (blkptr11) [% 56 | blkptr, fit={(blkptr10)}, 57 | draw, above right=.125cm and .06 of blkptr10.south west, inner sep=0pt, outer sep=0pt, 58 | fill=blkptr_bg_color!50] {}; 59 | \objects{blkptr10}{blkptr, left= .5cm of blkptr2-blkptr20.west}{blk\_ptr} 60 | \end{scope} 61 | 62 | \node (dots) [right = .5cm of blkptr2-blkptr20, text=blkptr_bg_color] {....}; 63 | \begin{scope}[local bounding box=blkptr3, name prefix=blkptr3-] 64 | \objects{blkptr30}{blkptr, right= .5cm of dots.east}{blk\_ptr} 65 | \node (blkptr32) [% 66 | blkptr, fit={(blkptr30)}, 67 | draw, above right=.25cm and .12cm of blkptr30.south west, inner sep=0pt, outer sep=0pt, 68 | fill=blkptr_bg_color!50] {}; 69 | \node (blkptr31) [% 70 | blkptr, fit={(blkptr30)}, 71 | draw, above right=.125cm and .06 of blkptr30.south west, inner sep=0pt, outer sep=0pt, 72 | fill=blkptr_bg_color!50] {}; 73 | \objects{blkptr30}{blkptr, right= .5cm of dots.east}{blk\_ptr} 74 | \end{scope} 75 | 76 | \node (data1) [data, below left=.5cm and .75cm of blkptr1.south, % 77 | minimum height = 1cm, minimum width = 2cm] {Data}; 78 | \node (dots) [right = .125cm of data1.east, text=data_ru_color] {...}; 79 | \node (data2) [data, right=1.125cm of data1.east, % 80 | minimum height = 1cm, minimum width = 2cm] {Data}; 81 | \node (dots) [right = .125cm of data2.east, text=data_ru_color] {...}; 82 | \node (data3) [data, right=1.35cm of data2.east, % 83 | minimum height = 1cm, minimum width = 2cm] {Data}; 84 | \node (dots) [right = .05cm of data3.east, text=data_ru_color] {...}; 85 | \node (data4) [data, below=.5cm of blkptr3-blkptr30.south, xshift=1.5cm, % 86 | minimum height = 1cm, minimum width = 2cm] {Data}; 87 | 88 | \draw [->] (blkptr0-blkptr00.one west) -| (blkptr1.north); 89 | \draw [->] (blkptr0-blkptr00.two west) -| (blkptr2.north); 90 | \draw [->] (blkptr0-blkptr00.four east) -| (blkptr3.north); 91 | \draw [->] (blkptr1-blkptr10.one west) -| (data1.north); 92 | \draw [->] (blkptr1-blkptr10.two east) -| (data2.north); 93 | \draw [->] (blkptr2-blkptr20.one east) -| (data3.north); 94 | \draw [->] (blkptr3-blkptr30.four east) -| (data4.north); 95 | \draw [->] (dva0.south) -- (blkptr0-blkptr00.north); 96 | \draw [->] (dva1.south) -- (blkptr0-blkptr01.north); 97 | \draw [->] (dva2.south) -- (blkptr0-blkptr02.north); 98 | \draw [-, densely dashed, dnode_li_color, thick] (dnode.three split east) -- (dva0.north west); 99 | \draw [-, densely dashed, dnode_li_color, thick] (dnode.four split east) -- (dva0.south west); 100 | \draw [-, dnode_li_color] (dva2.north east) -- ++(1.25cm,0); 101 | \draw [-, dnode_li_color] (dva2.south east) -- ++(1.25cm,0); 102 | \node [right=.25cm of dva2.east, text=dnode_ru_color] {....}; 103 | \end{tikzpicture} 104 | -------------------------------------------------------------------------------- /docs/Figures/src/zfs_label.tikz: -------------------------------------------------------------------------------- 1 | \begin{tikzpicture} 2 | \path (0,0) pic [local bounding box=disk] (disk) {disk}; 3 | \node at (disk.center) [anchor=center, text=gray!80, text width=2.5cm, align=center] % 4 | {\sffamily{Physical\\Vdev}}; 5 | \node (label0) [vdev, right=1cm of disk.east] {Label 0}; 6 | \node (label1) [vdev, right=0cm of label0.east] {Label 1}; 7 | \node (dots) [vdev, right=0cm of label1.east, minimum width=4cm] {....}; 8 | \node (label2) [vdev, right=0cm of dots.east] {Label 2}; 9 | \node (label3) [vdev, right=0cm of label2.east] {Label 3}; 10 | 11 | \node (pad) [labelitem,below right=1cm and -2cm of disk.south] {Padding}; 12 | \node (be) [labelitem,right=0cm of pad.east] {Boot Env}; 13 | \node (config) [labelitem,right=0cm of be.east, minimum width=5cm, text width=5cm] {nvpair list\\e.g., "type"="disk"}; 14 | \begin{scope} [local bounding box=ubs] 15 | \node (ub0) [ubcell,right=0cm of config.east, minimum width=.5cm, text width=.5cm] {}; 16 | \node (ub1) [ubcell,right=0cm of ub0.east] {}; 17 | \node (ub2) [ubcell,right=0cm of ub1.east] {}; 18 | \node (ub3) [ubcell,right=0cm of ub2.east] {}; 19 | \node (ub4) [ubcell,right=0cm of ub3.east] {}; 20 | \node (ub5) [ubcell,right=0cm of ub4.east] {}; 21 | \node (ub6) [ubcell,right=0cm of ub5.east] {}; 22 | \node (ub7) [ubcell,right=0cm of ub6.east] {}; 23 | \node (ub8) [ubcell,right=0cm of ub7.east] {}; 24 | \node (ub126) [ubcell,right=0cm of ub8.east] {}; 25 | \node (ub127) [ubcell,right=0cm of ub126.east] {}; 26 | \end{scope} 27 | \node (ubarray) at (ubs.center) [anchor=center, fill=white, minimum height=.8cm, % 28 | text=label_ru_color, fill=label_bg_color] {Uber block array}; 29 | \node (zero) [below=0 of pad.south west] {0}; 30 | \node (labelpad) [below=0 of pad.south east] {8K}; 31 | \node (labelbe) [below=0 of be.south east] {16K}; 32 | \node (ubbegin) [below=0 of ub0.south west, xshift=1em] {128K}; 33 | \node (ubend) [below=0 of ub127.south east, xshift=1em] {256K}; 34 | 35 | \draw [-, densely dashed] ($(disk.north east) - (0, .6cm)$) -- (label0.north west); 36 | \draw [-, densely dashed] ($(disk.south east) + (0, .6cm)$) -- (label0.south west); 37 | \draw [-, densely dashed] (label0.south west) -- (pad.north west); 38 | \draw [-, densely dashed] (label0.south east) -- (ub127.north east); 39 | \end{tikzpicture} 40 | -------------------------------------------------------------------------------- /docs/Figures/src/zfs_mos.tikz: -------------------------------------------------------------------------------- 1 | \begin{tikzpicture}[% 2 | every node/.style={font=\small\ttfamily} 3 | ] 4 | \begin{scope}[local bounding box=mos_part0, name prefix=mos_part0-] 5 | \mositem{empty} {} {}; 6 | \mositem{objdir}{right=0cm of empty.east}{\Longstack{O b j ~ D i r}} 7 | \mositem{others} {right=0cm of objdir.east, % 8 | minimum width=4cm, text width=4cm}{....} 9 | \mositem{others} {right=0cm of others.east}{} 10 | \end{scope} 11 | \begin{scope}[local bounding box=mos_part1, name prefix=mos_part1-] 12 | \mositem{rootds} {right=1.5cm of mos_part0-others.east} {\Longstack{R o o t ~ D S}}; 13 | \mositem{empty} {right=0cm of rootds.east} {}; 14 | \mositem{child} {right=0cm of empty.east} {\scriptsize{\Longstack{C h i l d ~ M a p}}}; 15 | \mositem{others} {right=0cm of child.east, % 16 | minimum width=1cm, text width=1cm}{....}; 17 | \mositem{headds} {right=0cm of others.east} {\Longstack{H e a d ~ D S}}; 18 | \mositem{others} {right=0cm of headds.east, % 19 | minimum width=1cm, text width=1cm}{....}; 20 | \mositem{config} {right=0cm of others.east} {\Longstack{c o n f i g}}; 21 | \mositem{empty} {right=0cm of config.east} {}; 22 | \mositem{empty} {right=0cm of empty.east} {}; 23 | \end{scope} 24 | \node (mos) [array_ru_color, left=0cm of mos_part0] {\sffamily\Large{MOS}}; 25 | 26 | \node (dnode_objdir) [dnode={7}, above=.75cm of mos_part0-objdir,% 27 | minimum width=4cm, text width=4cm] {% 28 | \struct{% 29 | dnode\_phys\_t\lowstrut, 30 | dn\_type = \footnotesize{DMU\_OT\_OBJECT\_DIRECTORY}, 31 | ....\highstrut, 32 | dn\_level\lowstrut, 33 | dn\_blkptr[]\lowstrut, 34 | dn\_bonus[]\lowstrut, 35 | dn\_spill\lowstrut 36 | } 37 | }; 38 | 39 | \node (zap_objdir) [zap, below right=4.5cm and 0cm of dnode_objdir.south west, % 40 | text width = 3cm, minimum width=3cm] {% 41 | root\_dataset=32\\ 42 | config=61\\ 43 | .... 44 | }; 45 | \node (fzap) [below=0 of zap_objdir, zap_ru_color] {Fat ZAP}; 46 | 47 | \node (dnode_rootds) [dnode={7}, above=.75cm of mos_part1-rootds, xshift=-.25cm] {% 48 | \struct{% 49 | dnode\_phys\_t\lowstrut, 50 | dn\_type = \footnotesize{DMU\_OT\_DSL\_DIR}, 51 | ....\highstrut, 52 | dn\_level\lowstrut, 53 | dn\_blkptr[]\lowstrut, 54 | dn\_bonus[]\lowstrut, 55 | dn\_spill\lowstrut 56 | } 57 | }; 58 | 59 | \node (dsl_rootds) [dsl, above right=0cm and 1cm of dnode_rootds.south east,% 60 | text width=3.75cm, minimum width=3.75cm] {% 61 | \struct{% 62 | dsl\_dir\_phys\_t\lowstrut, 63 | dd\_creation\_time, 64 | dd\_child\_dir\_zapobj\\=34, 65 | dd\_head\_dataset\_obj\\=54 66 | } 67 | }; 68 | 69 | \node (dnode_childmap) [dnode={7}, below left=.75cm and -.5cm of mos_part1-child.south,% 70 | minimum width=4cm, text width=4cm] {% 71 | \struct{% 72 | dnode\_phys\_t\lowstrut, 73 | dn\_type = \footnotesize{DMU\_OT\_DSL\_DIR\_CHILD\_MAP}, 74 | ....\highstrut, 75 | dn\_level\lowstrut, 76 | dn\_blkptr[]\lowstrut, 77 | dn\_bonus[]\lowstrut, 78 | dn\_spill\lowstrut 79 | } 80 | }; 81 | 82 | \node (dnode_headds) [dnode={7}, below right=.75cm and -.5cm of mos_part1-headds.south,% 83 | minimum width=4cm, text width=4cm] {% 84 | \struct{% 85 | dnode\_phys\_t\lowstrut, 86 | dn\_bonus\_type = \footnotesize{DMU\_OT\_DSL\_DATASET}, 87 | ....\highstrut, 88 | dn\_level\lowstrut, 89 | dn\_blkptr[]\lowstrut, 90 | dn\_bonus[]\lowstrut, 91 | dn\_spill\lowstrut 92 | } 93 | }; 94 | 95 | \node (childmap_mzap) [zap, above left=.5cm and 1cm of dnode_childmap.south west] {% 96 | \$MOS=35\\ 97 | \$FREE=38\\ 98 | \$ORIGIN=42 99 | }; 100 | \node (emzap) [below=0cm of childmap_mzap, zap_ru_color, text width=4cm, align=center]% 101 | {Micro ZAP\\(Embedded Blkptr)}; 102 | 103 | \node (ds_headds) [dsl, above right=1cm and .75cm of dnode_headds.south east,% 104 | text width=3.75cm, minimum width=3.75cm] {% 105 | \struct{% 106 | dsl\_dataset\_phys\_t\lowstrut, 107 | ds\_creation\_time, 108 | ds\_creation\_txg=1\lowstrut, 109 | ds\_bp 110 | } 111 | }; 112 | \node (dataset) [below=.5cm of ds_headds, text width=3cm, text=ds_ru_color, align=center]% 113 | {Data Set\\(file system)}; 114 | 115 | \node [above=0cm of mos_part0-objdir, array_ru_color] {1}; 116 | \node [above=0cm of mos_part1-rootds, array_ru_color] {32}; 117 | \node [above=0cm of mos_part1-child, array_ru_color] {34}; 118 | \node [above=0cm of mos_part1-headds, array_ru_color] {54}; 119 | \node [above=0cm of mos_part1-config, array_ru_color] {61}; 120 | 121 | \draw [-, densely dashed] (mos_part0-objdir.north west) -- (dnode_objdir.south west); 122 | \draw [-, densely dashed] (mos_part0-objdir.north east) -- (dnode_objdir.south east); 123 | \draw [-, densely dashed] (mos_part1-rootds.north west) -- (dnode_rootds.south west); 124 | \draw [-, densely dashed] (mos_part1-rootds.north east) -- (dnode_rootds.south east); 125 | \draw [-, densely dashed] (dnode_rootds.five split east) -- (dsl_rootds.north west); 126 | \draw [-, densely dashed] (dnode_rootds.six split east) -- (dsl_rootds.south west); 127 | \draw [->] (dnode_objdir.five west) -- ++(-.25cm,0) |- (zap_objdir.west); 128 | \draw [-, densely dashed] (mos_part1-child.south west) -- (dnode_childmap.north west); 129 | \draw [-, densely dashed] (mos_part1-child.south east) -- (dnode_childmap.north east); 130 | \draw [-, densely dashed] (mos_part1-headds.south west) -- (dnode_headds.north west); 131 | \draw [-, densely dashed] (mos_part1-headds.south east) -- (dnode_headds.north east); 132 | \draw [-, densely dashed] (dnode_childmap.four split west) -- (childmap_mzap.north east); 133 | \draw [-, densely dashed] (dnode_childmap.five split west) -- (childmap_mzap.south east); 134 | \draw [->] (dnode_headds.five east) -- ++(.25cm,0) |- (ds_headds.one west); 135 | \draw [->] (ds_headds.four south) -- (dataset.north); 136 | \end{tikzpicture} 137 | -------------------------------------------------------------------------------- /docs/Figures/src/zfs_ub.tikz: -------------------------------------------------------------------------------- 1 | \begin{tikzpicture} 2 | \node (ub) [dnode={7}] {% 3 | \struct{% 4 | uberblock\_t\lowstrut, 5 | ub\_magic, 6 | ub\_version, 7 | ub\_txg\lowstrut, 8 | ....\highstrut, 9 | ub\_rootbp\lowstrut, 10 | ....\highstrut 11 | } 12 | }; 13 | \node (os) [dnode={5}, below left = 0cm and 1cm of ub.north west] {% 14 | \struct{% 15 | objset\_phys\_t\lowstrut, 16 | os\_meta\_dnode, 17 | os\_zil\_header, 18 | os\_type = \\\footnotesize{DMU\_OST\_META}, 19 | ....\highstrut 20 | } 21 | }; 22 | \node (meta_dnode) [dnode={7}, below left = 0cm and 1cm of os.north west] {% 23 | \struct {% 24 | dnode\_phys\_t\lowstrut, 25 | dn\_type = DMU\_OT\_DNODE, 26 | ....\highstrut, 27 | dn\_level = 2\lowstrut, 28 | dn\_blkptr[]\lowstrut, 29 | dn\_bonus[]\lowstrut, 30 | dn\_spill\lowstrut, 31 | } 32 | }; 33 | \node (blkptr) [dnode={3}, below left = 0cm and 1cm of meta_dnode.north west] {% 34 | \struct {% 35 | blkptr\_t\lowstrut, 36 | blk\_dva[3], 37 | ....\highstrut 38 | } 39 | }; 40 | 41 | \objects{blkptr0}{% 42 | blkptr, draw=none, fill=none, below left=1.25cm and 1.5cm of blkptr.north west}{blk\_ptr} 43 | \node (blkptr02) [% 44 | blkptr, 45 | fit={(blkptr0)}, 46 | draw, below left=1.25cm and 1cm of blkptr.north west, inner sep=0pt, 47 | fill=blkptr_bg_color!50] {}; 48 | \node (blkptr01) [% 49 | blkptr, 50 | fit={(blkptr0)}, 51 | draw, below left=.25cm and .25cm of blkptr02.north east, inner sep=0pt, 52 | fill=blkptr_bg_color!50] {}; 53 | \objects{blkptr0}{% 54 | blkptr, below left = .25cm and .25cm of blkptr01.north east}{blk\_ptr} 55 | 56 | \begin{scope}[local bounding box=mos_part0, name prefix=mos_part0-] 57 | \mositem{empty} {below right=.75cm and -.5cm of blkptr0.south west} {}; 58 | \mositem{objdir} {right=0cm of empty.east} {\Longstack{O b j ~ D i r}}; 59 | \mositem{others} {right=0cm of objdir.east, % 60 | minimum width=4cm, text width=4cm}{....} 61 | \mositem{others} {right=0cm of others.east}{} 62 | \end{scope} 63 | \begin{scope}[local bounding box=mos_part1, name prefix=mos_part1-] 64 | \mositem{rootds} {right=1cm of mos_part0-others.east} {\Longstack{R o o t ~ D S}}; 65 | \mositem{empty} {right=0cm of rootds.east} {}; 66 | \mositem{child} {right=0cm of empty.east} {\scriptsize{\Longstack{C h i l d ~ M a p}}}; 67 | \mositem{others} {right=0cm of child.east, % 68 | minimum width=1cm, text width=1cm}{....}; 69 | \mositem{headds} {right=0cm of others.east} {\Longstack{H e a d ~ D S}}; 70 | \mositem{others} {right=0cm of headds.east, % 71 | minimum width=1cm, text width=1cm}{....}; 72 | \mositem{config} {right=0cm of others.east} {\Longstack{c o n f i g}}; 73 | \mositem{empty} {right=0cm of config.east} {}; 74 | \mositem{empty} {right=0cm of empty.east} {}; 75 | \end{scope} 76 | 77 | \node [below=0cm of mos_part0-empty, array_ru_color] {0}; 78 | \node [below=0cm of mos_part0-objdir, array_ru_color, text width=1.5cm, align=center] {1\\(fixed)}; 79 | \node [below=0cm of mos_part0-others, array_ru_color] {31}; 80 | \node [below=0cm of mos_part1-rootds, array_ru_color] {32}; 81 | \node [below=0cm of mos_part1-child, array_ru_color] {34}; 82 | \node [below=0cm of mos_part1-headds, array_ru_color] {54}; 83 | \node [below=0cm of mos_part1-config, array_ru_color] {61}; 84 | \node [below=0cm of mos_part1-empty, array_ru_color] {63}; 85 | \node [right=0cm of mos_part1-empty, xshift=.1cm, % 86 | array_ru_color, text width=3.5cm, align=center] {\Large\textbf{MOS}\\(dnode array)}; 87 | 88 | \draw [->] (ub.six west) -- ++(-.5cm,0) |- (os.one east); 89 | \draw [-, densely dashed] (os.one split west) -- (meta_dnode.north east); 90 | \draw [-, densely dashed] (os.two split west) -- (meta_dnode.south east); 91 | \draw [-, densely dashed] (meta_dnode.four split west) -- (blkptr.north east); 92 | \draw [-, densely dashed] (meta_dnode.five split west) -- (blkptr.south east); 93 | \draw [->] (blkptr.two west) -| (blkptr02.north); 94 | %\draw [->] (blkptr0.one west) -- ++(-.5cm,0) |- (mos_part0.west); 95 | \draw [->] (blkptr0.one west) -| (mos_part0.north west); 96 | %\draw [->] (blkptr0.two east) -- ++(1cm,0) -- ++(0,-3cm) -- ++(3cm,0) |- (mos_part1.west); 97 | \draw [->] (blkptr0.two east) -| (mos_part1.north west); 98 | \end{tikzpicture} 99 | -------------------------------------------------------------------------------- /docs/Figures/src/zfs_vdev.tikz: -------------------------------------------------------------------------------- 1 | \begin{tikzpicture}[% 2 | pics/.cd, 3 | every node/.style = { 4 | outer sep=0pt, inner sep = 0pt 5 | }, 6 | disk/.style = { 7 | code = { 8 | \fill [white] ellipse [x radius = 1, y radius = 1/3]; 9 | \path [left color = black!50, right color = black!50, 10 | middle color = black!25] 11 | (-1+.05,-.55) arc (180:360:1-.05 and 1/3-.05*1/3) -- cycle; 12 | \path [top color = black!25, bottom color = white] 13 | (0,.05*1/3) ellipse [x radius = 1-.05, y radius = 1/3-.05*1/3]; 14 | \path [left color = black!25, right color = black!25, 15 | middle color = white] (-1,0) -- (-1,-1) arc (180:360:1 and 1/3) 16 | -- (1,0) arc (360:180:1 and 1/3); 17 | \foreach \r in {225,315} 18 | \foreach \i [evaluate = {\s=30;}] in {0,2,...,30} 19 | \fill [black, fill opacity = 1/50] 20 | (0,0) -- (\r+\s-\i:1 and 1/3) -- ++(0,-1) 21 | arc (\r+\s-\i:\r-\s+\i:1 and 1/3) -- ++(0,1) -- cycle; 22 | \foreach \r in {45,135} 23 | \foreach \i [evaluate = {\s=30;}] in {0,2,...,30} 24 | \fill [black, fill opacity = 1/50] 25 | (0,0) -- (\r+\s-\i:1 and 1/3) 26 | arc (\r+\s-\i:\r-\s+\i:1 and 1/3) -- cycle; 27 | } 28 | } 29 | ] 30 | \node (root) [draw, minimum width=2.5cm, minimum height=1.25cm] {root vdev}; 31 | \node (mirror1) [draw, minimum width=3cm, minimum height=1.5cm, text width = 3cm, align=center,% 32 | below left=.75cm and 1cm of root.south] {top level vdev\\(M1 D1/D2)}; 33 | \node (mirror2) [draw, minimum width=3cm, minimum height=1.5cm, text width = 3cm, align=center,% 34 | below right=.75 and 1cm of root.south] {top level vdev\\(M2 D3/D4)}; 35 | \path ($(mirror1.south) - (1.25cm,1.75cm)$) pic [local bounding box=D1] (d1) {disk}; 36 | \path ($(mirror1.south) - (-1.25cm,1.75cm)$) pic [local bounding box=D2] (d2) {disk}; 37 | \path ($(mirror2.south) - (1.25cm,1.75cm)$) pic [local bounding box=D3] (d3) {disk}; 38 | \path ($(mirror2.south) - (-1.25cm,1.75cm)$) pic [local bounding box=D4] (d4) {disk}; 39 | \node (logical) [draw, densely dashed, fit=(root)(mirror1)(mirror2),inner sep=7.5pt] {}; 40 | \node (related) [draw, thick, densely dotted, fit=(mirror1)(D1)(D2),inner sep=7.5pt] {}; 41 | \node (physical) [draw, thick, dashed, fit=(D1)(D2)(D3)(D4), yshift=-5pt, inner sep=15pt] {}; 42 | \node [below left = 0cm and 0cm of logical.north east] {Logical Vdevs}; 43 | \node [above right=0cm and 0cm of related.north west, rotate=-90] {Related Vdevs}; 44 | \node [above left=0cm and 0cm of physical.south east] {Physical Vdevs}; 45 | \node at (D1) [text=gray!80,yshift=-5pt]{D1}; 46 | \node at (D2) [text=gray!80,yshift=-5pt]{D2}; 47 | \node at (D3) [text=gray!80,yshift=-5pt]{D3}; 48 | \node at (D4) [text=gray!80,yshift=-5pt]{D4}; 49 | 50 | \draw ($(root.south west)!1/5!(root.south east)$) -- ($(mirror1.north east)!1/5!(mirror1.north west)$); 51 | \draw ($(root.south east)!1/5!(root.south west)$) -- ($(mirror2.north west)!1/5!(mirror2.north east)$); 52 | \draw ($(mirror1.south west)!1/4!(mirror1.south east)$) -- (D1); 53 | \draw ($(mirror1.south east)!1/4!(mirror1.south west)$) -- (D2); 54 | \draw ($(mirror2.south west)!1/4!(mirror2.south east)$) -- (D3); 55 | \draw ($(mirror2.south east)!1/4!(mirror2.south west)$) -- (D4); 56 | \end{tikzpicture} 57 | -------------------------------------------------------------------------------- /docs/Figures/zfs_embedded_blkptr.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | PUBDIR = . 2 | HTMLSTYLEDIR = styles 3 | HTMLSTYLES = $(wildcard $(HTMLSTYLEDIR)/*.css) 4 | HTMLCMDCSS = $(foreach css, $(HTMLSTYLES), --css $(css)) 5 | HTML5TEMPLATE = $(HTMLSTYLEDIR)/tufte.html5 6 | FIGURESDIR = Figures 7 | include $(FIGURESDIR)/Makefile.fig 8 | FIGURESRC = $(foreach fig, $(FIGURES_PDF), $(FIGURESDIR)/src/$(fig:.pdf=.tikz)) 9 | 10 | PANDOCPATH=$(HOME)/.local/bin 11 | FILTERDIR ?= $(HOME)/Projects/Luke/pandoc-filter 12 | PANDOCFILTERS = $(FILTERDIR)/codeblock/codeblock-filter \ 13 | $(FILTERDIR)/image/image-filter \ 14 | $(PANDOCPATH)/pandoc-crossref $(PANDOCPATH)/pandoc-citeproc 15 | EXTRAARGS = $(foreach filter, $(PANDOCFILTERS), -F $(filter)) \ 16 | --listings 17 | 18 | ifeq ($(DEBUG), y) 19 | OUTPUT=--verbose 20 | else 21 | OUTPUT=>/dev/null 2>&1 22 | endif 23 | 24 | .SECONDEXPANSION: 25 | 26 | zfs_internals_mdparts = zfs_internals.md 27 | MDPDFTARGETS = zfs_internals.md.pdf 28 | MDHTMLTARGETS = zfs_internals.md.html 29 | 30 | include Makefile.in 31 | -------------------------------------------------------------------------------- /docs/Makefile.in: -------------------------------------------------------------------------------- 1 | ifeq ($(DEBUG), y) 2 | OUTPUT=--verbose 3 | else 4 | OUTPUT=>/dev/null 2>&1 5 | endif 6 | 7 | PUBDIR = . 8 | all: html pdf 9 | html: $(foreach target, $(MDHTMLTARGETS), $(PUBDIR)/$(target)) 10 | pdf: $(foreach target, $(MDPDFTARGETS), $(PUBDIR)/$(target)) 11 | .PHONY: all pdf html 12 | 13 | $(PUBDIR)/%.md.html: $$($$*_mdparts) $$(LISTINGS) $(HTMLSTYLES) $(FIGURESRC) 14 | @if [ -d $(FIGURESDIR) -a -f $(FIGURESDIR)/Makefile ]; \ 15 | then \ 16 | printf "%-12s %s\n" "Generating" "figures"; \ 17 | make -C $(FIGURESDIR) figures \ 18 | DEBUG=$(DEBUG) \ 19 | --no-print-directory; \ 20 | fi 21 | @printf "%-12s %s\n" "Generating" $@ 22 | @$(PANDOCPATH)/pandoc $(EXTRAARGS) \ 23 | --katex=https://cdn.jsdelivr.net/npm/katex@0.12.0/dist/ \ 24 | --toc --toc-depth=2 \ 25 | --wrap=preserve \ 26 | --listings \ 27 | --section-divs \ 28 | --from markdown+fenced_divs \ 29 | --from markdown+pipe_tables+table_captions \ 30 | --from markdown+tex_math_single_backslash \ 31 | --from markdown+definition_lists \ 32 | --to html5 --template=$(HTML5TEMPLATE) \ 33 | $(HTMLCMDCSS) \ 34 | --metadata date="`date`" \ 35 | --output $@ $($*_mdparts) $(OUTPUT) 36 | 37 | TEXTEMPLATE = templates/template.tex 38 | 39 | $(PUBDIR)/%.md.pdf: $$($$*_mdparts) $(TEXTEMPLATE) 40 | @if [ -d $(FIGURESDIR) -a -f $(FIGURESDIR)/Makefile ]; \ 41 | then \ 42 | printf "%-12s %s\n" "Generating" "figures"; \ 43 | make -C $(FIGURESDIR) figures \ 44 | TEXINPUTS="$(TEXINPUTS):$(STYLEDIR)" \ 45 | DEBUG=$(DEBUG) \ 46 | DOCTOOLS=$(DOCTOOLSPATH) \ 47 | --no-print-directory; \ 48 | fi 49 | @printf "%-12s %s\n" "Generating" $@ 50 | @TEXINPUTS="$(TEXINPUTS):./templates" \ 51 | pandoc $(EXTRAARGS) \ 52 | --pdf-engine=xelatex \ 53 | --pdf-engine-opt="-shell-escape" \ 54 | --from markdown+tex_math_single_backslash \ 55 | --from markdown-markdown_in_html_blocks-native_divs \ 56 | --from markdown+pipe_tables+table_captions \ 57 | --template=$(TEXTEMPLATE) $($*_mdparts) -o $@ $(OUTPUT) 58 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | This directory contains all files needed 2 | to build the documents. 3 | To generate the final documentations on Linux, 4 | you will have to install [Pandoc](https://pandoc.org/) 5 | and some filters, 6 | [TeX Live](https://tug.org/texlive/) and some packages. 7 | Additionally, 8 | you will have to compile two Pandoc filters written by me. 9 | 10 | # Pandoc 11 | It's pretty easy to install Pandoc on Fedora: 12 | 13 | ```bash 14 | $ sudo dnf install haskell-platform 15 | $ cabal update \ 16 | $ cabal install cabal-install \ 17 | && /usr/local/bin/cabal --version \ 18 | && /usr/local/bin/cabal v2-update \ 19 | && /usr/local/bin/cabal v2-install pandoc pandoc-citeproc pandoc-crossref 20 | ``` 21 | 22 | After installation, 23 | you can remove haskell plat-form to release about 6~7GB disk space. 24 | 25 | ```bash 26 | $ sudo dnf autoremove -y haskell-platform 27 | ``` 28 | 29 | On Ubuntu, 30 | you can use [stack](https://www.haskellstack.org) 31 | to install Pandoc and the needed filters. 32 | 33 | ```bash 34 | $ stack install pandoc pandoc-citeproc pandoc-crossref 35 | ``` 36 | 37 | It will likely fail, 38 | but the solution is in the error message. 39 | 40 | Install [Go](https://golang.org), 41 | clone pandoc-filter from 42 | https://github.com/vupiggy/pandoc-filter.git, 43 | then 44 | 45 | ```bash 46 | $ cd codeblaock 47 | $ go build -o codeblock-filter main.go 48 | $ cd ../image 49 | $ go build -o image-filter main.go 50 | ``` 51 | 52 | # Tex Live 53 | To install Tex Live, 54 | you can use `apt`, `dnf`, 55 | or the install script from CTAN. 56 | 57 | ```bash 58 | $ wget -c http://mirror.ctan.org/systems/texlive/tlnet/install-tl-unx.tar.gz 59 | && mkdir ./install-tl \ 60 | && tar --strip-components 1 -zvxf install-tl-unx.tar.gz -C "./install-tl" \ 61 | && ./install-tl/install-tl --profile=./texlive.profile 62 | ``` 63 | 64 | The content of `texlive.profile`: 65 | 66 | ``` 67 | selected_scheme scheme-small 68 | TEXDIR /usr/local/texlive/2021 69 | TEXMFCONFIG ~/.texlive2021/texmf-config 70 | TEXMFHOME ~/.texmf 71 | TEXMFLOCAL /usr/local/texlive/texmf-local 72 | TEXMFSYSCONFIG /usr/local/texlive/2021/texmf-config 73 | TEXMFSYSVAR /usr/local/texlive/2021/texmf-var 74 | TEXMFVAR ~/.texlive2021/texmf-var 75 | binary_x86_64-linux 1 76 | instopt_adjustpath 1 77 | instopt_adjustrepo 1 78 | instopt_letter 1 79 | instopt_portable 0 80 | instopt_write18_restricted 1 81 | tlpdbopt_autobackup 1 82 | tlpdbopt_backupdir tlpkg/backups 83 | tlpdbopt_create_formats 1 84 | tlpdbopt_desktop_integration 1 85 | tlpdbopt_file_assocs 1 86 | tlpdbopt_generate_updmap 0 87 | tlpdbopt_install_docfiles 1 88 | tlpdbopt_install_srcfiles 1 89 | tlpdbopt_post_code 1 90 | tlpdbopt_sys_bin /usr/bin 91 | tlpdbopt_sys_info /usr/share/info 92 | tlpdbopt_sys_man /usr/share/man 93 | tlpdbopt_w32_multi_user 0 94 | 95 | ``` 96 | 97 | Then install some needed LaTeX packages: 98 | 99 | ```bash 100 | $ sudo tlmgr install \ 101 | standalone \ 102 | luatex85 \ 103 | capt-of \ 104 | tkz-base \ 105 | tkz-euclide \ 106 | numprint \ 107 | xstring \ 108 | pgfopts \ 109 | flowchart \ 110 | makeshape \ 111 | IEEEtran \ 112 | anyfontsize \ 113 | xwatermark \ 114 | framed \ 115 | tocloft \ 116 | catoptions \ 117 | ltxkeys \ 118 | rsfs \ 119 | titlesec \ 120 | diagbox \ 121 | appendix \ 122 | pict2e \ 123 | was \ 124 | fourier \ 125 | utopia \ 126 | listofitems \ 127 | readarray \ 128 | verbatimbox \ 129 | ctex \ 130 | pgfplots \ 131 | enumitem \ 132 | harmony \ 133 | musixtex-fonts \ 134 | adjustbox \ 135 | collectbox \ 136 | siunitx \ 137 | collection-fontsrecommended \ 138 | chngcntr \ 139 | stackengine \ 140 | tasks \ 141 | exam \ 142 | exercise \ 143 | xsim \ 144 | scalerel \ 145 | newpx \ 146 | fontaxes \ 147 | kastrup \ 148 | newtx \ 149 | esvect \ 150 | stix \ 151 | zref \ 152 | tkz-doc \ 153 | mdframed \ 154 | datetime2 \ 155 | tracklang \ 156 | marginnote \ 157 | soulpos \ 158 | soulutf8 \ 159 | needspace \ 160 | footmisc \ 161 | xpatch \ 162 | etoc \ 163 | cancel \ 164 | tikz-3dplot \ 165 | pgf-blur \ 166 | lstaddons \ 167 | xint 168 | ``` 169 | 170 | # Fonts 171 | Running `fc-list` command to check whether the fonts, 172 | "DejaVu Sans" and "Ubuntu Mono" are installed, 173 | if not, install them with `apt` or `dnf`. 174 | 175 | # Make 176 | 177 | Modify Makefile setting `FILTERDIR` to the cloned and built pandoc-filter directory, 178 | then running `make` in this directory. 179 | You can run `make FILTERDIR=` to avoid modifying the Makefile. 180 | -------------------------------------------------------------------------------- /docs/styles/pandoc.css: -------------------------------------------------------------------------------- 1 | /* 2 | * I add this to html files generated with pandoc. 3 | */ 4 | 5 | html { 6 | font-size: 100%; 7 | overflow-y: scroll; 8 | -webkit-text-size-adjust: 100%; 9 | -ms-text-size-adjust: 100%; 10 | } 11 | 12 | .author, .date { 13 | font-size: 1.2em; 14 | } 15 | 16 | body { 17 | color: #111; 18 | font-family: Palatino, 'Palatino Linotype', serif, 'Hiragino Sans GB'; 19 | font-size: 12px; 20 | line-height: 1.5; 21 | margin-left: 10em; 22 | margin-right: auto; 23 | padding: 1em; 24 | max-width: 48em; 25 | background: #fefefe; 26 | text-align: justify; 27 | } 28 | 29 | /* 30 | @font-face { 31 | font-family: 'Hiragino Sans GB', "Kaiti SC"; 32 | unicode-range: U+00-FF, U+980-9FF; 33 | } 34 | */ 35 | 36 | a { 37 | color: #06e; 38 | /* color: #0645ad; */ 39 | text-decoration: none; 40 | } 41 | 42 | a:visited { 43 | /* color: #0b0080; */ 44 | color: #06e; 45 | } 46 | 47 | a:hover { 48 | color: #06e; 49 | } 50 | 51 | a:active { 52 | color: #06e; 53 | /* color: #faa700; */ 54 | } 55 | 56 | a:focus { 57 | outline: thin dotted; 58 | } 59 | 60 | *::-moz-selection { 61 | background: rgba(255, 255, 0, 0.3); 62 | color: #000; 63 | } 64 | 65 | *::selection { 66 | background: rgba(255, 255, 0, 0.3); 67 | color: #000; 68 | } 69 | 70 | a::-moz-selection { 71 | background: rgba(255, 255, 0, 0.3); 72 | color: #0645ad; 73 | } 74 | 75 | a::selection { 76 | background: rgba(255, 255, 0, 0.3); 77 | color: #0645ad; 78 | } 79 | 80 | p { 81 | margin: 1em 0; 82 | } 83 | 84 | em:lang(en) { 85 | font-style: italic; 86 | } 87 | 88 | em:lang(zh) { 89 | font-style: normal; 90 | font-family: "Kaiti SC"; 91 | font-size: 1.1em; 92 | } 93 | 94 | img { 95 | max-width: 100%; 96 | } 97 | 98 | h1, h2, h3, h4, h5, h6 { 99 | color: #111; 100 | font-family: 'Helvetica Neue', "Kaiti SC"; 101 | line-height: 125%; 102 | margin-top: 2em; 103 | font-weight: normal; 104 | } 105 | 106 | h4, h5, h6 { 107 | font-weight: bold; 108 | } 109 | 110 | h1 { 111 | font-size: 2.2em; 112 | } 113 | 114 | h1:after 115 | { 116 | content:' '; 117 | display:block; 118 | border:1px solid gray; 119 | } 120 | 121 | h2 { 122 | font-size: 1.8em; 123 | } 124 | 125 | h3 { 126 | font-size: 1.4em; 127 | } 128 | 129 | /* We don't have any level deeper than 3 130 | h4 { 131 | font-size: 1.2em; 132 | } 133 | 134 | h5 { 135 | font-size: 1em; 136 | } 137 | 138 | h6 { 139 | font-size: 0.9em; 140 | } 141 | */ 142 | 143 | blockquote { 144 | color: #666666; 145 | margin: 0; 146 | padding-left: 3em; 147 | border-left: 0.5em #EEE solid; 148 | } 149 | 150 | hr { 151 | display: block; 152 | height: 2px; 153 | border: 0; 154 | border-top: 1px solid #aaa; 155 | border-bottom: 1px solid #eee; 156 | margin: 1em 0; 157 | padding: 0; 158 | } 159 | 160 | div.sourceCode { 161 | padding-left: 1em; 162 | padding-top: 0.5ex; 163 | padding-bottom: 0.5ex; 164 | border: 1px solid #ddd; 165 | width:105%; 166 | } 167 | 168 | pre, code, kbd, samp { 169 | /* color: #000; */ 170 | /* border: 1px solid #ddd; */ 171 | font-family: monospace, monospace; 172 | _font-family: 'Cascadia', 'courier new', monospace; 173 | font-size: 0.925em; 174 | } 175 | 176 | .bash { 177 | } 178 | 179 | pre { 180 | white-space: pre; 181 | white-space: pre-wrap; 182 | word-wrap: break-word; 183 | max-width:120%; 184 | } 185 | 186 | b, strong { 187 | font-weight: bold; 188 | } 189 | 190 | dfn { 191 | font-style: italic; 192 | } 193 | 194 | ins { 195 | background: #ff9; 196 | color: #000; 197 | text-decoration: none; 198 | } 199 | 200 | mark { 201 | background: #ff0; 202 | color: #000; 203 | font-style: italic; 204 | font-weight: bold; 205 | } 206 | 207 | sub, sup { 208 | font-size: 75%; 209 | line-height: 0; 210 | position: relative; 211 | vertical-align: baseline; 212 | } 213 | 214 | sup { 215 | top: -0.5em; 216 | } 217 | 218 | sub { 219 | bottom: -0.25em; 220 | } 221 | 222 | ul, ol { 223 | margin: 1em 0; 224 | padding: 0 0 0 2em; 225 | } 226 | 227 | li p:last-child { 228 | margin-bottom: 0; 229 | } 230 | 231 | ul ul, ol ol { 232 | margin: .3em 0; 233 | } 234 | 235 | dl { 236 | margin-bottom: 1em; 237 | } 238 | 239 | dt { 240 | font-weight: bold; 241 | margin-bottom: .8em; 242 | } 243 | 244 | dd { 245 | margin: 0 0 .8em 2em; 246 | } 247 | 248 | dd:last-child { 249 | margin-bottom: 0; 250 | } 251 | 252 | img { 253 | border: 0; 254 | -ms-interpolation-mode: bicubic; 255 | vertical-align: middle; 256 | } 257 | 258 | figure { 259 | display: block; 260 | text-align: center; 261 | margin: 1em 0; 262 | } 263 | 264 | figure img { 265 | border: none; 266 | margin: 0 auto; 267 | } 268 | 269 | figcaption { 270 | font-size: 0.8em; 271 | /* font-style: italic; */ 272 | /* float: right; so ugly */ 273 | clear: right; 274 | margin-top: 0; 275 | margin-bottom: 0; 276 | } 277 | 278 | table { 279 | margin-bottom: 2em; 280 | border-bottom: 1px solid #ddd; 281 | border-right: 1px solid #ddd; 282 | border-spacing: 0; 283 | border-collapse: collapse; 284 | } 285 | 286 | table th { 287 | padding: .2em 1em; 288 | background-color: #eee; 289 | border-top: 1px solid #ddd; 290 | border-left: 1px solid #ddd; 291 | } 292 | 293 | table td { 294 | padding: .2em 1em; 295 | border-top: 1px solid #ddd; 296 | border-left: 1px solid #ddd; 297 | vertical-align: top; 298 | } 299 | 300 | @media only screen and (min-width: 480px) { 301 | body { 302 | font-size: 14px; 303 | } 304 | } 305 | @media only screen and (min-width: 768px) { 306 | body { 307 | font-size: 16px; 308 | } 309 | } 310 | @media print { 311 | * { 312 | background: transparent !important; 313 | color: black !important; 314 | filter: none !important; 315 | -ms-filter: none !important; 316 | } 317 | 318 | body { 319 | font-size: 12pt; 320 | max-width: 100%; 321 | } 322 | 323 | a, a:visited { 324 | text-decoration: underline; 325 | } 326 | 327 | hr { 328 | height: 1px; 329 | border: 0; 330 | border-bottom: 1px solid black; 331 | } 332 | 333 | a[href]:after { 334 | content: " (" attr(href) ")"; 335 | } 336 | 337 | abbr[title]:after { 338 | content: " (" attr(title) ")"; 339 | } 340 | 341 | .ir a:after, a[href^="javascript:"]:after, a[href^="#"]:after { 342 | content: ""; 343 | } 344 | 345 | pre, blockquote { 346 | border: 1px solid #999; 347 | padding-right: 1em; 348 | page-break-inside: avoid; 349 | } 350 | 351 | tr, img { 352 | page-break-inside: avoid; 353 | } 354 | 355 | img { 356 | max-width: 100% !important; 357 | } 358 | 359 | @page :left { 360 | margin: 15mm 20mm 15mm 10mm; 361 | } 362 | 363 | @page :right { 364 | margin: 15mm 10mm 15mm 20mm; 365 | } 366 | 367 | p, h2, h3 { 368 | orphans: 3; 369 | widows: 3; 370 | } 371 | 372 | h2, h3 { 373 | page-break-after: avoid; 374 | } 375 | } 376 | 377 | /* Luke's hack */ 378 | 379 | #TOC { 380 | position: fixed; 381 | right: 5em; 382 | top: 10em; 383 | border: 1px solid #aaa; 384 | padding: 20px; 385 | } 386 | -------------------------------------------------------------------------------- /docs/styles/tufte-extra.css: -------------------------------------------------------------------------------- 1 | /* The default x-height for code is slightly too large in side notes */ 2 | .marginnote code, .sidenote code { font-size: 0.95rem; } 3 | -------------------------------------------------------------------------------- /docs/styles/tufte.html5: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | $for(author-meta)$ 8 | 9 | $endfor$ 10 | $if(date-meta)$ 11 | 12 | $endif$ 13 | $if(keywords)$ 14 | 15 | $endif$ 16 | $if(title-prefix)$$title-prefix$ – $endif$$pagetitle$ 17 | 18 | $if(quotes)$ 19 | 20 | $endif$ 21 | $if(highlighting-css)$ 22 | 25 | $endif$ 26 | $for(css)$ 27 | 28 | $endfor$ 29 | $if(math)$ 30 | $math$ 31 | $endif$ 32 | 123 | 124 | 127 | 128 | $for(header-includes)$ 129 | $header-includes$ 130 | $endfor$ 131 | 132 | 133 | $for(include-before)$ 134 | $include-before$ 135 | $endfor$ 136 |
137 | $if(title)$ 138 |
139 |

$title$

140 | $if(subtitle)$ 141 |

$subtitle$

142 | $endif$ 143 | $if(date)$ 144 | 147 | 148 | 149 | $else$ 150 | $if(author)$ 151 | 152 | $endif$ 153 | $endif$ 154 |
155 | $endif$ 156 | $if(toc)$ 157 | 160 | $endif$ 161 | $body$ 162 |
163 | $for(include-after)$ 164 | $include-after$ 165 | $endfor$ 166 | 167 | 168 | -------------------------------------------------------------------------------- /docs/templates/preamble.tex: -------------------------------------------------------------------------------- 1 | \usepackage{calc} 2 | \usepackage[dvipsnames]{xcolor} 3 | 4 | % Hyperlink 5 | \usepackage[% 6 | breaklinks=true,% 7 | colorlinks,% 8 | citecolor=blue,% 9 | filecolor=blue,% 10 | linkcolor=blue,% 11 | urlcolor=blue,% 12 | bookmarksopen=true% 13 | pdfstartview=FitW% 14 | ]{hyperref} 15 | 16 | \usepackage{titlesec} 17 | 18 | % Copyright symbol 19 | \usepackage{textcomp} 20 | 21 | % header/footer 22 | \usepackage{fancyhdr} % exam class has already used fanchhdr 23 | 24 | % fancy stuff 25 | \usepackage{longtable} 26 | \usepackage{booktabs} 27 | \usepackage{framed} 28 | 29 | \usepackage{verbatimbox} 30 | \usepackage[inline]{enumitem} 31 | 32 | \usepackage[export]{adjustbox} 33 | \usepackage{capt-of} 34 | 35 | \usepackage{verbatim} % For comment environment and sth else 36 | 37 | \usepackage{tikz} 38 | \usetikzlibrary{% 39 | arrows.meta, 40 | positioning, 41 | % shadows.blur, 42 | shapes, 43 | calc, 44 | intersections, 45 | quotes, 46 | angles, 47 | } 48 | \usepackage{tkz-euclide} 49 | % \usetkzobj{all} texlive2019 use tkz-obj-eu-angles ... 50 | 51 | \usepackage{anyfontsize} 52 | \newsavebox\wmbox 53 | \savebox\wmbox{\tikz[color=gray,opacity=0.15]% 54 | \node{\shortstack{\tiny Luke Huang\\{\fontsize{3}{3}\selectfont bwonder475@tutanota.com}}};} 55 | 56 | \usepackage{graphicx,tipa,caption} 57 | \usepackage{flowchart} 58 | \newcommand*\circled[1]{\tikz[baseline=(char.base)]{ 59 | \node[shape=circle,draw,inner sep=1pt] (char) {#1};}} 60 | 61 | % my personal bad taste ;), feel free to remove if you don't like it. 62 | \linespread{1.1} 63 | \addtolength{\parskip}{0.25ex} 64 | \usepackage{xltxtra} % this package loads fontspec 65 | \defaultfontfeatures{Mapping=tex-text} 66 | 67 | % Palatino for serif 68 | %\usepackage[sc,osf]{mathpazo} 69 | \usepackage[sc]{mathpazo} 70 | % Helvetica for sans serif 71 | \usepackage[scaled]{helvet} 72 | \usepackage[T1]{fontenc} 73 | \setmonofont{Ubuntu Mono} 74 | 75 | \makeatletter 76 | \@ifclassloaded{memoir}{% 77 | \usepackage{parskip} 78 | \setlength{\marginparsep}{0pt} 79 | \addtolength{\textwidth}{100pt} 80 | \addtolength{\textheight}{6em} 81 | \addtolength{\topmargin}{2em} 82 | \addtolength{\headheight}{2em} 83 | \setlength{\marginparwidth}{0pt} 84 | \setulmargins{4cm}{*}{*} 85 | \setlrmargins{1in}{*}{*} 86 | \addtolength{\cftbeforesectionskip}{2pt} 87 | \checkandfixthelayout 88 | }{% 89 | \usepackage[twoside, inner=2.4cm, outer=4.4cm]{geometry} 90 | \usepackage{tocloft} 91 | } 92 | \makeatother 93 | \setcounter{tocdepth}{2} 94 | 95 | % Colors for listings 96 | \definecolor{lstkeyword} {RGB}{0, 112, 32} 97 | \definecolor{lstkeywordG} {RGB}{153, 168, 87} 98 | \definecolor{lstkeywordR} {RGB}{144, 32, 0} 99 | \definecolor{lstkeywordO} {RGB}{255,165,0} 100 | \definecolor{lstcomment} {RGB}{96, 160, 176} 101 | \definecolor{lstnumber} {RGB}{188, 122, 0} 102 | \definecolor{lststring} {rgb}{0.58,0,0.82} 103 | \definecolor{lstbg} {RGB}{240,240,240} 104 | 105 | \usepackage{harmony} 106 | \setlist[itemize,1]{labelsep=1.2em,label=\large\bfseries{$\Vier$}} 107 | \setlist[itemize,2]{labelsep=1.2em,label=\large\bfseries{$\AAcht$}} 108 | \setlist[itemize,3]{labelsep=1.2em,label=\large\bfseries{$\sharp$}} 109 | 110 | \usepackage{listings} 111 | \newcommand{\passthrough}[1]{#1} 112 | \lstset{columns=flexible} 113 | 114 | \makeatletter 115 | \lstset{language=C++, 116 | basicstyle=\lst@ifdisplaystyle\ttfamily\footnotesize\else\color{Bittersweet}\ttfamily\footnotesize\fi, 117 | backgroundcolor=\color{lstbg}, 118 | keywordstyle=\color{lstkeyword}, 119 | %emph={int,char,double,float,unsigned,void,bool}, 120 | %emphstyle=\color{lstkeywordR}, 121 | % classoffset=1, 122 | % otherkeywords={>,<,.,;,-,!,=,~}, 123 | % morekeywords={>,<,.,;,-,!,=,~}, 124 | % keywordstyle=\color{lstkeywordO}, 125 | % classoffset=0, 126 | stringstyle=\color{lststring}, 127 | commentstyle=\ttfamily\color{lstcomment}, 128 | captionpos=b, 129 | numberstyle=\color{lstnumber}, 130 | frame=single, 131 | framesep=4pt, 132 | framerule=0pt, 133 | xleftmargin=1em, 134 | tabsize=2, 135 | columns=flexible, 136 | keepspaces=true, 137 | breaklines=true, 138 | } 139 | \makeatother 140 | %\lstset{style=mystyle} 141 | 142 | %%% Extra stuff 143 | \lstdefinelanguage{C++}{ 144 | morekeywords={void} 145 | } 146 | \lstdefinelanguage{mybash}{ 147 | morekeywords={sudo} 148 | } 149 | \lstdefinelanguage{myelisp}{ 150 | language=elisp, 151 | morekeywords={setcar, setcdr, nthcdr}, 152 | } 153 | 154 | % \lstset{ 155 | % language = Bash, 156 | % literate = {\$\#}{{{\#}}}2, 157 | % columns = fullflexible, 158 | % keepspaces, 159 | % } 160 | -------------------------------------------------------------------------------- /docs/templates/template.tex: -------------------------------------------------------------------------------- 1 | \documentclass[ 2 | $if(papersize)$ 3 | $papersize$paper, 4 | $endif$ 5 | $if(extraoptions)$ 6 | $extraoptions$ 7 | $endif$ 8 | ]{$documentclass$} 9 | 10 | $if(fontpath)$ 11 | \def\fontpath{$fontpath$} 12 | $endif$ 13 | 14 | $if(lang)$ 15 | \def\$lang${true} 16 | $endif$ 17 | 18 | \input{preamble} 19 | 20 | $if(figcaption)$ 21 | \captionsetup{justification=$figcaption$, singlelinecheck=false} 22 | $endif$ 23 | 24 | $if(highlighting-macros)$ 25 | $highlighting-macros$ 26 | $endif$ 27 | \providecommand{\tightlist}{\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} 28 | 29 | \begin{document} 30 | 31 | \newcounter{groupdef} 32 | \pagestyle{empty} 33 | \tableofcontents 34 | \newpage 35 | 36 | $if(watermark)$ 37 | \newwatermark*[oddpages,color=red!5,angle=60,scale=12,xpos=-46,ypos=36]{\usebox\mybox} 38 | \newwatermark*[evenpages,color=red!5,angle=60,scale=12,xpos=-59,ypos=36]{\usebox\mybox} 39 | $endif$ 40 | 41 | \setlength{\headheight}{15pt} % fancyhead should be at least 14.03358pt 42 | \pagestyle{fancy} 43 | 44 | $body$ 45 | 46 | \end{document} 47 | -------------------------------------------------------------------------------- /docs/zfs_internals.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: ZFS On-disk Format 3 | author: Luke Huang \ 4 | documentclass: memoir 5 | lang: en-US 6 | secPrefix: Section. 7 | tblPrefix: Table. 8 | figPrefix: Figure. 9 | --- 10 | 11 | `

Click here for a printer friendly PDF version.

`{=html} 12 | 13 | # On-disk Format 14 | 15 | ## Vdev 16 | 17 | ZFS storage pools are essentially a collection of _virtual devices_, or _vdevs_, 18 | which are arranged in a tree 19 | with physical vdevs sitting at leaves. 20 | @fig:vdev illustrates such a tree representing 21 | a sample pool configuration containing two mirrors, 22 | each of which has two disks. 23 | 24 | ```{.figure} 25 | { 26 | "path" : "Figures/zfs_vdev", 27 | "caption" : "Vdev Tree", 28 | "label" : "vdev", 29 | "options" : "scale=1,center", 30 | "place" : "ht" 31 | } 32 | ``` 33 | 34 | ## Label and Uberblock 35 | 36 | Each physical vdev within a storage pool contains four identical copies of 256KB structure 37 | called a vdev _label_ (`vdev_label_t`). 38 | The vdev label 39 | contains information describing this physical vdev 40 | and all other vdevs which share a common top-level vdev as an ancestor. 41 | For example, 42 | in @fig:vdev, 43 | the vdev label on D3 44 | would contain information describing D3, D4, and M2. 45 | Any copy of the labels can be used to access and verify the pool. 46 | There are two labels at the front of the device 47 | and two labels at the back. 48 | @fig:label illustrates the physical layout of vdev labels. 49 | 50 | ```{.figure} 51 | { 52 | "path" : "Figures/zfs_label", 53 | "caption" : "From label to MOS", 54 | "label" : "label", 55 | "options" : "width=\\textwidth", 56 | "place" : "ht" 57 | } 58 | ``` 59 | 60 | An _uberblock_ (`uberblock_t`) contains information 61 | necessary to access the contents of the pool. 62 | Only one uberblock in the pool is _active_ at any time. 63 | The uberblock with the highest transaction group number 64 | and valid checksum is the active uberblock. 65 | @fig:ub illustrates the layout of an active uberblock, 66 | and how accessing the storage pool can start from it. 67 | 68 | ```{.figure} 69 | { 70 | "path" : "Figures/zfs_ub", 71 | "caption" : "From Uberblock to MOS", 72 | "label" : "ub", 73 | "options" : "width=\\textwidth", 74 | "place" : "ht" 75 | } 76 | ``` 77 | 78 | ## Block Pointer 79 | 80 | Data is transferred between disk and main memory in units called blocks. 81 | A block pointer (`blkptr_t`) is a 128 byte ZFS structure 82 | used to physically locate, verify, and describe blocks of data on disk. 83 | The layout of a normal blkptr is shown as @fig:blkptr. 84 | Note that, 85 | the size of a block is described by three different fields: 86 | _psize_, _lsize_, and _asize_. 87 | 88 | ```{.figure} 89 | { 90 | "path" : "Figures/zfs_blkptr", 91 | "caption" : "Block Pointer Layout", 92 | "label" : "blkptr", 93 | "options" : "scale=.8,center", 94 | "place" : "ht" 95 | } 96 | ``` 97 | 98 | * _lsize_: Logical size. 99 | The size of the data without compression, raidz or gang overhead. 100 | * _psize_: Physical size of the block on disk after compression 101 | * _asize_: Allocated size, 102 | total size of all blocks allocated to hold this data 103 | including any gang headers or raid-Z parity information 104 | 105 | Normally, 106 | block pointers point (via their DVAs) to a block which holds data. 107 | If the data that we need to store is very small, 108 | this is an inefficient use of space, 109 | Additionally, reading these small blocks tends to generate 110 | more random reads. 111 | Embedded-data Block Pointers was introduced. 112 | It allows small pieces of data 113 | (the "payload", upto 112 bytes) embedded in the block pointer, 114 | the block pointer doesn't point to anything then. 115 | The layout of an embedded block pointer is as @fig:embedded. 116 | 117 | ```{.figure} 118 | { 119 | "path" : "Figures/zfs_embedded_blkptr", 120 | "caption" : "Embedded Block Pointer Layout", 121 | "label" : "embedded", 122 | "options" : "scale=.8,center", 123 | "place" : "ht" 124 | } 125 | ``` 126 | 127 | ## DMU 128 | 129 | The _Data Management Unit_, 130 | or _DMU_ groups blocks into logical units 131 | called _objects_. 132 | Almost everything in ZFS is an object. 133 | Objects are defined by 512 bytes structures called _dnodes_ (`dnode_phys_t`). 134 | A dnode describes and organizes a collection of blocks making up an object, 135 | for example, 136 | `dn_type` determines the type of the object, 137 | `dn_blkptr` stores the block pointers for block addressing. 138 | A dnode has a limited number (`dn_nblkptr`) of block pointers to describe an object's data. 139 | For a dnode using the largest data block size (128KB) 140 | and containing the maximum number of block pointers (3), 141 | the largest object size it can represent is 384 KB. 142 | To allow larger objects, 143 | indirect blocks are introduced, 144 | the largest indirect block (128KB) can hold up to 1024 block pointers, 145 | so that 384MB object can be represented without the next level of indirection. 146 | The `dn_nlevel` field tells total levels of addressing. 147 | @fig:indirect illustrates a 3-levels indirect addressing. 148 | 149 | ```{.figure} 150 | { 151 | "path" : "Figures/zfs_indirect", 152 | "caption" : "Indirect block addressing", 153 | "label" : "indirect", 154 | "options" : "scale=1,center", 155 | "place" : "ht" 156 | } 157 | ``` 158 | 159 | Related objects can be further grouped by the DMU into _object sets_. 160 | ZFS allows users to create four kinds of object sets: 161 | _filesystems_, _clones_, _snapshots_, and _volumes_. 162 | 163 | ## DSL 164 | 165 | The _Dataset and Snapshot Layer_, or _DSL_ 166 | is for describing 167 | and managing relationships-between and properties-of object sets. 168 | 169 | Each object set is represented in the DSL as a _dataset_ (`dsl_dataset_phys_t`). 170 | Datasets are grouped into collections called _Dataset Dircectories_, 171 | which manages a related grouping of datasets 172 | and the properties associated with that grouping. 173 | A DSL directory always has exactly one _active_ dataset. 174 | All other datasets under the directory are related to the active dataset 175 | through _snapshots_, _clones_, or _child/parent dependencies_. 176 | 177 | ## MOS 178 | 179 | The DSL is implementd as an object set of type `DMU_OST_META`, 180 | which is often called the _Meta Object Set_, or _MOS_. 181 | There is a single distinguished object in the Meta Object Set, 182 | called the _object directory_. 183 | Object directory is always located in the $1^{st}$ element of the dnode array 184 | (index starts from $0$). 185 | All other objects can be located by traversing 186 | through a set of object references starting at this object. 187 | 188 | The object directory is implemented as a _ZAP_ object 189 | that is made up of name/value pairs. 190 | The object directory contains 191 | _root\_dataset_, _config_, _free\_bpobj_, 192 | and some other attribute pairs. 193 | 194 | @fig:mos illustrates a realistic layout of the MOS of a sample pool, 195 | from which a data set (e.g., file system) can be accessed. 196 | 197 | ```{.figure} 198 | { 199 | "path" : "Figures/zfs_mos", 200 | "caption" : "MOS", 201 | "label" : "mos", 202 | "options" : "width=\\textwidth", 203 | "place" : "ht" 204 | } 205 | ``` 206 | 207 | @fig:fs illustrates the path from a data set object to 208 | user data (contents of the sample file `/sbin/zdump`). 209 | 210 | ```{.figure} 211 | { 212 | "path" : "Figures/zfs_fs", 213 | "caption" : "From the data set to user data", 214 | "label" : "fs", 215 | "options" : "width=\\textwidth", 216 | "place" : "ht" 217 | } 218 | ``` 219 | 220 | ## ZAP 221 | 222 | The _ZFS Attributes Processors_, or _ZAPs_ are objects 223 | used to store attributes in the form of name-value pairs. 224 | ZAPs come in two forms: 225 | _micro ZAP_ for small number of attributes 226 | and _fat ZAP_ for large number of attributes. 227 | Both of them are arranged 228 | based on hash of the attribute's name. 229 | Directories in ZFS are implemented as ZAP objects. 230 | 231 | # On-Disk Data Walk (Or: Where's My Data) 232 | 233 | This part (title and content) was inspired by 234 | Max Bruning's great demostration -- 235 | [ZFS On-Disk Data Walk (Or: Where's My Data)](http://www.osdevcon.org/2008/files/osdevcon2008-max.pdf) 236 | , and even better, 237 | his 238 | [training material](https://www.yumpu.com/en/document/view/3947186/zfs-on-disk-data-walk-or-wheres-my-data-bruning-systems). 239 | He used the modified[^mod] `mdb`, `zdb`, 240 | and `dd` to read and dump ZFS data structure from physical devices 241 | to illustrate the ZFS on-disk layout, 242 | from vdev label to content of a file. 243 | 244 | There is not `mdb` equivalent on Linux, 245 | and I don't want to switch among tools from time to time, 246 | so that I wrote a simple tool to do all the things, 247 | reading (via `open` and `read` system calls), 248 | decompressing (by calling _liblz4_ functions) data from the physical vdev, 249 | and dumping the ZFS physical data structures as JSON format. 250 | It doesn't call any function of the ZFS libraries. 251 | A few helpers are still used 252 | because I was too lazy to write my own, 253 | perhaps I will remove all of them in the future. 254 | However the core functions 255 | such as `spa_xxxx`, `dmu_xxxx`, `dsl_xxxx`, `zio_xxxx`, are avoided. 256 | 257 | [^mod]: by himself `👍`{=html}`\lower.6ex\hbox{\includegraphics{Figures/1F44D}}`{=latex} 258 | 259 | ## Environment 260 | 261 | ```bash 262 | $ cat /etc/os-release 263 | NAME="Ubuntu" 264 | VERSION="20.04.2 LTS (Focal Fossa)" 265 | .... output omitted .... 266 | $ sudo apt install libzpool2linux libzfs2linux libzfslinux-dev 267 | $ sudo apt install libnvpair1linux libjson-c-dev liblz4-dev 268 | ``` 269 | 270 | ## Preparation 271 | 272 | #. Clone and build `zdump` tool. 273 | #. Create a new file system, with a very simple hierarchy. 274 | Note that, 275 | as the `zdump` tool only supports lz4, 276 | the default compression algorithm of ZFS on Linux, 277 | don't set the `compression` property for creating ZFS. 278 | 279 | ```bash 280 | $ sudo zfs create -V 4G dpool/zvol0 281 | $ sudo fdisk -l /dev/zd0 | head -1 282 | Disk /dev/zd0: 4 GiB, 4294967296 bytes, 8388608 sectors 283 | $ sudo dmsetup create zdisk0 --table '0 8388607 linear /dev/zd0 0' 284 | $ sudo mkdir /mnt/zwalk 285 | $ sudo zpool create -f -m /mnt/zwalk zwalk /dev/mapper/zdisk0 286 | $ sudo zfs list 287 | NAME USED AVAIL REFER MOUNTPOINT 288 | zwalk 840K 3.62G 192K /mnt/zwalk 289 | $ sudo mkdir /mnt/zwalk/{sbin,var,usr} 290 | $ sudo su -c 'printf "#!/bin/bash\n\necho Hello ZFS\n" >/mnt/zwalk/sbin/zdump' 291 | $ cat /mnt/zwalk/sbin/zdump 292 | #!/bin/bash 293 | 294 | echo Hello ZFS 295 | ``` 296 | 297 | To clean up after the walk: 298 | 299 | ```bash 300 | $ sudo zpool destroy zwalk 301 | $ sudo dmsetup remove zdisk0 302 | $ sudo zfs destroy dpool/zvol0 303 | ``` 304 | 305 | #. Add the user into `disk` group 306 | so that `sudo` is not needed to read the block device file. 307 | 308 | ```bash 309 | $ sudo usermod -aG disk $(whoami) 310 | ``` 311 | 312 | ## Walk the data. 313 | 314 | #. The first step is dumping the label and active uberblock. 315 | Note that the `offset`, `psize`, and `lsize` are hexadecimal. 316 | 317 | ```bash 318 | $ zdump --label /dev/mapper/zdisk0:0:40000/40000 319 | { 320 | "Vdev Label":{ 321 | "name":"zwalk", 322 | "version":5000, 323 | "uberblock":{ 324 | "magic":"0x0000000000bab10", 325 | "version":5000, 326 | "txg":10, 327 | "rootbp":{ 328 | "vdev":"0", 329 | "offset":"3801e000", 330 | "asize":"2000", 331 | "psize":"1000", 332 | "lsize":"1000", 333 | "compressed":"uncompressed" 334 | } 335 | } 336 | } 337 | } 338 | ``` 339 | 340 | #. Dump dnode of the MOS, 341 | using the `offset` ($3801e000$), 342 | `psize` ($1000$), and `lsize` ($1000$) we got from the previous step. 343 | 344 | ```bash 345 | $ zdump --mos /dev/mapper/zdisk0:"3801e000":1000/1000 346 | { 347 | "MOS":{ 348 | "os_type":"META", 349 | "dnonde":{ 350 | "dn_type":"DMU dnode", 351 | "dn_bonustype":0, 352 | "dn_indblkshift":17, 353 | "dn_nlevels":2, 354 | "dn_nblkptr":3, 355 | "dn_blkptr":[ 356 | { 357 | "vdev":"0", 358 | "offset":"4e000", 359 | "asize":"2000", 360 | "psize":"2000", 361 | "lsize":"20000", 362 | "compressed":"lz4" 363 | }, 364 | .... output omitted .... 365 | ] 366 | } 367 | } 368 | } 369 | ``` 370 | 371 | #. From output of the previous step, 372 | we notice that the MOS uses $2$-levels indirect addressing (`dn_nlevels` was $2$[^zvol]), 373 | so we need to find the $0^{th}$ level block pointer to access to the data block. 374 | The `lsize` of each block pointer is $16$K, 375 | that can contain $32$ dnodes. 376 | 377 | [^zvol]: If we create the ZVOL with `-s` option, 378 | there will only one level of block pointer. 379 | 380 | ```bash 381 | $ zdump --indirect-blkptr /dev/mapper/zdisk0:"4e000":20000/2000:2 382 | { 383 | "[L0]":[ 384 | { 385 | "vdev":"0", 386 | "offset":"28008000", 387 | "asize":"2000", 388 | "psize":"2000", 389 | "lsize":"4000", 390 | "compressed":"lz4" 391 | }, 392 | { 393 | "vdev":"0", 394 | "offset":"2802c000", 395 | "asize":"2000", 396 | "psize":"2000", 397 | "lsize":"4000", 398 | "compressed":"lz4" 399 | }, 400 | { 401 | "vdev":"0", 402 | "offset":"0", 403 | "asize":"0", 404 | "psize":"200", 405 | "lsize":"200", 406 | "compressed":"inherit" 407 | }, 408 | { 409 | "vdev":"0", 410 | "offset":"0", 411 | "asize":"0", 412 | "psize":"200", 413 | "lsize":"200", 414 | "compressed":"inherit" 415 | }, 416 | { 417 | "vdev":"0", 418 | "offset":"28006000", 419 | "asize":"2000", 420 | "psize":"2000", 421 | "lsize":"4000", 422 | "compressed":"lz4" 423 | }, 424 | .... output omitted .... 425 | ] 426 | } 427 | ``` 428 | 429 | #. Dump the MOS object directory, 430 | which is the $1^{st}$ object (the $0^{th}$ is not used) in the dnode array. 431 | The MOS is a fat ZAP object, 432 | whose entries will be dumped as well as its dnode. 433 | We will use the `root_dataset` object to move forward. 434 | 435 | ```bash 436 | $ zdump --mos-objdir /dev/mapper/zdisk0:"28008000":4000/2000 437 | { 438 | "dnode":{ 439 | "dn_type":"object directory", 440 | "dn_bonustype":0, 441 | "dn_indblkshift":17, 442 | "dn_nlevels":1, 443 | "dn_nblkptr":3, 444 | "dn_blkptr":[ 445 | { 446 | "vdev":"0", 447 | "offset":"10000", 448 | "asize":"2000", 449 | "psize":"2000", 450 | "lsize":"4000", 451 | "compressed":"lz4" 452 | }, 453 | { 454 | "vdev":"0", 455 | "offset":"12000", 456 | "asize":"2000", 457 | "psize":"2000", 458 | "lsize":"4000", 459 | "compressed":"lz4" 460 | }, 461 | { 462 | "vdev":"0", 463 | "offset":"0", 464 | "asize":"0", 465 | "psize":"200", 466 | "lsize":"200", 467 | "compressed":"inherit" 468 | } 469 | ] 470 | }, 471 | "FZAP":{ 472 | "zap_block_type":"ZBT_HEADER", 473 | "zap_magic":"0x00000002f52ab2a", 474 | "zap_num_entries":13, 475 | "zap_table_phys":{ 476 | "zt_blk":0 477 | } 478 | }, 479 | "FZAP leaf":{ 480 | "entries":[ 481 | { 482 | "name":"root_dataset", 483 | "value":32 484 | }, 485 | .... output omitted .... 486 | { 487 | "name":"config", 488 | "value":61 489 | }, 490 | .... output omitted .... 491 | ] 492 | } 493 | } 494 | ``` 495 | 496 | #. Dump the root data set. 497 | From the previous step, 498 | we knew that it's the $32^{nd}$ item in the dnode array, 499 | therefore, 500 | we seek it in the $1^{st}$ block (with the offset $2802c000$). 501 | `dsl_dir_phys_t` is stored 502 | in the `dn_bonus` field of dnode of the root data set object. 503 | We can see that the head data set object is the $54^{th}$ object, 504 | located in the same block as the root data set's. 505 | 506 | ```bash 507 | $ zdump --mos-rootds /dev/mapper/zdisk0:"2802c000":4000/2000:32 508 | { 509 | "dnode":{ 510 | "dn_type":"DSL directory", 511 | "dn_bonustype":12, 512 | "dn_indblkshift":17, 513 | "dn_nlevels":1, 514 | "dn_nblkptr":1, 515 | "dn_blkptr":[ 516 | { 517 | "vdev":"0", 518 | "offset":"0", 519 | "asize":"0", 520 | "psize":"200", 521 | "lsize":"200", 522 | "compressed":"inherit" 523 | } 524 | ] 525 | }, 526 | "DSL":{ 527 | "dd_creation_time":"....", 528 | "dd_child_dir_zapobj":34, 529 | "dd_head_dataset_obj":54 530 | } 531 | } 532 | ``` 533 | 534 | #. Dump the childmap and head data set. 535 | The later will be used to move forward to ZPL. 536 | 537 | ```bash 538 | $ zdump --mos-childmap /dev/mapper/zdisk0:"2802c000":4000/2000:34 539 | { 540 | "Embedded Block Pointer":{ 541 | "type":0, 542 | "psize":78, 543 | "lsize":512, 544 | "compressed":"lz4", 545 | "Micro ZAP":[ 546 | { 547 | "mze_name":"$MOS", 548 | "mze_value":35 549 | }, 550 | { 551 | "mze_name":"$FREE", 552 | "mze_value":38 553 | }, 554 | { 555 | "mze_name":"$ORIGIN", 556 | "mze_value":42 557 | } 558 | ] 559 | } 560 | } 561 | $ zdump --mos-headds /dev/mapper/zdisk0:"2802c000":4000/2000:54 562 | { 563 | "Head data set":{ 564 | "ds_dir_obj":32, 565 | "ds_creation_time":"Tue May 18 08:51:28", 566 | "ds_create_txg":1, 567 | "ds_bp":{ 568 | "vdev":"0", 569 | "offset":"8028000", 570 | "asize":"2000", 571 | "psize":"1000", 572 | "lsize":"1000", 573 | "compressed":"uncompressed" 574 | } 575 | }, 576 | "Object Set":{ 577 | "os_type":"ZPL", 578 | "dnonde":{ 579 | "dn_type":"DMU dnode", 580 | "dn_bonustype":0, 581 | "dn_indblkshift":17, 582 | "dn_nlevels":6, 583 | "dn_nblkptr":3, 584 | "dn_blkptr":[ 585 | { 586 | "vdev":"0", 587 | "offset":"8024000", 588 | "asize":"2000", 589 | "psize":"2000", 590 | "lsize":"20000", 591 | "compressed":"lz4" 592 | }, 593 | .... output omitted .... 594 | ] 595 | } 596 | } 597 | } 598 | ``` 599 | 600 | #. Note that 6-levels indirect block pointer is used, 601 | we need to walk down to the L0 block pointer first. 602 | 603 | ```bash 604 | $ zdump --indirect-blkptr /dev/mapper/zdisk0:"8024000":20000/2000:6 605 | { 606 | "[L4]":[ 607 | { 608 | "vdev":"0", 609 | "offset":"3801c000", 610 | "asize":"2000", 611 | "psize":"2000", 612 | "lsize":"20000", 613 | "compressed":"lz4" 614 | }, 615 | .... output omitted .... 616 | ], 617 | "[L3]":[ 618 | { 619 | "vdev":"0", 620 | "offset":"2802a000", 621 | "asize":"2000", 622 | "psize":"2000", 623 | "lsize":"20000", 624 | "compressed":"lz4" 625 | }, 626 | .... output omitted .... 627 | ], 628 | "[L2]":[ 629 | { 630 | "vdev":"0", 631 | "offset":"8022000", 632 | "asize":"2000", 633 | "psize":"2000", 634 | "lsize":"20000", 635 | "compressed":"lz4" 636 | }, 637 | .... output omitted .... 638 | ], 639 | "[L1]":[ 640 | { 641 | "vdev":"0", 642 | "offset":"4c000", 643 | "asize":"2000", 644 | "psize":"2000", 645 | "lsize":"20000", 646 | "compressed":"lz4" 647 | }, 648 | .... output omitted .... 649 | ], 650 | "[L0]":[ 651 | { 652 | "vdev":"0", 653 | "offset":"46000", 654 | "asize":"2000", 655 | "psize":"2000", 656 | "lsize":"4000", 657 | "compressed":"lz4" 658 | }, 659 | { 660 | "vdev":"0", 661 | "offset":"4a000", 662 | "asize":"2000", 663 | "psize":"2000", 664 | "lsize":"4000", 665 | "compressed":"lz4" 666 | }, 667 | { 668 | "vdev":"0", 669 | "offset":"0", 670 | "asize":"0", 671 | "psize":"200", 672 | "lsize":"200", 673 | "compressed":"inherit" 674 | }, 675 | { 676 | "vdev":"0", 677 | "offset":"0", 678 | "asize":"0", 679 | "psize":"200", 680 | "lsize":"200", 681 | "compressed":"inherit" 682 | }, 683 | { 684 | "vdev":"0", 685 | "offset":"48000", 686 | "asize":"2000", 687 | "psize":"2000", 688 | "lsize":"4000", 689 | "compressed":"lz4" 690 | }, 691 | .... output omitted .... 692 | ] 693 | } 694 | ``` 695 | 696 | #. Dump the master node, 697 | which is a micro ZAP object and fixed in the $1^{st}$ dnode in the array. 698 | 699 | ```bash 700 | $ zdump --headds-masternode /dev/mapper/zdisk0:"46000":4000/2000 701 | { 702 | "dnode":{ 703 | "dn_type":"ZFS master node", 704 | "dn_bonustype":0, 705 | "dn_indblkshift":17, 706 | "dn_nlevels":1, 707 | "dn_nblkptr":3, 708 | "dn_blkptr":[ 709 | { 710 | "vdev":"0", 711 | "offset":"2000", 712 | "asize":"2000", 713 | "psize":"200", 714 | "lsize":"200", 715 | "compressed":"uncompressed" 716 | }, 717 | .... output omitted .... 718 | ] 719 | }, 720 | "Micro ZAP":[ 721 | .... output omitted .... 722 | { 723 | "mze_name":"ROOT", 724 | "mze_value":34 725 | } 726 | ] 727 | } 728 | ``` 729 | 730 | #. The ROOT object ("/") is the $34^{th}$ object, 731 | located in the $1^{st}$ block (offset $4a000$). 732 | In this simplest case, 733 | the ROOT's dnode contains _embedded block pointer_, 734 | it is a micro ZAP object. 735 | 736 | ```bash 737 | $ zdump --root-dir /dev/mapper/zdisk0:"4a000":4000/2000:34 738 | { 739 | "Micro ZAP":[ 740 | { 741 | "mze_name":"sbin", 742 | "mze_value":2 743 | }, 744 | { 745 | "mze_name":"var", 746 | "mze_value":3 747 | }, 748 | { 749 | "mze_name":"usr", 750 | "mze_value":4 751 | } 752 | ] 753 | } 754 | ``` 755 | 756 | #. Dump the `sbin` directory, 757 | the $2^{nd}$ object in the $0^{th}$ block, 758 | from the output of indirect block pointer dumping, 759 | we knew that the block is located at $46000$, 760 | the object contains embedded block pointer again. 761 | 762 | ```bash 763 | $ zdump --dir /dev/mapper/zdisk0:"46000":4000/2000:2 764 | { 765 | "Micro ZAP":[ 766 | { 767 | "mze_name":"zdump", 768 | "mze_value":128 769 | }, 770 | { 771 | "mze_name":"", 772 | "mze_value":0 773 | }, 774 | { 775 | "mze_name":"", 776 | "mze_value":0 777 | } 778 | ] 779 | } 780 | ``` 781 | 782 | #. The `/sbin/zdump` file is the$128^{th}$ object, 783 | located in the $4^{th}$ block (offset $48000$), 784 | let's dump it. 785 | 786 | ```bash 787 | $ zdump --text /dev/mapper/zdisk0:"48000":4000/2000:128 788 | #!/bin/bash 789 | 790 | echo Hello ZFS 791 | ``` 792 | 793 | # Recovering removed file 794 | 795 | \textcolor{red}{TBD} 796 | -------------------------------------------------------------------------------- /docs/zfs_internals.md.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahrens/zfsondisk/30ae2531352ab0d9d5ee9af82ce39a2cd45ccd79/docs/zfs_internals.md.pdf -------------------------------------------------------------------------------- /ondiskformatfinal.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ahrens/zfsondisk/30ae2531352ab0d9d5ee9af82ce39a2cd45ccd79/ondiskformatfinal.odt -------------------------------------------------------------------------------- /zwalk/src/Makefile: -------------------------------------------------------------------------------- 1 | CC = clang 2 | 3 | # FIXME: /usr/src/zfs/include shouldn't be needed! 4 | CFLAGS += -I/usr/include/libspl -I/usr/include/libzfs -I/usr/src/zfs-0.8.3/include 5 | 6 | CFLAGS += -D_FILE_OFFSET_BITS=64 \ 7 | -D_LARGEFILE64_SOURCE \ 8 | -Wall -g -O2 9 | 10 | LDFLAGS = -llz4 -ljson-c -lzpool -lnvpair 11 | 12 | all: zdump 13 | 14 | zdump: zdump.o 15 | $(CC) $(LDFLAGS) $^ -o $@ 16 | 17 | zdump.o: zdump.c 18 | $(CC) $(CFLAGS) -c $^ -o $@ 19 | 20 | clean: 21 | rm -f zdump.o zdump 22 | -------------------------------------------------------------------------------- /zwalk/src/TODO.md: -------------------------------------------------------------------------------- 1 | #. check nvlist_xxxx return value, 2 | otherwise, segfault if block is invalid 3 | #. zdump_dnode: check dn\_type and dn\_bonustype range, 4 | otherwise, zdump\_dnode crashes. 5 | #. Remove all dependencies on ZFS libraries, 6 | only data structure definition needed. 7 | #. support multiple blocks file. 8 | -------------------------------------------------------------------------------- /zwalk/src/zdump.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #define ZDUMP_BLOCK_COMPRESSED 1 22 | #define ZDUMP_BLOCK_UNCOMPRESSED 0 23 | 24 | typedef enum zdump_object { 25 | ZDUMP_HELP = 0, 26 | ZDUMP_BLKPTR_CHAIN, 27 | ZDUMP_LABEL, 28 | ZDUMP_MOS, 29 | ZDUMP_MOS_OBJDIR, 30 | ZDUMP_MOS_ROOTDS, 31 | ZDUMP_MOS_CHILDMAP, 32 | ZDUMP_MOS_HEADDS, 33 | ZDUMP_HEADDS_MASTERNODE, 34 | ZDUMP_ROOT_DIR, 35 | ZDUMP_DIR, 36 | ZDUMP_TEXT, 37 | ZDUMP_OBJECT_NUM 38 | } zdump_object_t; 39 | 40 | static char *zdump_titles[ZDUMP_OBJECT_NUM] = { 41 | [ZDUMP_HELP] = "Help", 42 | [ZDUMP_BLKPTR_CHAIN] = "Block Pointer Chain", 43 | [ZDUMP_LABEL] = "Vdev Label", 44 | [ZDUMP_MOS] = "MOS", 45 | [ZDUMP_MOS_ROOTDS] = "MOS Root Dataset", 46 | [ZDUMP_MOS_CHILDMAP] = "MOS Child Map", 47 | [ZDUMP_MOS_HEADDS] = "MOS Head Dataset", 48 | [ZDUMP_HEADDS_MASTERNODE] = "Head Dataset Masternode", 49 | [ZDUMP_ROOT_DIR] = "Root Dir", 50 | [ZDUMP_DIR] = "Directory", 51 | [ZDUMP_TEXT] = "Content", 52 | }; 53 | 54 | typedef struct zdump_opts { 55 | zdump_object_t object; 56 | int debug; 57 | char device[PATH_MAX]; 58 | uint64_t offset; 59 | uint64_t psize; 60 | uint64_t lsize; 61 | union { 62 | int level; 63 | int objidx; 64 | } extra; 65 | } zdump_opts_t; 66 | 67 | typedef int (*zdump_func_t)(zdump_opts_t *, json_object *); 68 | 69 | static struct option long_options[] = { 70 | { "debug", no_argument , 0 , 'd' }, 71 | { "help", no_argument , 0 , ZDUMP_HELP }, 72 | { "indirect-blkptr", required_argument , 0 , ZDUMP_BLKPTR_CHAIN }, 73 | { "label", required_argument , 0 , ZDUMP_LABEL }, 74 | { "mos", required_argument , 0 , ZDUMP_MOS }, 75 | { "mos-objdir", required_argument , 0 , ZDUMP_MOS_OBJDIR }, 76 | { "mos-rootds", required_argument , 0 , ZDUMP_MOS_ROOTDS }, 77 | { "mos-childmap", required_argument , 0 , ZDUMP_MOS_CHILDMAP }, 78 | { "mos-headds", required_argument , 0 , ZDUMP_MOS_HEADDS }, 79 | { "headds-masternode", required_argument , 0 , ZDUMP_HEADDS_MASTERNODE }, 80 | { "root-dir", required_argument , 0 , ZDUMP_ROOT_DIR }, 81 | { "dir", required_argument , 0 , ZDUMP_DIR }, 82 | { "text", required_argument , 0 , ZDUMP_TEXT }, 83 | }; 84 | 85 | static const char *objset_types[DMU_OST_NUMTYPES] = { 86 | "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" }; 87 | 88 | static void print_raw(void *buf, size_t size, const char *label) 89 | { 90 | uint64_t *d = (typeof(d))buf; 91 | unsigned int nwords = size / sizeof (uint64_t); 92 | char *c; 93 | 94 | (void) printf("\n%s\n%6s %s 0123456789abcdef\n", 95 | label, "", " 0 1 2 3 4 5 6 7 8 9 a b c d e f"); 96 | 97 | for (int i = 0; i < nwords; i += 2) { 98 | (void) printf("%06llx: %016llx %016llx ", 99 | (u_longlong_t)(i * sizeof (uint64_t)), 100 | (u_longlong_t)d[i], 101 | (u_longlong_t)d[i+1]); 102 | 103 | c = (char *)&d[i]; 104 | for (int j = 0; j < 2 * sizeof (uint64_t); j++) 105 | (void) printf("%c", isprint(c[j]) ? c[j] : '.'); 106 | (void) printf("\n"); 107 | } 108 | } 109 | 110 | static char *read_block(zdump_opts_t *opts, int compressed) 111 | { 112 | char *buf = NULL, *dbuf = NULL; 113 | int fd; 114 | struct stat64 statbuf; 115 | uint32_t bufsize; 116 | 117 | if ((fd = open64(opts->device, O_RDONLY)) < 0) { 118 | fprintf(stderr, "[%s@%d]: open64 (%s) failed.\n%s\n", 119 | __func__, __LINE__ 120 | , opts->device, strerror(errno)); 121 | goto out; 122 | } 123 | if (fstat64_blk(fd, &statbuf) != 0) { 124 | fprintf(stderr, "[%s@%d]: fstat64_blk (%s) failed\n%s\n", 125 | __func__, __LINE__ 126 | , opts->device, strerror(errno)); 127 | goto close_fd; 128 | } 129 | 130 | /* 131 | * TODO: check page alignment, and etc, see zdb.c:dump_label 132 | */ 133 | buf = malloc(opts->psize); 134 | if (buf == NULL) { 135 | fprintf(stderr, "[%s@%d]: malloc failed\n%s\n", 136 | __func__, __LINE__, strerror(errno)); 137 | goto close_fd; 138 | } 139 | dbuf = buf; 140 | 141 | if (pread64(fd, buf, opts->psize, opts->offset) == -1) { 142 | fprintf(stderr, "[%s@%d]: pread64 (%s-%d-%lu-%lu) failed\n%s\n" 143 | , __func__, __LINE__ 144 | , opts->device 145 | , fd 146 | , opts->psize 147 | , opts->offset 148 | , strerror(errno)); 149 | 150 | goto free_buf; 151 | } 152 | 153 | if (compressed) { 154 | dbuf = malloc(opts->lsize); 155 | if (dbuf == NULL) { 156 | fprintf(stderr, "[%s@%d]: malloc failed\n%s\n", 157 | __func__, __LINE__, strerror(errno)); 158 | goto free_buf; 159 | } 160 | bufsize = BE_IN32(buf); 161 | if (LZ4_decompress_safe(buf + sizeof(bufsize), 162 | dbuf, bufsize, opts->lsize) < 0) { 163 | fprintf(stderr, "[%s@%d]: decompressing failed!\n", 164 | __func__, __LINE__); 165 | goto free_dbuf; 166 | } 167 | free(buf); 168 | } 169 | 170 | close(fd); 171 | return dbuf; 172 | 173 | free_dbuf: 174 | free(dbuf); 175 | free_buf: 176 | free(buf); 177 | close_fd: 178 | close(fd); 179 | out: 180 | return NULL; 181 | } 182 | 183 | static json_object *zdump_blkptr(const blkptr_t *bp) 184 | { 185 | char vdev_hexstr[8]; 186 | char offset_hexstr[16]; 187 | char asize_hexstr[8]; 188 | char psize_hexstr[8]; 189 | char lsize_hexstr[8]; 190 | json_object *json_bp; 191 | 192 | snprintf(vdev_hexstr, sizeof(vdev_hexstr) 193 | , "%llu", DVA_GET_VDEV(&bp->blk_dva[0])); 194 | snprintf(offset_hexstr, sizeof(offset_hexstr) 195 | , "%llx", DVA_GET_OFFSET(&bp->blk_dva[0])); 196 | snprintf(asize_hexstr, sizeof(asize_hexstr) 197 | , "%llx", DVA_GET_ASIZE(&bp->blk_dva[0])); 198 | snprintf(lsize_hexstr, sizeof(lsize_hexstr) 199 | , "%llx", BP_GET_LSIZE(bp)); 200 | snprintf(psize_hexstr, sizeof(psize_hexstr) 201 | , "%llx", BP_GET_PSIZE(bp)); 202 | 203 | json_bp = json_object_new_object(); 204 | json_object_object_add(json_bp, "vdev", 205 | json_object_new_string(vdev_hexstr)); 206 | json_object_object_add(json_bp, "offset", 207 | json_object_new_string(offset_hexstr)); 208 | json_object_object_add(json_bp, "asize", 209 | json_object_new_string(asize_hexstr)); 210 | json_object_object_add(json_bp, "psize", 211 | json_object_new_string(psize_hexstr)); 212 | json_object_object_add(json_bp, "lsize", 213 | json_object_new_string(lsize_hexstr)); 214 | json_object_object_add(json_bp, "compressed", 215 | json_object_new_string( 216 | BP_GET_COMPRESS(bp) < ZIO_COMPRESS_FUNCTIONS ? 217 | zio_compress_table[BP_GET_COMPRESS(bp)].ci_name : "Error!")); 218 | 219 | return json_bp; 220 | } 221 | 222 | static json_object *zdump_dnode(const dnode_phys_t *dn) 223 | { 224 | json_object *json_dn = json_object_new_object(); 225 | json_object *json_dn_blkptr = json_object_new_array(); 226 | 227 | json_object_object_add(json_dn, "dn_type", 228 | dn->dn_type < DMU_OT_NUMTYPES ? 229 | json_object_new_string(dmu_ot[dn->dn_type].ot_name) 230 | : json_object_new_string(dmu_ot[dn->dn_bonustype].ot_name)); 231 | json_object_object_add(json_dn, "dn_bonustype", 232 | json_object_new_int(dn->dn_bonustype)); 233 | json_object_object_add(json_dn, "dn_indblkshift", 234 | json_object_new_int(dn->dn_indblkshift)); 235 | json_object_object_add(json_dn, "dn_nlevels", 236 | json_object_new_int(dn->dn_nlevels)); 237 | json_object_object_add(json_dn, "dn_nblkptr", 238 | json_object_new_int(dn->dn_nblkptr)); 239 | for (int i = 0; i < dn->dn_nblkptr; i ++) 240 | json_object_array_add(json_dn_blkptr, zdump_blkptr(&dn->dn_blkptr[i])); 241 | 242 | json_object_object_add(json_dn, "dn_blkptr", json_dn_blkptr); 243 | return json_dn; 244 | } 245 | 246 | static json_object *zdump_objset(const char *buf, int debug) 247 | { 248 | const objset_phys_t *os = (typeof(os))buf; 249 | const dnode_phys_t *dn = &os->os_meta_dnode; 250 | json_object *json_dn, *json_os = NULL; 251 | 252 | if ((json_dn = zdump_dnode(dn)) == NULL) 253 | goto out; 254 | 255 | json_os = json_object_new_object(); 256 | json_object_object_add(json_os, "os_type", json_object_new_string(objset_types[os->os_type])); 257 | json_object_object_add(json_os, "dnonde", json_dn); 258 | out: 259 | return json_os; 260 | } 261 | 262 | static json_object *zdump_fzap_leaf(zdump_opts_t *opts, json_object *json_parent) 263 | { 264 | json_object *json_fzap_leaf = NULL, *json_leaf_header, *json_fzap_leaf_entries; 265 | const zap_leaf_phys_t *zap_leaf; 266 | const zap_leaf_chunk_t *zap_leaf_chunk; 267 | int max_tries = 64, entries = 0; 268 | char magic_hexstr[18], type_hexstr[18]; 269 | char *buf = read_block(opts, ZDUMP_BLOCK_COMPRESSED); 270 | 271 | if (buf == NULL) 272 | goto out; 273 | 274 | zap_leaf = (typeof(zap_leaf))buf; 275 | json_fzap_leaf = json_object_new_object(); 276 | json_leaf_header = json_object_new_object(); 277 | 278 | snprintf(type_hexstr, sizeof(type_hexstr), "0x%016lx", 279 | zap_leaf->l_hdr.lh_block_type); // ZBT_LEAF 280 | snprintf(magic_hexstr, sizeof(magic_hexstr), "0x%08x", 281 | zap_leaf->l_hdr.lh_magic); // 0x2AB1EAF 282 | json_object_object_add(json_leaf_header, "lh_block_type", 283 | json_object_new_string(type_hexstr)); 284 | json_object_object_add(json_leaf_header, "lh_magic", 285 | json_object_new_string(magic_hexstr)); 286 | json_object_object_add(json_fzap_leaf, "Header", json_leaf_header); 287 | 288 | json_fzap_leaf_entries = json_object_new_array(); 289 | 290 | // The leaf hash table has hard-coded block size / 2^5 (32) 291 | // entries, see zap_leaf.h. 292 | zap_leaf_chunk = (typeof(zap_leaf_chunk)) \ 293 | (((uint8_t *)zap_leaf) + sizeof(zap_leaf->l_hdr) 294 | + sizeof(zap_leaf->l_hash) * (opts->lsize/32)); 295 | for (int i = 0; i < max_tries && entries < zap_leaf->l_hdr.lh_nentries; i++) { 296 | json_object *json_fzap_leaf_chunk; 297 | const zap_leaf_chunk_t *tmp = &zap_leaf_chunk[i]; 298 | // FIXME: walk the zap_leaf_array for the complete (long) name or value. 299 | char *name; 300 | uint64_t *value; 301 | if (tmp->l_entry.le_type != ZAP_CHUNK_ENTRY) 302 | continue; 303 | 304 | entries++; 305 | name = (typeof(name)) 306 | zap_leaf_chunk[tmp->l_entry.le_name_chunk].l_array.la_array; 307 | value = (typeof(value)) 308 | &zap_leaf_chunk[tmp->l_entry.le_value_chunk].l_array.la_array[0]; 309 | json_fzap_leaf_chunk = json_object_new_object(); 310 | json_object_object_add(json_fzap_leaf_chunk, "name" 311 | , json_object_new_string(name)); 312 | json_object_object_add(json_fzap_leaf_chunk, "value" 313 | , json_object_new_int(BE_IN64(value))); 314 | json_object_array_add(json_fzap_leaf_entries, json_fzap_leaf_chunk); 315 | } 316 | json_object_object_add(json_fzap_leaf, "Entries", json_fzap_leaf_entries); 317 | json_object_object_add(json_parent, "FZAP leaf", json_fzap_leaf); 318 | free(buf); 319 | out: 320 | return json_fzap_leaf; 321 | 322 | } 323 | 324 | static json_object *zdump_fzap(zdump_opts_t *opts, json_object *json_parent) 325 | { 326 | json_object *json_fzap = NULL, *json_zap_table; 327 | const zap_phys_t *zap; 328 | char magic_hexstr[18]; 329 | char *buf = read_block(opts, ZDUMP_BLOCK_COMPRESSED); 330 | 331 | if (buf == NULL) 332 | goto out; 333 | 334 | zap = (typeof(zap))buf; 335 | json_fzap = json_object_new_object(); 336 | json_object_object_add(json_fzap, "zap_block_type" 337 | , json_object_new_string("ZBT_HEADER")); 338 | snprintf(magic_hexstr, sizeof(magic_hexstr), "0x%016lx", zap->zap_magic); 339 | json_object_object_add(json_fzap, "zap_magic" 340 | , json_object_new_string(magic_hexstr)); 341 | json_object_object_add(json_fzap, "zap_num_entries" 342 | , json_object_new_int(zap->zap_num_entries)); 343 | json_zap_table = json_object_new_object(); 344 | json_object_object_add(json_zap_table, "zt_blk" 345 | , json_object_new_int(zap->zap_ptrtbl.zt_blk)); 346 | json_object_object_add(json_fzap, "zap_table_phys", json_zap_table); 347 | json_object_object_add(json_parent, "FZAP", json_fzap); 348 | if (opts->debug) 349 | print_raw((void*)buf, opts->lsize, "FZAP block"); 350 | free(buf); 351 | out: 352 | return json_fzap; 353 | } 354 | 355 | // WARNING: embedded blkptr only! 356 | static json_object *zdump_embedded_dir(const dnode_phys_t *dn) 357 | { 358 | json_object *json_mzap = NULL; 359 | const mzap_phys_t *mzap; 360 | const mzap_ent_phys_t *mze; 361 | char *zbuf; 362 | const blkptr_t *bp = (typeof(bp))dn->dn_blkptr; 363 | 364 | if (!BP_IS_EMBEDDED(bp)) 365 | goto out; 366 | 367 | zbuf = malloc(BPE_GET_LSIZE(bp)); 368 | if (zbuf == NULL) 369 | goto out; 370 | 371 | decode_embedded_bp(bp, zbuf, BPE_GET_LSIZE(bp)); 372 | mzap = (typeof(mzap))zbuf; 373 | mze = mzap->mz_chunk; 374 | 375 | /* 376 | * ``sbin'', ``var'', ``usr'' expected, `4' means directory, 377 | * `8` means regular file. 378 | */ 379 | json_mzap = json_object_new_array(); 380 | for (int i = 0; i < 3; i++) { 381 | json_object *json_mze = json_object_new_object(); 382 | json_object_object_add(json_mze, "mze_name" 383 | , json_object_new_string(mze[i].mze_name)); 384 | json_object_object_add(json_mze, "mze_value" 385 | , json_object_new_int(mze[i].mze_value)); 386 | json_object_array_add(json_mzap, json_mze); 387 | } 388 | 389 | free(zbuf); 390 | out: 391 | return json_mzap; 392 | } 393 | 394 | int zdump_help(zdump_opts_t *opts, json_object *json_parent) 395 | { 396 | printf("Usage:\n"); 397 | return 0; 398 | } 399 | 400 | int zdump_blkptr_chain(zdump_opts_t *opts, json_object *json_parent) 401 | { 402 | int ret = 0; 403 | char *buf; 404 | json_object *json_blkptr_array; 405 | char level[5]; 406 | const blkptr_t *bp; 407 | 408 | if (opts->extra.level < 0) 409 | goto out; 410 | if ((buf = read_block(opts, ZDUMP_BLOCK_COMPRESSED)) == NULL) { 411 | ret = 1; 412 | goto out; 413 | } 414 | bp = (typeof(bp))buf; 415 | snprintf(level, sizeof(level), "[L%d]", opts->extra.level); 416 | 417 | json_blkptr_array = json_object_new_array(); 418 | for (int i = 0; i < 8 /*maximum:1024*/; i++, bp++) 419 | json_object_array_add(json_blkptr_array, zdump_blkptr(bp)); 420 | 421 | json_object_object_add(json_parent, level, json_blkptr_array); 422 | 423 | bp = (typeof(bp))buf; 424 | opts->offset = DVA_GET_OFFSET(&bp->blk_dva[0]) + VDEV_LABEL_START_SIZE; 425 | opts->lsize = BP_GET_LSIZE(bp); 426 | opts->psize = BP_GET_PSIZE(bp); 427 | opts->extra.level--; 428 | ret = zdump_blkptr_chain(opts, json_parent); 429 | free(buf); 430 | out: 431 | return ret; 432 | } 433 | 434 | int zdump_text(zdump_opts_t *opts, json_object *json_parent) 435 | { 436 | int ret = 0; 437 | const dnode_phys_t *dn; 438 | const blkptr_t *bp; 439 | char *buf = read_block(opts, ZDUMP_BLOCK_COMPRESSED); 440 | 441 | if (buf == NULL) { 442 | ret = 1; 443 | goto out; 444 | } 445 | 446 | dn = (typeof(dn))buf + + opts->extra.objidx % (opts->lsize / sizeof(dnode_phys_t)); 447 | json_object_object_add(json_parent, "dnode", zdump_dnode(dn)); 448 | bp = (typeof(bp))&dn->dn_blkptr[0]; 449 | opts->offset = DVA_GET_OFFSET(&bp->blk_dva[0]) + VDEV_LABEL_START_SIZE; 450 | opts->lsize = BP_GET_LSIZE(bp); 451 | opts->psize = BP_GET_PSIZE(bp); 452 | 453 | free(buf); 454 | buf = read_block(opts, ZDUMP_BLOCK_UNCOMPRESSED); 455 | printf("%s", buf); 456 | if (opts->debug) 457 | print_raw(buf, opts->lsize, "file content:"); 458 | free(buf); 459 | out: 460 | return ret; 461 | } 462 | 463 | int zdump_dir(zdump_opts_t *opts, json_object *json_parent) 464 | { 465 | int ret = 0; 466 | const dnode_phys_t *dn; 467 | char *buf = read_block(opts, ZDUMP_BLOCK_COMPRESSED); 468 | 469 | if (buf == NULL) { 470 | ret = 1; 471 | goto out; 472 | } 473 | 474 | dn = (typeof(dn))buf + opts->extra.objidx % (opts->lsize / sizeof(dnode_phys_t)); 475 | json_object_object_add(json_parent, "Micro ZAP", zdump_embedded_dir(dn)); 476 | free(buf); 477 | out: 478 | return ret; 479 | } 480 | 481 | int zdump_root_dir(zdump_opts_t *opts, json_object *json_parent) 482 | { 483 | int ret = 0; 484 | const dnode_phys_t *dn; 485 | char *buf = read_block(opts, ZDUMP_BLOCK_COMPRESSED); 486 | 487 | if (buf == NULL) { 488 | ret = 1; 489 | goto out; 490 | } 491 | 492 | dn = (typeof(dn))buf + opts->extra.objidx % (opts->lsize / sizeof(dnode_phys_t)); 493 | json_object_object_add(json_parent, "dnode", zdump_dnode(dn)); 494 | json_object_object_add(json_parent, "Micro ZAP", zdump_embedded_dir(dn)); 495 | free(buf); 496 | out: 497 | return ret; 498 | } 499 | 500 | int zdump_headds_masternode(zdump_opts_t *opts, json_object *json_parent) 501 | { 502 | int ret = 0; 503 | json_object *json_mzap; 504 | const dnode_phys_t *dn; 505 | const blkptr_t *bp; 506 | const mzap_phys_t *mzap; 507 | const mzap_ent_phys_t *mze; 508 | size_t max; 509 | char *buf; 510 | 511 | if ((buf = read_block(opts, ZDUMP_BLOCK_COMPRESSED)) == NULL) { 512 | ret = 1; 513 | goto out; 514 | } 515 | 516 | dn = (typeof(dn))buf + 1; 517 | json_object_object_add(json_parent, "dnode", zdump_dnode(dn)); 518 | 519 | bp = (typeof(bp))dn->dn_blkptr; 520 | opts->offset = DVA_GET_OFFSET(&bp->blk_dva[0]) + VDEV_LABEL_START_SIZE; 521 | opts->lsize = BP_GET_LSIZE(bp); 522 | opts->psize = BP_GET_PSIZE(bp); 523 | 524 | free(buf); 525 | buf = read_block(opts, ZDUMP_BLOCK_UNCOMPRESSED); 526 | if (buf == NULL) { 527 | ret = 1; 528 | goto out; 529 | } 530 | 531 | json_mzap = json_object_new_array(); 532 | // it's uncompressed 533 | mzap = (typeof(mzap))buf; 534 | mze = mzap->mz_chunk; 535 | max = (opts->lsize-offsetof(typeof(*mzap), mz_chunk))/sizeof(*mze); /* 7 */ 536 | for (int i = 0; i < max; i ++) { 537 | json_object *json_mze = json_object_new_object(); 538 | json_object_object_add(json_mze, "mze_name" 539 | , json_object_new_string(mze[i].mze_name)); 540 | json_object_object_add(json_mze, "mze_value" 541 | , json_object_new_int(mze[i].mze_value)); 542 | json_object_array_add(json_mzap, json_mze); 543 | } 544 | json_object_object_add(json_parent, "Micro ZAP", json_mzap); 545 | 546 | free(buf); 547 | 548 | out: 549 | return ret; 550 | } 551 | 552 | int zdump_mos_headds(zdump_opts_t *opts, json_object *json_parent) 553 | { 554 | int ret = 0; 555 | const dnode_phys_t *headds_dn; 556 | const dsl_dataset_phys_t *ds; 557 | const blkptr_t *bp; 558 | char tbuf[20]; 559 | json_object *json_head_ds; 560 | char *buf = read_block(opts, ZDUMP_BLOCK_COMPRESSED); 561 | 562 | if (buf == NULL) { 563 | ret = 1; 564 | goto out; 565 | } 566 | 567 | headds_dn = (typeof(headds_dn))buf 568 | + opts->extra.objidx % (opts->lsize / sizeof(dnode_phys_t)); 569 | // The dsl is stored in the dn_bonus, dumping dnode itself 570 | // looks not so helpful. 571 | // json_object_object_add(json_parent, "dnode", zdump_dnode(headds_dn)); 572 | ds = (typeof(ds))headds_dn->dn_bonus; 573 | json_head_ds = json_object_new_object(); 574 | json_object_object_add(json_head_ds, "ds_dir_obj" 575 | , json_object_new_int(ds->ds_dir_obj)); 576 | snprintf(tbuf, sizeof(tbuf), "%s", ctime((const time_t *)&ds->ds_creation_time)); 577 | json_object_object_add(json_head_ds, "ds_creation_time" 578 | , json_object_new_string(tbuf)); 579 | json_object_object_add(json_head_ds, "ds_create_txg" 580 | , json_object_new_int(ds->ds_creation_txg)); 581 | json_object_object_add(json_head_ds, "ds_bp", zdump_blkptr(&ds->ds_bp)); 582 | json_object_object_add(json_parent, "Head data set", json_head_ds); 583 | 584 | bp = &ds->ds_bp; 585 | opts->offset = DVA_GET_OFFSET(&bp->blk_dva[0]) + \ 586 | VDEV_LABEL_START_SIZE; 587 | opts->lsize = BP_GET_LSIZE(bp); 588 | opts->psize = BP_GET_PSIZE(bp); 589 | 590 | free(buf); 591 | // FIXME: hard-coded 592 | buf = read_block(opts, ZDUMP_BLOCK_UNCOMPRESSED); 593 | if (buf == NULL) { 594 | ret = 1; 595 | goto out; 596 | } 597 | 598 | json_object_object_add(json_parent, "Object Set", zdump_objset(buf, opts->debug)); 599 | free(buf); 600 | out: 601 | return ret; 602 | } 603 | 604 | int zdump_mos_childmap(zdump_opts_t *opts, json_object *json_parent) 605 | { 606 | int ret = 0; 607 | const dnode_phys_t *child_map_dn; 608 | const blkptr_t *bp; 609 | const mzap_phys_t *mzap; 610 | const mzap_ent_phys_t *mze; 611 | json_object *json_embedded_blkptr; 612 | json_object *json_childmap_entries; 613 | char *zbuf; 614 | char *buf; 615 | 616 | if ((buf = read_block(opts, ZDUMP_BLOCK_COMPRESSED)) == NULL) { 617 | ret = 1; 618 | goto out; 619 | } 620 | child_map_dn = (typeof(child_map_dn))buf 621 | + opts->extra.objidx % (opts->lsize / sizeof(dnode_phys_t)); 622 | json_object_object_add(json_parent, "dnode", zdump_dnode(child_map_dn)); 623 | bp = child_map_dn->dn_blkptr; 624 | // TODO: else 625 | if (BP_IS_EMBEDDED(bp)) { 626 | json_embedded_blkptr = json_object_new_object(); 627 | json_object_object_add(json_embedded_blkptr, "type" 628 | , json_object_new_int(BPE_GET_ETYPE(bp))); 629 | json_object_object_add(json_embedded_blkptr, "psize" 630 | , json_object_new_int(BPE_GET_PSIZE(bp))); 631 | json_object_object_add(json_embedded_blkptr, "lsize" 632 | , json_object_new_int(BPE_GET_LSIZE(bp))); 633 | json_object_object_add(json_embedded_blkptr, "compressed" 634 | , json_object_new_string( 635 | zio_compress_table[BP_GET_COMPRESS(bp)].ci_name)); 636 | 637 | zbuf = malloc(BPE_GET_LSIZE(bp)); 638 | if (zbuf == NULL) 639 | goto free_buf; 640 | json_childmap_entries = json_object_new_array(); 641 | decode_embedded_bp(bp, zbuf, BPE_GET_LSIZE(bp)); 642 | mzap = (typeof(mzap))zbuf; 643 | mze = mzap->mz_chunk; 644 | // $MOS, $FREE, and $ORIGIN are expected 645 | for (int i = 0; i < 3; i++) { 646 | json_object *json_mze = json_object_new_object(); 647 | json_object_object_add(json_mze, "mze_name" 648 | , json_object_new_string(mze[i].mze_name)); 649 | json_object_object_add(json_mze, "mze_value" 650 | , json_object_new_int(mze[i].mze_value)); 651 | json_object_array_add(json_childmap_entries, json_mze); 652 | } 653 | json_object_object_add(json_embedded_blkptr, "Micro ZAP" 654 | , json_childmap_entries); 655 | json_object_object_add(json_parent, "Embedded Block Pointer" 656 | , json_embedded_blkptr); 657 | } 658 | 659 | free_buf: 660 | free(buf); 661 | out: 662 | return ret; 663 | } 664 | 665 | int zdump_mos_rootds(zdump_opts_t *opts, json_object *json_parent) 666 | { 667 | int ret = 0; 668 | json_object *json_dsl; 669 | const dnode_phys_t *rootds_dn; 670 | const dsl_dir_phys_t *dsl; 671 | char tbuf[20]; 672 | char *buf; 673 | 674 | if ((buf = read_block(opts, ZDUMP_BLOCK_COMPRESSED)) == NULL) { 675 | ret = 1; 676 | goto out; 677 | } 678 | 679 | rootds_dn = (typeof(rootds_dn))buf 680 | + opts->extra.objidx % (opts->lsize / sizeof(dnode_phys_t)); 681 | json_object_object_add(json_parent, "dnode", zdump_dnode(rootds_dn)); 682 | 683 | dsl = (typeof(dsl))&rootds_dn->dn_bonus; 684 | json_dsl = json_object_new_object(); 685 | snprintf(tbuf, sizeof(tbuf), "%s", ctime((const time_t *)&dsl->dd_creation_time)); 686 | json_object_object_add(json_dsl, "dd_creation_time" 687 | , json_object_new_string(tbuf)); 688 | json_object_object_add(json_dsl, "dd_child_dir_zapobj" 689 | , json_object_new_int(dsl->dd_child_dir_zapobj)); 690 | json_object_object_add(json_dsl, "dd_head_dataset_obj" 691 | , json_object_new_int(dsl->dd_head_dataset_obj)); 692 | json_object_object_add(json_parent, "DSL", json_dsl); 693 | 694 | free(buf); 695 | out: 696 | return ret; 697 | } 698 | 699 | int zdump_mos_objdir(zdump_opts_t *opts, json_object *json_parent) 700 | { 701 | int ret = 0; 702 | const dnode_phys_t *dn; 703 | const blkptr_t *bp; 704 | char *buf = read_block(opts, ZDUMP_BLOCK_COMPRESSED); 705 | 706 | if (buf == NULL) { 707 | ret = 1; 708 | goto out; 709 | } 710 | 711 | dn = (typeof(dn))buf + 1; 712 | json_object_object_add(json_parent, "dnode", zdump_dnode(dn)); 713 | 714 | bp = &dn->dn_blkptr[0]; 715 | opts->offset = DVA_GET_OFFSET(&bp->blk_dva[0]) + \ 716 | VDEV_LABEL_START_SIZE; 717 | opts->lsize = BP_GET_LSIZE(bp); 718 | opts->psize = BP_GET_PSIZE(bp); 719 | zdump_fzap(opts, json_parent); 720 | 721 | /* 722 | * In this simple case, blk id in all buckets are 1. 723 | */ 724 | bp = &dn->dn_blkptr[1]; 725 | opts->offset = DVA_GET_OFFSET(&bp->blk_dva[0]) + \ 726 | VDEV_LABEL_START_SIZE; 727 | opts->lsize = BP_GET_LSIZE(bp); 728 | opts->psize = BP_GET_PSIZE(bp); 729 | zdump_fzap_leaf(opts, json_parent); 730 | 731 | free(buf); 732 | out: 733 | return ret; 734 | } 735 | 736 | int zdump_mos(zdump_opts_t *opts, json_object *json_parent) 737 | { 738 | int ret = 0; 739 | char *buf = read_block(opts, ZDUMP_BLOCK_UNCOMPRESSED); 740 | if (buf == NULL) { 741 | ret = 1; 742 | goto out; 743 | } 744 | 745 | json_object_object_add(json_parent, zdump_titles[opts->object], 746 | zdump_objset(buf, opts->debug)); 747 | free(buf); 748 | out: 749 | return ret; 750 | } 751 | 752 | // TODO: check nvlist_xxxx return value. 753 | int zdump_label_uberblock(zdump_opts_t *opts, json_object *json_parent) 754 | { 755 | int ret = 0; 756 | json_object *json_ub, *json_label; 757 | vdev_t vd; 758 | const vdev_label_t *label; 759 | const vdev_phys_t *vdev; 760 | char *buf, *nvbuf, *type, *name, *path; 761 | char magic_hexstr[18]; 762 | size_t nvbuflen; 763 | nvlist_t *config; 764 | uberblock_t *winner; 765 | nvlist_t *vdev_tree = NULL; 766 | uint64_t ashift, version, txg = 0; 767 | 768 | if ((buf = read_block(opts, ZDUMP_BLOCK_UNCOMPRESSED)) == NULL) { 769 | ret = 1; 770 | goto out; 771 | } 772 | 773 | label = (typeof(label))buf; 774 | vdev = &label->vl_vdev_phys; 775 | nvbuf = (typeof(nvbuf))vdev->vp_nvlist; 776 | nvbuflen = sizeof(vdev->vp_nvlist); 777 | 778 | nvlist_unpack(nvbuf, nvbuflen, &config, 0); 779 | if ((nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) 780 | || (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ASHIFT, &ashift) != 0)) 781 | ashift = SPA_MINBLOCKSHIFT; 782 | 783 | if (opts->debug) 784 | print_raw(buf, opts->psize, "label"); 785 | 786 | nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version); 787 | nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &name); 788 | nvlist_lookup_string(vdev_tree, ZPOOL_CONFIG_TYPE, &type); 789 | nvlist_lookup_string(vdev_tree, ZPOOL_CONFIG_PATH, &path); 790 | 791 | vd.vdev_ashift = ashift; 792 | vd.vdev_top = &vd; 793 | for (int i = 0; i < VDEV_UBERBLOCK_COUNT(&vd); i++) { 794 | uint64_t uoff = VDEV_UBERBLOCK_OFFSET(&vd, i); 795 | uberblock_t *ub = (void *)((char *)label + uoff); 796 | 797 | if (uberblock_verify(ub)) 798 | continue; 799 | 800 | if (ub->ub_txg > txg) { 801 | winner = ub; 802 | txg = ub->ub_txg; 803 | } 804 | } 805 | 806 | json_label = json_object_new_object(); 807 | json_object_object_add(json_label, "name", json_object_new_string(name)); 808 | json_object_object_add(json_label, "version", json_object_new_int(version)); 809 | 810 | json_ub = json_object_new_object(); 811 | snprintf(magic_hexstr, sizeof(magic_hexstr), "0x%016lx", winner->ub_magic); 812 | json_object_object_add(json_ub, "magic", json_object_new_string(magic_hexstr)); 813 | json_object_object_add(json_ub, "version", json_object_new_int(winner->ub_version)); 814 | json_object_object_add(json_ub, "txg", json_object_new_int(winner->ub_txg)); 815 | json_object_object_add(json_ub, "rootbp", zdump_blkptr(&winner->ub_rootbp)); 816 | json_object_object_add(json_label, "uberblock", json_ub); 817 | 818 | json_object_object_add(json_parent, zdump_titles[opts->object], json_label); 819 | 820 | nvlist_free(config); 821 | free(buf); 822 | out: 823 | return ret; 824 | } 825 | 826 | static int gen_opts(int argc, char **argv, zdump_opts_t *opts) 827 | { 828 | int ret = 0; 829 | int c; 830 | char *offsize, *tok; 831 | 832 | if (opts == NULL) { 833 | ret = 1; 834 | goto out; 835 | } 836 | 837 | while (1) { 838 | int option_index = 0; 839 | // No confusing short options 840 | c = getopt_long(argc, argv, "", long_options, &option_index); 841 | if (c == -1) 842 | break; 843 | switch (c) { 844 | case 'd': 845 | opts->debug = 1; 846 | break; 847 | default: 848 | opts->object = c; 849 | offsize = strdup(optarg); 850 | // device 851 | tok = strtok(offsize, ":"); 852 | if (tok == NULL) { 853 | free(opts); 854 | opts = NULL; 855 | goto out; 856 | } 857 | strncpy(opts->device, tok, sizeof(opts->device)); 858 | // offset 859 | tok = strtok(NULL, ":"); 860 | opts->offset = strtoull(tok ? tok : "", NULL, 16); 861 | // skip 4MB label 862 | if (c != ZDUMP_LABEL) 863 | opts->offset += VDEV_LABEL_START_SIZE; 864 | // lsize 865 | tok = strtok(NULL, "/"); 866 | opts->lsize = strtoull(tok ? tok : "", NULL, 16); 867 | // psize 868 | tok = strtok(NULL, "/"); 869 | opts->psize = tok ? strtoull(tok, NULL, 16) : opts->lsize; 870 | opts->object = c; 871 | // level, for dumping indirect blkptr 872 | tok = strtok(tok, ":"); 873 | tok = strtok(NULL, ":"); 874 | 875 | /* 876 | * -= 2 because the passed argument offset is 877 | * at the top level, and counting starts from 878 | * zero. E.g., 6-level indirect addressing, 879 | * the argument (offset) is at L5, dumping 880 | * starts from L4 down to L0 881 | */ 882 | if (c == ZDUMP_BLKPTR_CHAIN) 883 | opts->extra.level = tok ? strtoull(tok, NULL, 10)-2 : 0; 884 | if (c == ZDUMP_MOS_ROOTDS 885 | || c == ZDUMP_MOS_CHILDMAP 886 | || c == ZDUMP_MOS_HEADDS 887 | || c == ZDUMP_ROOT_DIR 888 | || c == ZDUMP_DIR 889 | || c == ZDUMP_TEXT) 890 | opts->extra.objidx = tok ? strtoull(tok, NULL, 10) : 0; 891 | 892 | free(offsize); 893 | break; 894 | } 895 | } 896 | out: 897 | return ret; 898 | } 899 | 900 | static zdump_func_t zdump_funcs[ZDUMP_OBJECT_NUM] = { 901 | [ZDUMP_HELP] = zdump_help, 902 | [ZDUMP_BLKPTR_CHAIN] = zdump_blkptr_chain, 903 | [ZDUMP_LABEL] = zdump_label_uberblock, 904 | [ZDUMP_MOS] = zdump_mos, 905 | [ZDUMP_MOS_OBJDIR] = zdump_mos_objdir, 906 | [ZDUMP_MOS_ROOTDS] = zdump_mos_rootds, 907 | [ZDUMP_MOS_CHILDMAP] = zdump_mos_childmap, 908 | [ZDUMP_MOS_HEADDS] = zdump_mos_headds, 909 | [ZDUMP_HEADDS_MASTERNODE] = zdump_headds_masternode, 910 | [ZDUMP_ROOT_DIR] = zdump_root_dir, 911 | [ZDUMP_DIR] = zdump_dir, 912 | [ZDUMP_TEXT] = zdump_text, 913 | }; 914 | 915 | int main(int argc, char **argv) 916 | { 917 | int ret = 1; 918 | zdump_opts_t opts = {}; 919 | 920 | if (gen_opts(argc, argv, &opts) == 0 921 | && opts.object < ZDUMP_OBJECT_NUM 922 | && zdump_funcs[opts.object] != NULL) { 923 | json_object *json_root = json_object_new_object(); 924 | zdump_func_t zdump_func = zdump_funcs[opts.object]; 925 | 926 | zdump_func(&opts, json_root); 927 | printf("%s\n", 928 | json_object_to_json_string_ext(json_root, JSON_C_TO_STRING_PRETTY)); 929 | json_object_put(json_root); 930 | ret = 0; 931 | } 932 | return ret; 933 | } 934 | 935 | /* 936 | * Local Variables: 937 | * mode: c 938 | * c-file-style: "linux" 939 | * End: 940 | */ 941 | --------------------------------------------------------------------------------