├── .github
    └── workflows
    │   └── emClarity-tutorial.yml
├── Algorithms
    ├── 05_tilt_series_alignment.tex
    ├── 06_defocus_estimate.tex
    ├── 08_picking.tex
    ├── 10_ctf_3d.tex
    ├── 11_avg.tex
    ├── 12_align.tex
    ├── 13_tomoCPR.tex
    └── 14_classification.tex
├── Figures_Tables
    ├── 04_workflow.tex
    ├── 05_parameters.tex
    ├── 06_ctf_tlt.tex
    ├── 06_parameters.tex
    ├── 07_recon_coords.tex
    ├── 08_csv.tex
    ├── 08_parameters.tex
    ├── 09_parameters.tex
    ├── 10_parameters.tex
    ├── 11_parameters.tex
    ├── 12_parameters.tex
    ├── 13_parameters.tex
    ├── 14_parameters.tex
    ├── 16_cluster.tex
    ├── 16_naming_convention.tex
    └── 16_svd.tex
├── Sections
    ├── 01_tutorial.tex
    ├── 02_project_directory.tex
    ├── 03_get_data_ready.tex
    ├── 04_workflow.tex
    ├── 05_tilt_series_alignment.tex
    ├── 06_defocus_estimate.tex
    ├── 07_subregions.tex
    ├── 08_picking.tex
    ├── 09_init.tex
    ├── 10_ctf_3d.tex
    ├── 11_avg.tex
    ├── 12_align.tex
    ├── 13_tomoCPR.tex
    ├── 14_classification.tex
    ├── 15_final_map.tex
    └── 16_algorithms.tex
├── biblio.bib
├── emClarity-tutorial.pdf
├── emClarity-tutorial.tex
└── tutorial.sty


/.github/workflows/emClarity-tutorial.yml:
--------------------------------------------------------------------------------
 1 | name: Build LaTeX document
 2 | on:
 3 |   push:
 4 |     branches: [master]
 5 |   pull_request:
 6 |     branches: [master]
 7 | jobs:
 8 |   build_latex:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |     - name: Set up Git repository
12 |       uses: actions/checkout@v2
13 | 
14 |     - name: Set global and remove compiled pdf
15 |       run: |
16 |         git config --global user.name ffyr2w
17 |         git config --global user.email ffyr2w@users.noreply.github.com
18 |         git pull
19 |     
20 |     - run: file emClarity-tutorial.pdf | grep -q ' PDF '
21 |     - name: remove pdf if it is there
22 |       if: success()
23 |       run: |
24 |         echo "emClarity-tutorial.pdf is there. Deleting it..."
25 |         git rm emClarity-tutorial.pdf
26 | 
27 |     - name: Github Action for LaTeX
28 |       uses: xu-cheng/latex-action@v2
29 |       with:
30 |         root_file: emClarity-tutorial.tex
31 | 
32 |     - name: Upload pdf to the master branch
33 |       run: |
34 |         git add emClarity-tutorial.pdf
35 |         git commit -m "Automated upload"
36 |         git push origin master
37 |         
38 | #     - name: Upload artifact
39 | #       uses: actions/upload-artifact@gh-pages
40 | #       with:
41 | #         name: emClarity-tutorial-pdf
42 | #         path: ./
43 | #       if: always()
44 | 


--------------------------------------------------------------------------------
/Algorithms/05_tilt_series_alignment.tex:
--------------------------------------------------------------------------------
 1 | \subsection{Tilt-series alignment} \label{sec:algo:tilt_series_alignment}
 2 | 
 3 | \subsubsection{Pre-processing}
 4 | 
 5 | \subsubsection{Patch tracking}
 6 | 
 7 | \subsubsection{Refining on beads}
 8 | 
 9 | %% WORFLOW:
10 | 
11 | % 1) If skip view, create a copy of the series with these images removed.
12 | % 2) If the image rotation - 180 > atand(nY./nX):
13 | %       - rotate the series by 90deg with newstack.
14 | % 3) Pre-process the tilt-series:
15 | %       - a) ifft(fft(img)*nyquist)
16 | %       - b) median filter; kernel=3 if pixel>2 else 2
17 | %       - c) bandpass * fft(img); LOW_RES_CUTOFF=800, RESOLUTION_CUTOFF=autoAli_max_resolution
18 | %       - d) resample img with the image rotation
19 | % 4) autoAlign:
20 | %       - a) for each binnning[binHigh:binInc:binLow]: (binInc = ceil((binHigh - binLow) / 3) ) AND for each iteration with this binning[autoAli_iterations_per_bin]:
21 | %               - If first iteration, run tiltxcorr using the resampled tilt-series, without patch mode, just global.
22 | %               - If >1 iteration, combine transformation (img rota + last iteration), apply them with newstack, run tiltxcorr in patch mode to get the fiducials and use them for tiltalign. TiltOption 0
23 | %                   autoAli_n_iters_no_rotation is setting the number of iteration before activating the local alignments for tiltalign.
24 | % 5) If rotate 90, rotate again but this time on the real .fixed (before it was preprocessed for alignment in tmp)
25 | 
26 | % 6) If refine on beads, for [15:-2:5]:
27 | %       - apply last transformation with newstack, reconstruct with tilt(thick=3000), findbeads3d, reproject with tilt (.erase), imodtrans to set .erase relative to the original series,
28 | %         then beadtrack to have a first estimate
29 | %       - refine the bead positions with fitBeads:
30 | %           - a) calculate the avg beads, center and standardize. Apply R-weight in Fourier space to keep low frequencies away (look at the edges!) before taking the average.
31 | %           - b) Fit gaussian mixture (3 components) to this average and use this model 
32 | %           - c) reconstruct this reference bead, pad to img size, and CC with img_derivative (R-weighted).
33 | %           - d) go the original position of the beads, get tile, apply peakMask and update coords using the COM of the tile.
34 | %       - Finally, run tiltalign using this current fiducial model. if (nBeads < 5) then return. if (nBeads < 11), no local alignment. TiltOption 5 and LocalTiltOption 0.
35 | %
36 | % OR
37 | %
38 | % 7) If no refine on beads: apply the final transformation to with newstack at bin 15. Then reconstruct with tilt at thickness 3000, then findbeads3d and reproject with tilt again.
39 | %    The is used to get the .erase and _3dfind.ali.
40 | %       
41 | 


--------------------------------------------------------------------------------
/Algorithms/08_picking.tex:
--------------------------------------------------------------------------------
  1 | \subsection{Template matching} \label{sec:algo:picking}
  2 | 
  3 | \subsubsection{Pre-processing the tomogram}
  4 | 
  5 | %% RESAMPLE THE TILT-SERIES
  6 | % 1) extract every tilt and sub-region from the metadata.
  7 | % 2) If the binned stacks do not exist, bin the aligned stacks, in series.
  8 | %       - get the header of the aliStack
  9 | %       - calculate the new pixel size and dimensions
 10 | %       - multiply each projection with 1/sinc(gX)^2: amplify the corners
 11 | %       - bandpass: highcut: 600, lowcut: binned pixel size
 12 | %       - resample image in Fourier space (rotation + eventual shifts to keep the center the same) and cut the final image (in real space). Save them in cache/
 13 | %       - do this for every stack.
 14 | 
 15 | The desired sub-region tomogram $\bm{V}$ is reconstructed by weighted back-projection using {\tilt}.
 16 |  \begin{enumerate}
 17 |     \item \textbf{Get the aligned stack}: The aligned stack saved in \code{aliStacks} is loaded and binned to the desired sampling (i.e. \code{Tmp\_sampling}) as described in section \ref{sec:algo:ctf_3d:resample}. If the stack already exists in \code{cache}, it is not recalculated.
 18 |     
 19 |     \item \textbf{Reconstruct the subregion tomogram}: The sub-region coordinates are extracted from table \ref{tab:recon_coords} and used to set the {\tilt} entries \code{SLICE}, \code{THICKNESS} and \code{SHIFT}. The tilt angles saved in table \ref{tab:ctf_tlt} are used for the \code{-TILTFILE} entry. If there are local alignments, the \code{.local} file is used for the \code{-LOCALFILE} entry. The output reconstructions from {\tilt} are oriented with the $y$ axis in the third dimension. With \href{https://bio3d.colorado.edu/imod/doc/man/trimvol.html}{trimvol} \code{-rx} entry, we rotate by -90\textdegree\ around $x$ to place the $z$ axis in the third dimension.
 20 |     \begin{note}This step is a simpler version of the reconstruction described in section \myref{sec:algo:ctf_3d}.\end{note}
 21 | \end{enumerate}
 22 | 
 23 | If the sub-region is larger than \code{Tmp\_targetSize}, it is divided into $c$ equal chunks. For each chunk $c$:
 24 | \begin{enumerate}
 25 |     \item \textbf{Band-pass filter}: The chunk $\bm{V}_c$ is band-pass filtered by $\bm{W}_{bandpass}$, which has a high-pass cutoff at 600\r{A} to remove ice/intensity gradients and a low-pass cutoff a \code{lowResCut} or if it is not defined a cutoff at the first CTF zero. The zero is estimated based on the average defocus value of the stack (table \ref{tab:ctf_tlt}). The chunk is then centered and standardized.
 26 |     \begin{note}If \code{Tmp\_medianFilter} is defined, the chunk $\bm{V}_c$ is median filtered using the specified neighborhood window.\end{note}
 27 |     
 28 |     \item \textbf{Positive contrast}: The chunk is low-pass filtered and we assume that \code{lowResCut} cuts before the first zero of the CTF. As such, the negative contrast is ``flipped'' in real-space by simply multiplying the chunk by $-1$.
 29 | \end{enumerate}
 30 | Finally, the variance of $\bm{V}$ is calculated and used to normalize each chunk $\bm{V}_c$.
 31 | 
 32 | \subsubsection{Pre-processing the template}
 33 | The template $\bm{S}$ is loaded, padded up to $2\times\code{Ali\_mRadius}$ while enforcing a squared box. It is then centered, standardized and finally resampled to \code{Tmp\_sampling}, using linear interpolation.
 34 | 
 35 | \subsubsection{Angular search}
 36 | % Computing the cross-correlation (CC) between the tomogram and the template gives us a CC map. Each pixel of this map corresponds to the CC score between the template and the tomogram centered on this pixel. If we sample different rotations, how do we keep track of CC scores? After all, the CC scores do not tell us what was the rotation of the template.
 37 | 
 38 | The in- and out-of-plane angles $[\Theta_{out},\ \Delta_{out}, \Theta_{in},\ \Delta_{in}]$ registered in \code{Tmp\_angleSearch} are converted into a set of $r \times 3$ Euler angles ($\phi_{r},\ \theta_{r},\ \psi_{r}$). These rotations are finally converted into $r$ rotation matrices $\bm{R_{r}}$.
 39 | 
 40 | To keep track of things, two empty volumes of the same size as the chunks are prepared, for each chunk. $\bm{\mathrm{CC}}_{best\text{-}peak}$ will store the standardized cross-correlation scores and $\bm{\mathrm{CC}}_{best\text{-}rot}$ will store the index $r$.
 41 | 
 42 | For each chunk $c$ and for each rotation $r$:
 43 | \begin{enumerate}
 44 |     \item \textbf{Rotate and pad the template}: The template is rotated by $\bm{R}_{r}$, padded to the size of $\bm{V}_c$, band-pass filtered with $\bm{W}_{bandpass}$ and any change in power due to interpolation is corrected. We refer to this transformed template as $\bm{S}_r$.
 45 |     
 46 |     \item \textbf{Calculate the normalized cross-correlation}: The cross-correlation between the tomogram chunk $\bm{V}_c$ and the rotated template $\bm{S}_r$ is calculated as follow.
 47 |     \begin{equation}
 48 |         \bm{\mathrm{CC}}_{c,r} =    \mathcal{F}^{-1} \left\{
 49 |                                         \mathcal{F} \left\{ \bm{V}_c \right\} \overline{\mathcal{F} \left\{ \bm{S}_r \right\}}
 50 |                                     \right\}
 51 |     \end{equation}
 52 |     Importantly, $\bm{\mathrm{CC}}_{c,r}$ is then normalized by its standard deviation. % add more detail on this.
 53 |     
 54 |     \item \textbf{Update the best score}: We only want to save the best peaks, i.e. the peaks that are higher than the previous iterations. As such, each voxel $v$ of ${[\bm{\mathrm{CC}}_{best\text{-}peak}]}_c$ and ${[\bm{\mathrm{CC}}_{best\text{-}rot}]}_c$ are updated as follow:
 55 |     \begin{equation}
 56 |         {[\bm{\mathrm{CC}}_{best\text{-}peak}(v)]}_c =
 57 |             \begin{cases}
 58 |                 \bm{\mathrm{CC}}_{c,r}(v), & \text{if}\ {[\bm{\mathrm{CC}}_{best\text{-}peak}]}_c(v) \leqslant  \bm{\mathrm{CC}}_{c,r}(v)\\
 59 |                 {[\bm{\mathrm{CC}}_{best\text{-}peak}]}_c(v), & \text{otherwise}
 60 |             \end{cases}
 61 |     \end{equation}
 62 |     \begin{equation}
 63 |         {[\bm{\mathrm{CC}}_{best\text{-}rot}]}_c(v) =
 64 |             \begin{cases}
 65 |                 r, & \text{if}\ {[\bm{\mathrm{CC}}_{best\text{-}peak}]}_c(v) \leqslant \bm{\mathrm{CC}}_{c,r}(v)\\
 66 |                 {[\bm{\mathrm{CC}}_{best\text{-}rot}]}_c(v), & \text{otherwise}
 67 |             \end{cases}
 68 |     \end{equation}
 69 |     Consequently, each voxel $v$ is assigned to the current best CC score and to the rotation $r$ that gave this score.
 70 |     \begin{note}Of course, if it is the first iteration, i.e. if $r=1$, then ${[\bm{\mathrm{CC}}_{best\text{-}peak}]}_c = \bm{\mathrm{CC}}_{c,r}$ and ${[\bm{\mathrm{CC}}_{best\text{-}rot}]}_c = 1$.
 71 |     \end{note}
 72 | \end{enumerate}
 73 |     
 74 | At the end of the angular search, the chunks are concatenated and the following files are saved in \code{convmap\_wedgeType\_2\_bin<X>} (\code{<X>} is equal to \code{Tmp\_sampling}):
 75 | \begin{itemize}
 76 |     \item \code{<prefix>\_<region>\_bin<X>\_convmap.mrc}: The best scores, i.e. $\bm{\mathrm{CC}}_{best\text{-}peak}$.
 77 |     \item \code{<prefix>\_<region>\_bin<X>\_angles.mrc}: The corresponding rotation $r$ for each best score, i.e. $\bm{\mathrm{CC}}_{best\text{-}rot}$.
 78 |     \item \code{<prefix>\_<region>\_bin<X>\_angles.list}: The $(\phi,\ \theta,\ \psi)$ Euler angles corresponding to the rotation $r$. One trio per line, $r$ lines.
 79 | \end{itemize}
 80 | 
 81 | \subsubsection{Extract the peaks} \label{sec:algo:picking:extract_peaks}
 82 | 
 83 | The goal now is to select the $x,\ y,\ z$ coordinates of the $p$ strongest peaks registered $\bm{\mathrm{CC}}_{best\text{-}peak}$, with $p$ equal to \code{Tmp\_threshold}. Then, for each peak, the corresponding rotation $r$ is extracted from $\bm{\mathrm{CC}}_{best\text{-}rot}$ and converted back to the corresponding $(\phi,\ \theta,\ \psi)$ Euler angles. Therefore, for each desired peak $p$:
 84 | \begin{enumerate}
 85 |     \item \textbf{Get the coordinates}: The $x,\ y,\ z$ coordinates ${[\bm{T}_{x,y,z}]}_p$ of the strongest peak registered in $\bm{\mathrm{CC}}_{best\text{-}peak}$ are selected. To take into account the neighbouring pixels, these coordinates are adjusted by the local center of mass ($3\times3\times3$ matrix, centered on the strongest peak). ${[\bm{T}_{x,y,z}]}_p$ is relative to the subregion tomogram $\bm{V}$, with the origin at the lower left corner and is unbinned.
 86 |     % mouais...
 87 |     
 88 |     \item \textbf{Get the rotation}: The value of $\bm{\mathrm{CC}}_{best\text{-}rot}$ at the coordinates ${[\bm{T}_{x,y,z}]}_p$ is extracted. This value is the rotation index $r$ and is converted back to the corresponding $(\phi_p,\ \theta_p,\ \psi_p)$ Euler angles and rotation matrix $\bm{R}_p$. If a symmetry was entered, the rotation is randomized between the symmetry related pairs to reduce missing-wedge bias.
 89 |     
 90 |     \item \textbf{Get the CC score}: The value of the peak in $\bm{\mathrm{CC}}_{best\text{-}peak}$ at position ${[\bm{T}_{x,y,z}]}_p$ is extracted, centered and standardized. This score is referred as $\bm{\mathrm{CC}}_p$.
 91 |         
 92 |     \item \textbf{Erase the selected peak}: The selected peak at position ${[\bm{T}_{x,y,z}]}_p$ and its neighbouring voxels are masked-out by $\bm{M}_{peak}$. $\bm{M}_{peak}$ is set by \code{Peak\_mType} and \code{particleRadius} (or \code{Peak\_mRadius} if it is defined), and is rotated by $\bm{R}_p$ before being applied. In that way, the next iteration cannot select the same peak nor the peaks within this particle radius.
 93 | 
 94 |     \item \textbf{Save the peak information in table \ref{tab:csv}}.
 95 | \end{enumerate}
 96 | Finally, the coordinates $\bm{T}_{x,y,z}$ are binned to \code{Tmp\_sampling} and saved into \code{<prefix>\_<region>\_bin<X>.pos} and converted into an IMOD mod file with the following command:
 97 | \begin{lstlisting}
 98 | point2model -number 1 -sphere 3 -scat *.pos  *.mod
 99 | \end{lstlisting}
100 | 
101 | \input{Figures_Tables/08_csv}
102 | 


--------------------------------------------------------------------------------
/Algorithms/10_ctf_3d.tex:
--------------------------------------------------------------------------------
  1 | \subsection{Tomogram reconstruction} \label{sec:algo:ctf_3d}
  2 | 
  3 | %%%%% Workflow
  4 | 
  5 | %% Parameters:
  6 | % applyExposureFilter (default=1)
  7 | % useSurfaceFit (default=1)
  8 | % flgDampenAliasedFrequencies (default=0)
  9 | % flg2dCTF (default=0): force to 1 section equal to maxZ
 10 | 
 11 | % Ali_samplingRate
 12 | % PIXEL_SIZE
 13 | 
 14 | \subsubsection{Resample the tilt-series} \label{sec:algo:ctf_3d:resample}
 15 | 
 16 | For efficiency and practicality, we often start working with ``binned'' data and progressively decrease the ``binning'', up to the point where the original data is used. Binning consists into reducing images by an integer multiple to ensure that every output pixel is an average of the same number of neighbouring pixels (the ``bin''). There are many ways to bin an image: \href{https://www.imagemagick.org/Usage/filter/#box}{box filter}, \href{https://entropymine.com/imageworsener/pixelmixing/}{pixel mixing}, Fourier cropping, interpolation, etc. Currently, {\emClarity} bins the tilt-series by resampling them in a new, smaller, coordinate grid (in Fourier space), using bilinear interpolation.
 17 | 
 18 | Therefore, the original aligned tilt-series, saved in \code{aliStacks} and registered in the metadata during the project initialization (section \ref{sec:init}), are resampled, in parallel, as follow:
 19 | \begin{enumerate}
 20 |     \item TODO: $1/{\mathrm{sinc}(gX)}^2$.
 21 |     % This should just be a note.
 22 |     % This is a real problem. Fourier crop should just work.
 23 |     % resample2d is updated, so update this.
 24 | 
 25 |     \item The projections are Fourier transformed and band-pass filtered. Low frequencies are removed up to $\sim$600\r{A}, which sets the mean of the projections to 0 and attenuates eventual large intensity gradients present in the images. Frequencies after the new binned pixel size $\bm{p}_{bin}$ are also removed.  $\bm{p}_{bin}$ is equal to \code{PIXEL\_SIZE} $\times$ \code{Ali\_mSamplingRate}.
 26 | 
 27 |     \item When resizing images, we force the output image to fit into a new grid. This operation ultimately defines a new center in pixel space and it is important to keep this new center aligned with the center of the original image, so that any operation done on the binned data can be scaled back to the original data. Depending on the binning factor and image size, we can anticipate and shift the images before scaling, to keep the new center aligned with the original grid.
 28 | 
 29 |     \item The projection spectra are scaled by $\bm{p}_{bin}$ using bilinear interpolation and the inverse Fourier transform is calculated to switch the images back to real space. Remember that scaling the frequency spectrum of an image by $n$ is equivalent to scaling the image by $1/n$.
 30 | 
 31 |     \item At this point, the images are ``zoomed out``, but the actual size of the images are unchanged. To complete the process of resizing the image, we need to crop them to the desired binned size.
 32 | \end{enumerate}
 33 | 
 34 | The binned stacks are saved in the \code{cache} directory as \code{<prefix>\_<ali1>\_bin<nb>.fixed}.
 35 | 
 36 | % 3) Check that the reconstruction at this binning doesn't exist already. If every subregions from one stack are already reconstructed, then skip it.
 37 | 
 38 | \subsubsection{Defocus step} \label{sec:algo:ctf_3d:defocus_step}
 39 | 
 40 | Defocus-gradient corrected back-projection, as described in \cite{jensen_3dctf}, requires that ``during the reconstruction of tomogram by [weighted] back-projection, each voxel is calculated from tilted images that were CTF-corrected with defocus values corresponding to the position of that voxel at each tilt. To achieve this, each image in the tilt-series is CTF-corrected multiple times with different defocus value. The number of different CTF corrections performed per image depends upon how finely the defocus gradient should be sampled'' \cite{novaCTF}.
 41 | 
 42 | \begin{note}During tilt-series alignment, the tilt axis is aligned to the $y$ axis. As such, if we assume that the specimen is flat, the defocus only varies along the $x$ and $z$ axis.
 43 | \end{note}
 44 | 
 45 | In practice, the tomograms are divided into $s$ $z$-sections of equal width, also referred as $z$-slabs or simply sections. Each section is assigned to a CTF-corrected tilt-series with a defocus corresponding to the defocus at the center of the section. The sampling of the defocus gradient, set by the defocus step $\bm{\Delta \mathrm{z}}$, is defined to keep the average CTF amplitudes, resulting from the destructive interference between all of the CTFs within a section, above the current resolution target. It is calculated as follow:
 46 | \begin{enumerate}
 47 |     \item The specimen thickness $\bm{t}$ is defined by the $z_{min}$ and $z_{max}$ boundaries of the sub-regions of the current tilt-series. The sub-regions are defined manually, as described in section \ref{sec:subregions}.
 48 |     
 49 |     \item The goal of this procedure is to progressively decrease $\bm{\Delta \mathrm{z}}$, up to the point where the $z$ sampling is fine enough to achieve the resolution target. $\bm{\Delta \mathrm{z}}$ is initially set to the specimen thickness $\bm{t}$. First, we calculate a theoretical CTF for each $z$ point defined from $-{\bm{\Delta \mathrm{z}}}/2$ to ${\bm{\Delta \mathrm{z}}}/2$, with $0.1 \times \bm{\Delta \mathrm{z}}$ increment, and average all of these CTFs together. This gives us an estimate of the average CTF resulting from the interference of many CTFs along a $z$ section. The defocus estimate used to calculate the CTFs is the average of the defoci calculated in section \ref{sec:algo:defocus_estimate:per_view_defocus} and stored in table \ref{tab:ctf_tlt}.
 50 | 
 51 |     \item Using this average CTF, we can estimate the maximum resolution $\bm{h}_{max}$, in 1/\r{A}, that a subtomogram could achieve if we were to use the current $\bm{\Delta \mathrm{z}}$. $\bm{h}_{max}$ corresponds to the highest frequency, up to Nyquist, where the average CTF is above 90\% of contrast.
 52 |     %The reason why we take the highest frequency and not the first frequency where the CTF first goes below 90\% of contrast is bc a very wide range of defocus will be represented in the subtomograms due to the tilt, so the sampling function will be smoother without reaching 0... NOT SURE ABOUT THIS.
 53 | 
 54 |     \item We define the resolution target as $\bm{h}_{cut}/2$, where $\bm{h}_{cut}$ is the frequency cutoff, in 1/\r{A}, defined in equation \ref{eq:hcut}. If it is the first cycle and the half-maps are not reconstructed yet, $\bm{h}_{cut}$ is set to 40\r{A}. If $\bm{h}_{max} \geqslant \bm{h}_{cut}/2$, it indicates that the defocus gradient is probably sampled enough to achieve the resolution target. On the other hand, if $\bm{h}_{max} < \bm{h}_{cut}/2$, it indicates that we are likely to benefit from a finer sampling, i.e. a smaller $\bm{\Delta \mathrm{z}}$. In this case, we decrease the current $\bm{\Delta \mathrm{z}}$ by 90\% and recalculate the average CTF and $\bm{h}_{max}$. This procedure is repeated until $\bm{h}_{max} \geqslant \bm{h}_{cut}$, up to the minimum allowed value $\bm{\Delta \mathrm{z}} = 10$ nm.
 55 |     \begin{note}Since we expect the resolution to improve using this new reconstruction, the resolution of the new reconstruction must be higher than what we currently have. The value $\bm{h}_{cut}/2$ roughly balances the trade off between achievable resolution and run time during the reconstruction.
 56 |     \end{note}
 57 |     
 58 |     \begin{note}If $\bm{h}_{max} \geqslant \bm{h}_{cut}$ at the first iteration, when $\bm{\Delta \mathrm{z}}$ is equal to the specimen thickness $\bm{t}$, the CTF correction shouldn't be considered really as ``3D'' as we will only use one $z$ section. On the other hand, it also means that, given the resolution target, the specimen is thin enough to not likely benefit from a ``3D'' correction.
 59 |     \end{note}
 60 | \end{enumerate}
 61 | 
 62 | Once the defocus step $\bm{\Delta \mathrm{z}}$ and thickness of the specimen $\bm{t}$ are calculated, we can define the number $s$ of $z$-sections, as the closest odd integer from $\bm{t}\ /\ \bm{\Delta \mathrm{z}}$.
 63 | 
 64 | % \subsubsection{Z sections}
 65 | 
 66 | % Once $\Delta f$ and $T$ are calculated, we can define the number of z sections needed as the closest odd integer from $T\ /\ \Delta f$. A z section, simply referred as section, is nothing more than a z slab of the tomogram with a width equal to $\Delta f$. Each section will be reconstructed independently from the other sections.
 67 | 
 68 | % Therefore, for each section, we are going to calculate one tilt-series that will be CTF-corrected using the section's defocus offset. Then, for each sub-region, we reconstruct each section independently using their respective tilt-series. Once the sections are calculated, we concatenate them to form the final 3D-CTF corrected sub-region tomograms. The ability to reconstruct only subsets of a bigger field-of-view heavily relies on the \code{SLICE}, \code{THICKNESS} and \code{SHIFT} parameters of the {\tilt} program.
 69 | 
 70 | % Before calculating the CTF-corrected tilt-series for each section of the specimen, we can calculate the spatial model of each section, which will make the CTF-correction more accurate.
 71 | 
 72 | \subsubsection{Center-of-mass and spatial model} \label{sec:algo:ctf_3d:spatial_model}
 73 | 
 74 | The CTF estimate, fitted from the power spectra, is considered to be the sum of CTFs from weak phase objects at varying defoci within the field of view. The center-of-mass of the specimen in $z$, $\bm{\mathrm{COM}}_z$, where most of the signal comes from, greatly impacts this average CTF, to the point where we can assume that the defocus estimate is the distance from $\bm{\mathrm{COM}}_z$ to the focal plane. The $z$-sections are positioned relative to the center of the reconstruction, $\bm{\mathrm{COR}}_z$, therefore to calculate their respective defocus and correctly estimate their average CTF, we must adjust the current defocus estimate to match the center of the reconstruction $\bm{\mathrm{COR}}_z$. In conclusion, we need to know the defocus at the center of the specimen or in other words, the average $z$-offset $\bm{\bar{Z}}_{R\text{-}M} = \bm{\mathrm{COR}}_z - \bm{\mathrm{COM}}_z$.
 75 | 
 76 | Here, we make the assumption that most of the signal comes from the particles, therefore that the defocus estimate is the distance from the center-of-mass of all the subtomograms to the focal plane. As the particle positions are expressed relative to $\bm{\mathrm{COR}}_z$ (i.e. $z=0$ at the center of the reconstruction), it becomes very easy to calculate $\bm{\bar{Z}}_{R\text{-}M}$. Moreover, as we know which particle belongs to which section, we can calculate an offset for each section $s$. We refer to these offsets as ${[\bm{\bar{Z}}_{R\text{-}M}]}_{s}$.
 77 | 
 78 | Of course, the particles within a section are not necessarily into the same $z$ plane, meaning that ${[\bm{Z}_{R\text{-}M}]}_s$ varies with the $x$ and $y$ coordinates. To take this into account, we calculate a spatial model describing the $z$-positions of the particles across the specimen. To do so, we extract the $x,\ y,\ z$ coordinates of the particles of every sub-regions of the specimen and, according to their $z$-position, the particles are assigned to a section. If a section contains more than 6 particles, we fit a quadratic surface to the particle positions. This surface defines the ``spatial model'' of the section or in other words ${[\bm{Z}_{R\text{-}M}]}_s(x,y)$. If there is less than 6 particles, the spatial model is an horizontal plane (it is the average $z$-position of the particles and is invariant across the section, i.e. ${[\bm{\bar{Z}}_{R\text{-}M}]}_s$).
 79 | 
 80 | % NOTE: Take the coordinates of every valid particle, bin the coordinates, recenter the coordinates (tilt-axis=0; shift from lower left to centered and include the tomo Z offset from the microscope frame) and add the Z coordinates of the particles. Do this for every sub-region in the current stack.
 81 | 
 82 | \subsubsection{3D-CTF phase correction} \label{sec:algo:ctf_3d:ctf_phase_correction}
 83 | 
 84 | %%%% Calculate the CTF phase corrected (multiply by CTF and exposure filter) tilt-series.
 85 | For each section $s$, we are going to calculate one CTF-corrected tilt-series using the section's defocus offset. Therefore, for each section $s$ and for each view of the tilt-series $i$:
 86 | % As we have said previously, we need to calculate a CTF-corrected tilt-series for each section:
 87 | 
 88 | \begin{enumerate}
 89 |     \item \textbf{Exposure filter}: Calculate the Fourier transform of the view and multiply it with the exposure filter ${[\bm{W}_{exposure}]}_i$, as in \cite{exposure_grant_2015}. This filter only varies with the tilt angle of the image and therefore is identical for each section.
 90 |     \begin{note}To take into account that the tilt-series, and therefore the subtomograms, are exposure filtered, the same filter will be applied to the sampling functions of the particles in section \myref{sec:algo:avg:SF3D}.
 91 |     \end{note}
 92 |     
 93 |     \item \textbf{Microscope frame}: The view is replaced within the microscope frame. To do so, the spatial model ${[\bm{Z}_{R\text{-}M}]}_s(x,y)$ is tilted according to the tilt angle of the current image. As described previously, because it is calculated from the $z$-positions of the particles, the spatial model of the section is already correctly positioned in $z$.
 94 |     
 95 |     \item \textbf{Defocus ramp}: The microscope frame is then divided into $n$ $z$-slabs of $\bm{\Delta \mathrm{z}}$ width, which effectively divides the view (represented by the transformed spatial model ${[\bm{Z}_{R\text{-}M}]}_{s,i}$) into $n$ strips parallel to the tilt-axis. This is similar to figure 1.A.right, from \cite{novaCTF} and forms a defocus ramp perpendicular to the tilt-axis. The strips follow a defocus ramp, defined as follow:
 96 |     \begin{equation} \label{eq:def_ramp}
 97 |         \bm{\mathrm{z}}_{i} + {z'}_{min} - \bm{\Delta \mathrm{z}}
 98 |                                   \xrightarrow[up\ to]{+\bm{\Delta \mathrm{z}}}\
 99 |                                   \bm{\mathrm{z}}_{i} + {z'}_{max} + \bm{\Delta \mathrm{z}}
100 |     \end{equation}
101 |     where ${z'}_{min}$ and ${z'}_{max}$ are the highest and lowest $z$ coordinates of the transformed (i.e tilted) spatial model. $\bm{\mathrm{z}}_{i}$ is the defocus value of the current view, saved in table \ref{tab:ctf_tlt}.
102 |     
103 |     \begin{note}As the spatial model is not necessarily a plane, the strips can be ``curved'' (i.e. with a variable width along the tilt-axis).
104 |     \end{note}
105 |     
106 |     \begin{note}For a 0\textdegree\ image, $\bm{\mathrm{z}}'_{max} + \bm{\mathrm{z}}'_{min} \leqslant \bm{\Delta \mathrm{z}}$, because the spatial model only takes into consideration the particles from within a $z$-section. In other words, the 0\textdegree \ spatial model ``fits'' into the central slab of $\bm{\Delta \mathrm{z}}$ width and therefore has only one strip. On the other hand, as the tilt-angle increases, more strip are necessary to fully cover the spatial model.
107 |     \end{note}
108 |     
109 |     \item \textbf{CTF multiplication}: At this point, the spatial model is correctly positioned in the microscope frame and divided into $n$ strips. An array is allocated in memory to hold the final CTF-corrected view and it is progressively filled, one strip at a time. For each strip $n$:
110 |     \begin{enumerate}
111 |         \item We calculate the 2D CTF of the current strip. The astigmatic defocus of the strip depends on the spatial model and therefore contains the $z$-offset of the strip, the $z$-offset of the section and the $z$-offset to take into account that $\bm{\Delta \mathrm{z}}$ is the defocus at the center-of-mass of the specimen and not at the center of the reconstruction.
112 | 
113 |         \item A copy of the Fourier transform of the current image is multiplied by this 2D CTF and inverse Fourier transformed. The pixels that belongs to the current strip are extracted from this CTF-multiplied image and added to the pre-allocated output array.
114 |         
115 |         \begin{note}Adjacent strips can slightly overlap by 1 pixels. The values of these pixels are divided by 2 to eliminate this overlapping artefact. 
116 |         \end{note}
117 |     \end{enumerate}
118 |     
119 |     \item \textbf{Save the CTF-corrected stacks}: Once every images of the tilt-series are reconstructed, the stack of CTF-corrected images is temporarily saved in the \code{cache} directory.
120 | \end{enumerate}
121 | 
122 | At this point of the procedure, we have calculated one CTF-corrected tilt-series for each $z$-section of the specimen.
123 | 
124 | % FOR EACH SECTION:
125 | % 1) assume the defocus determined is the distance from the center of mass of subtomograms in Z to the focal plane, rather than the center of mass of the tomograms (specimen): add avgZ to the defocus (defocus offset). This is only done if the section hasn't a surfaceFit. If it does, the surfaceFit already has the offset.
126 | 
127 | % FOR EACH PROJECTION
128 | % 1) pad projection to optimum FFT size.
129 | % 2) Compute and apply the exposure filter.
130 | % 3) Compute the mesh grids for X and Y. For the Z, use the surfaceFIT, otherwise zeros (flat).
131 | % 4) Transform the specimen plane to take into account the tilt angle. The tZ is multiplied by pixel_size (meter opposed to pixel) and is shifted by the defocus value (which contains the defocus offset).
132 | % 5) Extract the defocus ramp (min tZ, max tZ).
133 | % 6) Using the same defocus step, define the strips: minDefocus-ctf3dDepth/1:ctf3dDepth/1:maxDefocus+ctf3dDepth/1.
134 | %    For each strip:
135 | %       - allocate correctedPrj zeros().
136 | %       - compute the vector defocus of the strip (defocus_strip) and compute the CTF.
137 | %       - multiply the entire projection with the CTF, save as tmpCorrection.
138 | %       - define the strip mask: (tZ > defocus_strip - ctf3dDepth/2 & tZ <= defocus_strip + ctf3dDepth/2) which defines the region of the transformed image that belongs to the strip. The strips are not linear if the sample is not planar.
139 | %       - Add tmpCorrection(mask) to correctedPrj.
140 | %   We also make sure that if the strip overlap (rounding error, surface fit...), we'll take the mean value for these pixels sampled multiple times (usually max 2, it is at the edges of the strips).
141 | 
142 | % Save the corrected stack (multiplied by CTF and exposure filtered).
143 | 
144 | \subsubsection{Tomogram reconstructions} \label{sec:algo:ctf_3d:reconstruction}
145 | 
146 | For each sub-region, we reconstruct each $z$-section of the sub-region tomogram independently, using their respective CTF-corrected tilt-series. Once the $z$-sections are reconstructed, we concatenate them to form the final 3D-CTF corrected sub-region tomograms.
147 | For each $z$-section $s$ of a given specimen:
148 | \begin{enumerate}
149 |     \item For each sub-region defined for this specimen, we reconstruct with {\tilt} the current section using the section's 3D-CTF corrected tilt-series. The tilt angles saved in table \ref{tab:ctf_tlt} are used for the \code{-TILTFILE} entry. If there is a \code{.local} file for this specimen, it will be assigned to the \code{-LOCALFILE} entry. The cosine stretching is turned-off (\code{-COSINTERP 0}), with no low-pass filtering \code{-RADIAL 0.5,0.05}.
150 |     \begin{note}The ability to reconstruct only subsets of a bigger volume heavily relies on the \code{SLICE}, \code{THICKNESS} and \code{SHIFT} entries of the {\tilt} program.
151 |     \end{note}
152 | 
153 |     \item The output reconstructions from {\tilt} are oriented with the $y$ axis in the third dimension. With \href{https://bio3d.colorado.edu/imod/doc/man/trimvol.html}{trimvol} \code{-rx} entry, we rotate by -90\textdegree\ around $x$ to place the $z$ axis in the third dimension.
154 | \end{enumerate}
155 | 
156 | Once all the sections of each sub-region are reconstructed, the $z$-sections of the same sub-region are stacked together in $z$ with \href{https://bio3d.colorado.edu/imod/doc/man/newstack.html}{newstack} to create the final 3D-CTF corrected sub-region tomograms.
157 | 


--------------------------------------------------------------------------------
/Algorithms/13_tomoCPR.tex:
--------------------------------------------------------------------------------
  1 | \subsection{Tilt-series refinement} \label{sec:algo:tomoCPR}
  2 | 
  3 | 
  4 | Subtomogram averaging provides accurate estimates of both particle positions and high SNR reconstructions. It is thus possible to leverage this information for improving the alignment of a tilt-series.
  5 | 
  6 | 
  7 | \subsubsection{Reconstruct the synthetic tomogram}
  8 | 
  9 | The first is to reconstruct the full tomogram $\bm{V}$ in the same way we reconstructed the sub-regions tomograms in section \ref{sec:algo:ctf_3d}. Similarly to the sub-region tomograms used for subtomogram averaging and alignment, this tomogram is CTF-phase corrected. To save some precious run time and because this tomogram is only used to refine the fiducial positions, {\emClarity} does not follow the so-called ``3D CTF-correction``. Indeed, the correction is forced to have only one $z$ slab (section \ref{sec:algo:ctf_3d:defocus_step}). In other words, the thickness of the specimen is not taken into account during the correction, but only the tilts. Moreover, the center of mass in $z$ is not adjusted and the spatial model is not defined (section \ref{sec:algo:ctf_3d:spatial_model}).
 10 | 
 11 | % ASK BEN: The tomogram is standardized ($\sigma=1$) and weighted based on the particle mass:
 12 | %          divide the tomogram by rmsScale*rms(tomogram); rmsScale = sqrt(particleMass).
 13 | 
 14 | Once that the full tomogram is reconstructed, the subtomograms are replaced by their corresponding half-maps. For each particle $i$:
 15 | 
 16 | \begin{enumerate}
 17 |     \item \textbf{Get the coordinates of the particle}: The $x,\ y,\ z$ coordinates of the particle, that are saved in the metadata, corresponds to the 3D coordinates within the sub-region tomogram, with the origin at the lower left corner. In this section, we are working with the full tomogram $\bm{V}$, i.e. the entire field of view, so the coordinates must be adjusted to $\bm{V}$.
 18 | 
 19 |     \item \textbf{Get the reference in the microscope frame}: The particle is attached to a rotation $\bm{R}_i$. Moreover, as explained in section \ref{sec:algo:avg:subtomo_avg} step \textbf{1.b}, the particles $p$ are attached to a translation ${[\bm{T}_{orig}]}_p$. The half-map $\bm{S}$ is rotated by $\bm{R}^{T}_i$ and translated by ${[-\bm{T}_{orig}]}_i$ (note \ref{note:ref2mic_frame}). The same transformation is applied to the soft-edged molecular mask $\bm{M}_{mol}$ (section \ref{sec:algo:avg:molecular_mask}). We will refer to the transformed reference and transformed mask as $\bm{S}_p$ and ${[\bm{M}_{mol}]}_p$.
 20 |     % I'm skipping the fact that we actually compute the fsc mask on ref1+ref2 and applying a spherical mask.
 21 |     
 22 |     \item \textbf{Replace the particle's density by the reference's density}: The voxels of the full tomogram $\bm{V}$ corresponding to the particle $\bm{s}_p$ are replaced by the masked reference, such as:
 23 |     \begin{equation}
 24 |         \bm{V}(\bm{s}_p) = \left(1-{[\bm{M}_{mol}]}_p \right)\bm{s}_p + \bm{S}_p {[\bm{M}_{mol}]}_p
 25 |     \end{equation}
 26 |     where $\bm{V}(\bm{s}_p)$ refers to the voxels of $\bm{V}$ that corresponds to the particle $\bm{s}_p$.
 27 | \end{enumerate}
 28 | 
 29 | 
 30 | %   - 1) Reconstruct the full tomogram:
 31 | %       - a) Get the thickness of the reconstruction t (maxZ):
 32 | %           - For each sub-regions, get the Zmax-Zmin + Zshift, and save maxZ as the thickest.
 33 | %       - b) Call ctf 3d, but reconstruct the entire field of view, use the maxZ defined above, one Z section, no surfaceFit.
 34 | %            Therefore, this is not a 3D correction: the tilt is corrected, but no thickness (one Z section...).
 35 | %            You don't use the particle positions here: no offsets.
 36 | 
 37 | %   - 2) Standardize the tomogram and weight it based on the particle mass:
 38 | %       - a) divide the tomogram by rmsScale*rms(tomogram); rmsScale = sqrt(particleMass).
 39 | 
 40 | % - 3) For each sub-region:
 41 | %       - a) Get the exposure (first view is exposure=0).
 42 | %       - b) Get the defocus values z: _align.defocus
 43 | %       - c) Get the xf file (no rotation, no shift): _align.XF
 44 | %       - d) Get the sub-regions coordinates (relative to the full tomo).
 45 | %       - e) Get the particleMask = boxsize of the reference, sphere of particleRadius * fsc reference mask of ref1+ref2
 46 | 
 47 | %       - f) For each subtomo: Reprojection of the subtomograms.
 48 | %           - i) Get rotation and position of particle: prjVector (relative to the full tomogram, center).
 49 | %                There is a preShift = [-0.5,-0.5,0.5] applied. This is for IMOD I think, pixel vs coord space.
 50 | %           - ii) Extract the subtomogram pixel coordinates indVal and shiftVal, using its subregions tomogram boundaries.
 51 | %                 This is like avg or alignRaw.
 52 | %           - iii) Put the reference of the particle (fsc 1 or 2) in the mic frame: rot' and then shiftVal.
 53 | %                  Apply the same transformation to the ref mask and apply it to the resampled ref.
 54 | %           - iv) Replace the full tomogram voxels by the reference: tomo * (1-refMask) + ref
 55 | %                 Multiplying by 1-refMask to take into account the taper.
 56 | %           - v) Save the 3d position of the particle. The positions are 90deg rotx, lower left corner full tomo.
 57 | %               _ali*.coord: x,y,z,fid_id
 58 | %           - vi) Save the 2d position of the particle for each image in the stack:
 59 | %               _ali*.defAng = fid_id,section,def; def = z_tilt + z_subtomo (coord(x,y,z) * tilt), in binned pixel.
 60 | %               _ali*.coord_start = fid_id,subregion,particle_id,def,def_shift,def_angle(deg),rotm,pre_exp,post_exp,fsc_group.
 61 | %                                   The def here is not as defAng; here it is not tilted, just prjVector + z_tilt.
 62 | 
 63 | \subsubsection{Reproject the synthetic tilt-series} \label{sec:algo:tomoCPR:reproject_coords}
 64 | 
 65 | We want to reproject the ``synthetic'' tilted views \emph{and} the particles coordinates. Both reprojections are calculated by {\tilt}:
 66 | \begin{itemize}
 67 |     \item \textbf{Reproject the coordinates}: For the tilt-series alignment, {\tiltalign} needs the $x,\ y$ coordinates of the particles, for each view of the tilt-series. Fortunately, {\tilt} can reproject the $x,\ y,\ z$ coordinates of the particles and if the defocus value are know, for each view, it can also calculate the defocus of each particle, for each view, while accounting for the local alignments that were used during the reconstruction.
 68 |     % As such, we run {\tilt} with the following entries:
 69 |     % table
 70 |     
 71 |     \item \textbf{Reproject the tilt-series}: The synthetic tomogram, calculated at the previous section, is reprojected into an aligned synthetic tilt-series using {\tilt}. Of course, it takes into account the eventual local alignments that were used to reconstruct the tomogram.
 72 | \end{itemize}
 73 | 
 74 | \begin{note}{\tilt} and {\tiltalign} operates with the $y$ axis, i.e. the \code{SLICE}, in the third dimension. So both the ``synthetic'' tomogram and the $x,\ y,\ z$ coordinates are rotated by 90\textdegree\ around $x$.
 75 | \end{note}
 76 | 
 77 | 
 78 | %  - v) Save the 3d position of the particle. The positions are 90deg rotx, lower left corner full tomo.
 79 | %               _ali*.coord: x,y,z,fid_id
 80 | %   - 4) Reprojection 3d from 2d:
 81 | %       - a) Save _ali*.coord into model file: .3dfid and reproject (at the tilt angles) the 3dfid coordinates with tilt = coordPrj.
 82 | %            Each fiducial has its defocus value reprojected as well, as in .defAng, but with local alignments compensated = defAngTilt.
 83 | %       - b) Rotate -rx and save the synthetic tomogram .tmpRot.
 84 | %       - c) Reproject with tilt at the same tilt angles than original tilts:
 85 | %           - The reprojection is divided into chunks, in Y.
 86 | %           - COSINTERP 0, THICNESS maxZ, local file if any.
 87 | 
 88 | \subsubsection{Refine the fiducial positions}
 89 | 
 90 | The synthetic tomogram is now reprojected into a synthetic tilt-series. Tiles around each projected high SNR subtomogram origin are masked out, convolved with the CTF of the raw data projection at that point and aligned to the raw data.
 91 | 
 92 | By default, {\emClarity} is setting the maximal number of particle which are going to be used as fiducial to 1800. This value can be changed with the \code{tomoCPR\_randomSubset} entry. If there is more particles than the allowed number of fiducial, a random subset of particles will be selected.
 93 | 
 94 | We will refer to the raw aligned tilt-series as $\bm{I}_{raw}$ and synthetic tilt-series as $\bm{I}_{synt}$. Before starting the refinement, each view ${[\bm{I}_{raw}]}_i$ and ${[\bm{I}_{synt}]}_i$ are centered and standardized, first globally, then locally. Combined with the sampling mask, calculated in section \ref{sec:algo:defocus_estimate:transform}, it allows us to define a mask ${[\bm{M}_{eval}]}_i$ excluding regions that are not sampled or that significantly varies from the rest of the data, like carbon, contaminants, etc.
 95 | 
 96 | Before refining the fiducial positions, a global shift estimate is calculated, for each view $i$.
 97 | \begin{enumerate}
 98 |     \item \textbf{Calculate the cross-correlation map}: The cross-correlation between ${[\bm{I}_{raw}]}_i$, which is not CTF corrected, and ${[\bm{I}_{synt}]}_i$, which is multiplied by the CTF, is defined as follow:
 99 |     \begin{equation}
100 |         \bm{\mathrm{CC}}_{i} = \mathcal{F}^{-1} \left\{ \bm{W}_{low\text{-}pass}\ {[\bm{W}_{ctf}]}_i\ \mathcal{F}\left\{ {[\bm{I}_{raw}]}_i \right\}\ \overline{\mathcal{F}\left\{ {[\bm{I}_{synt}]}_i \right\}}\ \left|{[\bm{W}_{ctf'}]}_i\right|\ \right\} \bm{M}_{peak\text{-}global}
101 |     \end{equation}
102 |     where $\bm{W}_{low\text{-}pass}$ is a low-pass filter, with a low-cutoff set by \code{tomoCprLowPass}. ${[\bm{W}_{ctf}]}_i$ is the astigmatic 2D CTF of the view $i$, with envelope. $|{[\bm{W}_{ctf'}]}_i|$ is the astigmatic 2D CTF without envelope and is used to modulate the amplitudes of the reprojected reference to better match the raw data. $\bm{M}_{peak\text{-}global}$ is a spherical mask and limits the translation to $\sim 10$ to $20$\r{A}.
103 |     
104 |     \item \textbf{Get highest peak position}: This step is identical to the last step of section \ref{sec:algo:align:get_translation} and outputs a translation ${[\bm{T}_{global}]}_i$.
105 |     
106 |     \item \textbf{Apply the global translation estimate}: ${[\bm{I}_{raw}]}_i$ is translated by ${[\bm{T}_{global}]}_i$, using linear interpolation.
107 | \end{enumerate}
108 | 
109 | %       - c) Calculate the ctf of the projection, without envelope. PhaseOnly=-0.15 and not 1? HqzUnMod is the CTF before cutoffs and without PhaseOnly.
110 | %       - d) Calculate the CCmap = fftshift(ifftn( bandPassPrj * fftn(dataPrj) * abs(HqzUnMod) * conj(fftn(refPrj)*Hqz) ))
111 | %            To match the amplitudes, given that the projected reference is amplitude corrected, I multiplying fftn(refPrj) with abs(ctf). Why not do same for local refinement?
112 | %       - e) Get the peak estPeak = subtract with min, apply globalPeakMask, take the max, get 7x7 box around it, calculate the COM and add to max. This is like usual.
113 | %       - f) Shift the dataPrj by estPeak.
114 | 
115 | Then, the $x$ and $y$ position of each particle, for each view $i$ is refined as follow:
116 | \begin{enumerate}
117 |     \item \textbf{Extract the particle tile}: For both $\bm{I}_{raw}$ and $\bm{I}_{synt}$, a tile of $1.5 \times \code{particleRadius}$ is extracted at the particle reprojected coordinate, centered and standardized. If the tiles overlap with the evaluation mask ${[\bm{M}_{eval}]}_i$, the particle is ignored and will not be used as fiducial.
118 |     
119 |     \item \textbf{Calculate the translation between the raw and the synthetic tile}: The tiles are padded 2 times in real space, masked and the CC map is calculated as follow:
120 |     \begin{equation}
121 |         \bm{\mathrm{CC}}_{p,i} = \mathcal{F}^{-1} \left\{ \bm{W}_{low\text{-}pass}\ {[\bm{W}_{ctf}]}_{p,i}\ \mathcal{F}\left\{ {[\bm{I}_{raw}]}_{p,i} \right\}\ \overline{\mathcal{F}\left\{ {[\bm{I}_{synt}]}_{p,i} \right\}}\ \right\} \bm{M}_{peak\text{-}local}
122 |     \end{equation}
123 |     where ${[\bm{W}_{ctf}]}_{p,i}$ is the anisotropic 2D CTF at the particle position, defined by the defocus value $\bm{\mathrm{z}}_{p,i}$. $\bm{M}_{peak\text{-}local}$ is a mask controlled by \code{Peak\_mType} and \code{Peak\_mRadius} and is used to restrict the translation. By default, it is set to $0.4 \times \code{particleRadius}$.
124 |     
125 |     \item \textbf{Get the translation estimate}: This step is identical to the last step of section \ref{sec:algo:align:get_translation} and outputs a translation ${[\bm{T}_{local}]}_{p,i}$.
126 | \end{enumerate}
127 | 
128 | At this end of this procedure, each fiducial is translated by ${[\bm{T}_{orig}]}_{p,i} + {[\bm{T}_{global}]}_i + {[\bm{T}_{local}]}_{p,i}$ and saved for the next step.
129 | 
130 | The defocus value can be refined by sampling a range of defocus value around the current estimate in order to maximize $\mathrm{CC}_{p,i}$. The defocus search is set by $\bm{\mathrm{z}}_{p,i} \pm $ \code{tomoCprDefocusRange}, with a step of \code{tomoCprDefocusStep}. For each image $i$, the defocus shifts $\bm{\Delta \mathrm{z}}_{p,i}$ that maximize $\mathrm{CC}_{p,i}$ are averaged and this average will be added to the current the defocus value of the image.
131 | 
132 | \subsubsection{Align and transform the tilt-series}
133 | 
134 | The fiducials are now aligned, so we can use them to find a new geometric model with {\tiltalign}. A bash script is saved in \code{mapBack<n>} and contains the parameters that are used for the alignment. Most of them are accessible directly via the {\emClarity} parameter file (table \ref{param:tomoCPR}).
135 | 
136 | Finally, the raw unaligned tilt-series in \code{fixedStacks} is transformed using this new geometric model. This last part is done via \code{ctf update} and is similar to section \ref{sec:algo:defocus_estimate} to the exception that this new alignment is relative to the aligned tilt-series. As such, the new rotation and shifts are added to the original ones, and the tilt angles are updated. This new transformation is saved in a new table \ref{tab:ctf_tlt}. The beads coordinates are updated as well and erased.
137 | 
138 | %% WORKFLOW
139 | 
140 | % Get the reference:
141 | %   - un-mount the reference
142 | 
143 | %%% For every tilt-series saved in mapBackGeometry:
144 | 
145 | %   - 1) Reconstruct the full tomogram:
146 | %       - a) Get the thickness of the reconstruction t (maxZ):
147 | %           - For each sub-regions, get the Zmax-Zmin + Zshift, and save maxZ as the thickest.
148 | %       - b) Call ctf 3d, but reconstruct the entire field of view, use the maxZ defined above, one Z section, no surfaceFit.
149 | %            Therefore, this is not a 3D correction: the tilt is corrected, but no thickness (one Z section...).
150 | %            You don't use the particle positions here: no offsets.
151 | 
152 | %   - 2) Standardize the tomogram and weight it based on the particle mass:
153 | %       - a) divide the tomogram by rmsScale*rms(tomogram); rmsScale = sqrt(particleMass).
154 | 
155 | %   - 3) For each sub-region:
156 | %       - a) Get the exposure (first view is exposure=0).
157 | %       - b) Get the defocus values z: _align.defocus
158 | %       - c) Get the xf file (no rotation, no shift): _align.XF
159 | %       - d) Get the sub-regions coordinates (relative to the full tomo).
160 | %       - e) Get the particleMask = boxsize of the reference, sphere of particleRadius * fsc reference mask of ref1+ref2
161 | 
162 | %       - f) For each subtomo: Reprojection of the subtomograms.
163 | %           - i) Get rotation and position of particle: prjVector (relative to the full tomogram, center).
164 | %                There is a preShift = [-0.5,-0.5,0.5] applied. This is for IMOD I think, pixel vs coord space.
165 | %           - ii) Extract the subtomogram pixel coordinates indVal and shiftVal, using its subregions tomogram boundaries.
166 | %                 This is like avg or alignRaw.
167 | %           - iii) Put the reference of the particle (fsc 1 or 2) in the mic frame: rot' and then shiftVal.
168 | %                  Apply the same transformation to the ref mask and apply it to the resampled ref.
169 | %           - iv) Replace the full tomogram voxels by the reference: tomo * (1-refMask) + ref
170 | %                 Multiplying by 1-refMask to take into account the taper.
171 | %           - v) Save the 3d position of the particle. The positions are 90deg rotx, lower left corner full tomo.
172 | %               _ali*.coord: x,y,z,fid_id
173 | %           - vi) Save the 2d position of the particle for each image in the stack:
174 | %               _ali*.defAng = fid_id,section,def; def = z_tilt + z_subtomo (coord(x,y,z) * tilt), in binned pixel.
175 | %               _ali*.coord_start = fid_id,subregion,particle_id,def,def_shift,def_angle(deg),rotm,pre_exp,post_exp,fsc_group.
176 | %                                   The def here is not as defAng; here it is not tilted, just prjVector + z_tilt.
177 | 
178 | %   - 4) Reprojection 3d from 2d:
179 | %       - a) Save _ali*.coord into model file: .3dfid and reproject (at the tilt angles) the 3dfid coordinates with tilt = coordPrj.
180 | %            Each fiducial has its defocus value reprojected as well, as in .defAng, but with local alignments compensated = defAngTilt.
181 | %       - b) Rotate -rx and save the synthetic tomogram .tmpRot.
182 | %       - c) Reproject with tilt at the same tilt angles than original tilts:
183 | %           - The reprojection is divided into chunks, in Y.
184 | %           - COSINTERP 0, THICNESS maxZ, local file if any.
185 | 
186 | %   - 5) Prepare the alignment:
187 | %       - CTFSIZE = 2 * 1.5 * particleRadius, up to best Fourier = box size
188 | %       - ctfMask = spherical mask that covers the entire tile -padding.
189 | %       - peakMask = spherical 0.4 * particleRadius. If eraseMask = define own type and radius with Peak_mType and Peak_mRadius.
190 | %       - bandPassPrj = size of the tilt-series. lowpassCutoff = tomoCprLowPass, between 10 and 24A (and Nyquist).
191 | %       - globalPeakMask = closest even int to max(2, ceil(10/pixSize)); look around +/- 10-20A. The mask is at the center of the CCmap of the stack.
192 | 
193 | %   - 6) Fiducial: select the fiducial to follow. Take all if < tomoCPR_randomSubset, otherwise take a random subset.
194 | 
195 | %   - 7) For each view:
196 | %       - a) Load both the stack (dataPrj), synthetic stack (refPrj) and samplingMask. Resample samplingMask to current binning.
197 | %       - b) Get dataPrj ready:
198 | %           - i) center and standardize globally = dataPrj
199 | %           - ii) then calculate the local mean, subtract it = dataRMS. The local window is 256x256 unbinned,
200 | %                 or 64x64 unbinned for the local rms (for the mask, not for the local scaling).
201 | %           - iii) Remove outliers: calculate on the dataRMS the local rms, the global mean and rms.
202 | %                  evalMask = dataRMS > (mRms - 2*sRms) & ~samplingMask
203 | %                  This remove from the evalMask the regions that are not sampled, and carbone, etc.
204 | %           - iv) Local scaling: subtract the local mean to dataPrj, and divide by local rms of this centered dataPrj.
205 | %                OR Option: BH_whitenNoiseSpectrum
206 | %           - v) Global scaling: center and standardize dataPrj AND refPrj with their mean and rms.
207 | 
208 | %       - c) Calculate the ctf of the projection, without envelope. PhaseOnly=-0.15 and not 1? HqzUnMod is the CTF before cutoffs and without PhaseOnly.
209 | %       - d) Calculate the CCmap = fftshift(ifftn( bandPassPrj * fftn(dataPrj) * abs(HqzUnMod) * conj(fftn(refPrj)*Hqz) ))
210 | %       - e) Get the peak estPeak = subtract with min, apply globalPeakMask, take the max, get 7x7 box around it, calculate the COM and add to max. This is like usual.
211 | %       - f) Shift the dataPrj by estPeak.
212 | 
213 | %       - g) For each fiducial in this projection:
214 | %           - IF ctfCalc:
215 | %               - if sqrt(2)*pixelSize > min_res_for_ctf_fitting(10), then, turn-off the defocus refinement.
216 | %              
217 | %           - take the X and Y coordinates of the particles from .coordPrj
218 | %           - tileRadius = 1.5 * PARTICLE_RADIUS. oxEval/oyEval = X/Y +/- PARTICLE_RADIUS.
219 | %           - If any 0 in the evalMask, within this the particle radius OR if the tile is out-of-bounds, the particle is ignored.
220 | %           - extract the tile, from dataPrj and refPrj. center and standardize.
221 | %           - Pad the images to CTFSIZE (oversample) and apply ctfMask to the tiles.
222 | %           - calculate fft of tiles, lowpass to lowPassCutoff, highpass to 400. Take the conj for the refPrj and multiply by the CTF of particle (using defAngTilt).
223 | %               - if ctfCalc:
224 | %                   - highpass to 40, lowpass to min(sqrt(2)*pixelSize, min_res_for_ctf_fitting).
225 | %                   - try a range of defocus, set by tomoCprDefocusRange and tomoCprDefocusStep.
226 | %           - dXY = calculate the CCmap, apply peakMask and get the peak coordinates as usual.
227 | %                   Take into account the x/y shift due to extraction of the tiles AND estPeak.
228 | %           - if calcCTF:
229 | %               - take the defocus that gave the highest peak. Take the mean of these defoci for each fiducial in this image: this is the defocus shift to apply to the defocus at the titl-axis of this given projection. This is saved in _ctf.defShifts
230 | %           - Save the new coordinates in .coordFIT: particle_id, fid_id, dXY
231 | 
232 | %   - 8) Call tiltalign - refine the tilt-series alignment:
233 | %       - RotOption	1: for each view having an independent rotation
234 | %       - TiltOption 5: to automap groups of tilt angles (for linearly changing values), TiltDefaultGrouping = 5
235 | %       - MagOption 1: to vary all magnifications independently
236 | %  XStretchOption	0
237 | % SkewOption	0
238 | % BeamTiltOption	0
239 | % XTiltOption	0
240 | % ResidualReportCriterion	0.001
241 | % RobustFitting
242 | % KFactorScaling 0.458: k_factor_scaling = 10 / sqrt(nFidsTotal)
243 | % LocalAlignments
244 | % LocalRotOption 1
245 | % LocalRotDefaultGrouping 3
246 | % LocalTiltOption 5
247 | % LocalTiltDefaultGrouping 5
248 | % LocalMagOption 1
249 | % LocalMagDefaultGrouping 5
250 | 
251 | 
252 | %%%%%% CTF UPDATE
253 | % Align the tilt-series with ali2_ctf.tlt, like ctf estimate (fraction inelatic, etc.).
254 | %   - The shifts and rotation are relative to the aligned stack, so the new alignment is added to the original one. So update ali2_ctf.tlt to make it relative to the raw tilt-series (rotm and shift).
255 | %   - Add to def value the .defShifts (if calcCTF).
256 | %   - Update the tilt angles.
257 | %   - Update the beads coordinates, using the new .tltxf transformation. (rotation + mag) and erase them on the new aligned stack.
258 | %
259 | 


--------------------------------------------------------------------------------
/Algorithms/14_classification.tex:
--------------------------------------------------------------------------------
  1 | \subsection{Subtomogram classification}
  2 | 
  3 | The heterogeneity of the data-set can be analysed by comparing individual particles with the current references. This analysis can be focused to some specific features, i.e. resolutions bands, like $\alpha$-helices, small protein domains, etc. Briefly, difference maps are calculated between each particle and the references, for each resolution bands. These maps are then analysed by Principal Component Analysis (PCA), using Singular Value Decomposition (SVD). Once the difference maps are reduced in dimensionality and described by some principal axes, they can be clustered with $k$-means or other clustering algorithms. As a result, each subtomogram is assigned to a class and a subtomogram average can be generated for each class. We will detail this entire process in the following sections.
  4 | 
  5 | Usually, PCA is calculated on the covariance or correlation matrix of the data. These are quite popular in tomography because we can consider the missing wedge of the particles while calculating these matrices, using constrained cross-correlation. SVD allows us to perform a PCA directly on the data, but we still need to take into account that each particle has an incomplete sampling. To solve this problem, we are going to calculate difference maps.
  6 | 
  7 | %Multiple copies of the objects of interest can be extracted from tomograms. As for SPA, but in 3D, it is then possible to align and average these subtomograms to generate reconstructions with improved resolution. Further characterization can be done via statistical methods (classification algorithms) allowing to describe the subtomogram population based on physical differences. This is subtomogram averaging and classification. Compared to SPA, CET collects more information per object of interest \cite{mcewen_1995}, making each particle exists as a unique 3D reconstruction, allowing for a direct analysis of the 3D heterogeneity of the subtomograms.
  8 | 
  9 | %To correct for differences in sampling between the reference and an individual particle, the current subtomogram average is distorted by the sampling function it is being compared to. This effectively estimates what the average particle should look like at that subtomogram position, allowing to only compare meaningful differences. The dimensionality of these differences is reduced by principal component analysis, using Singular Value Decomposition (SVD). Features of a given length scale (e.g. $\alpha$-helices, small protein domains, etc.) can be focused on and considered simultaneously by band-pass filtering the reconstructions and computing the SVD for each length scale. The singular vectors (i.e. the principal axes) describing the greatest variance for each length scale are selected and used to project the data (i.e. the principal components). Then, the principal components are clustered with $k$-means or other clustering algorithms. Finally, once every subtomogram is assigned to a class, we can reconstruct the subtomogram average for each class.
 10 | 
 11 | \subsubsection{Combine the half-maps} \label{sec:algo:classification:combine_maps}
 12 | 
 13 | First, we need to prepare the reference to which the data-set is being compared to; what we are going to classify is the difference between the particles $\bm{s}$ and this reference. With the goal-standard approach, we have two half-maps, $\bm{S}_1$ and $\bm{S}_2$. For classification, these two references are aligned by spline interpolation, using the rotation $\bm{R}_{gold}$ and translation $\bm{T}_{gold}$ calculated in section \ref{sec:algo:avg:fsc}, and averaged into one unique reference, simply referred as $\bm{S}$. Of course, this alignment is not persistent, so that extracted class averages are still independent half-sets.
 14 | 
 15 | \begin{note}The classification can be performed at a different sampling than the half-maps, hence the two distinct parameters \code{Cls\_samplingRate} and \code{Ali\_samplingRate}. In this case, the reference $\bm{S}$ is re-scaled to the desired pixel size, using linear interpolation.
 16 | \end{note}
 17 | 
 18 | \subsubsection{Resolution bands} \label{sec:algo:classification:resolution_bands}
 19 | 
 20 | As mentioned previously, we can restrict the analysis of the physical differences between the data-set and the reference to some $r$ specific length scales, also referred as resolution bands or features. These resolution bands are controlled by the \code{pcaScaleSpace} entry and we refer to them as $\bm{L}_r$. As we will see in more detail, the PCA is calculated for each one of these length scales. Importantly, the clustering is only calculated once and considers each length scale simultaneously to provide a richer description of the feature space.
 21 | 
 22 | First, we can exclude from the analysis most of the background by calculating a molecular mask $\bm{M}_{mol}$ of $\bm{S}$, as described in \ref{sec:algo:avg:molecular_mask}. The \code{Cls\_mType}, \code{Cls\_mRadius} and \code{Cls\_mCenter} parameters are used to a compute soft-edged shape mask, $\bm{M}_{shape}$. In order to constrain the analysis to the desired region, this mask is applied to the molecular mask $\bm{M}_{mol}$. This mask is saved as \code{<prefix>\_pcaVolMask.mrc}.
 23 | 
 24 | % The reference is then masked with $\bm{M}_{mol}$ and center and standardized.
 25 | % Then, for each resolution band $\bm{L}_{r}$, the masked reference is filtered by either a band-pass filter or a low-pass filter:
 26 | 
 27 | Then, for each resolution band $r$,  either a band-pass filter or a low-pass filter $\bm{W}_r$ is prepared.
 28 | \begin{itemize}
 29 |     \item \textbf{Low-pass filters}: By default, low-pass filters are used and includes frequencies from 400\r{A} to $(0.9 \times \bm{L}_{r})$\r{A}. These filters do not define ``resolution bands'' per say and most frequencies will be shared in the resulting filtered references $\bm{S}_r$, but they have produced good results nonetheless.
 30 |     \begin{note}As often, frequencies before 400\r{A} are removed to center the reference and remove large densities gradients.\end{note}
 31 |     % with the reference we don't expect gradients I think, but...
 32 |     
 33 |     \item \textbf{Band-pass filters}: If \code{test\_updated\_bandpass} is true, band-pass filters are used instead and include frequencies from 400\r{A} to 100\r{A} and a resolution band at $\bm{L}_r$.
 34 | \end{itemize}
 35 | 
 36 | For visualization, the reference $\bm{S}$ is filtered with the filters $\bm{W}_r$, masked with $\bm{M}_{mol}$, centered and standardized. These volumes are saved in \code{test\_filt.mrc}
 37 | 
 38 | % The filtered references $\bm{S}_r$ are finally centered and standardized withing $\bm{M}_{mol}$, masked again with $\bm{M}_{mol}$ and saved in \code{test\_filt.mrc}.
 39 | 
 40 | % This is achieved by filtering the reference $\bm{S}$ with band-pass filters, resulting into $r$ references, referred as $\bm{S}_r$.
 41 | 
 42 | \subsubsection{Difference maps}
 43 | 
 44 | What we want to classify is the difference between each particle $p$ and the reference $\bm{S}$, for each length scale $r$, while accounting for incomplete sampling of the particles. To calculate a difference map $\bm{X}_{p,r}$, we do as follow:
 45 | 
 46 | \begin{enumerate}
 47 |     \item \textbf{Get the particle in the reference frame}: The particle $\bm{s}_p$ is extracted from the CTF phase-multiplied sub-region tomogram. Because we want to use the entire data-set for the classification, the two half-sets must be aligned in the same way we aligned two half-maps $\bm{S}_1$ and $\bm{S}_2$ in section \ref{sec:algo:classification:combine_maps}.
 48 |     As such, the first half-set is translated by ${[\bm{T}_{orig}]}_p + \bm{T}_{gold}$ and rotated by $\bm{R}_{p} \bm{R}_{gold}$, whereas the second half-set is simply translated by ${[\bm{T}_{orig}]}_p$ and rotated by $\bm{R}_p$.
 49 | 
 50 |     \item \textbf{Get the sampling function of the particle in the reference frame}: The same $\bm{R}_p$ rotation (or $\bm{R}_{p} \bm{R}_{gold}$) is applied to the sampling function of the particle $\bm{w}_{p}^2$ to correctly represent the original sampling of the subtomogram.
 51 |     %\begin{note}As explained in section \myref{subsubsec:SF3D}, {\emClarity} is currently not computing a sampling function per particle, but divides the field of view by 9 strips parallel to the tilt-axis.
 52 |     %\end{note}
 53 |     
 54 |     % We don't do this exactly in this order, but I think it is clearer like this.
 55 |     \item \textbf{Calculate the difference map}: The difference map $\bm{X}_{p,r}$ between the reference and the ${p^{th}}$ particle, for the $r^{th}$ length scale, is then defined as:
 56 |     \begin{equation}
 57 |         \bm{X}_{p,r} =  \mathcal{F}^{-1} \bigg\{
 58 |                                         \Big(
 59 |                                             \underbrace{\mathcal{F} \left\{ \bm{S} \right\} \bm{w}^{2}_p}_{\mu=0,\ \sigma=1}\ -\
 60 |                                             \underbrace{\mathcal{F} \left\{ \bm{s}_{p} \right\}}_{\mu=0,\ \sigma=1}
 61 |                                         \Big) \bm{W}_{r}
 62 |                                     \bigg\} \bm{M}_{mol}
 63 |     \end{equation}
 64 |     $\bm{X}_{p,r}$ is then centered and standardized.
 65 |     To save memory and time, only the voxels within $\bm{M}_{mol}$ are saved.
 66 |     \begin{note}The decomposition will be calculated on the host, but the difference maps are calculated on the device, which is much more efficient. The \code{PcaGpuPull} entry controls how many maps $\bm{X}_{p,r}$ should be held on the device at any given time.
 67 |     \end{note}
 68 | \end{enumerate}
 69 | 
 70 | \subsubsection{Singular Value Decomposition} \label{sec:algo:classification:SVD}
 71 | 
 72 | The difference maps $\bm{X}_{p,r}$ are then linearised into column vectors and stacked into a matrix $\bm{X}$, where the number of rows is the number of voxels $v$ within $\bm{M}_{mol}$, the number of columns is the number of particles $p$ and the number of pages (the third dimension) is the number of length scales $r$.
 73 | 
 74 | One important step when doing a PCA is to center the variables, i.e. the voxels. In our case, it means that the rows of $\bm{X}$ should be set to have a mean equal to zero. In other words, each difference map must be subtracted by the average of all the difference maps.
 75 | 
 76 | Once $\bm{X}$ is ready, we can then calculate the SVD, for each length scale $r$. This is a representation of what we have:
 77 | 
 78 | \input{Figures_Tables/16_svd}
 79 | 
 80 | As most of the variance is usually explained within the first 20 to 30 directions, it is usually not useful to save all of the directions. Use \code{Pca\_maxEigs} to select the number of directions you want to save. The goal now is to select the principal directions that are going to be used to reproject the data onto. This entirely relies on the user, so the principal directions are reshaped and a few files are saved to help:
 81 | \begin{itemize}
 82 |     % C = U * (S^2 / n-1) * U^T
 83 |     \item \textbf{Variance map}: The covariance matrix of $\bm{X}$ is saved as \code{*\_varianceMap<n>-STD-<r>.mrc}, where \code{<n>} corresponds to the \code{Pca\_maxEigs} and \code{<r>} corresponds to the length scale number $r$ in \code{pcaScaleSpace}. These may be opened on top of your averages from this cycle and should highlight regions where there is significant variability across the data set. If these maps show little blobs everywhere, there is either no significant variability (just picking up noise) or there is still substantial wedge bias at this resolution.
 84 |     
 85 |     \item \textbf{Principal axes/directions}: The principal axes $\bm{U}$ are saved in \code{*\_eigenImage<n>-STD-mont-<r>.mrc}. The axes are organized from the lower left moving across the bottom row incrementing by one. The first image is associated with the greatest singular value, i.e. it describes the greatest portion of variance in the dataset. The second image describes the greatest portion of remaining variance, and so on. The value of the voxels can be greater than 1 or less than zero, so they should not be interpreted as a grayscale image. It might be easier to visualize them with colors (3dmod: \code{F12}). For visualization, the principal axes are centered and standardized.
 86 |     
 87 |     \item \textbf{Sums}: The principal axes can be difficult to interpret, so it might useful to add them to the reference, which highlights what is being explained in each principal direction. These are saved in saved in \code{*\_eigenImage<n>-STD-SUM-mont-<r>.mrc}.
 88 |     % Actually we calculate the avg.
 89 | \end{itemize}
 90 | 
 91 | \subsubsection{Clustering}
 92 | 
 93 | 
 94 | Once that the principal directions of each length scale $r$ are selected by the user, we can reprojected the data along these axes. We refer to the principal components as ${[\bm{S}_{r}\bm{V}^{T}_r]}_{best}$. Before PCA, each particle $i$ was described by $v$ variables. Now, each particle is only described by $a+b+c$ variables (Figure \ref{fig:cluster}), with $a+b+c \ll v$, i.e. the dimentionality of the dataset was reduced.
 95 | 
 96 | \input{Figures_Tables/16_cluster}
 97 | 
 98 | The projected data ${[\bm{SV}^T]}_{best}$ is then clustered into $k$ clusters using $k$-means clustering. $k$ is set by \code{Pca\_clusters}. The squared Euclidean distance metric is used by default, i.e. each centroid is the mean of the points in that cluster, and the number of replicates is set to 128, i.e. the number of times to repeat clustering using new initial cluster centroid positions. Both of these can be changed using the parameters \code{Pca\_distMeasure} and \code{Pca\_nReplicates}. The $k$-means\texttt{++} algorithm is used for cluster center initialization and the number of maximal iteration is set to 5000.
 99 | 
100 | At the end, each particle $p$ is assigned to a cluster $c(p)$, which is saved into the metadata.
101 | 
102 | 
103 | %%%%% WORKFLOW
104 | 
105 | 
106 | %% PCA:
107 | 
108 | % 1) extract classification mask info: radius, type, center.
109 | 
110 | % 2) Combine the reference:
111 | %   - a) Extract the rotation matrix and shifts used to align the two references during FSC calculation.
112 | %   - b) load the 2 half-maps, apply the rotation and shifts to the first half map with spline interpolation and add it to the other map.
113 | %   - c) if the sampling for the classification is not equal to the sampling of the alignment, resample the reference with BH_reScale3d.
114 | 
115 | % 3) Volume mask, volTMP:
116 | %   - a) get the shape mask (type, size, radius, center). If any symmetry, calculate only the asym unit.
117 | %   - b) get the reference mask and multiply it with the shape mask.
118 | %   - c) save this as *_pcaVolMask.mrc.
119 | 
120 | % 4) For each resolution band, save the following (X is the resolution band number, starting from 1):
121 | %   - masks.volMask.1.X = volTMP;
122 | %   - masks.binary.1.X = (volTMP >= bh_global_binary_pcaMask_threshold)(:); bh_global_binary_pcaMask_threshold = 0.5
123 | %   - masks.binaryApply.1.X = (volTMP >= 0.01);
124 | %   - masks.scaleMask = bandpass(highcut = 400, lowcut = resolution_band * 0.9)
125 | 
126 | % 5) Create one average for each resolution band:
127 | %   - a) center and standardize the reference with binaryApply and apply the volMask to this volume.
128 | %   - b) apply the scaleMask of this resolution band and center/standardize.
129 | %   - c) center within binary and standardize within binaryApply and apply binaryApply to the filtered volume.
130 | %   - d) mount and save these averages in test_filt.mrc.
131 | 
132 | % 6) extract the particles:
133 | %   - a) load the SFs. If different size, pad the reference for each resolution accordingly. 
134 | %   - b) The sampling functions are squared, so sqrt them before applying to the reference.
135 | %   - c) define the particles to use. Either random subset or full dataset.
136 | %   - d) For each valid particle (if not valid: -9999):
137 | %       - i) get shifts and rotation. The first halfset is additionally rotated and shifted (from the FSC) to merge both half-set.
138 | %       - ii) Extract the subtomograms, pad if truncated.
139 | %       - iii) rotate and shift. rotate the particle's SF.
140 | %       - iv) For each resolution band:
141 | %               - apply volMask (reference+shape) to the rotated particle.
142 | %               - bandpass filter with scaleMask and center and standardize.
143 | %               - Calculate the difference map (everything is in Fourier space):
144 | %                   - 1: multiply the reference by the particle's SF (which is rotated in the reference frame).
145 | %                   - 2: center and standardize both ref and particle (which are both in freq space at this point).
146 | %                   - 2: subtract the particle to the reference.
147 | %                   - 3: switch back to real space: this is the difference map.
148 | %               - Save the difference map voxel that belong to binary in tempDataMatrix{resolution_band}.
149 | %       - v) this is done on the GPU, but pull back to the CPU from time to time (every PcaGpuPull particle) in dataMatrix.
150 | %       - vi) clean the dataMatrix with not valid particles due to pre-allocation.
151 | 
152 | % 7) Center the rows:
153 | %   - For each resolution band, each row (same variable but for all sample), center the row.
154 | 
155 | % 8) For each resolution band, calculate the decomposition:
156 | %   - a) calculate the economy size SVD of dataMatrix.
157 | %   - b) keep the first n eigen vectors.
158 | %   - c) coeffs = S * V'; S(rxr) and V(rxr). Each eigenvector is scaled (rotate and stretch).
159 | %   - d) Calculate the principal components (projection of the data) varianceMap = U * (S^2/n-1) * U' but i'm sure this is what it does.
160 | %   - e) For each n eigenvector:
161 | %       - i) get the eigenvector (which is actually the eigenImage) from U.
162 | %       - ii) replace it within the full volume (so far it was restricted to binary).
163 | %       - iii) center and standardize within binaryApply. This is the eigenImage.
164 | %       - iv) calculate the sum: (eigenImage + reference)/2.
165 | %   - f) mont the eigenImages and sum and save to disk.
166 | 
167 | % 9) Save the coeffs to metada.
168 | 
169 | %%%%%%%%%%%% 
170 | %%%%%%%%%%%% CLUSTER
171 | %%%%%%%%%%%% 
172 | 


--------------------------------------------------------------------------------
/Figures_Tables/04_workflow.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | \begin{figure}[!htb]  % Stay within section
 3 | \captionsetup{labelfont=bf}
 4 | \centering
 5 | 
 6 | \begin{center}
 7 | \begin{tikzpicture}[node distance=2cm]
 8 | 
 9 | % Specification of nodes
10 | 
11 | \node (j1) [job, text width=4cm, align=center] {Align tilt-series\\ \hyperref[sec:defocus_estimate]{\textit{ctf estimate}}, \hyperref[sec:tomoCPR]{\textit{ctf update}}};
12 | 
13 | \node (j2) [job, below of=j1, text width=4cm, align=center] {Defocus estimate\\ \hyperref[sec:defocus_estimate]{\textit{ctf estimate}}};
14 | 
15 | \node (j3) [job, below of=j2] {Define sub-regions};
16 | 
17 | \node (j4) [job, below of=j3, text width=4cm, align=center] {Picking\\ \hyperref[sec:picking]{\textit{templateSearch}}};
18 | 
19 | \node (j5) [job, below of=j4, text width=4cm, align=center] {Initialize project\\ \hyperref[sec:init]{\textit{init}}};
20 | 
21 | \node (j6) [job, below of=j5, text width=4cm, align=center] {Tomogram reconstruction\\ \hyperref[sec:ctf_3d]{\textit{ctf 3d}}};
22 | 
23 | \node (j7) [job, right of=j6, xshift=4cm, text width=4cm, align=center] {Subtomogram average\\ \hyperref[sec:avg]{\textit{avg RawAlignment}}};
24 | 
25 | \node (j8) [chimera, right of=j5, xshift=8cm, text width=4cm, align=center] {Classification\\ \hyperref[sec:classification]{\textit{pca}}, \hyperref[sec:classification]{\textit{cluster}}};
26 | 
27 | \node (j9) [job, right of=j4, xshift=4cm, text width=4cm, align=center] {Subtomogram alignment\\ \hyperref[sec:align]{\textit{alignRaw}}};
28 | 
29 | \node (j10) [chimera, right of=j1, xshift=4cm, text width=4cm, align=center] {Tilt-series refinement\\ \hyperref[sec:tomoCPR]{\textit{tomoCPR}}};
30 | 
31 | % Add tilt-series alignment
32 | \node (j0bis) [job, above of=j10, text width=4cm, align=center] {Tilt-series alignment\\ with \hyperref[sec:tilt_series_alignment:emClarity]{\textit{ETomo}}};
33 | 
34 | \node (j0) [job, above of=j0bis, text width=4cm, align=center] {Tilt-series alignment\\ with \hyperref[sec:tilt_series_alignment:emClarity]{\textit{autoAlign}}};
35 | 
36 | \node (or) at ($(j0bis)+(0,1cm)$) [fill opacity=1] {or};
37 | 
38 | % Add final reconstruction
39 | \node (between_j11_j11bis) [below of=j7, fill opacity=0] {};
40 | 
41 | \node (j11) at ($(between_j11_j11bis)+(-3cm,0cm)$) [job, text width=4cm, align=center] {Final reconstruction\\ \hyperref[sec:final_map]{\textit{avg FinalAlignment}}};
42 | 
43 | \node (j11bis) at ($(between_j11_j11bis)+(+3cm,0cm)$) [job, text width=4cm, align=center] {Final reconstruction\\ \hyperref[sec:final_map]{\textit{reconstruct}} (with {\cisTEM})};
44 | 
45 | %% ARROWS
46 | 
47 | % Specification of arrows
48 | \draw [arrow] (j1) -- (j2);
49 | \draw [arrow] (j2) -- (j3);
50 | \draw [arrow] (j3) -- (j4);
51 | \draw [arrow] (j4) -- (j5);
52 | \draw [arrow] (j5) -- (j6);
53 | \draw [arrow] (j6) -- (j7);
54 | 
55 | % Average - Alignment
56 | \draw [arrow] ($(j7.north)+(0.5,0)$) -- ($(j9.south)+(0.5,0)$);
57 | \draw [arrow] ($(j9.south)+(-0.5,0)$) -- ($(j7.north)+(-0.5,0)$);
58 | 
59 | % Connect Classification
60 | \draw [dashed, arrow] ($(j7.east)$) -| ($(j8.south)$) ;
61 | \draw [dashed, arrow] ($(j8.north)$) |- ($(j9.east)$) ;
62 | 
63 | % Refine picking
64 | \draw [dashed, arrow] (j9) -- (j4);
65 | 
66 | % Refine tilt-series
67 | \draw [dashed, arrow] (j9) -- (j10);
68 | \draw [dashed, arrow] (j10) -- (j1);
69 | \draw [dashed, arrow] (j1) -- ($(j1.west)+(-1,0)$) |- ($(j6.west)$);
70 | 
71 | % Tilt-series alignment
72 | \node (inv) at ($(or)+(-4cm,0cm)$) [fill opacity=0] {};
73 | \draw [line] ($(j0.west)$) -| ($(inv)+(0,-0.1cm)$);
74 | \draw [line] ($(j0bis.west)$) -| ($(inv)+(0,+0.1cm)$);
75 | \draw [arrow] ($(inv)+(0.15mm,0)$) -| (j1.north);
76 | 
77 | % Final reconstruction
78 | \draw [arrow] (j7.south) |- (j11.east);
79 | \draw [arrow] (j7.south) |- (j11bis.west);
80 | 
81 | % Notes
82 | \fill [myred, opacity=1] ($(j4.east)+(0,0.5)$) circle (0.2);
83 | 
84 | 
85 | \end{tikzpicture}
86 | \end{center}
87 | 
88 | \caption[{\emClarity} workflow]{{\emClarity} workflow. The classification and tilt-series refinement steps are optional. The red marker indicates the possibility to import data from other software (section \ref{sec:picking:import} for more details).}
89 | \label{fig:emClarity_workflow}
90 | \end{figure}
91 | 


--------------------------------------------------------------------------------
/Figures_Tables/05_parameters.tex:
--------------------------------------------------------------------------------
 1 | % Parameters for autoAlign
 2 | \renewcommand{\arraystretch}{1.2}
 3 | \begin{longtable}[l]{| l || p{90mm} |}
 4 | \captionsetup{labelfont=bf}
 5 | \caption[\code{autoAlign} parameters]{\code{autoAlign} parameters. Your parameter file should have the following parameters.\\ \textcolor{myred}{\textbf{*}} indicates the required parameters. The other parameters are optional.}\\
 6 | 
 7 | \hline
 8 | 
 9 | -- \code{PIXEL\_SIZE}\textcolor{myred}{\textbf{*}} & Pixel size in meters per pixel (e.g. 1.8e-10). Should match the header of the tilt-series.\\
10 | 
11 | -- \code{beadDiameter}\textcolor{myred}{\textbf{*}} & Bead diameter in meters (e.g. 10e-9). If 0, the beads are ignored during the alignment, but the \code{.erase} file is still generated using \href{https://bio3d.colorado.edu/imod/betaDoc/man/findbeads3d.html}{findbeads3d} with \code{-BeadSize}=10.5nm.\\
12 | 
13 | -- \code{autoAli\_max\_resolution} & Low-pass cutoff, in \r{A}, used in alignment. An additional median filter is applied to the images before alignment. The filtered series are only used for alignment. Default=18\\
14 | 
15 | -- \code{autoAli\_min\_sampling\_rate} & Maximum pixel size used for alignment, in \r{A} per pixel. If you have a pixel size of 2, then the alignment will start at bin 5 using the default. Default=10.\\
16 | 
17 | -- \code{autoAli\_max\_sampling\_rate} & Minimum pixel size used for alignment, in \r{A} per pixel. If you have a pixel size of 2, then the alignment will end at bin 2 using the default. Default=3.\\
18 | 
19 | -- \code{autoAli\_iterations\_per\_bin} & The number of patch tracking iterations, for each bin. Default=3. \\
20 | 
21 | -- \code{autoAli\_n\_iters\_no\_rotation} & The number of patch tracking iterations, for each bin, before activating local alignments. Default=3.\\
22 | 
23 | -- \code{autoAli\_patch\_size\_factor} & Sets the size of the patches used for patch tracking. Making this larger will result in more patches, and more local areas in later iterations, but may also decrease accuracy. Default=4\\ % SizeOfPatchesXandY = [nX,nY]/(bin*factor)
24 | 
25 | -- \code{autoAli\_patch\_tracking\_border} & Number of pixels to trim off each edge in X and in Y. Corresponds to \code{-BordersInXandY} from \href{https://bio3d.colorado.edu/imod/doc/man/tiltxcorr.html}{tiltxcorr}. Default=64.\\
26 | 
27 | -- \code{autoAli\_patch\_overlap} & Fractional overlap in X and Y between patches that are tracked by correlation. This influences the number of patches. Corresponds to \code{-OverlapOfPatchesXandY} from \href{https://bio3d.colorado.edu/imod/doc/man/tiltxcorr.html}{tiltxcorr}. Default=0.5.\\
28 | 
29 | -- \code{autoAli\_max\_shift\_in\_angstroms} & Maximum shifts allowed, in \r{A}, for the patch tracking alignment. Default=40.\\
30 | 
31 | -- \code{autoAli\_max\_shift\_factor} & The maximum shifts allowed are progressively reduced with the iterations $i$, such as \footnote{$\code{int}( \code{autoAli\_max\_shift\_in\_angstroms} / i^{\code{autoAli\_max\_shift\_factor}} ) + 1$}. Default=1.\\
32 | 
33 | -- \code{autoAli\_refine\_on\_beads} & Whether or not the patch tracking alignment should be refined using the gold beads. This refinement makes the alignment significantly slower, but can substantially improve the quality of the alignment. This refinement is automatically turned-off if the number of beads detected is less than 5 and local alignments are only activated if there is more than 10 beads. Default=false.\\
34 | \hline
35 | \end{longtable}
36 | 


--------------------------------------------------------------------------------
/Figures_Tables/06_ctf_tlt.tex:
--------------------------------------------------------------------------------
 1 | \renewcommand{\arraystretch}{1.2}
 2 | \begin{longtable}[c]{| l | p{35mm} || l | p{35mm} || l | p{35mm} |}
 3 | \captionsetup{labelfont=bf}
 4 | \caption{\code{fixedStacks/ctf/<prefix>\_ali*\_ctf.tlt}} \label{tab:ctf_tlt}\\
 5 | % I need to check the diff between first and last column. It is useful when removing images.
 6 | 
 7 | \hline
 8 | \textbf{C} & \textbf{Description} & \textbf{C} & \textbf{Description} & \textbf{C} & \textbf{Description}\\
 9 | \hline
10 | 1 & index $i$                   & 9 & $\bm{R}_{1,2,i}$               & 17 & \code{Cs}\\
11 | \hline
12 | 2 & $\bm{T}_{x,i}$ (in pixels)  & 10 & $\bm{R}_{2,2,i}$              & 18 & \code{WAVELENGTH}\\
13 | \hline
14 | 3 & $\bm{T}_{y,i}$ (in pixels)  & 11 & $i^{th}$ post-exposure       & 19 & \code{AMPCONT}\\
15 | \hline
16 | 4 & $\bm{\alpha}_i$             & 12 & ${[\bm{\Delta\mathrm{z}}_{ast}]}_i$    & 20 & pixels in X\\
17 | \hline
18 | 5 & \cellcolor{lightgray}empty  & 13 & ${[\bm{\phi}_{ast}]}_i$                & 21 & pixels in Y\\
19 | \hline
20 | 6 & 90\textdegree               & 14 & \cellcolor{lightgray}empty   & 22 & sections Z\\
21 | \hline
22 | 7 & $\bm{R}_{1,1,i}$             & 15 & $\bm{\mathrm{z}}_i$          & 23 & ?\\
23 | \hline
24 | 8 & $\bm{R}_{2,1,i}$             & 16 & \code{PIXEL\_SIZE}           & 24 & \cellcolor{lightgray}\\
25 | \hline
26 | \end{longtable}


--------------------------------------------------------------------------------
/Figures_Tables/06_parameters.tex:
--------------------------------------------------------------------------------
 1 | % Parameters for ctf estimate
 2 | \renewcommand{\arraystretch}{1.2}
 3 | \begin{longtable}[l]{| l || p{110mm} |}
 4 | \captionsetup{labelfont=bf}
 5 | \caption[\code{ctf estimate} parameters]{\code{ctf estimate} parameters. Your parameter file should have the following parameters.\\ \textcolor{myred}{\textbf{*}} indicates the required parameters, \textcolor{blue}{\textbf{*}} indicates expert parameters. Expert parameters should not be changed except if you know what you are doing. The other parameters are optional.}\\
 6 | 
 7 | \hline
 8 | \multicolumn{2}{|c|}{\textbf{Microscope settings}}\\
 9 | \hline
10 | 
11 | -- \code{VOLTAGE}\textcolor{myred}{\textbf{*}} & Accelerating voltage of the microscope in Volts (e.g. 300e3).\\
12 | -- \code{Cs}\textcolor{myred}{\textbf{*}} & Spherical aberration of the microscope in meters (e.g. 2.7e-6).\\
13 | -- \code{AMPCONT}\textcolor{myred}{\textbf{*}} & Percent amplitude contrast ratio (e.g. 0.09).\\
14 | -- \code{PIXEL\_SIZE}\textcolor{myred}{\textbf{*}} & Pixel size in meters per pixel (e.g. 1.8e-10). Must match the header of the stacks in \code{fixedStacks/*.fixed}.\\
15 | -- \code{SuperResolution}\textcolor{myred}{\textbf{*}} & Whether the stacks are super-sampled. If \code{1}, {\emClarity} will Fourier crop by a factor of 2 and set the actual pixel size to \code{2 * PIXEL\_SIZE}. Note that this is not tested anymore, so it is preferable to Fourier crop the stacks beforehand and set it to 0.\\
16 | 
17 | 
18 | % Fiducials
19 | \hline
20 | \multicolumn{2}{|c|}{\textbf{Fiducials}}\\
21 | \hline
22 | 
23 | -- \code{beadDiameter} & Diameter of the beads to erase, in meters (e.g. 10e-9). This parameter is used if fiducial beads need to be erased, thus only for stacks with a \code{fixedStacks/*.erase} file.\\
24 | 
25 | -- \code{erase\_beads\_after\_ctf} & Whether or not the fiducial beads should be removed on the raw tilt-series (now) or on the CTF multiplied tilt-series computed during the tomogram reconstruction (section \ref{sec:ctf_3d}). Do not change this option between \code{ctf estimate} and \code{ctf 3d}. Default=\code{false}.\\
26 | 
27 | % Tilt-scheme
28 | \hline
29 | \multicolumn{2}{|c|}{\textbf{Tilt-scheme}}\\
30 | \hline
31 | 
32 | -- \code{CUM\_e\_DOSE}\textcolor{myred}{\textbf{*}} & Total exposure in e/\r{A}$^2$.\\
33 | -- \code{doseAtMinTilt}\textcolor{myred}{\textbf{*}} & The exposure each view receive (should be about \code{CUM\_e\_DOSE} / nb of views), in e/\r{A}$^2$.\\
34 | -- \code{oneOverCosineDose}\textcolor{myred}{\textbf{*}} & Whether or not it is a Saxton scheme (dose increase as 1/cos($\alpha$), $\alpha$ being the tilt angle); this will scale \code{doseAtMinTilt} according to the tilt angle (e.g. 0).\\
35 | -- \code{startingAngle}\textcolor{myred}{\textbf{*}} & Starting angle, in degrees (e.g. 0).\\
36 | -- \code{startingDirection}\textcolor{myred}{\textbf{*}} & Starting direction; should the angles decrease or increase (neg or pos).\\
37 | -- \code{doseSymmetricIncrement}\textcolor{myred}{\textbf{*}} & The number of tilts  before each switch in direction. 0=false, 2="normal" dose symmetric. The original dose symmetric scheme included 0 in the first group. For this, specify the number as a negative number.\\
38 | 
39 | % Defocus estimate
40 | \hline
41 | \multicolumn{2}{|c|}{\textbf{Defocus estimate}}\\
42 | \hline
43 | 
44 | -- \code{defCutOff}\textcolor{myred}{\textbf{*}} & The power spectrum used by \code{ctf estimate} is considered from slightly before the first zero past the first zero to this cutoff, in meter (e.g. 7e-10).\\
45 | -- \code{defEstimate}\textcolor{myred}{\textbf{*}} & Initial rough estimate of the defocus, in meter. With \code{defWindow}, it defines the search window of defoci.\\
46 | -- \code{defWindow}\textcolor{myred}{\textbf{*}} & Defocus window around \code{defEstimate}, in meter; e.g. if \code{defEstimate} = 2.5e-6 and \code{defWindow} = 1.5e-6, try a range of defocus between 1e-6 to 4e-6.\\
47 | -- \code{deltaZtolerance}\textcolor{blue}{\textbf{*}} & Includes the tiles with defocus equal to that at the tilt-axis $\pm\Delta{Z}$, in meters. See section \ref{sec:algo:defocus_estimate} for more details. Default=50e-9.\\
48 | -- \code{zShift}\textcolor{blue}{\textbf{*}} & Used for the handedness check. Shift the evaluation region ($Z_{tilt-axis}\ \pm\code{deltaZtolerance}$) by this amount. See section \ref{sec:algo:defocus_estimate} for more details. Default=150e-9.\\
49 | -- \code{ctfMaxNumberOfTiles}\textcolor{blue}{\textbf{*}} & Limits the number of tiles to include in the power spectrum. The more tiles, the stronger the signal but the longer it takes to compute the power spectrum. Default=4000.\\
50 | -- \code{ctfTileSize}\textcolor{blue}{\textbf{*}} & Size of the (square) tiles, in meters. Default=680e-10.\\
51 | -- \code{paddedSize}\textcolor{blue}{\textbf{*}} & The tiles are padded to this size, in pixel, in real space before computing the Fourier transform. Should be even, large (compared to the tiles), and preferably a power of 2. Default=768.\\
52 | 
53 | \hline
54 | \end{longtable}
55 | 


--------------------------------------------------------------------------------
/Figures_Tables/07_recon_coords.tex:
--------------------------------------------------------------------------------
 1 | \renewcommand{\arraystretch}{1.2}
 2 | \begin{longtable}[r]{| c | l || p{110mm} |}
 3 | \captionsetup{labelfont=bf}
 4 | \caption{\code{recon/<prefix>\_recon.coords}} \label{tab:recon_coords}\\
 5 | 
 6 | \hline
 7 | \textbf{Line} & {\tilt} \textbf{parameter} & \textbf{Description}\\
 8 | \hline
 9 | 1 & \cellcolor{lightgray} & \code{<prefix>}; stack prefix.\\
10 | \hline
11 | 2 & \cellcolor{lightgray} & Number of sub-regions within this stack.\\
12 | \hline
13 | 3 & \code{WIDTH} & Width in X of the first sub-region, in pixel.\\
14 | 4 & \code{SLICE} 1 & Starting Y coordinate of the first sub-region. Starts from 0.\\
15 | 5 & \code{SLICE} 2 & Ending Y coordinate of the first sub-region. Starts from 0.\\
16 | 6 & \code{THICKNESS} & Thickness in Z of the first sub-region, in pixels.\\
17 | 7 & \code{SHIFT} 1 & Shift in X the reconstructed slice, in pixel. If it is positive, the slice will be shifted to the right and the output, the first sub-region, will contain the left part of the whole potentially reconstructable area.\\
18 | 8 & \code{SHIFT} 2 & Shift in Z the reconstructed slice, in pixel. If it is positive, the slice is shifted upward.\\
19 | \hline
20 | ... & ... & Same as line 3 to 8, but for the next sub-regions, if any.\\
21 | \hline
22 | \end{longtable}


--------------------------------------------------------------------------------
/Figures_Tables/08_csv.tex:
--------------------------------------------------------------------------------
 1 | \renewcommand{\arraystretch}{1.2}
 2 | \begin{longtable}[c]{| l | p{29mm} || l | p{29mm} || l | p{29mm} || l | p{29mm} |}
 3 | \captionsetup{labelfont=bf}
 4 | \caption[\code{convmap/<prefix>\_<region>\_bin<X>.csv}]{\code{convmap\_wedgeType\_2\_bin<X>/<prefix>\_<region>\_bin<X>.csv}. One line per particle $p$. The translations ($\bm{T}_x$, $\bm{T}_y$, $\bm{T}_z$) are in pixel, un-binned. The Euler angles ($\phi$, $\theta$, $\psi$) are described in section \ref{sec:algo:euler_conventions}. They are actually not directly used by {\emClarity}. As mentioned previously, the rotation matrices ($\bm{R}_{m,n}$, $m=$ rows, $n=$ columns) are meant to be applied to the particles to rotate them from the microscope frame to the reference frame. In this case, the translations are applied before the rotation.} \label{tab:csv}\\
 5 | 
 6 | \hline
 7 | \textbf{C} & \textbf{Description} & \textbf{C} & \textbf{Description} & \textbf{C} & \textbf{Description} & \textbf{C} & \textbf{Description}\\
 8 | \hline
 9 | 1 & $\bm{\mathrm{CC}}_p$                & 8 & \cellcolor{lightgray} empty (1)     & 15 & $\theta_p$                & 22 & ${[\bm{R}_{32}]}_p$\\
10 | \hline
11 | 2 & \code{Tmp\_sampling}                & 9 & \cellcolor{lightgray} empty (1)     & 16 & $\psi_p$                 & 23 & ${[\bm{R}_{13}]}_p$\\
12 | \hline
13 | 3 & \cellcolor{lightgray} empty (0)     & 10 & \cellcolor{lightgray} empty (0)    & 17 & ${[\bm{R}_{11}]}_p$      & 24 & ${[\bm{R}_{23}]}_p$\\
14 | \hline
15 | 4 & Unique ID, $p$                      & 11 & ${[\bm{T}_x]}_p$                   & 18 & ${[\bm{R}_{21}]}_p$      & 25 & ${[\bm{R}_{33}]}_p$\\
16 | \hline
17 | 5 & \cellcolor{lightgray} empty (1)     & 12 & ${[\bm{T}_y]}_p$                   & 19 & ${[\bm{R}_{31}]}_p$      & 26 & \cellcolor{lightgray} Class (1)\\
18 | \hline
19 | 6 & \cellcolor{lightgray} empty (1)     & 13 & ${[\bm{T}_z]}_p$                   & 20 & ${[\bm{R}_{12}]}_p$      & \cellcolor{lightgray} & \cellcolor{lightgray}\\
20 | \hline
21 | 7 & \cellcolor{lightgray} empty (1)     & 14 & $\phi_p$                           & 21 & ${[\bm{R}_{22}]}_p$      & \cellcolor{lightgray} & \cellcolor{lightgray}\\
22 | \hline
23 | 
24 | \end{longtable}


--------------------------------------------------------------------------------
/Figures_Tables/08_parameters.tex:
--------------------------------------------------------------------------------
 1 | % Parameters for template matching
 2 | \renewcommand{\arraystretch}{1.2}
 3 | \begin{longtable}[l]{| l || p{80mm} |}
 4 | \captionsetup{labelfont=bf}
 5 | \caption[\code{templateSearch} parameters]{\code{templateSearch} parameters.  Your parameter file should have the following parameters.\\ \textcolor{myred}{\textbf{*}} indicates the required parameters, \textcolor{blue}{\textbf{*}} indicates expert parameters. Expert parameters should not be changed except if you know what you are doing. The other parameters are optional.}\\
 6 | 
 7 | \hline
 8 | \multicolumn{2}{|c|}{\textbf{Sampling}}\\
 9 | \hline
10 | 
11 | -- \code{PIXEL\_SIZE}\textcolor{myred}{\textbf{*}} & Pixel size in meter per pixel (e.g. 1.8e-10). Should match the header of the stacks in \code{fixedStacks/*.fixed}.\\
12 | -- \code{SuperResolution}\textcolor{myred}{\textbf{*}} & Whether or not the \code{fixedStacks/*.fixed} are super-sampled. Not that this should be the same value you used for \code{ctf estimate} in section \ref{sec:defocus_estimate}.\\
13 | -- \code{Tmp\_samplingRate}\textcolor{myred}{\textbf{*}} & Sampling (i.e. binning) at which the sub-region should be reconstructed to perform the template matching (1 means no binning). The sampling rate should be chosen to give a running pixel size between 8 and 12\r{A}/pix.\\
14 | 
15 | \hline
16 | \multicolumn{2}{|c|}{\textbf{Tomogram reconstruction}}\\
17 | \hline
18 | 
19 | -- \code{erase\_beads\_after\_ctf} & Whether or not the fiducial beads should be removed before or after CTF multiplication. Default=\code{0}.\\
20 | 
21 | -- \code{beadDiameter}\textcolor{myred}{\textbf{*}} & Bead diameter in \r{A}.\\
22 | 
23 | -- \code{applyExposureFilter} & Whether or not the exposure filter should be applied. If you turn it off, make sure it is turned-off during subtomogram averaging as well.\\
24 | 
25 | -- \code{super\_sample}\textcolor{blue}{\textbf{*}} & Compute the back projection in a slice larger by the given integer factor (max=8) in each dimension, by interpolating the projection data at smaller intervals ("super-sampling"). This corresponds to the \code{SuperSampleFactor} entry from {\tilt}. Default=\code{0}.\\
26 | 
27 | -- \code{expand\_lines}\textcolor{blue}{\textbf{*}} & If \code{super\_sample} is greater than 0, expand projection lines by Fourier padding (sync interpolation) when super-sampling, which will preserve higher frequencies better but increase memory needed. This corresponds to the \code{ExpandInputLines} entry from {\tilt}. Default=\code{0}.\\
28 | 
29 | -- \code{whitenPS}\textcolor{blue}{\textbf{*}} & TODO. Default=\code{[0,0,0]}\\
30 | 
31 | % invertDose
32 | % flgDampenAliasedFrequencies
33 | % lowResCut = this is deprecated apparently. so I guess default to 12?
34 | % SuperResolution is required...
35 | 
36 | \hline
37 | \multicolumn{2}{|c|}{\textbf{Particle}}\\
38 | \hline
39 | 
40 | -- \code{particleRadius}\textcolor{myred}{\textbf{*}} & Particle radii, in \r{A}. Format is [$R_X,\ R_Y,\ R_Z$]. In this context, it defines a region around a cross-correlation peak to remove from consideration after a particle is selected. See \code{Peak\_mRadius} for more details.\\
41 | 
42 | -- \code{Ali\_mRadius}\textcolor{myred}{\textbf{*}} & Alignment mask radii, in \r{A}. Format is [$R_X,\ R_Y,\ R_Z$]. In this case, it is used to pad/trim the template to this size.\\
43 | 
44 | -- \code{Peak\_mType} & Type (i.e. shape) of the cross-correlation peaks. Can be sphere, cylinder or rectangle. See section \ref{sec:algo:picking} for more details. Default=sphere.\\
45 | 
46 | -- \code{Peak\_mRadius} & Radius of the cross-correlation peaks, in \r{A}. Format is [$R_X,\ R_Y,\ R_Z$]. See section \ref{sec:algo:picking} for more details. Default= $0.75\times\code{particleRadius}$.\\
47 | 
48 | -- \code{diameter\_fraction\_for\_local\_stats} & TODO. Default=\code{1}.\\
49 | 
50 | \hline
51 | \multicolumn{2}{|c|}{\textbf{Template matching}}\\
52 | \hline
53 | 
54 | --\code{symmetry}\textcolor{myred}{\textbf{*}} & Symmetry of the partiles, used to restrict the angular search to the strict necessary and helps to reduce wedge bias by randomizing detected angles to one withing the symmetry group. E.g. C1, C2, .., CX, O, I) (see section \ref{sec:algo:picking:extract_peaks}.\\
55 | 
56 | -- \code{Tmp\_angleSearch}\textcolor{myred}{\textbf{*}} & Angular search, in degrees. Format is [$\Theta_{out},\ \Delta_{out},\ \Theta_{in},\ \Delta_{out}$]. For example, [$180,\ 15,\ 180,\ 12$], specifies a $\pm$180\textdegree\ out of plane search (polar and azimuth angles) with 15\textdegree\ steps and $\pm$180\textdegree\ in plane search (planar angles) with 12\textdegree\ steps. Depending on the \code{symmetry}, the search will be restrained if the specified range is outside of the unique set of angles.\\
57 | 
58 | --\code{Tmp\_threshold}\textcolor{myred}{\textbf{*}} & Estimate of the number of particles. From this value a score threshold will be calculated that should result in fewer false positives (estimated to ~10\%) and allows to pick for up to 2x this estimate. If there are significant departures from Gaussian noise (e.g. carbon edge) this may fail.\\
59 | 
60 | --\code{Override\_threshold\_and\_return\_N\_peaks} & Overrides \code{Tmp\_threshold} and select the specified highest scoring peaks.\\
61 | 
62 | --\code{Tmp\_targetSize} & Size, in pixel, of the chunk to process. If the sub-region is too big, the processing will be split into individual chunks. Format is [$X, Y, Z$]. Default=\code{[512,512,512]}.\\
63 | 
64 | --\code{Tmp\_bandpass} & A band-pass filter is applied to the template and the tomogram. This parameter defined the band-pass filter. Should be vector of 3 values, aka [1,2,3]. (1) is the filter value at the DC (i.e. zero) frequency, between 0 and 1. (2) is the high-pass cutoff (where the pass is back to 1) in \r{A}. (3) is the low-pass cutoff (where the pass starts to roll off) in \r{A}. (2) should be larger than (3). Default=\code{[1e-3,600,28]}.\\
65 | 
66 | --\code{rescale\_mip} & TODO. Default=\code{1}.\\
67 | 
68 | % --\code{Tmp\_medianFilter} & Apply a median filter to the sub-region before computing the cross-correlation. It defines the size of the kernel, either 3, 5 or 7. Default=false.\\
69 | 
70 | \hline
71 | \end{longtable}
72 | 


--------------------------------------------------------------------------------
/Figures_Tables/09_parameters.tex:
--------------------------------------------------------------------------------
 1 | % Parameters for init
 2 | \renewcommand{\arraystretch}{1.2}
 3 | \begin{longtable}[l]{| l || p{114mm} |}
 4 | \captionsetup{labelfont=bf}
 5 | \caption[\code{init} parameters]{\code{init} parameters. Your parameter file should have the following parameters.\\ \textcolor{myred}{\textbf{*}} indicates the required parameters, the other parameters are optional.}\\
 6 | 
 7 | \hline
 8 | 
 9 | -- \code{subTomoMeta}\textcolor{myred}{\textbf{*}} & Project name. Most output files will have the project name as prefix and the metadata is saved in a {\MATLAB} file called \code{<subTomoMeta>.mat}.\\ \hline
10 | 
11 | -- \code{Tmp\_samplingRate}\textcolor{myred}{\textbf{*}} & Sampling (i.e. binning) at which the template matching was ran. It should corresponds to the binning registered in the filename of the .csv and .mod file.\\ \hline
12 | 
13 | -- \code{fscGoldSplitOnTomos}\textcolor{myred}{\textbf{*}} & Whether or not the particles from the same subregions should be kept in the same half-set or distributed randomly. We do recommend to keep the sub-regions from overlapping and keep this parameter to 0/False. However, with tightly packed particles, which share a lot of their densities with other particles, it is better to set this parameter to 1/True to make sure the half sets don't share the same voxels.\\ \hline
14 | 
15 | -- \code{lowResCut} & This correspond to a rough estimate of the initial resolution of the data-set, directly coming out from the picking. The default value is set to 40\r{A} and it can be lowered for data-sets collected at low defocus. In most situation, the default value is fine to start with. As the processing goes, {\emClarity} will progressively lower this resolution estimate using the FSC.\\ \hline
16 | 
17 | \end{longtable}
18 | 


--------------------------------------------------------------------------------
/Figures_Tables/10_parameters.tex:
--------------------------------------------------------------------------------
 1 | % Parameters for ctf 3d
 2 | \renewcommand{\arraystretch}{1.2}
 3 | \begin{longtable}[l]{| l || p{115.5mm} |}
 4 | \captionsetup{labelfont=bf}
 5 | \caption[\code{ctf 3d} parameters]{\code{ctf 3d} parameters. Your parameter file should have the following parameters.\\ \textcolor{myred}{\textbf{*}} indicates the required parameters, \textcolor{blue}{\textbf{*}} indicates expert parameters. Expert parameters should not be changed except if you know what you are doing. The other parameters are optional.}\\
 6 | 
 7 | 
 8 | %% Parameters:
 9 | % flgDampenAliasedFrequencies (default=0)
10 | 
11 | \hline
12 | \multicolumn{2}{|c|}{\textbf{Microscope settings}}\\
13 | \hline
14 | 
15 | -- \code{PIXEL\_SIZE}\textcolor{myred}{\textbf{*}} & Pixel size in meter per pixel (e.g. 1.8e-10). Must match the header of the stacks in \code{fixedStacks/*.fixed}.\\
16 | -- \code{SuperResolution}\textcolor{myred}{\textbf{*}} & Whether or not the \code{fixedStacks/*.fixed} are super-sampled. Not that this should be the same value you used for \code{ctf estimate} in section \ref{sec:defocus_estimate}.\\
17 | 
18 | -- \code{Ali\_samplingRate}\textcolor{myred}{\textbf{*}} & Binning factor (1 means no binning) of the output reconstruction.\\
19 | 
20 | % CTF correction
21 | \hline
22 | \multicolumn{2}{|c|}{\textbf{CTF correction}}\\
23 | \hline
24 | 
25 | -- \code{useSurfaceFit}\textcolor{blue}{\textbf{*}} & Whether or not the spatial model should be calculated as a function of $x,\ y$ coordinates. If 0, the spatial model is a plane (constant center-of-mass). See section \ref{sec:algo:ctf_3d:spatial_model} for more details.\\
26 | 
27 | -- \code{flg2dCTF}\textcolor{blue}{\textbf{*}} & Whether or not the CTF correction should correct for the defocus gradients along the electron beam (thickness of the specimen). If 1, only one $z$ section is used. See section \ref{sec:algo:ctf_3d:defocus_step} for more details.\\
28 | 
29 | \hline
30 | \multicolumn{2}{|c|}{\textbf{Others}}\\
31 | \hline
32 | 
33 | -- \code{erase\_beads\_after\_ctf} & Whether or not the fiducial beads should be removed before or after CTF multiplication. Default=\code{false}.\\
34 | 
35 | -- \code{applyExposureFilter} & Whether or not the exposure filter should be applied. If you turn it off, make sure it is turned-off during subtomogram averaging as well.\\
36 | 
37 | -- \code{super\_sample}\textcolor{blue}{\textbf{*}} & Compute the back projection in a slice larger by the given integer factor (max=8) in each dimension, by interpolating the projection data at smaller intervals ("super-sampling"). This corresponds to the \code{SuperSampleFactor} entry from {\tilt}. Default=\code{0}.\\
38 | 
39 | -- \code{expand\_lines}\textcolor{blue}{\textbf{*}} & If \code{super\_sample} is greater than 0, expand projection lines by Fourier padding (sync interpolation) when super-sampling, which will preserve higher frequencies better but increase memory needed. This corresponds to the \code{ExpandInputLines} entry from {\tilt}.Default=\code{0}\\
40 | 
41 | \hline
42 | \end{longtable}
43 | 
44 | 


--------------------------------------------------------------------------------
/Figures_Tables/11_parameters.tex:
--------------------------------------------------------------------------------
 1 | % Parameters for avg
 2 | \renewcommand{\arraystretch}{1.2}
 3 | \begin{longtable}[l]{| l || p{108.5mm} |}
 4 | \captionsetup{labelfont=bf}
 5 | \caption[\code{avg} parameters]{\code{avg} parameters. Your parameter file should have the following parameters. \textcolor{myred}{\textbf{*}} indicates the required parameters, \textcolor{blue}{\textbf{*}} indicates expert parameters. Expert parameters should not be changed except if you know what you are doing. The other parameters are optional.} \label{param:avg}\\
 6 | 
 7 | \hline
 8 | \multicolumn{2}{|c|}{\textbf{Sampling}}\\
 9 | \hline
10 | 
11 | -- \code{PIXEL\_SIZE}\textcolor{myred}{\textbf{*}} & Pixel size in meter per pixel (e.g. 1.8e-10). Must match the header of the stacks in \code{fixedStacks/*.fixed}.\\
12 | -- \code{SuperResolution}\textcolor{myred}{\textbf{*}} & Whether or not the \code{fixedStacks/*.fixed} are super-sampled. Not that this should be the same value you used for \code{ctf estimate} in section \ref{sec:defocus_estimate}.\\
13 | 
14 | -- \code{Ali\_samplingRate}\textcolor{myred}{\textbf{*}} & Current bin factor (1 means no binning). The sub-region tomograms at this given binning must be already reconstructed in the \code{cache} directory. If they aren't, you'll need to run \code{ctf 3d} before running this step.\\
15 | 
16 | \hline
17 | \multicolumn{2}{|c|}{\textbf{Mask}}\\
18 | \hline
19 | -- \code{Ali\_mType}\textcolor{myred}{\textbf{*}} & Type of mask; ``cylinder'', ``sphere'', ``rectangle''.\\
20 | -- \code{particleRadius}\textcolor{myred}{\textbf{*}} & [$x,\ y,\ z$] particle radius, in \r{A}. It should be the smallest values to contain particle. For particles in a lattice, neighboring particles can be used in alignment by specifying a larger mask size, with \code{Ali\_Radius}, but this parameter must correspond to the central unit.\\
21 | -- \code{Ali\_mRadius}\textcolor{myred}{\textbf{*}} & [$x,\ y,\ z$] mask radius, in \r{A}. The mask size must be large enough to contain the entire particle (i.e. larger than \code{particleRadius}), the delocalized signal, proper apodization, and to avoid wraparound error in cross-correlation.\\
22 | -- \code{Ali\_mCenter}\textcolor{myred}{\textbf{*}} & [$x,\ y,\ z$] shifts, in \r{A}, relative to the center of the reconstruction. Positive shifts translate the \code{Ali\_mType} mask to the right of the axis.\\
23 | -- \code{scaleCalcSize}\textcolor{blue}{\textbf{*}} & Scale the box size (section \ref{sec:algo:avg:box}) by this number. Default=1.5.\\
24 | 
25 | \hline
26 | \newpage
27 | 
28 | \hline
29 | \multicolumn{2}{|c|}{\textbf{Symmetry}}\\
30 | \hline
31 | 
32 | -- \code{Raw\_classes\_odd}\textcolor{myred}{\textbf{*}} & Parameter used to control the C symmetry of the first half set. It should be ``\code{[0; <C>.*ones(2,1)]}'', where \code{<C>} is the central symmetry. This is equivalent to \code{[0; <C>; <C>]}.\\
33 | -- \code{Raw\_classes\_eve}\textcolor{myred}{\textbf{*}} & Parameter used to control the C symmetry of the second half set. It should be identical to \code{Raw\_classes\_odd}.\\
34 | 
35 | -- \code{symmetry} & New parameter used to control the symmetry. CX, I/I2, O, and DX are supported. The old default will be used if this parameter is not specified.\\
36 | 
37 | \hline
38 | \multicolumn{2}{|c|}{\textbf{Fourier shell correlation}}\\
39 | \hline
40 | 
41 | -- \code{flgCones}\textcolor{myred}{\textbf{*}} & Whether or not {\emClarity} should calculate the conical FSCs. This will greatly impact the calculated half-maps if you have preferential orientation in the data-set. We recommend to leave this to 1 (true) throughout the workflow.\\
42 | 
43 | -- \code{minimumParticleVolume}\textcolor{blue}{\textbf{*}} & Defines a minimum value for the $f_{mask}/f_{particle}$ ratio (section \ref{sec:algo:avg:molecular_mask} and \ref{sec:algo:avg:fsc}). Default=$0.1$\\
44 | 
45 | -- \code{flgFscShapeMask}\textcolor{blue}{\textbf{*}} & Apply a very soft mask based on the particle envelope before calculating the FSC (section \ref{sec:algo:avg:molecular_mask}). We highly recommend to not turn this off. Default=1.\\
46 | 
47 | -- \code{shape\_mask\_test} & Exit after saving the shape mask in \code{FSC/}. This is useful when testing the following two parameters. Default=0.\\
48 | 
49 | -- \code{shape\_mask\_lowpass} & Low-pass cutoff, in \r{A}, to apply to the median filtered raw subtomogram averages. Default=14.\\
50 | 
51 | -- \code{shape\_mask\_threshold} & Initial threshold used for the dilation. If you have extra "dust" outside your particle, it can be helpful to decrease the resolution used in the inital thresholding, i.e. increase \code{shape\_mask\_lowpass}, or to increase this threshold (or both). Default=2.4.\\
52 | 
53 | \hline
54 | \multicolumn{2}{|c|}{\textbf{Others}}\\
55 | \hline
56 | 
57 | -- \code{subTomoMeta}\textcolor{myred}{\textbf{*}} & Project name. At this step, {\emClarity} excepts to find the metadata \code{subTomoMeta}.mat in the project directory. Most output files will have the project name as prefix.\\
58 | 
59 | -- \code{Raw\_className}\textcolor{myred}{\textbf{*}} & Class ID for subtomogram averaging and alignment. You should leave it set to zero.\\
60 | 
61 | -- \code{Fsc\_bfactor}\textcolor{myred}{\textbf{*}} & Global B-factor applied to both references. See \ref{sec:algo:avg:wiener} for more detail. It can be a vector. In this case, if it is an intermediate reconstruction (i.e. reconstructing half-maps with \code{RawAlignment}) only the first value is used, if it is a final reconstruction (i.e. \code{FinalAlignment} introduced in section \ref{sec:final_map}), {\emClarity} will calculate one reconstruction per value registered in this vector.\\
62 | 
63 | -- \code{flgClassify}\textcolor{myred}{\textbf{*}} & Whether or not this cycle is a classification cycle. It must be 0 if subtomogram alignment is the next step. More information on this in \ref{sec:classification}.\\
64 | 
65 | -- \code{flgCutOutVolumes}\textcolor{blue}{\textbf{*}} & Whether or not each transformed particle (rotated and sifted) should be saved to \code{cache} directory. Note that the subtomogram have an extra padding of 20 pixel. This makes the reconstruction much slower if activated. Default=0.\\
66 | 
67 | -- \code{flgQualityWeight}\textcolor{blue}{\textbf{*}} & The particles with an alignment score (Constrain Cross-Correlation score, CCC) below the average CCC score of the entire data-set are down-weighted before being added to the reference. This parameter regulates the strength of the weighting.\\
68 | 
69 | -- \code{flgCCCcutoff}\textcolor{blue}{\textbf{*}} & Particles with an alignment score (Constrain Cross-Correlation score, CCC) below this value are ignored from the reconstruction. Default=0.\\
70 | 
71 | -- \code{use\_v2\_SF3D}\textcolor{blue}{\textbf{*}} & Whether or not the new per-particle sampling function procedure should be used, as opposed to the older ``grouped'' sampling functions. This is the default since {\emClarity} 1.5.1.0. Default=1.\\
72 | 
73 | -- \code{mtfVal}\textcolor{blue}{\textbf{*}} & Type of Modulation Transfer Function (MTF) of the detector. This is applied on top of the volume-normalized SPA Wiener filter (see section \ref{sec:algo:avg:wiener}. Set it to 0 to turn it off, 1 for a stronger version. You can also set your own (see \href{https://github.com/bHimes/emClarity/blob/d624d8b58db6933ee334a6448789dda389f7ca21/testScripts/emClarity.m#L838-L845}{code} and \href{https://github.com/bHimes/emClarity/blob/d624d8b58db6933ee334a6448789dda389f7ca21/statistics/BH_multi_cRef_Vnorm.m#L402-L434}{code}) but this is not tested. Default=2.\\
74 | 
75 | 
76 | 
77 | %%%%% Optional
78 | % flgCutOutVolumes
79 | % loadTomo: is it useful? so it load the entire tomo in memory... mouais.
80 | 
81 | \hline
82 | \end{longtable}
83 | 


--------------------------------------------------------------------------------
/Figures_Tables/12_parameters.tex:
--------------------------------------------------------------------------------
 1 | % Parameters for alignRaw
 2 | \renewcommand{\arraystretch}{1.2}
 3 | \begin{longtable}[l]{| l || p{110.5mm} |}
 4 | \captionsetup{labelfont=bf}
 5 | \caption[\code{alignRaw} parameters]{\code{alignRaw} parameters. Your parameter file should have the following parameters.\\
 6 | \textcolor{myred}{\textbf{*}} indicates the required parameters, \textcolor{blue}{\textbf{*}} indicates expert parameters. Expert parameters should not be changed except if you know what you are doing. The other parameters are optional.}\\
 7 | 
 8 | \hline
 9 | \multicolumn{2}{|c|}{\textbf{Sampling}}\\
10 | \hline
11 | 
12 | -- \code{PIXEL\_SIZE}\textcolor{myred}{\textbf{*}} & Pixel size in meter per pixel (e.g. 1.8e-10). Must match the header of the stacks in \code{fixedStacks/*.fixed}.\\
13 | -- \code{SuperResolution}\textcolor{myred}{\textbf{*}} & Whether or not the \code{fixedStacks/*.fixed} are super-sampled. Not that this should be the same value you used for \code{ctf estimate} in section \ref{sec:defocus_estimate}.\\
14 | 
15 | -- \code{Ali\_samplingRate}\textcolor{myred}{\textbf{*}} & Current bin factor (1 means no binning). The sub-region tomograms at this given binning must be already reconstructed in the \code{cache} directory. You must use the same sampling than during \code{avg}.\\
16 | 
17 | \hline
18 | \multicolumn{2}{|c|}{\textbf{Mask}}\\
19 | \hline
20 | -- \code{Ali\_mType}\textcolor{myred}{\textbf{*}} & Type of mask; ``cylinder'', ``sphere'', ``rectangle''.\\
21 | -- \code{particleRadius}\textcolor{myred}{\textbf{*}} & [$x,\ y,\ z$] particle radius, in \r{A}. It should be the smallest values to contain particle. For particles in a lattice, neighboring particles can be used in alignment by specifying a larger mask size, with \code{Ali\_Radius}, but this parameter must correspond to the central unit.\\
22 | -- \code{Ali\_mRadius}\textcolor{myred}{\textbf{*}} & [$x,\ y,\ z$] mask radius, in \r{A}. The mask size must be large enough to contain the entire particle (i.e. larger than \code{particleRadius}), the delocalized signal, proper apodization, and to avoid wraparound error in cross-correlation.\\
23 | -- \code{Ali\_mCenter}\textcolor{myred}{\textbf{*}} & [$x,\ y,\ z$] shifts, in \r{A}, relative to the center of the reconstruction. Positive shifts translate the \code{Ali\_mType} mask to the right of the axis.\\
24 | 
25 | -- \code{Peak\_mRadius} & Further restrict the translations to this radius. By default, the translations are limited to the \code{particleRadius}. If specified, \code{Peak\_mType} should also be specified.\\
26 | -- \code{Peak\_mType} & Further restrict the translations. Masked applied to the correlation map, should be ``cylinder'', ``sphere'' or ``rectangle''.\\
27 | 
28 | -- \code{flgCenterRefCOM} & Whether or not the references should be shifted to their center-of-mass before starting the alignment\footnote{For membrane proteins, or phage particles for example, this parameter should be shut off either from the beginning, or after a cycle or two.}. Default=1.\\
29 | 
30 | 
31 | -- \code{scaleCalcSize}\textcolor{blue}{\textbf{*}} & Scale the box size (section \ref{sec:algo:avg:box}) by this number. Default=1.5.\\
32 | 
33 | \hline
34 | \multicolumn{2}{|c|}{\textbf{Symmetry}}\\
35 | \hline
36 | 
37 | -- \code{Raw\_classes\_odd}\textcolor{myred}{\textbf{*}} & Parameter used to control the C symmetry of the first half set. It should be ``\code{[0; <C>.*ones(2,1)]}'', where \code{<C>} is the central symmetry. This is equivalent to \code{[0; <C>; <C>]}.\\
38 | -- \code{Raw\_classes\_eve}\textcolor{myred}{\textbf{*}} & Parameter used to control the C symmetry of the second half set. It should be identical to \code{Raw\_classes\_odd}.\\
39 | %-- \code{force_no_symmetry} & Turn off the symmetry of the particle. Default=0.\\
40 | -- \code{symmetry} & New parameter used to control the symmetry. CX, I/I2, O, and DX are supported. The old default will be used if this parameter is not specified.\\
41 | 
42 | \hline
43 | \multicolumn{2}{|c|}{\textbf{Angular search}}\\
44 | \hline
45 | 
46 | -- \code{Raw\_angleSearch}\textcolor{myred}{\textbf{*}} & Angular search, in degrees. Format is [$\Theta_{out},\ \Delta_{out},\ \Theta_{in},\ \Delta_{out}$]. For example, [$180,\ 15,\ 180,\ 12$], specifies a $\pm$180\textdegree\ out of plane search (polar and azimuth angles) with 15\textdegree\ steps and $\pm$180\textdegree\ in plane search (planar angles) with 12\textdegree\ steps.\\
47 | 
48 | \hline
49 | \multicolumn{2}{|c|}{\textbf{Others}}\\
50 | \hline
51 | 
52 | -- \code{subTomoMeta}\textcolor{myred}{\textbf{*}} & Project name. At this step, {\emClarity} excepts to find the metadata \code{subTomoMeta}.mat in the project directory. Most output files will have the project name as prefix.\\
53 | 
54 | -- \code{Raw\_className}\textcolor{myred}{\textbf{*}} & Class ID for subtomogram averaging and alignment. You should leave it set to zero.\\
55 | 
56 | -- \code{Cls\_className}\textcolor{myred}{\textbf{*}} & Class ID for classification. You should leave it set to zero for now. For more information, see section \ref{sec:classification}.\\
57 | 
58 | -- \code{Fsc\_bfactor}\textcolor{myred}{\textbf{*}} & Global B-factor applied to both references. Although it can be a vector, at this stage {\emClarity} will only use the first number as B-factor. See \ref{sec:algo:avg:wiener} for more detail.\\
59 | 
60 | -- \code{flgClassify}\textcolor{myred}{\textbf{*}} & Whether or not this cycle is a classification cycle. It must be 0 if subtomogram alignment is the next step. More information on this in \ref{sec:classification}.\\
61 | 
62 | -- \code{use\_v2\_SF3D}\textcolor{blue}{\textbf{*}} & Whether or not the new per-particle sampling function procedure should be used, as opposed to the older ``grouped'' sampling functions. This is the default since {\emClarity} 1.5.1.0. Default=1.\\
63 | 
64 | % I don't think this one is really supported...?
65 | %-- \code{flgCutOutVolumes}\textcolor{blue}{\textbf{*}} & ... Default=0.\\
66 | 
67 | \hline
68 | \end{longtable}
69 | 


--------------------------------------------------------------------------------
/Figures_Tables/13_parameters.tex:
--------------------------------------------------------------------------------
 1 | % Parameters for tomoCPR
 2 | \renewcommand{\arraystretch}{1.2}
 3 | \begin{longtable}[l]{| l || p{96mm} |}
 4 | \captionsetup{labelfont=bf}
 5 | \caption[\code{tomoCPR} parameters]{\code{tomoCPR} parameters. Your parameter file should have the following parameters. \textcolor{myred}{\textbf{*}} indicates the required parameters, \textcolor{blue}{\textbf{*}} indicates expert parameters. Expert parameters should not be changed except if you know what you are doing. The other parameters are optional.} \label{param:tomoCPR}\\
 6 | 
 7 | \hline
 8 | \multicolumn{2}{|c|}{\textbf{Sampling}}\\
 9 | \hline
10 | 
11 | -- \code{PIXEL\_SIZE}\textcolor{myred}{\textbf{*}} & Pixel size in meter per pixel (e.g. 1.8e-10). Must match the header of the stacks in \code{fixedStacks/*.fixed}.\\
12 | -- \code{SuperResolution}\textcolor{myred}{\textbf{*}} & Whether or not the \code{fixedStacks/*.fixed} are super-sampled. Not that this should be the same value you used for \code{ctf estimate} in section \ref{sec:defocus_estimate}.\\
13 | -- \code{Ali\_samplingRate}\textcolor{myred}{\textbf{*}} & Current binning factor (1 means no binning). The sub-region tomograms at this given binning must be already reconstructed in the \code{cache} directory. If they aren't, you'll need to run \code{ctf 3d} before running this step.\\
14 | \hline
15 | \newpage
16 | 
17 | \hline
18 | \multicolumn{2}{|c|}{\textbf{Fiducial alignment}}\\
19 | \hline
20 | 
21 | -- \code{Ali\_mType}\textcolor{myred}{\textbf{*}} & Type of mask; ``cylinder'', ``sphere'', ``rectangle''.\\
22 | 
23 | -- \code{particleRadius}\textcolor{myred}{\textbf{*}} & [$x,\ y,\ z$] particle radius, in \r{A}. It should be the smallest values to contain particle. For particles in a lattice this parameter must correspond to the central unit.\\
24 | 
25 | -- \code{Ali\_Radius}\textcolor{myred}{\textbf{*}} & [$x,\ y,\ z$] mask radius, in \r{A}. The mask size must be large enough to contain the entire particle (i.e. larger than \code{particleRadius}), the delocalized signal, proper apodization, and to avoid wraparound error in cross-correlation.\\
26 | 
27 | -- \code{Ali\_mCenter}\textcolor{myred}{\textbf{*}} & [$x,\ y,\ z$] shifts, in \r{A}, relative to the center of the reconstruction. Positive shifts translate the \code{Ali\_mType} mask to the right of the axis.\\
28 | 
29 | -- \code{peak\_mask\_fraction} & Fraction of the \code{particleRadius} used for the alignment. This has no effect if \code{Peak\_mType} and \code{Peak\_mRadius} are defined. Default=0.4.\\
30 | 
31 | -- \code{Peak\_mType} & Further restrict the translations by applying an additional mask; ``cylinder'', ``sphere'', ``rectangle''.\\
32 | 
33 | -- \code{Peak\_mRadius} & Further restrict the translations to this radius. By default (0), the translations are limited by \code{particleRadius} and \code{peak\_mask\_fraction}. This has no effect if \code{Peak\_mType} is not defined and it will switch back to the default behavior.\\
34 | 
35 | -- \code{tomoCprLowPass} & Low-pass filter cutoff applied to the tiles, in \r{A}. By default, it corresponds to $1.5 \times$ the current resolution and it is forced to be between 10\r{A} and 24\r{A}.\\
36 | 
37 | -- \code{tomoCprDefocusRefine}\textcolor{myred}{\textbf{*}} & Refine the defocus value during alignment. This feature is experimental and we don't recommend you to use it at the moment. Default=false.\\
38 | 
39 | -- \code{tomoCprDefocusRange}\textcolor{myred}{\textbf{*}} & Range of defocus to sample around the current defocus estimate, in meter.\\
40 | 
41 | -- \code{tomoCprDefocusStep}\textcolor{myred}{\textbf{*}} & Defocus step, in meter.\\
42 | 
43 | -- \code{min\_res\_for\_ctf\_fitting}\textcolor{blue}{\textbf{*}} & Low-pass filter cutoff applied to the tiles, in \r{A}. It replaces \code{tomoCprLowPass} in case of a defocus search. If $\sqrt{2}\code{PIXEL\_SIZE} < \code{min\_res\_for\_ctf\_fitting}$ the defocus search is turned off. Default=10.\\
44 | 
45 | \hline
46 | \multicolumn{2}{|c|}{\textbf{Tilt-series alignment}}\\
47 | \hline
48 | 
49 | -- \code{particleMass}\textcolor{myred}{\textbf{*}} & Rough estimate of the particle mass, in MDa. This is used to set the number of particles per patch. Smaller particles give less reliable alignments, so more of them are included in each patch, creating a smoother solution.\\ % Actually, why not do the opposite? the bigger the particle, the bigger the patches (because you'll have less particle/patch so you need to make them bigger to compensate)?
50 | 
51 |     % I think you misunderstand. The patch size is based on the size of each particle and how many particles are in a patch. This assumes a fairly uniform distribution of particles. It is the number of particles/patch that is related to the smoothness of the local solution. A smoother solution means a stronger weight on the prior and less weight on the data. - BAH
52 |     
53 | -- \code{tomoCPR\_randomSubset} & The maximal number of particle which are going to be used as fiducials. If there is more particles available than this, a random subset of particles will be selected. Default=-1, all particles.\\
54 | 
55 | -- \code{k\_factor\_scaling} & \code{KFactorScaling} from {\tiltalign}.\\ & Default=$10/\sqrt{\code{tomoCPR\_randomSubset}}$.\\
56 | 
57 | -- \code{rot\_option\_global} & \code{RotOption} from {\tiltalign}. Default=1.\\
58 | -- \code{rot\_option\_local} & \code{LocalRotOption} from {\tiltalign}. Default=1.\\
59 | -- \code{rot\_default\_grouping\_local} & \code{LocalRotDefaultGrouping} from {\tiltalign}. Default=3.\\
60 | 
61 | -- \code{mag\_option\_global} & \code{MagOption} from {\tiltalign}. Default=1.\\
62 | -- \code{mag\_option\_local} & \code{LocalMagOption} from {\tiltalign}. Default=1.\\
63 | -- \code{mag\_default\_grouping\_global} & \code{MagDefaultGrouping} from {\tiltalign}. Default=5.\\
64 | -- \code{mag\_default\_grouping\_local} & \code{LocalMagDefaultGrouping} from {\tiltalign}. Default=5.\\
65 | 
66 | -- \code{tilt\_option\_global} & \code{TiltOption} from {\tiltalign}. Default=5.\\
67 | -- \code{tilt\_option\_local} & \code{LocalTiltOption} from {\tiltalign}. Default=5.\\
68 | -- \code{tilt\_default\_grouping\_global} & \code{TiltDefaultGrouping} from {\tiltalign}. Default=5.\\
69 | -- \code{tilt\_default\_grouping\_local} & \code{LocalTiltDefaultGrouping} from {\tiltalign}. Default=5.\\
70 | 
71 | -- \code{min\_overlap} & \code{MinSizeOrOverlapXandY} from {\tiltalign}. Default=0.5.\\
72 | 
73 | -- \code{shift\_z\_to\_to\_centroid} & If true/1, use the \code{ShiftZFromOriginal} entry from {\tiltalign} and set \code{AxisZShift} to 0. Default=true.\\
74 | 
75 |  
76 | \hline
77 | \multicolumn{2}{|c|}{\textbf{Others}}\\
78 | \hline
79 | 
80 | -- \code{subTomoMeta}\textcolor{myred}{\textbf{*}} & Project name. At this step, {\emClarity} excepts to find the metadata \code{<subTomoMeta>}.mat in the project directory.\\
81 | 
82 | -- \code{Raw\_className}\textcolor{myred}{\textbf{*}} & Class ID. You should leave it set to zero.\\
83 | -- \code{Raw\_classes\_odd}\textcolor{myred}{\textbf{*}} & This is only used to set the number of references. As the multi-reference alignment is currently disabled, you can leave it as whatever you used for the subtomogram average or set it to \code{[0;0]}.\\
84 | 
85 | -- \code{Raw\_classes\_eve}\textcolor{myred}{\textbf{*}} & It should be identical to \code{Raw\_classes\_odd}.\\
86 |  
87 | \hline
88 | \end{longtable}
89 | 
90 | 
91 | % rmsScale(sqrt(particleMass)) : idk about this one...
92 | % whitenProjections there is a fixme on this one so... ignoring it for now
93 | % use_MCF, false, ignoring this for now.
94 | 
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/Figures_Tables/14_parameters.tex:
--------------------------------------------------------------------------------
 1 | % Parameters for pca
 2 | \renewcommand{\arraystretch}{1.2}
 3 | \begin{longtable}[l]{| l || p{110mm} |}
 4 | \captionsetup{labelfont=bf}
 5 | \caption[\code{pca} parameters]{\code{pca} parameters. Your parameter file should have the following parameters. \textcolor{myred}{\textbf{*}} indicates the required parameters, \textcolor{blue}{\textbf{*}} indicates expert parameters. Expert parameters should not be changed except if you know what you are doing. The other parameters are optional.}\\
 6 | 
 7 | \hline
 8 | \multicolumn{2}{|c|}{\textbf{Sampling}}\\
 9 | \hline
10 | 
11 | -- \code{PIXEL\_SIZE}\textcolor{myred}{\textbf{*}} & Pixel size in meter per pixel (e.g. 1.8e-10). Must match the header of the stacks in \code{fixedStacks/*.fixed}.\\
12 | -- \code{SuperResolution}\textcolor{myred}{\textbf{*}} & Whether or not the \code{fixedStacks/*.fixed} are super-sampled. Not that this should be the same value you used for \code{ctf estimate} in section \ref{sec:defocus_estimate}.\\
13 | -- \code{Cls\_samplingRate}\textcolor{myred}{\textbf{*}} & Current binning factor (1 means no binning). The sub-region tomograms at this given binning must be already reconstructed in the \code{cache} directory. If they aren't, you'll need to run \code{ctf 3d} before running this step.\\
14 | 
15 | -- \code{Ali\_samplingRate}\textcolor{myred}{\textbf{*}} & Binning factor (1 means no binning) of the half-maps of the current cycle. If this is different from \code{Cls\_samplingRate}, the reconstructions will be resampled to match \code{Cls\_samplingRate}.\\
16 | 
17 | 
18 | \hline
19 | \multicolumn{2}{|c|}{\textbf{Masks}}\\
20 | \hline
21 | 
22 | -- \code{Ali\_mType}\textcolor{myred}{\textbf{*}} & Type of mask used for the reconstruction; ``cylinder'', ``sphere'', ``rectangle''.\\
23 | -- \code{Cls\_mType}\textcolor{myred}{\textbf{*}} & Type of mask to use for the PCA; ``cylinder'', ``sphere'', ``rectangle''.\\
24 | 
25 | -- \code{Ali\_Radius}\textcolor{myred}{\textbf{*}} & [$x,\ y,\ z$] mask radius, in \r{A}, used for the reconstruction.\\
26 | -- \code{Cls\_Radius}\textcolor{myred}{\textbf{*}} & [$x,\ y,\ z$] mask radius, in \r{A} to use for the PCA.\\
27 | 
28 | -- \code{Ali\_mCenter}\textcolor{myred}{\textbf{*}} & [$x,\ y,\ z$] shifts, in \r{A}, used for the reconstruction. These are relative to the center of the reconstruction. Positive shifts translate the \code{Ali\_mType} mask to the right of the axis.\\
29 | -- \code{Cls\_mCenter}\textcolor{myred}{\textbf{*}} & [$x,\ y,\ z$] shifts, in \r{A} to use for the PCA. These are relative to the center of the reconstruction. Positive shifts translate the \code{Ali\_mType} mask to the right of the axis.\\
30 | 
31 | -- \code{flgPcaShapeMask} & Calculate and apply a molecular to the difference maps. This molecular mask is calculated using the combined reference. Default=true.\\
32 | 
33 | -- \code{test\_updated\_bandpass} & By default (0/false), low-pass filters are used to calculate the length scales. If true, use band-pass filters. See section \ref{sec:algo:classification:resolution_bands} for more details.\\
34 | 
35 | \hline
36 | \multicolumn{2}{|c|}{\textbf{PCA}}\\
37 | \hline
38 | 
39 | -- \code{pcaScaleSpace}\textcolor{myred}{\textbf{*}} & Length scales, i.e. resolution bands, in \r{A}. If this is a vector, the PCA will be performed for each length scales and you will need to select the principal axes for each length scale.\\
40 | 
41 | -- \code{Pca\_randSubset}\textcolor{myred}{\textbf{*}} & For very large data sets (tens of thousands) the principal axes describing the important variation can be obtained with a subset of the data, which saves a lot of computation. If 0, use the entire dataset, otherwise specify the number of particle that should be randomly selected to perform the decomposition. Usually 25\% or at least 3000-4000 is a good way to go.\\
42 | 
43 | -- \code{Pca\_maxEigs}\textcolor{myred}{\textbf{*}} & Most of the variance is usually explained within the first 20 to 30 directions, so it is usually not useful to save all of the directions. Use this parameter to select the number of principal directions to save.\\
44 | 
45 | \hline
46 | \multicolumn{2}{|c|}{\textbf{Clustering}}\\
47 | \hline
48 | 
49 | -- \code{Pca\_coeffs}\textcolor{myred}{\textbf{*}} & The selected principal axes, for each length scale. Each length scale is a row. You can select as many (or few) from any resolution as you want, but the number of entries in each row must be constant. Use zeros to fill empty places.\\
50 | 
51 | -- \code{Pca\_clusters}\textcolor{myred}{\textbf{*}} & The number of clusters to find. If this is a vector, the clustering will be calculated for each registered value.\\
52 | 
53 | -- \code{Pca\_distMeasure} & This corresponds to the \code{Distance} entry of the \code{kmeans} function of {\MATLAB}. By default, the squared Euclidean distance metric, i.e. each centroid is the mean of the points in that cluster.\\
54 | 
55 | -- \code{Pca\_nReplicates} & This corresponds to the \code{Replicates} entry of the \code{kmeans} function of {\MATLAB}. By default, the number of replicates is set to 128, i.e. the number of times to repeat clustering using new initial cluster centroid positions.\\
56 | 
57 | \hline
58 | \multicolumn{2}{|c|}{\textbf{Others}}\\
59 | \hline
60 | 
61 | -- \code{PcaGpuPull}\textcolor{myred}{\textbf{*}} & The decomposition is calculated on the CPU, but the difference maps are calculated on the GPU, which is much more efficient. This parameters controls how many difference maps should be held on the GPU at any given type.\\
62 | 
63 | -- \code{flgClassify}\textcolor{myred}{\textbf{*}} & Specify that the this cycle is a classification cycle. Must be set to 1/true.\\
64 | 
65 | -- \code{subTomoMeta}\textcolor{myred}{\textbf{*}} & Project name. At this step, {\emClarity} excepts to find the metadata \code{subTomoMeta}.mat in the project directory.\\
66 | 
67 | -- \code{flgCutOutVolumes} & Whether or not each transformed particle (rotated and sifted) used to calculate the difference maps should be saved to \code{cache} directory. Note that the subtomogram have an extra padding of 20 pixel. This makes the pre-processing for the PCA much slower if activated. Default=0.\\
68 | 
69 | -- \code{scaleCalcSize}\textcolor{blue}{\textbf{*}} & Scale the box size used to calculate the difference maps by this number. Default=1.5.\\
70 | 
71 | -- \code{use\_v2\_SF3D} & Whether or not the new per-particle sampling function procedure should be used, as opposed to the older ``grouped'' sampling functions. This is the default since {\emClarity} 1.5.1.0. Default=1.\\
72 | 
73 | \hline
74 | \end{longtable}
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 
83 | % SuperResolution
84 | % Pca_symMask, only supports cylinder for some reason, so don't talk about it.
85 | % Pca_flattenEigs, 0
86 | 


--------------------------------------------------------------------------------
/Figures_Tables/16_cluster.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[!htb]  % Stay within section
 2 | \captionsetup{labelfont=bf}
 3 | \centering
 4 | 
 5 | \begin{tikzpicture}[every node/.style={minimum width=1.4cm,minimum height=7mm}]
 6 | 
 7 | \node (pcr) {${[\bm{S}_r \bm{V}^{T}_{r}]}_{best}$};
 8 | \draw[dashed](pcr.west)--($(pcr.west)+(-3.1,0)$);
 9 | 
10 | \node (pc2) at ($(pcr)+(-1.3,-1.94)$) {${[\bm{S}_2 \bm{V}^{T}_{2}]}_{best}$};
11 | \draw[dashed](pc2.west)--($(pc2.west)+(-1.8,0)$);
12 | 
13 | \node (pc1) at ($(pc2)+(-1.3,-1.94)$) {${[\bm{S}_1 \bm{V}^{T}_{1}]}_{best}$};
14 | \draw[dashed](pc1.west)--($(pc1.west)+(-0.5,0)$);
15 | 
16 | % pc all
17 | \matrix (pcall) [draw,matrix of math nodes,fill=white] at ($(pc1)+(+4,-4)$)
18 | {
19 | c_{1,1} & \cdots & c_{1,a+1} & \cdots & c_{1,b+1} & \cdots\\
20 | c_{2,1} & \cdots & c_{2,a+1} & \cdots & c_{2,b+1} & \cdots\\
21 | \cdots & \ddots & \cdots & \ddots & \cdots & \ddots\\
22 | c_{p,1} & \cdots & c_{p,a+1} & \cdots & c_{p,b+1} & \cdots\\
23 | };
24 | 
25 | \draw[arrow](pcall.south west)--(pcall.south east) node[midway,sloped,below] {$a+b+c$ axes};
26 | \draw[arrow](pcall.north west)--(pcall.south west) node[midway,sloped,below] {$p$ particles};
27 | 
28 | \node (dima) at ($(pcall)+(-2.75,0)$) [draw,minimum width=2.5cm,minimum height=3.1cm] {};
29 | \draw[arrow](pc1.east)-|($(dima.north)+(0,0)$) node[near end,sloped,above] {$a$ axes};
30 | 
31 | \node (dimb) at ($(pcall)+(0,0)$) [draw,minimum width=2.5cm,minimum height=3.1cm] {};
32 | \draw[arrow](pc2.east)-|($(dimb.north)+(0,0)$) node[near end,sloped,above] {$b$ axes};
33 | 
34 | \node (dimc) at ($(pcall)+(2.75,0)$) [draw,minimum width=2.5cm,minimum height=3.1cm] {};
35 | \draw[arrow](pcr.east)-|($(dimc.north)+(0,0)$) node[near end,sloped,above] {$c$ axes};
36 | 
37 | 
38 | % kmeans
39 | \matrix (kmeans) [draw,matrix of math nodes,fill=white] at ($(pcall)+(+8,0)$)
40 | {
41 | 1 & c(1)\\
42 | 2 & c(2)\\
43 | \vdots & \vdots\\
44 | p & c(p)\\
45 | };
46 | 
47 | \draw[arrow](pcall.east)--(kmeans.west) node[midway,sloped,above] {$k$-means};
48 | 
49 | \node (tittle_particle) at ($(kmeans)+(-0.75,2.5)$) [rotate=90] {particles};
50 | \node (tittle_class) at ($(kmeans)+(0.64,2.19)$) [rotate=90] {class};
51 | 
52 | 
53 | \end{tikzpicture}
54 | 
55 | \caption[Clustering]{The data is projected onto the principal axes, for each $r$ length scale and stacked into one single $\bm{SV}^T$ matrix of principal components. For visualization, the principal components are transposed to have the axes as columns and the particles as rows. The projected data is then clustered, usually with $k$-means. As a result, each particle is assigned to a class.}
56 | \label{fig:cluster}
57 | \end{figure}
58 | 


--------------------------------------------------------------------------------
/Figures_Tables/16_naming_convention.tex:
--------------------------------------------------------------------------------
 1 | % \renewcommand{\arraystretch}{1.2}
 2 | \begin{longtable}[c]{| l || p{120mm} |}
 3 | \captionsetup{labelfont=bf}
 4 | \caption{Symbols often used} \label{tab:symbols}\\
 5 | % I need to check the diff between first and last column. It is useful when removing images.
 6 | 
 7 | \hline
 8 | \textbf{Symbol} & \textbf{Description}\\ \hhline{|=#=|}
 9 | $\bm{S}$ & A reference, i.e. a subtomogram average or a template.\\ \hline
10 | $\bm{s}$ & A particle in 3D, i.e. a subtomogram.\\ \hhline{|=#=|}
11 | 
12 | $\bm{V}$ & A tomogram. This usually refers to the full tomogram or a sub-region tomogram.\\ \hline
13 | $\bm{I}$ & An image. This can be an entire image, a strip or a tile.\\ \hhline{|=#=|}
14 | 
15 | $\bm{W}$ & A weight in Fourier space. This can be a low- high- band-pass filter, a total 3D sampling function, 1D or 2D CTFs, an exposure or "B-factor" filter, etc. 3D sampling functions are referred as $\bm{w}$.\\ \hline
16 | $\bm{M}$ & Real space masks. This can be any shape mask, molecular masks, evaluation masks, etc.\\ \hhline{|=#=|}
17 | 
18 | $\bm{\mathrm{z}}$ & a defocus value\\ \hline
19 | $\bm{\Delta \mathrm{z}}$ & a defocus shift. $\bm{\Delta \mathrm{z}}_{ast}$ is the astigmatic shift.\\ \hline
20 | $\bm{\phi}$ & the azimuthal angle. $\bm{\phi}_{ast}$ is the astigmatic angle.\\ \hhline{|=#=|}
21 | 
22 | $\bm{R}$ & a rotation matrix\\ \hline
23 | $\bm{T}$ & a translation\\ \hline
24 | $\bm{\alpha}$ & a tilt angle\\ \hline
25 | 
26 | \end{longtable}


--------------------------------------------------------------------------------
/Figures_Tables/16_svd.tex:
--------------------------------------------------------------------------------
 1 | \begin{figure}[!htb]  % Stay within section
 2 | \captionsetup{labelfont=bf}
 3 | \centering
 4 | 
 5 | \begin{tikzpicture}[every node/.style={anchor=north east,minimum width=1.4cm,minimum height=7mm}]
 6 | \matrix (mA) [draw,matrix of math nodes, fill=white]
 7 | {
 8 | x_{1,1,r} & x_{1,2,r} & \cdots & x_{1,p,r} \\
 9 | x_{2,1,r} & x_{2,2,r} & \cdots & x_{2,p,r} \\
10 | \vdots    &  \vdots   & \ddots & \vdots    \\
11 | x_{v,1,r} & x_{v,2,r} & \cdots & x_{v,p,r} \\
12 | };
13 | 
14 | \matrix (mB) [draw,matrix of math nodes, fill=white] at ($(mA.south west)+(4.6,1.3)$)
15 | {
16 | x_{1,1,2} & x_{1,2,2} & \cdots & x_{1,p,2} \\
17 | x_{2,1,2} & x_{2,2,2} & \cdots & x_{2,p,2} \\
18 | \vdots    &  \vdots   & \ddots & \vdots    \\
19 | x_{v,1,2} & x_{v,2,2} & \cdots & x_{v,p,2} \\
20 | };
21 | 
22 | \matrix (mC) [draw,matrix of math nodes, fill=white] at ($(mB.south west)+(4.6,1.3)$)
23 | {
24 | x_{1,1,1} & x_{1,2,1} & \cdots & x_{1,p,1} \\
25 | x_{2,1,1} & x_{2,2,1} & \cdots & x_{2,p,1} \\
26 | \vdots    &  \vdots   & \ddots & \vdots    \\
27 | x_{v,1,1} & x_{v,2,1} & \cdots & x_{v,p,1} \\
28 | };
29 | 
30 | \draw[dashed](mA.north east)--(mC.north east);
31 | \draw[dashed](mA.south east)--(mC.south east);
32 | 
33 | % Basis
34 | \draw[arrow](mC.north west)--(mA.north west) node[midway,sloped,above] {$r$ length scales};
35 | \draw[arrow](mC.north west)--(mC.south west) node[midway,sloped,below] {$v$ voxels};
36 | \draw[arrow](mA.north west)--(mA.north east) node[midway,sloped,above] {$p$ particles};
37 | 
38 | % fill opacity=0
39 | % Example of difference maps
40 | \node (rect1) at ($(mC)+(-1.5,1.42)$) [draw,minimum width=1.2cm,minimum height=2.85cm] {};
41 | \node[below=0.5cm of rect1] (txt1) {$\bm{X}_{1,1}$};
42 | \draw[arrow3](rect1.south)--(txt1.north);
43 | 
44 | \node (rect2) at ($(mC)+(-0.1,1.42)$) [draw,minimum width=1.2cm,minimum height=2.85cm] {};
45 | \node[below=0.5cm of rect2] (txt2) {$\bm{X}_{2,1}$};
46 | \draw[arrow3](rect2.south)--(txt2.north);
47 | 
48 | % draw arrow for SVD
49 | \node[right=3.8cm of mA] (svdr) {$\bm{U}_r \bm{S}_r \bm{V}_{r}^{T}$};
50 | \draw[arrow3](mA.east)--(svdr.west) node[below, midway] {decompose} (svdr);
51 | \draw[dashed](svdr.east)--($(svdr.east)+(1.8,0)$);
52 | 
53 | \node[right=3.8cm of mB] (svd2) {$\bm{U}_2 \bm{S}_2 \bm{V}^{T}_{2}$};
54 | \draw[arrow3](mB.east)--(svd2.west) node[below, midway] {decompose} (svd2);
55 | \draw[dashed](svd2.east)--($(svd2.east)+(3.1,0)$);
56 | 
57 | \node[right=3.8cm of mC] (svd1) {$\bm{U}_1 \bm{S}_1 \bm{V}^{T}_{1}$};
58 | \draw[arrow3](mC.east)--(svd1.west) node[below, midway] {decompose} (svd1);
59 | \draw[dashed](svd1.east)--($(svd1.east)+(4.4,0)$);
60 | 
61 | % \draw[arrow3]($(mC.south east)+(0.2,0)$)--($(mC.south east)+(0.2,1.94)$);
62 | 
63 | \end{tikzpicture}
64 | 
65 | \caption[Singular Value Decomposition]{Singular Value Decomposition of the difference maps $p$, for each length scale $r$. The features, i.e. the voxels, are organized in rows. As such, $\bm{U}$ contains the right singular vectors, i.e. the principal directions/axes, $\bm{V}^T$ contains the left singular vectors. Consequently, $\bm{SV}^{T}$ are the principal components.}
66 | \label{fig:svd}
67 | \end{figure}
68 | 


--------------------------------------------------------------------------------
/Sections/01_tutorial.tex:
--------------------------------------------------------------------------------
 1 | \section{How to use this guide} \label{sec:tutorial}
 2 | 
 3 | \subsection{Run the jobs}
 4 | 
 5 | Our main objective in writing this tutorial is to help you get started using {\emClarity} for processing sub-tomogram data as quickly as possible. To begin, we will not introduce all of the methods that {\emClarity} puts at your disposal, instead focusing on core features and concepts. If at any point you are confused, or something seems to not work as you expect, you might find more information on the  \href{https://github.com/bHimes/emClarity/wiki}{wiki}; feel free to also search the mailing list archive, or post new questions to the community forum, hosted on \href{https://groups.google.com/forum/#!forum/emclarity}{google groups}, should you have any questions you cannot resolve on your own.
 6 | 
 7 | \begin{tip}To display every procedure available, run \code{emClarity help} from the command line.
 8 | \end{tip}
 9 | 
10 | \subsection{Algorithms}
11 | 
12 | The {\emClarity} source code is available on \href{https://github.com/bHimes/emClarity/tree/LTS_version_1_5_0}{github}, and we encourage you to go through the code to look at the algorithms directly. Because {\emClarity} is frequently being updated, this can be a great way to see what's going on behind the scenes.
13 | 
14 | Section \ref{sec:algo} contains descriptions of the algorithms for each section presented in this tutorial. Please keep in mind that these are simplified descriptions of what {\emClarity} is actually doing, as we often don't mention the details that were implemented to make the code more efficient.
15 | 
16 | \subsection{Installation and Requirements}
17 | 
18 | Information about the \href{https://github.com/bHimes/emClarity/wiki/Installation}{installation} and the software and hardware \href{https://github.com/bHimes/emClarity/wiki/Requirements}{requirements} is available \href{https://github.com/bHimes/emClarity/wiki}{online}.
19 | 
20 | % \subsubsection{Build}
21 | % Compiled versions of {\emClarity} are available one the \href{https://github.com/bHimes/emClarity/wiki}{wiki}. These require the {\MATLAB} Compiler Runtime (MCR), which is available for free. However, if you want to compile {\emClarity} yourself:
22 | 
23 | % \begin{enumerate}
24 | %     \item Install \href{https://www.mathworks.com/help/install/}{{\MATLAB} 2019a} and the \href{https://developer.nvidia.com/cuda-10.0-download-archive}{CUDA toolkit 10.0} if they are not already installed in your system. Note that the versions matter: This version of {\emClarity} requires {\MATLAB} 2019a, which itself requires CUDA 10.0. Installing these software do \emph{not} require administrator privileges, however you will need a {\MATLAB} licence.
25 |     
26 | %     \item Download the repository:
27 | % \begin{lstlisting}
28 | % git clone https://github.com/bHimes/emClarity.git --branch vs1.5.3 --single-branch emClarity_1_5_3_08
29 | % \end{lstlisting}
30 | %     The name of the output directory does not matter. We'll refer to it as \code{emClarity\_path}.
31 | 
32 | %     \item Download the emClarity dependencies one the \href{https://github.com/bHimes/emClarity/wiki}{wiki}. Select the correct version, download the entire compressed directory from Google Drive. Then copy the `deps' directory to \code{emClarity\_path/bin/deps}.
33 | % \end{enumerate}
34 | 
35 | % We might want to add a section about installation though. I've heard it is not that easy for some people, specially for large clusters. Also for installing the MCR locally, and cuda I realize it probably isn't trivial
36 | 
37 | \subsection{Parameter files}
38 | 
39 | {\emClarity} is currently using a parameter file to manage inputs. You can find an example \href{https://github.com/bHimes/emClarity/blob/master/docs/exampleParametersAndRunScript/param0.m}{here}.
40 | 
41 | \subsection{System parameters}
42 | 
43 | % Parameters for autoAlign
44 | \begin{longtable}[l]{| l || p{123mm} |}
45 | \captionsetup{labelfont=bf}
46 | \caption[GPU and CPU parameters]{GPU and CPU parameters. Your parameter files should have the following parameters.\\ \textcolor{myred}{\textbf{*}} indicates the required parameters.} \label{tab:system_param}\\
47 | 
48 | \hline
49 | 
50 | -- \code{nGPUs}\textcolor{myred}{\textbf{*}} & The number of visible GPUs. {\emClarity} will try to use them in parallel as much as possible. If this number doesn't correspond to the actual number of GPUs available, {\emClarity} will ask you to either adjust this number to match the number of GPUs, or modify the environment variable \code{CUDA\_VISIBLE\_DEVICE} to make some GPUs invisible to {\MATLAB}.\\
51 | 
52 | -- \code{nCpuCores}\textcolor{myred}{\textbf{*}} & The maximum number of processes to run simultaneously. In most {\emClarity} programs, the number of processes launched in parallel on a single GPU is equal to \code{nCpuCores}$/$\code{nGPUs}. If your devices run out of memory, it is likely that you will have to decrease the number of processes per device, thus decreasing this parameter.\\
53 | 
54 | - \code{fastScratchDisk}\textcolor{myred}{\textbf{*}} & Path of the optional temporary cache directory, used by \code{ctf 3d} and \code{tomoCPR}. This directory is only temporary and is moved back inside the project directory at the end of the execution. We recommend setting this to the fastest storage you have available. If left empty, \code{ctf 3d} and \code{tomoCPR} will use directly the project cache directory (c.f. \code{<projectDir>/cache} from section \ref{sec:project_directory}).\\
55 | 
56 | \hline
57 | \end{longtable}
58 | 
59 | % I don't mention them in the rest of the tutorial, because in my experience, I set them at the beginning of the project, find the best nCpus and change them only between binnings (and except in rare cases with mem issues during classification). Usually the closer I get from bin1, the smaller nCpus should be. Anyway that something that is more technical and I don't want them mixed with the other parameters.
60 | 


--------------------------------------------------------------------------------
/Sections/02_project_directory.tex:
--------------------------------------------------------------------------------
 1 | \section{The project directory} \label{sec:project_directory}
 2 | 
 3 | {\emClarity} should be run from the ``project directory'', referred to as \code{<projectDir>}. Every output will be saved in this directory, ignoring the temporary cache set by the \code{fastScratchDisk} parameter (table \ref{tab:system_param}). As we go along, we will present in more detail each sub-directory and their content.
 4 | 
 5 | \begin{itemize}
 6 |     \item \code{<projectDir>}:
 7 |     \begin{itemize}
 8 |         \item \textbf{description}: it contains every input file and input directories {\emClarity} needs and every outputs. As most of the {\emClarity} programs are project based, you should run {\emClarity} from this directory.
 9 |         \item \textbf{name}: anything you like.
10 |         \item \textbf{created by}: you.
11 |     \end{itemize}
12 | 
13 |     \item \code{<projectDir>/fixedStacks}:
14 |     \begin{itemize}
15 |         \item \textbf{description}: it contains the raw (\textit{not} aligned) tilt-series (\code{*.fixed}) and the initial tilt-series alignment files (\code{*.xf}, \code{*.tlt} and optionally \code{*.local} and \code{*.erase}). See section \ref{sec:tilt_series_alignment:ETomo} for more details.
16 |         \item \textbf{name}: fixed.
17 |         \item \textbf{created by}: you or {\emClarity} \code{autoAlign} (\ref{sec:tilt_series_alignment:emClarity}).
18 |     \end{itemize}
19 | 
20 |      \item \code{<projectDir>/fixedStacks/ctf}:
21 |      \begin{itemize}
22 |         
23 |         \item \textbf{description}: created by \code{ctf estimate} (section \ref{sec:defocus_estimate}) and updated after tilt-series refinements by \code{ctf update} (section \ref{sec:tomoCPR}). It contains the radial averages (\code{*\_psRadial1.pdf}) and stretched power spectrum (\code{*\_PS2.mrc}) computed by \code{ctf estimate}, as well as the tilt-series metadata (\code{*\_ctf.tlt}), used throughout the entire workflow and containing in particular the dose-scheme and defocus estimate of each view.
24 |         \item \textbf{name}: fixed.
25 |         \item \textbf{created by}: {\emClarity} \code{ctf estimate}.
26 |     \end{itemize}
27 | 
28 |      \item \code{<projectDir>/aliStacks}:
29 |      \begin{itemize}
30 |         \item \textbf{description}: created by \code{ctf estimate} (section \ref{sec:defocus_estimate}) and updated after tilt-series refinement by \code{ctf update} (section \ref{sec:tomoCPR}). It contains the aligned, bead-erased tilt-series. These stacks are mostly used by \code{ctf 3d} to compute the tomograms at different binning.
31 |         \item \textbf{name}: fixed.
32 |         \item \textbf{created by}: {\emClarity} \code{ctf estimate}.
33 |     \end{itemize}
34 | 
35 |     \item \code{<projectDir>/cache}:
36 |      \begin{itemize}
37 |         \item \textbf{description}: created and updated by {\emClarity} when needed, usually during \code{ctf 3d}. Store any stack or reconstruction for the current binning. If a reconstruction (\code{*.rec}) is present at the current binning, \code{ctf 3d} will skip its reconstruction. This also means that if a reconstruction is aborted, you may need to manually delete a partially complete reconstruction. For older versions of {\emClarity} ($<$ 1.5.1 versions), if the sampling functions are already calculated (\code{*.wgt}), {\emClarity} will re-use them.
38 |         \item \textbf{name}: fixed.
39 |         \item \textbf{created by}: {\emClarity}.
40 |         
41 |     \end{itemize}
42 |     \item \code{<projectDir>/convmap}:
43 |      \begin{itemize}
44 |         \item \textbf{description}: When creating a project with \code{init} (section \ref{sec:init}), {\emClarity} will look in this directory to grab outputs from \code{templateSearch} (section \ref{sec:picking}). If you pick your particles with {\emClarity}, the content of this directory is generated by \code{templateSearch}.
45 |         \item \textbf{name}: fixed.
46 |         \item \textbf{created by}: you / {\emClarity}.
47 |     \end{itemize}
48 | 
49 |     \item \code{<projectDir>/recon}:
50 |      \begin{itemize}
51 |         \item \textbf{description}: It holds the information for each reconstructed sub-region in a given tilt-series (section \ref{sec:subregions}). The \code{*\_recon.coords} files are read into the metadata created by \code{init} and is used whenever a tomogram is made or whenever the coordinates of a sub-region is needed. Any change to these files is ignored after \code{init}.
52 |         \item \textbf{name}: fixed.
53 |         \item \textbf{created by}: \code{recScript2.sh}
54 |     \end{itemize}
55 | 
56 |     \item \code{<projectDir>/<binX>}:
57 |      \begin{itemize}
58 |         \item \textbf{description}: {\emClarity} does not directly use this directory, but it is used by \code{recScript2.sh} to define the sub-regions boundaries (section \ref{sec:subregions}) and create \code{<projectDir>/recon}.
59 |         \item \textbf{name}: whatever is defined in \code{recScript2.sh}. By default, \code{bin10}.
60 |         \item \textbf{created by}: \code{recScript2.sh}
61 |     \end{itemize}
62 | 
63 |     \item \code{<projectDir>/FSC}:
64 |      \begin{itemize}
65 |         \item \textbf{description}: created and updated during subtomogram averaging (section \ref{sec:avg}). It contains the spherical and conical FSCs for each cycle (\code{*fsc\_GLD.txt} and \code{*fsc\_GLD.pdf}), as well as the Figure-Of-Merit used for filtering (\code{*cRef\_GLD.pdf}) and the CTF-corrected volume used for FSC calculations (\code{*noFilt\_EVE.mrc} and \code{*noFilt\_ODD.mrc}). For a full description of the FSC related outputs, see section \ref{sec:algo:avg}. If you run \code{emClarity fsc}, the molecular masks are saved in this directory as well.
66 |         \item \textbf{name}: fixed.
67 |         \item \textbf{created by}: {\emClarity} \code{avg}.
68 |     \end{itemize}
69 |     
70 |     \item \code{<projectDir>/alignResume}:
71 |     \begin{itemize}
72 |         \item \textbf{description}: Contains the results of the subtomogram alignments, for each cycle. {\emClarity} will look at this directory before aligning the particles from a given sub-region. If the results for this sub-region, at the current cycle, are already saved, it will skip the alignment. See section \ref{sec:align:run} for more details on why it is useful.
73 |         \item \textbf{name}: fixed.
74 |         \item \textbf{created by}: {\emClarity} \code{alignRaw}
75 |     \end{itemize}
76 | \end{itemize}
77 | 


--------------------------------------------------------------------------------
/Sections/03_get_data_ready.tex:
--------------------------------------------------------------------------------
 1 | \section{Get your data ready}  \label{sec:get_data_ready}
 2 | 
 3 | In this tutorial, we will use the 12 tilt-series of the fast-incremental single-exposure (FISE) data deposited on \href{https://www.ebi.ac.uk/pdbe/emdb/empiar/entry/10304/}{EMPIAR-10304} from \cite{eisenstein_2019}. You should be able to get a $\sim$7\r{A} map from this tutorial. To download the motion-corrected tilt-series, EMPIAR recommends to use the `Aspera Connect' option but there are options as well.
 4 | 
 5 | \renewcommand{\arraystretch}{1.2}
 6 | \begin{longtable}[c]{| l || p{130mm} |}
 7 | \hline
 8 | -- \textbf{Sample}: & 70S ribosomes, 10nm colloidal gold fiducials.\\
 9 | \hline
10 | -- \textbf{Tilt-scheme}: & Dose-symmetric starting from 0\textdegree, 3\textdegree  increment, $\pm$60\textdegree, 120e/\r{A}$^2$ total exposure.\\
11 | \hline
12 | -- \textbf{Instruments}: & Krios with single-tilt axis holder equipped with a Gatan K3 direct electron detector. 2.1\r{A}/pix. $\sim$175\textdegree \ image rotation.\\
13 | \hline
14 | \captionsetup{labelfont=bf}
15 | \caption{Tutorial data-set}
16 | \end{longtable}
17 | 
18 | \begin{note}For this tutorial, we don't necessarily recommend to align the 12 tilt-series manually, as it can be quite redundant, but for beginners, we do recommend to at least try aligning one tilt-series with {\ETomo}. In any case, you should be able to use \code{emClarity autoAlign} for this dataset.
19 | \end{note}
20 | 


--------------------------------------------------------------------------------
/Sections/04_workflow.tex:
--------------------------------------------------------------------------------
 1 | \section{Workflow} \label{sec:workflow}
 2 | 
 3 | \input{Figures_Tables/04_workflow}
 4 | 
 5 | You will often find that it is much easier to organize every {\emClarity} calls into one script. This script has two main purposes. First, it keeps track of the jobs that have been run (you can also find this information into the \code{logFile} directory). This is often useful to visualize the global picture and it might help you to remember how you got your final reconstruction. Second, it is a script, so you can use it directly to run {\emClarity}, making the workflow much simpler.
 6 | 
 7 | In the case of this tutorial, here is one example of such script. To make things easier, each jobs has its own parameter file. This is just an example and you can modify the number of cycles and which sampling to use, the angular searches, the number of tilt-series refinement and the number of classification, etc.
 8 | 
 9 | % inputx
10 | \begin{lstlisting}[basicstyle=\footnotesize\ttfamily]
11 | #!/bin/bash
12 | 
13 | # Simple function to stop on *most* errors
14 | check_error() {
15 |     sleep 2
16 |     if tail -n 30 ./logFile/emClarity.logfile |\
17 |        grep -q "Error in emClarity" ; then
18 | 	    echo "Critical error found. Stopping the script."
19 | 	    exit
20 |     else
21 | 	    echo "No error detected. Continue."
22 |     fi
23 | }
24 | 
25 | # Change binning with tomoCPR
26 | run_transition_tomoCPR() {
27 |     emClarity removeDuplicates param${i}.m ${i}; check_error
28 |     emClarity tomoCPR param${i}.m ${i}; check_error
29 |     emClarity ctf update param$((${i}+1)).m; check_error
30 |     emClarity ctf 3d param$((${i}+1)).m; check_error
31 | }
32 | 
33 | # Basic alignment cycle
34 | run_avg_and_align() {
35 |     emClarity avg param${i}.m ${i} RawAlignment; check_error
36 | 	emClarity alignRaw param${i}.m ${i}; check_error
37 | }
38 | 
39 | # autoAlign
40 | # ctf estimate
41 | # templateSearch
42 | 
43 | # Create metadata and reconstruct the tomograms
44 | emClarity init param0.m; check_error
45 | emClarity ctf 3d param0.m; check_error
46 | 
47 | # First reconstruction - check if that looks OK.
48 | emClarity avg param00.m 0 RawAlignment; check_error
49 | emClarity alignRaw param0.m 0; check_error
50 | 
51 | # Bin 3
52 | for i in 1 2 3 4; do run_avg_and_align; done
53 | 
54 | # Run tomoCPR at bin3 using cycle 4 and then switch to bin2
55 | run_transition_tomoCPR
56 | 
57 | # Bin 2
58 | for i in 5 6 7 8 9; do run_avg_and_align; done
59 | 
60 | # Run tomoCPR at bin2 using cycle 9 and then switch to bin1
61 | run_transition_tomoCPR
62 | 
63 | # Bin 1
64 | for i in 10 11 12 13 14; do run_avg_and_align; done
65 | 
66 | 
67 | # Last cycle: merge the datasets
68 | emClarity avg param15.m 15 RawAlignment; check_error
69 | emClarity avg param15.m 15 FinalAlignment; check_error
70 | emClarity reconstruct param15.m 15;
71 | \end{lstlisting}
72 | 
73 | This example doesn't have a classification, but as explained in section \ref{sec:classification}, classifications are encapsulated in their own cycles, so you can run them anytime you want between two cycles.
74 | 
75 | In our experience, it is usually good practice to keep a close eye on how the half-maps and FSC evolves throughout the workflow, specially before deciding to change the sampling. Moreover, as mentioned in Figure \ref{fig:emClarity_workflow}, the tilt-series refinement in completely optional and you can simply change the binning by running \code{ctf 3d}, as opposed to \code{run\_transition\_tomoCPR}.
76 | 
77 | The following sections of the tutorial will give you more details on the command, on the parameters you need to run them and on the generated outputs.
78 | 
79 | Finally, if everything is set correctly and once the picking is done, you can technically run this script and wait for {\emClarity} to output the final reconstruction. If you are new with {\emClarity}, we do recommend you to run the commands manually, at least for the first few cycles, in order to get a better grasp of the software.
80 | 
81 | \begin{tip}It is best practice to work the whole way through the workflow with the smallest data-set as possible and once you checked that everything holds, then process your full data. The same approach may be taken with this tutorial; it should be possible to obtain a low-resolution but recognizable 70S ribosome with only two or three of the tilt-series. This will confirm that everything ``runs'' (producing some output) in your hands and on your gear. Only after this it does make sense to then confirm you are able to produce the correct output, i.e. a 5.9\r{A} map.
82 | \end{tip}
83 | 


--------------------------------------------------------------------------------
/Sections/05_tilt_series_alignment.tex:
--------------------------------------------------------------------------------
  1 | 
  2 | % Tilt-series alignment
  3 | \section{Initial tilt-series alignment} \label{sec:tilt_series_alignment}
  4 | 
  5 | \subsection{Objectives}
  6 | 
  7 | The first step of the workflow consists into finding an initial alignment for the raw tilt-series, that is the tilt, rotation and shift for each image within the series. After the alignment, the tilt-axis must be parallel to the $y$-axis. This alignment can be refined later on using the particles positions (section \ref{sec:tomoCPR}).
  8 | 
  9 | 
 10 | \subsection{With {\emClarity}} \label{sec:tilt_series_alignment:emClarity}
 11 | 
 12 | {\emClarity} can align the tilt-series for you using its \code{autoAlign} procedure. This procedure is based on the {\IMOD} programs {\tilt} and {\tiltalign} and offers an automatic way of aligning tilt-series, with or without gold beads.
 13 | 
 14 | 
 15 | \subsubsection{Parameters}
 16 | 
 17 | \input{Figures_Tables/05_parameters}
 18 | 
 19 | 
 20 | \subsubsection{Run}
 21 | 
 22 | As with every {\emClarity} programs, you should run the next commands in the project directory. The \code{autoAlign} routine has the following signature:
 23 | \begin{lstlisting}
 24 | >> emClarity autoAlign <param> <stack> <rawtlt> <rot>
 25 | \end{lstlisting}
 26 | where \code{<param>} is the name of the parameter file, \code{<stack>} is the tilt-series to align (e.g. \code{tilt1.st}), \code{<rawtlt>} is a text file containing the raw tilt-angles (e.g. \code{tiltl1.rawtlt}), in degrees (one line per image, in the same order as in the \code{.st} file). \code{<rot>} is the image rotation (tilt-axis angle from the vertical), in degrees, as specified in {\ETomo}. This angle will vary based on the microscope and magnification, but for this tutorial it is about 175\textdegree.
 27 | 
 28 | For example, to run \code{autoAlign} on the first tilt-series of the tutorial:
 29 | \begin{lstlisting}
 30 | >> emClarity autoAlign param.m tilt1.st tilt1.rawtlt 175
 31 | \end{lstlisting}
 32 | 
 33 | % Add maybe a note for a for loop. In my bash toolbox I have parallel for loops, so I could add this here.
 34 | 
 35 | You may have noticed that all of the tilt-series have their first image blank. Fortunately, \code{autoAlign} can remove images from the series before alignment. For instance, to remove the first view of the first tilt-series:
 36 | \begin{lstlisting}
 37 | >> emClarity autoAlign param.m tilt1.st tilt1.rawtlt 175 1
 38 | \end{lstlisting}
 39 | and to remove the first and last view of tilt11, run:
 40 | \begin{lstlisting}
 41 | >> emClarity autoAlign param.m tilt11.st tilt11.rawtlt 175 [1,41]
 42 | \end{lstlisting}
 43 | \begin{note}For the tutorial dataset, you should remove the first view of tilt1 to tilt10 and the first and last views from tilt11 and tilt12. Also, the refinement using fiducial beads doesn't produce satisfying results for tilt5, tilt6 and tilt8. As such, the \code{autoAli\_refine\_on\_beads} was turned off for these tilt-series.
 44 | \end{note}
 45 | 
 46 | 
 47 | \subsubsection{Outputs}
 48 | 
 49 | {\emClarity} creates and organizes the necessary files it needs to run the next step of the workflow. You can find a description of these files in section \ref{sec:tilt_series_alignment:ETomo}. The goal here is to check whether or not the alignment is good enough to start with and the easiest way is to look at \code{fixedStacks/<prefix>\_3dfind.ali} or \code{fixedStacks/<prefix>\_binX.ali}.
 50 | If you are familiar with {\ETomo}, then you can of course also look at the log files saved in \code{emC\_autoAlign\_<prefix>}. For instance, to visually check the fiducial beads:
 51 | \begin{lstlisting}
 52 | >> 3dmod \
 53 | emC_autoAlign_<prefix>/<prefix>_X_3dfind.ali \
 54 | emC_autoAlign_<prefix>/<prefix>_X_fitbyResid_X.fid
 55 | \end{lstlisting}
 56 | 
 57 | If for some reason you want to reconstruct the aligned tilt-series now, you can run the following command:
 58 | \begin{lstlisting}
 59 | >> newstack -xform <prefix>.xf <prefix>.fixed <prefix>.ali
 60 | \end{lstlisting}
 61 | 
 62 | 
 63 | \subsection{With {\ETomo}} \label{sec:tilt_series_alignment:ETomo}
 64 | 
 65 | If you don't want to use \code{autoAlign}, we do recommend using the (fiducial) alignment procedure from the {\ETomo} pipeline (\href{https://bio3d.colorado.edu/imod/doc/man/autofidseed.html}{autofidseed}, {\tiltalign}, etc.). One powerful option of this pipeline is to be able to solve for a series of local alignments using subsets of fiducial points, which can then be used by {\emClarity}, via the IMOD {\tilt} program, to reconstruct the tomograms.
 66 | 
 67 | During the alignments, you should keep an eye at the ratio of known measurements (fiducials) and unknown parameters (shifts, rotations, tilts and magnifications) you try to solve for. If this ratio is too low, it means the program is likely to accommodate random errors to solve for the model you are asking for. In face of such low ratio, you can simplify the alignment model during the refinement by grouping and/or fixing the variables (see \href{https://bio3d.colorado.edu/imod/doc/man/restrictalign.html}{restrictalign} ``OrderOfRestrictions'' for more details).
 68 | 
 69 | You are free to use another software for the initial alignment, but it is likely to require more efforts to integrate your data into {\emClarity} (see the next \ref{sec:tilt_series_alignment:ETomo} section). We do not plan to support imports from tomography software other than IMOD in the near future.
 70 | 
 71 | \begin{note}The \href{https://bio3d.colorado.edu/imod/betaDoc/man/batchruntomo.html}{\textbf{IMOD batchruntomo}} interface encapsulates the \href{https://bio3d.colorado.edu/imod/doc/etomoTutorial.html}{\textbf{ETomo}} pipeline and runs the operations required to align the tilt-series. To deal with low ratio, \href{https://bio3d.colorado.edu/imod/betaDoc/man/batchruntomo.html}{\textbf{batchruntomo}} is applying successive restrictions to the alignment. The default behavior is to group the rotations, then group the magnifications, then fix the tilt angles, then solve for only one rotation and then fix the magnification.
 72 | \end{note}
 73 | 
 74 | \begin{tip}You might have noticed that some images from the tutorial data-set are (mostly) blank. These images can be ignored during alignment (\href{https://bio3d.colorado.edu/imod/doc/etomoTutorial.html}{\textbf{ETomo}}: ``view to skip'' option). We don't recommend to remove these images from the stack as it is possible to remove them with \href{https://github.com/bHimes/emClarity}{\textbf{emClarity}} while accounting for the cumulative electron dose (section \myref{sec:defocus_estimate}).
 75 | \end{tip}
 76 | 
 77 | 
 78 | Once the tilt-series are aligned, you have to transfer the alignment files to {\emClarity}. Please pay careful attention to the naming conventions in this section, as there are used throughout the pipeline.
 79 | 
 80 | For each tilt-series, {\emClarity} needs:
 81 | 
 82 | \begin{itemize}
 83 |     \item \code{<prefix>.fixed}: the raw (\textit{not} aligned) tilt-series. These should \textit{not} be exposure-filtered nor phase flipped. They correspond to the original \code{tilt1.mrc} to \code{tilt12.mrc} stacks available in EMPIAR. If you did any other preprocessing (X-ray removal, etc.), you should use these.
 84 |     
 85 |     \begin{note}While it will produce substandard results, if you have no option but to use images which are already exposure filtered, please add the following to your parameter file: \code{applyExposureFilter=0}
 86 |     \end{note}
 87 |     
 88 |     \item \code{<prefix>.xf}: the file with alignment transforms to apply to the \code{<prefix>.fixed} stacks. This file should contain one line per view, each with a linear transformation specified by six numbers. The first 4 numbers is the 2x2 rotation matrix (in-plane rotation, scaling/magnification) and the last 2 numbers are the X and Y shifts (in un-binned pixels). See section \ref{sec:algo:defocus_estimate} for more details.
 89 |     
 90 |     If you did the alignment with {\ETomo}, it corresponds to ``OutputTransformFile'' from {\tiltalign} which is set by default to \code{<prefix>\_fid.xf} (fiducial alignment) or \code{<prefix>.xf} (fiducialess alignment).
 91 |     
 92 |     \item \code{<prefix>.tlt}: the file with the solved tilt angles. One line per view, angles in degrees.
 93 |     
 94 |     If you did the alignment with {\ETomo}, it corresponds to the ``OutputTiltFile'' from {\tiltalign}, which is by default \code{<prefix>\_fid.tlt} (fiducial alignment) or \code{<prefix>.tlt} (fiducialess alignment).
 95 |     
 96 |     \item \textbf{(optional)} \code{<prefix>.local}: the file of local alignments. This file is similar to the \code{<prefix>.xf} file, but contains one transformation per view and per patch, plus an additional header per patch. See the {\tiltalign} documentation for more details.
 97 |     
 98 |     If you did the alignment with {\ETomo}, it corresponds to the ``OutputLocalFile'' from {\tiltalign}, which is by default \code{<prefix>local.xf}.
 99 |     
100 |     \item \textbf{(optional)} \code{<prefix>.erase}: the file with the coordinates (in pixel) of the fiducial beads to erase before ctf estimation. These coordinates must correspond to the aligned stack. One line should contain the x, y and z (view) coordinates of the beads you wish to remove. Alternatively, you can remove the beads from the raw \code{<prefix>.fixed} before importing them to {\emClarity}.
101 |     
102 |     If you did the alignment with {\ETomo}, it corresponds to the \code{<prefix>\_erase.fid} file.
103 | \end{itemize}
104 | 
105 | These files should be copied to \code{<projectDir>/fixedStacks} (see section \ref{sec:project_directory}).
106 | 
107 | \begin{tip}You don't necessarily need to copy the tilt-series to the \code{fixedStacks} directory; use soft links: \code{ln -s <...>/<prefix>.mrc <...>/fixedStacks/<prefix>.fixed}\end{tip}
108 | 


--------------------------------------------------------------------------------
/Sections/06_defocus_estimate.tex:
--------------------------------------------------------------------------------
 1 | \section{Defocus estimate} \label{sec:defocus_estimate}
 2 | 
 3 | \subsection{Objectives}
 4 | 
 5 | There are two main objectives. First, create the aligned, optionally bead-erased, weighted stacks. Weighted refers to the per-view weighting applied by {\emClarity} to take into account the frequency dependent drop in SNR due to radiation damage, an isotropic drop in SNR due to increased thickness with the tilt-angle causing inelastic scattering losses and optionally also for the cosine dose-scheme, also referred as Saxton scheme. These stacks will be then used to compute the tomograms at later stages. The second objective is to estimate the defocus of each view of the stack (two defoci and the astigmatism angle, per view).
 6 | 
 7 | \subsection{Parameters}
 8 | 
 9 | % table
10 | \input{Figures_Tables/06_parameters}
11 | 
12 | \subsection{Run}
13 | 
14 | The \code{ctf estimate} routine has the following signature:
15 | \begin{lstlisting}
16 | >> emClarity ctf estimate <param> <prefix>
17 | \end{lstlisting}
18 | \code{<param>} is the name of the parameter file (e.g. \code{param\_ctf.m}), and \code{<prefix>} is the base-name of the tilt-series in \code{<projectDir>/fixedStacks} you wish to process.
19 | 
20 | For example, to run \code{ctf estimate} on the first tilt-series of the tutorial:
21 | \begin{lstlisting}
22 | >> emClarity ctf estimate param_ctf.m tilt1
23 | \end{lstlisting}
24 | 
25 | If you have many tilt-series and you don't want to run all of them individually, you can do the following. This will select every available stack to emClarity and run \code{ctf estimate} on each one of them.
26 | \newpage
27 | \begin{lstlisting}
28 | #!/bin/bash
29 | for stack in fixedStacks/*.fixed; do
30 |     prefix=${stack#fixedStacks}
31 |     emClarity ctf estimate param_ctf.m ${prefix%.fixed}
32 | done
33 | \end{lstlisting}
34 | 
35 | If you didn't remove the bad images by now, \code{ctf estimate} can remove images from the stack. For instance, to remove the first view of tilt1, run:
36 | \begin{lstlisting}
37 | >> emClarity ctf estimate param_ctf.m tilt1 1
38 | \end{lstlisting}
39 | 
40 | and to remove the first and last view of tilt11, run:
41 | \begin{lstlisting}
42 | >> emClarity ctf estimate param_ctf.m tilt11 [1,41]
43 | \end{lstlisting}
44 | 
45 | \begin{note}You should remove the first view from the tilt-series tilt1 to tilt10, and the first and last views from tilt11 and tilt12.
46 | \end{note}
47 | 
48 | \subsection{Outputs}
49 | 
50 | You should make sure the tilt-series ``looks aligned'' and the average defocus (at the tilt axis) was correctly estimated. The best way to check:
51 | \begin{enumerate}
52 |     \item Open \code{aliStacks/<prefix>\_ali1.fixed} and go through the views. The views should be aligned to the tilt-axis, which must be parallel to the Y axis (so vertical if you use {\threedmod}). If an \code{*.erase} file was available for this tilt-series, the beads should be removed.
53 |     \item Open \code{fixedStacks/ctf/<prefix>\_ali1\_psRadial\_1.pdf} and check that the theoretical CTF estimate (green) matches the radial average of the power spectrum of the tilt-series (black). Note that the amplitude doesn't matter here, the phase on the other hand, does.
54 |     \item If they don't match, it is likely that you will need to adjust the \code{defEstimate} and \code{defWindow} parameters. Open \code{fixedStacks/ctf/*\_ccFIT.pdf}, which plots the cross-correlation score as a function of defocus. There is often an obvious correct peak, smoothly rising and falling. If you don't see this peak, try to change the sampled defoci with \code{defEstimate} $\pm$\code{defWindow} and re-run \code{ctf estimate}
55 | \end{enumerate}
56 | 
57 | \begin{note}For a full description of the outputs generated by \code{ctf estimate}, you should refer to section \myref{sec:algo:defocus_estimate}.
58 | \end{note}


--------------------------------------------------------------------------------
/Sections/07_subregions.tex:
--------------------------------------------------------------------------------
 1 | \section{Select sub-regions} \label{sec:subregions}
 2 | 
 3 | \subsection{Objectives}
 4 | Quite often, the regions of interests don't take up the entire field of view. To speed things up and save memory, you can select the sub-regions you would like {\emClarity} to focus on. These sub-regions can be changed until the project is initialized (section \ref{sec:init}).
 5 | 
 6 | You may notice as we go through this tutorial that we often thinks in term of sub-regions and not in terms of the full tomograms. For example, during picking (section \ref{sec:picking}), each sub-regions is processed independently from the others and if you decide to ignore one sub-regions from a tomogram, you can.
 7 | 
 8 |     \begin{note} When the tilts-series alignment is later refined using tomogram constrained projection refinement (tomoCPR) the full area imaged is reconstructed on the fly, and all sub-tomograms from all sub-regions are inserted into this reconstruction to generate references. This allows a weighted contribution from all particles of all species to be considered, related to what ``M'' \cite{Tegunov2020} refers to as multi-particle refinement.
 9 |     \end{note}
10 |     
11 | \subsection{Create the boundaries for each sub-regions}
12 | \begin{itemize}
13 |     \item Download from the repository the \href{https://github.com/bHimes/emClarity}{recScript2.sh} script and copy it inside the project directory.
14 |     
15 |     \item Prior to selecting the sub-regions, we need to create the tomograms containing the entire field of view. Running the following command from the project directory will do exactly that.
16 | \begin{lstlisting}
17 | ./recScript2.sh -1
18 | \end{lstlisting}
19 |     By default, this will create a new directory, called \code{<projectDir>/bin10}, which will have a bin 10 tomogram for every stack created by \code{ctf estimate} (i.e. a tomogram for every\\ \code{aliStacks/$*$\_ali1.fixed}). 
20 |     
21 |     \item Now that we have the tomograms, we can start thinking about the sub-regions we want to select. The goal is to define 6 points which defines the boundaries of one sub-regions ($x_{min},\ x_{max},\ y_{min},\ y_{max},\ z_{min}$ and $z_{max}$). So if you have 3 sub-regions you are interested in for a particular tomogram, you will need to define $6\times3=18$ points.
22 |     
23 |     This is most easily done by creating an \href{https://bio3d.colorado.edu/imod/doc/binspec.html}{\textcolor{myred}{IMOD model}}:
24 |     \begin{itemize}
25 |         \item Go in \code{<projectDir>/bin10} and open the first tomogram with {\threedmod}:
26 | \begin{lstlisting}
27 | 3dmod tilt1_bin10.rec
28 | \end{lstlisting}
29 |         \item Select the ``Model'' mode and select the 6 points in the order specified above. Each point must be in its own contour. For the tutorial data-set, as the ribosome are homogeneously spread across the entire tomogram, we recommend to divide the tomograms into 2 sub-regions of equal size. In this case, you'll need to create a sequence of 12 points.
30 |         
31 |         \item Save the model (\code{File} $\rightarrow$ \code{Save model}) with the same name as the tomogram but with the \code{.mod} extension.
32 |         
33 |         \item Repeat for each tomogram.
34 |         
35 |     \end{itemize}
36 |     At the end of this step, you should have in \code{<projectDir>/bin10}, one \code{*.mod} file per tilt-series you wish to process.
37 | 
38 | % This is not accurate. -BAH
39 |  %   \begin{note}Each sub-regions will need to be transferred to GPU memory. Therefore, you should be careful not to %select too big sub-regions as there might not be enough memory to hold them, especially at bin 1. If the particles %of interest are homogeneously spread, we recommend to divide the field of view into 2 equal sub-regions.
40 |  %   \end{note}
41 |     \begin{note}Each sub-region is kept on disk, and only individual sub-tomograms are read in from them, reducing the amount of GPU memory needed. emClarity parallelizes jobs on a sub-region level. As such, dividing a tomogram into multiple smaller sub-regions might be more efficient down the line. This is especially true for sub-tomogram alignment (section \myref{sec:align}) and tilt-series refinement (section \myref{sec:tomoCPR}) since emClarity can distribute jobs across multiple servers.
42 |     \end{note}
43 | 
44 |     \item This \code{bin10} directory is not directly used by {\emClarity}. You'll need to convert the \code{*.mod} files you just created into something {\emClarity} understands. You can do this on the first stack by running:
45 | \begin{lstlisting}
46 | ./recScript2.sh tilt1
47 | \end{lstlisting}
48 |     Or to convert every sub-regions of every tomograms, run:
49 | \begin{lstlisting}
50 | #!/bin/bash
51 | for stack in bin10/*.mod; do
52 |     prefix=${stack#bin10/}
53 |     ./recScript2.sh ${prefix%_bin10.mod}
54 | done
55 | \end{lstlisting}
56 | 
57 |     This will create a directory called \code{<projectDir>/recon} with the file {\emClarity} needs to extract the sub-regions you selected:
58 |     \input{Figures_Tables/07_recon_coords}
59 | 
60 |     \begin{note}If you change the coordinates of the sub-regions, you must do it before running \code{init} (section \myref{sec:init}) and you always need to refresh the \code{<projectDir>/recon} directory by re-running \code{./recScript2.sh <prefix>}.
61 |     \end{note}
62 |     
63 |     \begin{note}In this last step, \code{recScript2.sh} needs Python2 in the PATH.
64 |     \end{note}
65 | \end{itemize}
66 | 


--------------------------------------------------------------------------------
/Sections/08_picking.tex:
--------------------------------------------------------------------------------
 1 | \section{Picking} \label{sec:picking}
 2 | 
 3 | \subsection{Objectives}
 4 | 
 5 | It's time to pick the particles, i.e. the subtomograms. There are many ways to pick particles, but they usually all rely on the tomograms. Each particle can be described by its $x,\ y,\ z$ coordinates and $\phi,\ \theta,\ \psi$ Euler angles. {\emClarity} has a template matching routine that can pick the subtomograms for you, but it requires a template.
 6 | 
 7 | \subsection{Parameters}
 8 | 
 9 | \input{Figures_Tables/08_parameters}
10 | 
11 | \subsection{Run} \label{sec:picking:run}
12 | 
13 | \subsubsection{Preparing the template}
14 | Before running \code{templateSearch}, you need to prepare a template. This template should have the same pixel size as the \code{PIXEL\_SIZE} parameter. It doesn't need to be low-pass filter, as {\emClarity} will do it internally. If you want to re-scale a map, you can run:
15 | \begin{lstlisting}
16 | >> emClarity rescale <in> <out> <inPixel> <OutPixel> <method>
17 | \end{lstlisting}
18 | \code{<in>} and \code{<out>} are the name of your template and the output name of the re-scaled template, respectively. \code{<inPixel>} is the pixel size of your template and \code{<OutPixel>} is the desired pixel size. \code{<method>} can be ``GPU'' or ``cpu''.
19 | 
20 | 
21 | \begin{note}We do provide a \href{https://github.com/bHimes/emClarity}{template} for this tutorial, but any 70S ribosome map should work.
22 | \end{note}
23 | 
24 | \subsubsection{Generating the tomograms}
25 | 
26 | The tomograms use for the template matching are CTF multiplied as described in section \ref{sec:ctf_3d} and \ref{sec:algo:ctf_3d}. To generate them, simply run:
27 | \begin{lstlisting}
28 | >> emClarity ctf 3d <param> templateSearch
29 | \end{lstlisting}
30 | This will generate a tomogram for every subregion defined in the \code{recon/*.coords} files (table \ref{tab:recon_coords}).
31 | 
32 | \subsubsection{Template matching}
33 | 
34 | The \code{templateSearch} routine has the following signature:
35 | \begin{lstlisting}
36 | >> emClarity templateSearch <param> <prefix> <region> <template> <symmetry> <GPU>
37 | \end{lstlisting}
38 | \code{<param>} is the name of the parameter file, \code{<prefix>} is the base-name of the tilt-series in \code{<projectDir>/aliStacks} to process. \code{<region>} is the number of the sub-region to process (see section \ref{sec:subregions}). \code{<symmetry>} is not used but kept for backward compatibility. The \code{symmetry} parameter specified in the parameter file overrides whatever is entered on the command line. \code{<GPU>} is the GPU ID to use (starting from 1).
39 | 
40 | For example, to run \code{templateSearch} on the first tilt-series of the tutorial, where we defined 2 sub-regions:
41 | \begin{lstlisting}
42 | # First region
43 | >> emClarity templateSearch param.m tilt1 1 template.mrc C1 1
44 | # Second region
45 | >> emClarity templateSearch param.m tilt1 2 template.mrc C1 1
46 | \end{lstlisting}
47 | 
48 | \subsection{Outputs}
49 | 
50 | The primary goal now is to remove false positives due to strong homogeneous features like carbon edges, membranes, or residual gold beads. The template matching produces a ``cumulative correlation'' map, which can be opened alongside a 3d model of the selected peaks.
51 | 
52 | In \code{<projectDir>/convmap\_wedgeType\_2\_bin<X>}, with \code{<X>} equal to \code{Tmp\_sampling}:
53 | \begin{itemize}
54 |     \item To look at the 3d IMOD model containing the coordinates of the selected particles, you can open the model with \code{3dmodv}. However, if your particles aren't organized in a lattice, it is quite difficult to do anything with this.
55 |     \item Open \code{<prefix>\_<region>\_bin<X>\_convmap.mrc} overlapped with \code{<prefix>\_<region>\_bin<X>.mod} to look and remove particles. Another possibility is to use the binned tomogram in \code{cache/<prefix>\_<region>\_bin<X>.rec}.
56 | \begin{lstlisting}
57 | 3dmod <*>_convmap.mrc <*>.mod
58 | # or
59 | 3dmod ../cache/<*>.rec <*>.mod
60 | \end{lstlisting}
61 |     \item You can always change the sampling, the angular search, the template or the threshold and re-run \code{templateSearch}, if you are not satisfied with the results. If you want to change the sub-region coordinates, change the boundaries in \code{../bin10/<prefix>\_bin10.mod} and re-run the \code{recScrip2.sh} script as explained in section \ref{sec:subregions}.
62 |     \item TODO Remove neighbours
63 |     \item TODO Constrains
64 | \end{itemize}
65 | 
66 | This directory cannot be seen by emClarity as it currently is. You will need to rename it to \code{<projectDir>/convmap} before going to the next step.
67 | 
68 | \begin{note}For a full description of the outputs generated by \code{templateSearch}, you should refer to section \myref{sec:algo:picking}.
69 | \end{note}
70 | 
71 | \begin{note}Changing the contours coordinates or adding new contours in the \code{.mod} file has no effect. You can only remove contours. See section \myref{sec:init} for more details.
72 | \end{note}
73 | 
74 | \begin{note}The \code{wedgeType\_2} prefix comes from the an old version of emClarity and indicates the type of missing wedge mask to apply to the tomogram and the template before computing the cross-correlation scores. This parameter is no longer used.
75 | \end{note}
76 | 
77 | \subsection{Import particles from another software} \label{sec:picking:import}
78 | 
79 | If you decide to use another software to pick your particles, you can still import them into {\emClarity}. In this case, you would need to create your own \code{.csv} and \code{.mod} files (see table \ref{tab:csv}). These files are relative to the sub-regions, which should still be defined as described in section \ref{sec:subregions}. If your coordinates are relative to the entire field of view, you can either define the entire field of view as your sub-regions or you can subtract the $x_{min}$, $y_{min}$ and $z_{min}$ of the sub-regions you defined in section \ref{sec:subregions} to your coordinates. As specified in table \ref{tab:csv}, the coordinates origin is at the lower left corner (at least as visualized in \code{3dmod}). Of course, you should still run \code{ctf estimate} (section \ref{sec:defocus_estimate}) before going to the next section.


--------------------------------------------------------------------------------
/Sections/09_init.tex:
--------------------------------------------------------------------------------
 1 | \section{Initialize the project} \label{sec:init}
 2 | 
 3 | \subsection{Objectives}
 4 | This step is creating the project metadata that will be used throughout the processing. There are three main things that {\emClarity} will do. First, it will grab the sub-region coordinates in \code{/recon/<prefix>.coords}. Second, it will grab the tilt-series CTF estimate stored in \code{fixedStacks/ctf/<prefix>\_ali1\_ctf.tlt}. Lastly, it will grab the particle coordinates from \code{convmap/<prefix>\_<nb>\_<bin>.csv}. As explained in the last section, peaks can be removed from the \code{.csv} files using the corresponding \code{.mod} file.
 5 | 
 6 | \begin{note}Once ran, these files are ignored and will not be used again. If one needs to modify some of the above-mentioned information, this step must be re-run for the modification to be effective.
 7 | \end{note}
 8 | 
 9 | 
10 | \subsection{Parameters}
11 | 
12 | \input{Figures_Tables/09_parameters}
13 | 
14 | 
15 | \subsection{Run}
16 | 
17 | The \code{init} routine has the following signature:
18 | \begin{lstlisting}
19 | >> emClarity init <param>
20 | \end{lstlisting}
21 | where \code{<param>} is the parameter file name.
22 | 
23 | \subsection{Outputs}
24 | 
25 | The main output is of course the output file \code{<subTomoMeta>.mat}. This step should only take a few seconds to run and it will output to the terminal, the total number of particles and the number of particles before and after cleaning, for each sub-region.


--------------------------------------------------------------------------------
/Sections/10_ctf_3d.tex:
--------------------------------------------------------------------------------
 1 | \section{Reconstruct the tomograms} \label{sec:ctf_3d}
 2 | 
 3 | \subsection{Objectives}
 4 | 
 5 | The objective here is to reconstruct the tomograms that will be later used to extract the particles and calculate the references (i.e the half-maps). For high-resolution sub-tomogram averaging, we must account for a defocus gradient perpendicular to the til-axis, as well as a gradient through the thickness of the sample due to Ewald sphere curvature. The former is handled by multiplying the Fourier transform of each projection by their measured CTFs. In addition to correcting for contrast inversions, multiplication (rather than phase-flipping) also helps to supress noise near the CTF zeros. Previous approaches multiplied the CTF on thin strips or small tiles, which can lead to significant aliasing (see \cite{Tegunov2020} Figure S2 for a nice explanation). In {\emClarity}, the full tilt image is multiplied by the CTF for a given defocus, inverse Fourier transformed and then pixels corresponding to that defocus are extracted. This is repeated over the range of defoci in the image, thus avoiding any aliasing. Each view is also weighted according to the cumulative electron dose as described in \cite{exposure_grant_2015}. {\emClarity} then uses the {\IMOD} program {\tilt} to reconstruct the tomograms, preserving the full context of the tomogram while simultaneously considering local movement and anisotropic magnification due to microlensing from charge accumulation on the specimen \cite{MASTRONARDE2017102}.
 6 | 
 7 | \subsection{Parameters}
 8 | \input{Figures_Tables/10_parameters}
 9 | \newpage
10 | 
11 | \subsection{Run}
12 | 
13 | To run it, you just need to specify your parameter file and {\emClarity} will look at the project metadata to extract the current alignment cycle. If your \code{cache} directory already contains reconstructions at this binning (\code{Ali\_samplingRate}), {\emClarity} will not override them. As such, you should remove any previous reconstructions at that binning from your \code{cache}.
14 | \begin{lstlisting}
15 | >> emClarity ctf 3d <param.m>
16 | \end{lstlisting}
17 | 
18 | \subsection{Outputs}
19 | 
20 | The binned tilt-series and CTF-corrected reconstruction are saved in the \code{cache} directory.


--------------------------------------------------------------------------------
/Sections/11_avg.tex:
--------------------------------------------------------------------------------
 1 | \section{Subtomogram averaging} \label{sec:avg}
 2 | 
 3 | \subsection{Objectives}
 4 | 
 5 | Once the tomograms are generated, {\emClarity} can calculate the subtomogram averages, one for each half-set. A volume-normalized single-particle Wiener (SPW) filter \cite{volume_normalized_SPW} will minimize the reconstruction error of the particle density by applying an ``optimal'' (low-pass) filter and correcting for all of the systematic changes to the signal imposed by the microscope and image processing algorithms on the reconstructions.
 6 | 
 7 | \begin{note}This filter is optimum as long as we correctly estimate the Signal-to-Noise Ratio of the two subtomogram averages (one for each half-set). This estimate depends on the conical Fourier shell correlation described in \myref{sec:algo:avg}.
 8 | \end{note}
 9 | 
10 | \subsection{Parameters}
11 | 
12 | \input{Figures_Tables/11_parameters}
13 | 
14 | \subsection{Run}
15 | 
16 | To call the subtomogram averaging procedure:
17 | \begin{lstlisting}
18 | >> emClarity avg <param.m> <cycle_nb> RawAlignment
19 | \end{lstlisting}
20 | 
21 | where \code{<param.m>} is the parameter file you want to use for this cycle, \code{<cycle\_nb>} is the cycle number, starting from 0. Each cycle starts with this step and is usually followed by the subtomogram alignment procedure. \code{RawAlignment} indicates to {\emClarity} that we want to calculate the subtomogram average of the entire half-set. As we'll see later, it is also possible to compute one average (one cluster) for each class.
22 | 
23 | If you suspect some densities are being omitted by the molecular mask or if you see extra `dust' outside your particle, you can change with the \code{shape\_mask\_*} parameters. If you want to only calculate the mask, you can run the following, with \code{shape\_mask\_test=1}:
24 | \begin{lstlisting}
25 | >> emClarity fsc <param.m> <cycle_nb> RawAlignment
26 | \end{lstlisting}
27 | The same molecular mask is used for averaging and alignment. The values that you use for the averaging will be used during the alignment.
28 | 
29 | \newpage
30 | \subsection{Outputs}
31 | 
32 | The two subtomogram averages (half-maps) are saved into the project directory, as:\\ \code{cycleXXX\_projectName\_class0\_REF\_EVE.mrc}, and\\ \code{cycleXXX\_projectName\_class0\_REF\_ODD.mrc}.
33 | 
34 | The FSC is saved in \code{FSC/cycleXXX\_projectName\_Raw-1-fsc\_GLD.pdf}. The corresponding \code{.txt} file constains the FSC values. The first column is the resolution, in \r{A}\textsuperscript{-1}. The second column contains the Correlation Coefficients (CCs). The other 36 columns contain the CCs for each FSC cones.
35 | 
36 | For more details about the outputs, see \ref{sec:algo:avg}.


--------------------------------------------------------------------------------
/Sections/12_align.tex:
--------------------------------------------------------------------------------
 1 | \section{Subtomogram alignment} \label{sec:align}
 2 | 
 3 | \subsection{Objectives}
 4 | Once the reconstructions are generated, we can estimate the $\phi,\ \theta,\ \psi$ rotations and $x,\ y,\ z$ translations between the references and each particle, or in other words, we can maximise the constrained cross-correlation (CCC) between the references and each particle (more details in section \ref{sec:algo:align}). As this procedure directly compares the references with the particles and because the references result from the average of the transformed particles, the subtomogram averaging and alignment steps should be run until convergence of the references.
 5 | 
 6 | \subsection{Parameters}
 7 | \input{Figures_Tables/12_parameters}
 8 | 
 9 | \subsection{Run} \label{sec:align:run}
10 | 
11 | To call the subtomogram alignment procedure:
12 | \begin{lstlisting}
13 | >> emClarity alignRaw <param.m> <cycle_nb>
14 | \end{lstlisting}
15 | where \code{<param.m>} is the parameter file you want to use for this cycle and \code{<cycle\_nb>} is the cycle number. As the alignment needs the reconstructions, one must first run the subtomogram averaging procedure.
16 | 
17 | The sub-regions are loaded and processed independently from one another during the alignment. Once the particles from a sub-region are aligned, the results from the alignment are saved in the \code{alignResume} directory. {\emClarity} will not re-run the alignment of a particular sub-region if the results for this sub-region are saved in \code{alignResume}. This creates the possibility to run multiple instances of {\emClarity}, each one working on a different set of sub-regions. This is especially useful if you have multiple machines connected to the same storage. {\emClarity} offers two possibilities:
18 | \begin{itemize}
19 |     \item \textbf{Reverse order}: You can run two instances of {\emClarity}, the first one, as usual, will process the sub-regions in a particular order and save the results on a sub-region basis.
20 | \begin{lstlisting}
21 | >> emClarity alignRaw <param.m> <cycle_nb>
22 | \end{lstlisting}
23 |     Then, you can run the same job, but with a negative cycle number:
24 | \begin{lstlisting}
25 | >> emClarity alignRaw <param.m> -<cycle_nb>
26 | \end{lstlisting}
27 |     This second run will not update the metadata and will process the data in the reverse order. The first instance of {\emClarity} will use the results from this run to update the metadata.
28 |     
29 |     \item \textbf{Divide the dataset}: If you want to run more than two instances of {\emClarity}, it is also possible using the following signature.
30 | \begin{lstlisting}
31 | >> emClarity alignRaw <param.m> [<cycle_nb>, <idx>, <jobs>]
32 | \end{lstlisting}
33 |     where \code{<cycle\_nb>} is the cycle number (as usual), \code{<jobs>} is the number of total jobs to split into, which should be less than the number of sub-regions, of course. \code{<idx>} is the index of the current job, from 1 to \code{<total>}. For instance, if you want to share the alignment across 4 GPU nodes and your cycle number is 8, you have to run the following commands.
34 | \begin{lstlisting}
35 | >> emClarity alignRaw <param.m> [8, 1, 4]  # @node1
36 | >> emClarity alignRaw <param.m> [8, 2, 4]  # @node2
37 | >> emClarity alignRaw <param.m> [8, 3, 4]  # @node3
38 | >> emClarity alignRaw <param.m> [8, 4, 4]  # @node4
39 | \end{lstlisting}
40 |     Then, run the usual command as shown below. This should only take a few seconds as it only extracts the results saved in \code{alignResume} by the different jobs and update the metadata.
41 | \begin{lstlisting}
42 | >> emClarity alignRaw <param.m> 8
43 | \end{lstlisting}
44 |     
45 | \end{itemize}
46 | 
47 | 
48 | \subsection{Outputs}
49 | 
50 | The results (relative rotation and shifts, plus the new associated CCC score, per particle) from the alignment are saved in \code{alignResume} and the metadata is updated.
51 | 
52 | Usually at this point of the workflow, you can either start a new cycle and reconstruct the subtomogram average (section \ref{sec:avg}), the final reconstruction (section \ref{sec:final_map}) or run a classification, or you can stay in the same cycle and run a tilt-series refinement (section \ref{sec:tomoCPR}).


--------------------------------------------------------------------------------
/Sections/13_tomoCPR.tex:
--------------------------------------------------------------------------------
 1 | \section{Tilt-series refinement} \label{sec:tomoCPR}
 2 | 
 3 | \subsection{Objectives}
 4 | 
 5 | Colloidal gold beads have two key characteristics: they are highly contrasted and they are mostly round. As such, it is quite easy to define the ($x$, $y$) center of each bead over every projection, making them suitable fiducial markers for tilt-series alignments. Unfortunately, we need a lot of beads in the field of view to have an accurate description of the local ``movements'' in the specimen. Fortunately, subtomogram averaging provides accurate estimates of both particle positions and high SNR reconstructions, making them excellent fiducial markers. It is thus possible to leverage this information for improving the alignment of a tilt-series.
 6 | 
 7 | To do so, the density corresponding to the particles in the original tomogram is replaced with a copy of the subtomogram average at the appropriate orientation. This augmented tomogram is then re-projected into a tilt-series using the current global and local geometric model. Tiles around each subtomograms projected origin are convoled with the local CTF (phase correction) and aligned locally to the corresponding tile from the raw data. This defines a new fiducial model that can be used by {\tiltalign} to refine the tilt-series alignment.
 8 | 
 9 | \begin{note}As the entire tomogram is projected, the reference projections generated to refine the location of the subtomogram fiducial markers in the raw images include information from neighboring reprojected particles, as well as any other local densities.
10 | \end{note}
11 |     
12 | \begin{note}{\tiltalign} forces the fiducial markers to behave similarly within a given projection (spatial restrictions), while also requiring them to vary smoothly as a group from projection to projection (spatiotemporal restrictions).
13 | \end{note}
14 | 
15 | 
16 | \subsection{Parameters}
17 | 
18 | \input{Figures_Tables/13_parameters}
19 | 
20 | \subsection{Run}
21 | 
22 | \subsubsection{Tilt-series refinement}
23 | The tilt-series refinement should be ran after \code{avg} or \code{alignRaw}.
24 | To run it, run the following command:
25 | \begin{lstlisting}
26 | >> emClarity tomoCPR <param.m> <cycle_nb>
27 | \end{lstlisting}
28 | where \code{<param.m>} is the parameter file you want to use for this cycle and \code{<cycle\_nb>} is the cycle number.
29 | 
30 | This create a new directory, called \code{mapBack<n>}, where \code{<n>} is the number of \code{tomoCPR} that you have ran before. This directory contains all the outputs from {\tiltalign}. See section \ref{sec:algo:tomoCPR} for more information about the outputs.
31 | 
32 | Similarly to the subtomogram alignment, \code{tomoCPR} can be divided into multiple jobs. To do so, you will have to use the following signature:
33 | \begin{lstlisting}
34 | >> emClarity tomoCPR <param.m> [<cycle_nb>, <idx>, <jobs>]
35 | \end{lstlisting}
36 | where \code{<cycle\_nb>} is the cycle number (as usual), \code{<jobs>} is the number of total jobs to split into, which should be less than the number of sub-regions, of course. \code{<idx>} is the index of the current job, from 1 to \code{<total>}. For instance, if you want to share the refinement across 4 GPU nodes and your cycle number is 8, you have to run the following commands.
37 | \begin{lstlisting}
38 | >> emClarity tomoCPR <param.m> [8, 1, 4]  # @node1
39 | >> emClarity tomoCPR <param.m> [8, 2, 4]  # @node2
40 | >> emClarity tomoCPR <param.m> [8, 3, 4]  # @node3
41 | >> emClarity tomoCPR <param.m> [8, 4, 4]  # @node4
42 | \end{lstlisting}
43 | Then, run this final command to update the metadata:
44 | \begin{lstlisting}
45 | >> emClarity tomoCPR <param.m> [8,0,0]
46 | \end{lstlisting}
47 | 
48 | 
49 | \subsubsection{Update the aligned tilt-series}
50 | \code{tomoCPR} does not update the aligned the tilt-series, so to make the new tilt-series alignment effective, you have to run:
51 | \begin{lstlisting}
52 | >> emClarity ctf update <param.m> <cycle_nb>
53 | \end{lstlisting}
54 | 
55 | This will create a new aligned tilt-series in \code{aliStacks/<prefix>\_ali<n+1>.fixed}, using the updated tilt-series alignment from \code{tomoCPR}, as well as a new \code{*\_ctf.tlt} file (table \ref{tab:ctf_tlt}) in \code{fixedStacks/ctf/<prefix>\_ali<n+1>\_ctf.tlt}.


--------------------------------------------------------------------------------
/Sections/14_classification.tex:
--------------------------------------------------------------------------------
  1 | \section{Classification} \label{sec:classification}
  2 | 
  3 | \subsection{Objectives}
  4 | 
  5 | The heterogeneity of the data-set can be analysed by comparing individual particles with the current subtomogram average. To correct for differences in sampling between the reference and an individual particle, the current subtomogram average is distorted by the particle's 3D-sampling function it is being compared to. This effectively estimates what the average particle should look like at that subtomogram position, allowing to only compare meaningful differences. The dimensionality of these differences is reduced by principal component analysis, using singular-value decomposition (SVD). Features of a given length scale (e.g. $\alpha$-helices, small protein domains, etc.) can be focused on and considered simultaneously by band-pass filtering the reconstructions and computing the SVD for each length scale. The singular vectors describing the greatest variance for each length scale are then concatenated into feature vectors and clustered with $k$-means or other clustering algorithms.
  6 | 
  7 | 
  8 | \subsection{Parameters}
  9 | 
 10 | \input{Figures_Tables/14_parameters}
 11 | 
 12 | \subsection{PCA}
 13 | 
 14 | \subsubsection{Run}
 15 | 
 16 | Before running the PCA, you have to start a new cycle by running the following command:
 17 | \begin{lstlisting}
 18 | >> emClarity avg <param.m> <cycle_nb> RawAlignment
 19 | \end{lstlisting}
 20 | Then, you can run the PCA on the entire dataset:
 21 | \begin{lstlisting}
 22 | >> emClarity pca <param.m> <cycle_nb> 0
 23 | \end{lstlisting}
 24 | For very large data sets, i.e. tens of thousands particles, the principal directions describing the important variation can be obtained with a subset of the data, which saves a lot of computation. In this case, use the \code{Pca\_randSubset} parameter and run the same command shown above. Then, to calculate the principal components of the rest of the dataset, run this second command:
 25 | \begin{lstlisting}
 26 | >> emClarity pca <param.m> <cycle_nb> 1
 27 | \end{lstlisting}
 28 | 
 29 | \subsubsection{Outputs}
 30 | 
 31 | The goal now is to select, for each length scale, the principal directions that are going to be used to reproject the data onto. There are a few files that are here to help you decide, all of which are introduced in section \ref{sec:algo:classification:SVD}.
 32 | 
 33 | For example, let's say you have 3 length scales and want to select the principal directions 3 to 5 for the first scale, 4, 5, 8, 9 for the second scale, and 3 and 5 for the last scale, you would have to write in your parameter file:
 34 | 
 35 | \code{Pca\_coeffs=[ 3:5,0 ; 4,5,8,9 ; 3,5,0,0 ]}, or equivalently\\
 36 | \code{Pca\_coeffs=[ 3,4,5,0 ; 4,5,8,9 ; 3,5,0,0 ]}
 37 | 
 38 | In some cases, it is not obvious which principal directions to choose and it might require some trials before successfully clustering the dataset into meaningful clusters. Moreover, it might not be clear if the classification was successful or not until the reconstruction step.
 39 | 
 40 | \subsection{Clustering}
 41 | 
 42 | \subsubsection{Run}
 43 | 
 44 | Once that you have selected the principal axes, for each length scale, we can select the corresponding principal components and cluster them.
 45 | 
 46 | You can specify how many clusters you want with the \code{Pca\_clusters} parameters. For example, say you want to run 3 clustering, one with 2 clusters, one with 4 and one with 6:
 47 | 
 48 | \code{Pca\_clusters=[2,4,6]}
 49 | 
 50 | Then, run the following command:
 51 | \begin{lstlisting}
 52 | >> emClarity cluster <param.m> <cycle_nb>
 53 | \end{lstlisting}
 54 | 
 55 | \subsubsection{Outputs} \label{sec:classification:clustering:outputs}
 56 | 
 57 | For each requested number of clusters (\code{Pca\_clusters}), the class populations are printed into a text file in the
 58 | project directory called \code{<projectName>\_cycleXXX\_ClassIDX.txt}.
 59 | 
 60 | 
 61 | \subsection{Reconstruction}
 62 | 
 63 | To create a montage of your class averages, we need to call the \code{avg} procedure. As such, your parameter file should contains the parameters necessary to run \code{avg} (table \ref{param:avg}), plus an additional 3 parameters. Indeed, you have to specify which clustering to use. Say you ran 3 clustering, one of them with 6 clusters, like in the previous example. To reconstruct the subtomogram average of these 6 classes, you have to specify the following parameters in your parameter file:
 64 | 
 65 | \code{Cls\_className=6}\\
 66 | \code{Cls\_classes\_odd=[1:6,1.*ones(1,6)]}\\
 67 | \code{Cls\_classes\_eve=[1:6,1.*ones(1,6)]}, or equivalently\\
 68 | \code{Cls\_classes\_eve=[1:6, 1,1,1,1,1,1]}
 69 | 
 70 | Each entry in the second row (everything after \code{1:6,}) is a CX symmetry, which may not be the same for each class. For example, you first ran the reconstruction without symmetry and realize that you have a C3 symmetry for all the classes, except in the last 2 classes where there is a C6 symmetry, you could then rerun the reconstruction with the following parameters:
 71 | 
 72 | \code{Cls\_classes\_odd=[1:6, 3,3,3,3,6,6]}\\
 73 | \code{Cls\_classes\_eve=[1:6, 3,3,3,3,6,6]}
 74 | 
 75 | \begin{note}If you use the new \code{symmetry} parameter, the second row is not used but is still required for compatibility reasons.\end{note}
 76 | 
 77 | To call the \code{avg} function, run the following command:
 78 | \begin{lstlisting}
 79 | >> emClarity avg <param.m> <cycle_nb> Cluster_cls
 80 | \end{lstlisting}
 81 | This can be done for each of your clustering specified in the \code{Pca\_clusters} parameter.
 82 | 
 83 | 
 84 | \subsection{Select the classes to use as references}
 85 | 
 86 | At this point, there are two possibilities. If any of the clustering was successful and you would like to cancel the classification and switch back to averaging and alignment, you could either use and rename the metadata back-up from the previous cycle and continue as nothing happened, or you could skip the current cycle by running the following command:
 87 | \begin{lstlisting}
 88 | >> emClarity skip <param.m> <cycle_nb>
 89 | \end{lstlisting}
 90 | 
 91 | On the other hand, if you have identified a good clustering, with some classes you would like to keep and some classes you would like to exclude, you will have to:
 92 | \begin{enumerate}
 93 |     \item Make sure the last reconstruction was done using the clustering you want to use. For example, if you ran the clustering with \code{Pca\_clusters=[2,4,6]}, then reconstructed the montage for all of these clustering, starting from 2 to 6, but want to use the second clustering with 4 classes, you would have to rerun the reconstruction of the second clustering before continuing.
 94 |     % I think that's a bug, at least in my experience this is a necessary step. I didn't pay attention to this when I was locking at the code tbh...
 95 |     
 96 |     \item Save a copy of your metadata. This is useful for two reasons. First, the next step will remove some particles from further consideration, so if something goes wrong, it's better to keep a copy of the current state of the metadata. Second, if you want to split your dataset in multiple parts to process some classes independently from the others, the easiest way is to use this copy to repeat the next steps for each (group of) classes you want to analyze.
 97 | 
 98 |     \item \textbf{Select the classes you want to exclude}: Open your class montage in \code{3dmod}, in ``Model'' mode and create a new model point for each class you want to delete. Save the model file, for example as \code{classes2ignore.mod}.
 99 |     
100 |     \item \textbf{Exclude these classes}: Run the following command to exclude them; the particles that belongs to this/these class/classes will be tagged with a \code{-9999} in the metadata.
101 | \begin{lstlisting}
102 | >> emClarity geometry <param.m> <cycle_nb> Cluster_cls RemoveClasses <classes2ignore.mod> STD
103 | \end{lstlisting}
104 |     {\emClarity} will tell you how many particles were removed and how many are left. This should correspond exactly to the class populations from the clustering (section \ref{sec:classification:clustering:outputs}). If it doesn't, stop and make sure you followed the instructions from the first step. Replace the metadata with your copy from step 2 and try again.
105 | 
106 |     \item \textbf{End this cycle of classification}: Finally, you will need to ``skip'' the currently disabled class average alignment, and
107 |     update the metadata so you may proceed to the next cycle of averaging/alignment.
108 | \begin{lstlisting}
109 | >> emClarity skip <param.m> <cycle_nb>
110 | \end{lstlisting}
111 | \end{enumerate}
112 | 
113 | At this point, you can then turn off classification in your next parameter file (\code{flgClassify=0}) and proceed to the next cycle.


--------------------------------------------------------------------------------
/Sections/15_final_map.tex:
--------------------------------------------------------------------------------
 1 | \section{Final reconstruction} \label{sec:final_map}
 2 | 
 3 | \subsection{Objectives}
 4 | 
 5 | This step consist into reconstructing the final reconstruction by combining the two half-set. {\emClarity} offers two possibilities.
 6 | 
 7 | The first possibility is to calculate the final reconstruction from the two subtomogram averages (one per FSC group), within {\emClarity}. Briefly, the half-maps are reconstructed as described in section \ref{sec:algo:avg} and the FSC is calculated, as well as the transformation between the two maps. Then, the particles from the second FSC groups are aligned to the first group, using the aforementioned transformation, while they are being re-extracted. The two aligned maps are then combined and filtered using the FSC calculated during the first reconstruction.
 8 | 
 9 | The second possibility is to calculate the reconstruction using {\cisTEM}. In this case, {\emClarity} reprojects the 3D coordinates of the particles in 2D, similarly to section \ref{sec:algo:tomoCPR:reproject_coords}. A {\cisTEM} STAR file is created, containing, for each particle and for each view of the tilt-series, its $x$ and $y$ position, rotation, defocus, pre- and post-exposure, etc. {\cisTEM} will then calculate an initial reconstruction using its \code{reconstruct3d} program, then refine it using \code{refine3d} (note that the $\phi,\ \theta,\ \psi$ angles are not refined) and then finally calculates with \code{reconstruct3d} the final reconstruction using this refinement.
10 | 
11 | \subsection{Parameters}
12 | 
13 | This step requires the exact same parameters as the subtomogram averaging step (table \ref{param:avg}, from section \ref{sec:avg}). The only difference is in the \code{fsc\_bfactor}, as explained in table \ref{param:avg}.
14 | 
15 | If you use {\cisTEM} to reconstruct the final map, only the following parameters are used: \code{subTomoMeta}, \code{PIXEL\_SIZE}, \code{particleRadius}, \code{VOLTAGE}, \code{Cs}, \code{AMPCONT}, \code{Ali\_mRadius} and \code{particleMass}.
16 | 
17 | \begin{note}This step assumes \code{SuperResolution=0}. If you set it to \code{1} during \code{ctf estimate} (which is not recommended), you'll have to enter the `real' (aka Fourier cropped) pixel size for \code{emClarity reconstruct}.\end{note}
18 | 
19 | \subsection{Run}
20 | 
21 | \subsubsection{With {\emClarity}}
22 | 
23 | First, you should start a new cycle by reconstructing the two half-maps and calculate the FSC:
24 | \begin{lstlisting}
25 | >> emClarity avg <param.m> <cycle_nb> RawAlignment
26 | \end{lstlisting}
27 | Then, the second FSC group will be re-extracted while being aligned to the first group. Finally, the two maps are combined and filtered using the FSC we just calculated.
28 | \begin{lstlisting}
29 | >> emClarity avg <param.m> <cycle_nb> FinalAlignment
30 | \end{lstlisting}
31 | This generates one filtered final reconstruction for every B-factor specified in \code{fsc\_bfactor}.
32 | 
33 | \subsubsection{With {\cisTEM}}
34 | To calculate the final reconstruction with {\cisTEM}, run the following command:
35 | \begin{lstlisting}
36 | >> emClarity reconstruct <param.m> <cycle_nb> <prefix> <symmetry> <max_exposure>
37 | \end{lstlisting}
38 | where \code{<prefix>} is the prefix that will be added to every output generated by {\cisTEM}. \code{<symmetry>} is the symmetry to use for the reconstruction and should corresponds to the \code{symmetry} parameter in table \ref{param:avg}. \code{<max\_exposure>} is the maximum exposure, in e/\r{A}\textsuperscript{2}. Any images with more exposure that this value will be excluded from the reconstruction.
39 | 
40 | \begin{note}The current implementation is quite limiting since it needs to store then entire dataset to RAM (see \href{https://github.com/bHimes/emClarity/issues/139}{issue}). While this is being fixed, one can reduce the \code{<max\_exposure>} to 30-40 to reduce the dataset size.\end{note}
41 | 
42 | The scripts used to call {\cisTEM} are saved in the project directory:
43 | \begin{enumerate}
44 |     \item \code{<prefix>\_ref.sh} is calculating the first reconstruction (\code{<prefix>\_refFilt.mrc}) using the series of extracted particles (\code{<prefix>.mrc}) and the corresponding STAR file (\code{<prefix>.star}), both generated by {\emClarity}. The FSC is saved in \code{<prefix>\_stats.txt}.
45 |     
46 |     \item \code{<prefix>\_ref.sh} is then refining the alignment (only translations) and the statistics using the first reconstruction. The updated STAR file is saved as \code{<prefix>\_refined.star}.
47 |     
48 |     \item \code{<prefix>\_ref2.sh} is calculating the final reconstruction (\code{<prefix>\_refFilt\_refined.mrc}) using the updated STAR file. The FSC is saved in \code{<prefix>\_stats\_refined.txt}.
49 | \end{enumerate}
50 | 


--------------------------------------------------------------------------------
/Sections/16_algorithms.tex:
--------------------------------------------------------------------------------
 1 | \section{Algorithms} \label{sec:algo}
 2 | 
 3 | \subsection{Naming conventions} \label{sec:algo:naming_conventions}
 4 | 
 5 | There is a lot of things to cover and it is often easier to use abbreviations (CTF, FSC, CCC, etc.) and symbols to refer to something.
 6 | 
 7 | \input{Figures_Tables/16_naming_convention}
 8 | 
 9 | Indexes are subscripts, e.g. the $p^{th}$ subtomogram is referred as $\bm{s}_p$. This works with multiple indexes, e.g. the $p^{th}$ subtomogram rotated by the $r^{th}$ rotation is referred as $\bm{s}_{p,r}$. Labels are subscripts as well, e.g. if we want to specify that the subtomograms are in the reference frame, we would write $\bm{s}_{ref}$. On the other hand, if the symbol is labeled \emph{and} an index is needed, the labeled symbol is placed between square brackets. For instance, if we want to specify that the $p^{th}$ subtomogram is in the reference frame, we would write ${[\bm{s}_{ref}]}_p$.
10 | 
11 | 
12 | \subsection{Euler angles conventions} \label{sec:algo:euler_conventions}
13 | 
14 | The $\phi, \theta, \psi$ Euler angles used by {\emClarity} describe a $z\text{-}x\text{-}z$ active intrinsic rotations of the particles coordinate system. That is to say, to switch the particles from the microscope frame to the reference frame, the basis vectors of the subtomograms are rotated (positive anti-clockwise) around $z$, the new $x$, and the new $z$ axis.
15 | 
16 | The microscope frame defines the coordinate system of the microscope, where the electron beam is the $z$ axis. When the subtomograms are extracted from their tomogram, they are in the microscope frame. The reference frame is the coordinate system attached to the reconstruction, i.e. the subtomogram average and is usually set during the particle picking.
17 | 
18 | 
19 | \subsection{Linear transformations in Fourier space}
20 | 
21 | Linear transformations are often applied in Fourier space directly. It might be useful to write down a few useful properties of the Fourier transforms.
22 | 
23 | \begin{itemize}
24 |     \item \textbf{Shift}: Shifting an image in real space is equivalent to applying a complex phase shift to its frequency spectrum, such as:
25 |     \begin{equation}
26 |         g(x,y) = f(x\bm{-a},y\bm{-b})\ \xleftrightarrow{\mathcal{F}}\ G(u,v) = F(u,v) \times \bm{ e^{ -2\pi i \left( \frac{au}{N}\ +\ \frac{bv}{M} \right) } }
27 |     \end{equation}
28 |     where $N$ and $M$ are the number of $u$ and $v$ frequencies, respectively. The complex phase shift is just a complex number on the unit circle, so the magnitude of the Fourier transform is unchanged, so $\abs{G(u,v)} = \abs{F(u,v)}$.
29 |             
30 |     \item \textbf{Magnification}: Magnifying an image by a factor $a$ is equivalent to magnifying its frequency spectrum by $1/a$, such as:
31 |     \begin{equation}
32 |         g(x,y) = f(\bm{a}x, \bm{b}y)\ \xleftrightarrow{\mathcal{F}}\ G(u,v) = \bm{\frac{1}{\abs{ab}}} \times F\left(\frac{u}{\bm{a}},\frac{v}{\bm{b}}\right)
33 |     \end{equation}
34 |             
35 |     \item \textbf{Rotation}: Rotating an image by an angle $\Theta$ in real space is the same as rotating its frequency spectrum by the same angle $\Theta$.
36 | \end{itemize}
37 | 
38 | 
39 | \newpage
40 | 
41 | \input{Algorithms/05_tilt_series_alignment}
42 | \newpage
43 | 
44 | \input{Algorithms/06_defocus_estimate}
45 | \newpage
46 | 
47 | \input{Algorithms/08_picking}
48 | \newpage
49 | 
50 | \input{Algorithms/10_ctf_3d}
51 | \newpage
52 | 
53 | \input{Algorithms/11_avg}
54 | \newpage
55 | 
56 | \input{Algorithms/12_align}
57 | \newpage
58 | 
59 | \input{Algorithms/13_tomoCPR}
60 | \newpage
61 | 
62 | \input{Algorithms/14_classification}
63 | \newpage
64 | 
65 | 


--------------------------------------------------------------------------------
/emClarity-tutorial.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomasfrosio/emClarity-tutorial/7a23de9b50ce2e4216a21d201a47b5189c5500bc/emClarity-tutorial.pdf


--------------------------------------------------------------------------------
/emClarity-tutorial.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[12pt,a4paper]{article}
  2 | 
  3 | \title{{\emClarity} v1.5.3.10 tutorial}
  4 | 
  5 | \usepackage{tutorial}
  6 | 
  7 | \begin{document}
  8 | 
  9 | % First page
 10 | \maketitle
 11 | \thispagestyle{empty}
 12 | \newpage
 13 | 
 14 | \thispagestyle{empty}
 15 | \renewcommand{\baselinestretch}{0.75}\normalsize
 16 | {
 17 |   \hypersetup{linkcolor=black}
 18 |   \tableofcontents
 19 | }
 20 | \renewcommand{\baselinestretch}{1.0}\normalsize
 21 | \thispagestyle{empty}
 22 | 
 23 | \newpage
 24 | 
 25 | \thispagestyle{empty}
 26 | \renewcommand{\baselinestretch}{0.75}\normalsize
 27 | {
 28 |   \hypersetup{linkcolor=black}
 29 |   \listoffigures
 30 |   \listoftables
 31 | }
 32 | \renewcommand{\baselinestretch}{1.0}\normalsize
 33 | 
 34 | \newpage
 35 | 
 36 | % How to use this guide
 37 | \input{Sections/01_tutorial}
 38 | \newpage
 39 | 
 40 | % Workflow
 41 | \input{Sections/02_project_directory}
 42 | \newpage
 43 | 
 44 | % Project directory.
 45 | \input{Sections/03_get_data_ready}
 46 | \newpage
 47 | 
 48 | % Download tutorial dataset.
 49 | \input{Sections/04_workflow}
 50 | \newpage
 51 | 
 52 | % Tilt-series alignment
 53 | \input{Sections/05_tilt_series_alignment}
 54 | \newpage
 55 | 
 56 | % Ctf estimate
 57 | \input{Sections/06_defocus_estimate}
 58 | \newpage
 59 | 
 60 | % Select the sub-regions
 61 | \input{Sections/07_subregions}
 62 | \newpage
 63 | 
 64 | % Picking
 65 | \input{Sections/08_picking}
 66 | \newpage
 67 | 
 68 | % Initialize the project
 69 | \input{Sections/09_init}
 70 | \newpage
 71 | 
 72 | %
 73 | \input{Sections/10_ctf_3d}
 74 | \newpage
 75 | 
 76 | %
 77 | \input{Sections/11_avg}
 78 | \newpage
 79 | 
 80 | %
 81 | \input{Sections/12_align}
 82 | \newpage
 83 | 
 84 | %
 85 | \input{Sections/13_tomoCPR}
 86 | \newpage
 87 | 
 88 | %
 89 | \input{Sections/14_classification}
 90 | \newpage
 91 | 
 92 | %
 93 | \input{Sections/15_final_map}
 94 | \newpage
 95 | 
 96 | % Algorithms
 97 | \input{Sections/16_algorithms}
 98 | \newpage
 99 | 
100 | \bibliographystyle{unsrt}
101 | \bibliography{biblio}
102 | 
103 | \end{document}
104 | 


--------------------------------------------------------------------------------
/tutorial.sty:
--------------------------------------------------------------------------------
  1 | \ProvidesPackage{tutorial}
  2 | 
  3 | % Packages:
  4 | \usepackage[nottoc,numbib]{tocbibind}
  5 | \usepackage{fancyhdr}
  6 | \usepackage[latin1]{inputenc}
  7 | \usepackage{amsmath}
  8 | \usepackage{mathtools}
  9 | \DeclarePairedDelimiter\abs{\lvert}{\rvert}
 10 | \DeclarePairedDelimiterX{\norm}[1]{\lVert}{\rVert}{#1}
 11 | \usepackage{xspace}
 12 | \usepackage{bm}
 13 | \usepackage{amsfonts}
 14 | \usepackage{amssymb}
 15 | \usepackage{graphicx}
 16 | \usepackage{dashrule}  % for making dashed line across page
 17 | \usepackage{soul}  % for highlighting text
 18 | \usepackage[hmargin=2cm,vmargin=2.5cm]{geometry}
 19 | \usepackage{multirow}
 20 | \usepackage{wrapfig}
 21 | \usepackage{epsfig}
 22 | \usepackage{framed}
 23 | \usepackage{textcomp}
 24 | \usepackage{color}
 25 | \usepackage{booktabs}
 26 | \usepackage{pdflscape}  % for landscape pages
 27 | \usepackage[per-mode=symbol]{siunitx}
 28 | \usepackage{multirow}
 29 | \usepackage[table,xcdraw]{xcolor}
 30 | \usepackage{makecell}  % to make new lines in tables
 31 | \usepackage{changepage}  % easier for marging, with adjustwidth
 32 | \usepackage{enumerate}
 33 | \usepackage{microtype}
 34 | \usepackage[htt]{hyphenat}  % enable hyphenation in typewriter text
 35 | \PassOptionsToPackage{hyphens}{url}
 36 | \usepackage{hyperref}  % for urls: \url{https....}
 37 | \usepackage[T1]{fontenc}  % font encoding for underscore and such:\_, ae, xxx
 38 | \usepackage{caption}
 39 | \usepackage{subcaption}  % to make subfigure
 40 | \renewcommand{\familydefault}{\sfdefault}  % sans-serif
 41 | 
 42 | \usepackage[usenames,dvipsnames]{pstricks}
 43 | \usepackage{listings} % for pretty code
 44 | 
 45 | \usepackage{longtable}
 46 | \usepackage{hhline}
 47 | 
 48 | % tikz:
 49 | \usepackage{tikz} % for pretty flowcharts
 50 | \usetikzlibrary{shapes.geometric, decorations.pathreplacing, arrows, calc, matrix, positioning}
 51 | \tikzstyle{arrow} = [thick,->,>=stealth, rounded corners]
 52 | \tikzstyle{arrow2} = [thick,->,>=stealth, rounded corners, draw=gray!30]
 53 | \tikzstyle{arrow3} = [->,>=stealth, rounded corners]
 54 | \tikzstyle{line} = [thick, rounded corners]
 55 | \tikzstyle{basic} = [rectangle, rounded corners, minimum width=4.5cm, minimum height=1cm,text centered, draw=black, font=\footnotesize]
 56 | \tikzstyle{job} = [basic, fill = white]
 57 | \tikzstyle{chimera} = [basic, fill = white, fill=gray!30, draw=gray!30]
 58 | \tikzstyle{durchsichtig} = [rectangle, minimum width=4.5cm, minimum height=1cm]
 59 | \tikzstyle{failed} = [basic, fill=red!30]
 60 | 
 61 | % SI units:
 62 | \usepackage[version=4]{mhchem}
 63 | \DeclareSIUnit{\molar}{M}
 64 | \DeclareSIUnit{\calorie}{cal}
 65 | \newcommand{\angstrom}{\textup{\AA}}
 66 | 
 67 | % Figures:
 68 | \graphicspath{{Figures/}}
 69 | 
 70 | %%%%%%%%%%%%%%%%%%%%%%%%%
 71 | %%%%%%%%%%%%%%%%%%%%%%%%%
 72 | %% Style
 73 | \pagestyle{fancy}
 74 | \setlength\parindent{0in}
 75 | \setlength\parskip{0.1in}
 76 | \setlength\headheight{15pt}
 77 | 
 78 | % Colours
 79 | \definecolor{mygreen}{rgb}{0,0.6,0}
 80 | \definecolor{mygray}{rgb}{0.5,0.5,0.5}
 81 | \definecolor{myblue}{rgb}{0.0, 0.18, 0.65}
 82 | 
 83 | \colorlet{myred}{red!50!black}
 84 | \definecolor{lightgray}{gray}{0.95}
 85 | \hypersetup{
 86 |     colorlinks=true,
 87 |     linkcolor=myred,
 88 |     filecolor=myred,
 89 |     urlcolor=myred,
 90 |     citebordercolor=black,
 91 |     filebordercolor=black,
 92 |     linkbordercolor=myred,
 93 |     citecolor=myred
 94 | }
 95 | \urlstyle{same}
 96 | 
 97 | % Header|Footer
 98 | \fancyhf{}
 99 | \lhead{emClarity tutorial}
100 | \rhead{\nouppercase\leftmark}
101 | \rfoot{Page \thepage}
102 | 
103 | % Ref
104 | \newcommand{\myref}[1]{\textbf{\ref{#1}}}
105 | 
106 | %%%%%%%%%%%%%%%%%%%%%%%%%
107 | %%%%%%%%%%%%%%%%%%%%%%%%%
108 | 
109 | %%%%%%%%% NEW ENVIRONMENTS %%%%%%%%%%%%
110 | \newenvironment{lines}{
111 | \begin{itemize}
112 | \renewcommand\labelitemi{--}
113 | }{\end{itemize}}
114 | 
115 | \newenvironment{numlists}{
116 | \begin{enumerate}
117 |   \setlength{\itemsep}{1pt}
118 |   \setlength{\parskip}{0pt}
119 |   \setlength{\parsep}{0pt}
120 | }{\end{enumerate}}
121 | 
122 | % Tips and Notes
123 | \newcounter{tip}
124 | \newenvironment{tip}
125 |     {\begin{adjustwidth}{0.5cm}{0.5cm}
126 |      \refstepcounter{tip}
127 |      \bgroup\color{myred}
128 |      \textit\bgroup
129 |      {\textbf{Tip~\thetip:}}
130 |     }{\egroup\egroup\end{adjustwidth}}
131 | 
132 | \newcounter{note}
133 | \newenvironment{note}
134 |     {\begin{adjustwidth}{0.5cm}{0.5cm}
135 |      \refstepcounter{note}
136 |      \bgroup\color{myred}
137 |      \textit\bgroup
138 |      {\textbf{Note~\thenote:}}
139 |     }{\egroup\egroup\end{adjustwidth}}
140 | 
141 | %%%%%%%%%%%%%%%%%%%%%%%%%
142 | %%%%%%%%%%%%%%%%%%%%%%%%%
143 | % Insert code:
144 | \sethlcolor{lightgray}
145 | \def\resetstyle#1{{\normalsize\rm\color[rgb]{0,0,0}\noindent#1}}
146 | \newcommand{\code}[1]{{\resetstyle{\texttt{\hl{#1}}}}}
147 | 
148 | % Code settings
149 | \lstset{ 
150 |   backgroundcolor=\color{lightgray},   % choose the background color; you must add \usepackage{color} or \usepackage{xcolor}; should come as last argument
151 |   basicstyle=\ttfamily,   % the size of the fonts that are used for the code
152 |   breakatwhitespace=false,             % sets if automatic breaks should only happen at whitespace
153 |   breaklines=true,                     % sets automatic line breaking
154 |   belowskip=-0.7cm,                    % to make sure the space below is similar to the space above
155 |   captionpos=b,                        % sets the caption-position to bottom
156 |   commentstyle=\color{mygreen},        % comment style
157 |   deletekeywords={...},                % if you want to delete keywords from the given language
158 |   escapeinside={\%*}{*)},              % if you want to add LaTeX within your code
159 |   %extendedchars=true,                 % lets you use non-ASCII characters; for 8-bits encodings only, does not work with UTF-8
160 |   %frame=single,	                   % adds a frame around the code
161 |   keepspaces=true,                     % keeps spaces in text, useful for keeping indentation of code (possibly needs columns=flexible)
162 |   keywordstyle=\color{myred},          % keyword style
163 |   language=bash,                       % the language of the code
164 |   morekeywords={*,...,emClarity,>>},   % if you want to add more keywords to the set
165 |   numbers=none,                        % where to put the line-numbers; possible values are (none, left, right)
166 |   numbersep=5pt,                       % how far the line-numbers are from the code
167 |   numberstyle=\tiny\color{mygray},     % the style that is used for the line-numbers
168 |   rulecolor=\color{black},             % if not set, the frame-color may be changed on line-breaks within not-black text (e.g. comments (green here))
169 |   showspaces=false,                    % show spaces everywhere adding particular underscores; it overrides 'showstringspaces'
170 |   showstringspaces=false,              % underline spaces within strings only
171 |   showtabs=false,                      % show tabs within strings adding particular underscores
172 |   stepnumber=2,                        % the step between two line-numbers. If it's 1, each line will be numbered
173 |   stringstyle=\color{black},           % string literal style
174 |   tabsize=4,	                       % sets default tabsize to 2 spaces
175 |   title=\lstname                       % show the filename of files included with \lstinputlisting; also try caption instead of title
176 | }
177 | 
178 | 
179 | % Shortcuts
180 | \def\CC{{C\nolinebreak[4]\hspace{-.05em}\raisebox{.4ex}{\tiny\bf ++}}}
181 | \newcommand{\MATLAB}{\textsc{Matlab}\xspace}
182 | \newcommand{\emClarity}{\href{https://github.com/bHimes/emClarity}{emClarity}}
183 | \newcommand{\IMOD}{\href{https://bio3d.colorado.edu/imod/}{IMOD}}
184 | \newcommand{\ETomo}{\href{https://bio3d.colorado.edu/imod/doc/etomoTutorial.html}{ETomo}}
185 | \newcommand{\tilt}{\href{https://bio3d.colorado.edu/imod/doc/man/tilt.html}{tilt}}
186 | \newcommand{\tiltalign}{\href{https://bio3d.colorado.edu/imod/doc/man/tiltalign.html}{tiltalign}}
187 | \newcommand{\threedmod}{\href{https://bio3d.colorado.edu/imod/doc/3dmodguide.html}{3dmod}}
188 | \newcommand{\Chimera}{\href{https://www.cgl.ucsf.edu/chimera/}{Chimera}}
189 | \newcommand{\cisTEM}{\href{https://cistem.org/software}{cisTEM}}
190 | 


--------------------------------------------------------------------------------