├── .gitignore
├── .readthedocs.yaml
├── AGPL3.txt
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── docs
    ├── Makefile
    ├── _templates
    │   └── breadcrumbs.html
    ├── coding.png
    ├── coding_google_docs.png
    ├── coding_vscode.png
    ├── conf.py
    ├── index.rst
    ├── make.bat
    ├── manuscript.rst
    ├── memo.png
    └── requirements.txt
├── paper.bib
├── paper.md
├── poetry.lock
├── pyproject.toml
├── qc_lockup.v0.png
└── qualitative_coding
    ├── cli
        ├── __init__.py
        ├── check.py
        ├── click_aliases.py
        ├── code.py
        ├── codebook.py
        ├── coders.py
        ├── codes
        │   ├── __init__.py
        │   ├── crosstab.py
        │   ├── find.py
        │   ├── list.py
        │   ├── rename.py
        │   └── stats.py
        ├── corpus
        │   ├── __init__.py
        │   ├── anonymize.py
        │   ├── import_media.py
        │   ├── list.py
        │   ├── move.py
        │   ├── remove.py
        │   └── update.py
        ├── decorators.py
        ├── export.py
        ├── init.py
        ├── memo.py
        ├── options.py
        ├── upgrade.py
        └── version.py
    ├── codebook.py
    ├── corpus.py
    ├── database
        ├── errors.py
        └── models.py
    ├── demo.qdpx
    ├── diff.py
    ├── editors.py
    ├── exceptions.py
    ├── helpers.py
    ├── logs.py
    ├── media_importers
        ├── __init__.py
        ├── base.py
        ├── pandoc.py
        ├── verbatim.py
        └── vtt.py
    ├── migrations
        ├── __init__.py
        ├── migration.py
        ├── migration_0_2_3.py
        ├── migration_1_0_0.py
        └── migration_1_4_0.py
    ├── refi_qda
        ├── nvivo_project.qdpx
        ├── reader.py
        ├── schema.xsd
        └── writer.py
    ├── tests
        ├── __init__.py
        ├── fixtures.py
        ├── mock_editor.py
        ├── test_check.py
        ├── test_code.py
        ├── test_code_parsing.py
        ├── test_codebook.py
        ├── test_coders.py
        ├── test_codes_crosstab.py
        ├── test_codes_find.py
        ├── test_codes_list.py
        ├── test_codes_rename.py
        ├── test_codes_stats.py
        ├── test_corpus_anonymize.py
        ├── test_corpus_import.py
        ├── test_corpus_move.py
        ├── test_corpus_remove.py
        ├── test_corpus_update.py
        ├── test_export.py
        ├── test_init.py
        ├── test_init_import.py
        ├── test_logs.py
        ├── test_memo.py
        ├── test_read_diff_offsets.py
        ├── test_refi_qda_writer.py
        ├── test_tree_node.py
        ├── test_upgrade.py
        └── test_version.py
    ├── tree_node.py
    ├── user_input.py
    └── views
        ├── coding_ui.py
        ├── styles.py
        └── viewer.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.swp
 2 | *.pyc
 3 | .DS_Store
 4 | *.swo
 5 | qualitative_coding.egg-info/*
 6 | dist/*
 7 | NOTES.md
 8 | TODO.md
 9 | docs/_build/*
10 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file for Sphinx projects
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | build:
 8 |   os: ubuntu-22.04
 9 |   tools:
10 |     python: "3.12"
11 | 
12 | sphinx:
13 |   configuration: docs/conf.py
14 | 
15 | python:
16 |   install:
17 |     - requirements: docs/requirements.txt
18 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | [Chris Proctor](chrisp@buffalo.edu), the project lead, would be delighted to hear about your experience 
 4 | using `qc`. Bug reports, feature requests, and discussion of the future directions of `qc` takes place 
 5 | on the [project repository’s issues page](https://github.com/cproctor/qualitative-coding/issues).
 6 | Code contributions to this project should be via pull requests on this repository.
 7 | 
 8 | ## How you can help
 9 | 
10 | `qc` is a young project; it's not yet clear how (or if) the project should grow to best support 
11 | its users. Probably the best way to contribute is to use `qc` in your own research, and to share
12 | limitations you discover or new features you wish existed. This 
13 | [repository’s issues page](https://github.com/cproctor/qualitative-coding/issues) would be a great
14 | place to iterate ideas for new features. 
15 | 
16 | ## Code of conduct
17 | 
18 | [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](code_of_conduct.md)
19 | 
20 | This project adopts the [Contributor Covenant](https://www.contributor-covenant.org/). 
21 | Please contact [Chris Proctor](chrisp@buffalo.edu) if you experience treatment which makes you 
22 | feel unsafe or unwelcome. 
23 | 
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | QC: Qualitative coding for computer scientists
 2 | Copyright (C) 2019 Chris Proctor 
 3 | chrisproctor.net
 4 | pypi.org@accounts.chrisproctor.net
 5 | 
 6 | This program is free software: you can redistribute it and/or modify
 7 | it under the terms of the GNU Affero General Public License as published
 8 | by the Free Software Foundation, either version 3 of the License, or
 9 | (at your option) any later version.
10 | 
11 | This program is distributed in the hope that it will be useful,
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | GNU Affero General Public License for more details.
15 | 
16 | You should have received a copy of the GNU Affero General Public License
17 | along with this program (AGPL3.txt).  If not, see 
18 | <https://www.gnu.org/licenses/>.
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![QC logo](qc_lockup.v0.png)
 2 | 
 3 | [![status](https://joss.theoj.org/papers/7d0c7ed7cbe8f614d986fc699ae42729/status.svg)](https://joss.theoj.org/papers/7d0c7ed7cbe8f614d986fc699ae42729)
 4 | 
 5 | `qc` is a free, open-source command-line-based tool for qualitative data 
 6 | analysis designed to support computational thinking. In addition to making the 
 7 | qualitative data analysis process more efficient, computational thinking can 
 8 | contribute to the richness of subjective interpretation. The typical workflow
 9 | in qualitative research is an iterative cycle of "notice things," "think about 
10 | things," and "collect things" (seidel, 1998). `qc` provides
11 | computational affordances for each of these practices, including the ability to 
12 | integrate manual coding with automated coding, a tree-based hierarchy of codes
13 | stored in a YAML file, allowing versioning of thematic analysis, and a powerful
14 | query interface for viewing code statistics and snippets of coded documents. 
15 | 
16 | Qualitative data analysis, in its various forms, is a core methodology for 
17 | qualitative, mixed methods, and some quantitative research in the social 
18 | sciences. Although there are a variety of well-known commercial QDA software 
19 | packages such as NVivo, Dedoose, Atlas.TI, and MaxQDA, they are generally 
20 | designed to protect users from complexity rather than providing 
21 | affordances for engaging with complexity via algorithms and data structures. 
22 | The central design hypothesis of `qc` is that a closer partnership between
23 | the researcher and the computational tool can enhance the quality of QDA.
24 | `qc` adopts the "unix philosophy" (McIlroy, 1978) of building tools which do 
25 | one thing well while being composable into flexible workflows, and the 
26 | values of "plain-text social science" (Healy, 2020), emphasizing 
27 | reproducability, transparency, and collaborative open science. 
28 | 
29 | `qc` was used in [a prior paper](https://chrisproctor.net/research/proctor_2019_defining/)
30 | and the author's doctoral dissertation; `qc` is currently a core tool supporting a large 
31 | NSF-funded Delphi study involving multiple interviews 
32 | with forty participant experts, open coding with over a thousand distinct 
33 | codes, four separate coders, and several custom machine learning tools 
34 | supporting the research team with clustering and synthesizing emergent themes.
35 | `qc` is a free, open-source command-line-based tool for qualitative data analysis
36 | designed to support computational thinking. In addition to making qualitative data 
37 | analysis process more efficient, computational thinking can contribute to the richness 
38 | of subjective interpretation. Although numerous powerful software packages exist 
39 | for qualitative data analysis, they are generally designed to protect users from complexity 
40 | rather than providing affordances for engaging with complexity via algorithms and 
41 | data structures. 
42 | 
43 | ## Installation
44 | 
45 | `qc` is distributed via the Python Package Index (PYPI), and can be
46 | installed on any POSIX system (Linux, Unix, Mac OS, or Windows Subsystem
47 | for Linux) which has Python 3.9 or higher installed. If you want to install
48 | `qc` globally on your system, the cleanest approaach is to use 
49 | [pipx](https://pipx.pypa.io/stable/). 
50 | 
51 |     pipx install qualitative-coding
52 | 
53 | If your research project
54 | is already contained within a Python package and you want to install `qc` 
55 | as a local dependency, simply add `qualitative-coding` to `pyproject.toml`
56 | or `requirements.txt`.
57 | 
58 | `qc` relies on [Pandoc](https://pandoc.org/) for converting between file formats, 
59 | so make sure that is installed as well. `qc` uses a text editor for coding; 
60 | you should install Visual Studio Code, the default editor, unless you prefer
61 | a different editor such as emacs or vim.
62 | 
63 | ## Usage
64 | 
65 | Please see the [package documentation](https://qualitative-coding.readthedocs.io) 
66 | for details on the design of `qc`, a vignette illustrating its usage, and full 
67 | documentation of `qc`'s commands. 
68 | 
69 | ## Acknowledgements
70 | 
71 | Partial support for development of `qc` was provided by UB's Digital Studio Scholarship
72 | Network. Logo design by Blessed Mhungu. 
73 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/_templates/breadcrumbs.html:
--------------------------------------------------------------------------------
1 | {%- extends "sphinx_rtd_theme/breadcrumbs.html" %}
2 | 
3 | {% block breadcrumbs %}
4 | {% endblock %}
5 | 
6 | {% block breadcrumbs_aside %}
7 | {% endblock %}
8 | 


--------------------------------------------------------------------------------
/docs/coding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/docs/coding.png


--------------------------------------------------------------------------------
/docs/coding_google_docs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/docs/coding_google_docs.png


--------------------------------------------------------------------------------
/docs/coding_vscode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/docs/coding_vscode.png


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | 
 9 | project = 'Qualitative Coding'
10 | copyright = '2024, Chris Proctor'
11 | author = 'Chris Proctor'
12 | release = '1.7.3'
13 | 
14 | # -- General configuration ---------------------------------------------------
15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
16 | 
17 | extensions = []
18 | 
19 | templates_path = ['_templates']
20 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
21 | 
22 | 
23 | 
24 | # -- Options for HTML output -------------------------------------------------
25 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
26 | 
27 | html_theme = 'sphinx_rtd_theme'
28 | html_static_path = ['_static']
29 | html_logo = '../qc_lockup.v0.png'
30 | html_theme_options = {
31 |     'logo_only': True,
32 |     'display_version': False,
33 | }
34 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. Qualitative Coding documentation master file, created by
 2 |    sphinx-quickstart on Tue May 28 09:51:22 2024.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | `qc`: A tool for qualitative data analysis designed to support computational thinking
 7 | =====================================================================================
 8 | 
 9 | .. image:: https://joss.theoj.org/papers/7d0c7ed7cbe8f614d986fc699ae42729/status.svg
10 |    :alt: Journal of Open Source Software
11 |    :target: https://joss.theoj.org/papers/7d0c7ed7cbe8f614d986fc699ae42729
12 | 
13 | ``qc`` is a free, open-source command-line-based tool for qualitative
14 | data analysis designed to support computational thinking. In addition to
15 | making qualitative data analysis process more efficient, computational
16 | thinking can contribute to the richness of subjective interpretation.
17 | Although numerous powerful software packages exist for qualitative data
18 | analysis, they are generally designed to protect users from complexity
19 | rather than providing affordances for engaging with complexity via
20 | algorithms and data structures. 
21 | 
22 | .. _installation:
23 | 
24 | Installation
25 | ------------
26 | 
27 | Prerequisites
28 | ~~~~~~~~~~~~~
29 | 
30 | ``qc`` runs on the command line (also called a shell or a terminal), so you will 
31 | need to be familiar with using a command line in order to use ``qc``.
32 | 
33 | * `Python 3.9 <https://www.python.org/downloads/>`__ or higher.
34 | * `Pandoc <https://pandoc.org/>`__. ``qc`` relies on Pandoc for converting between
35 |   file formats.
36 | * A code editor. You should install 
37 |   `Visual Studio Code <https://code.visualstudio.com/>`__, the default
38 |   editor, unless you prefer a different editor such as emacs or vim.
39 | * Terminal bindings for the code editor, allowing the code editor to be 
40 |   opened from Terminal. `Here are instructions for Visual Studio Code <https://code.visualstudio.com/docs/setup/mac#_launching-from-the-command-line>`_.
41 | * The `Sync Scroll <https://marketplace.visualstudio.com/items?itemName=dqisme.sync-scroll>`__
42 |   extension for Visual Studio Code.
43 | 
44 | Install with pip or pipx
45 | ~~~~~~~~~~~~~~~~~~~~~~~~
46 | 
47 | ``qc`` is distributed via the Python Package Index (PYPI). If you want to
48 | install ``qc`` globally on your system, the cleanest approach is to use
49 | `pipx <https://pipx.pypa.io/stable/>`__.
50 | 
51 | .. note::
52 | 
53 |    The command below (and others throughout this documentation)
54 |    is intended to be entered into a terminal. 
55 |    The ``%`` character is the command prompt indicating that the
56 |    terminal is ready for input; don't type it into your terminal.
57 |    Don't worry if your terminal uses a different command prompt 
58 |    such as ``$``.
59 | 
60 | .. code-block:: console
61 | 
62 |    % pipx install qualitative-coding
63 | 
64 | Install as a dependency
65 | ~~~~~~~~~~~~~~~~~~~~~~~
66 | 
67 | If your research project is already contained within a Python package
68 | and you want to install ``qc`` as a local dependency, simply add
69 | ``qualitative-coding`` to ``pyproject.toml`` or ``requirements.txt``.
70 | 
71 | Stuck?
72 | ~~~~~~
73 | 
74 | If you get stuck installing ``qc``, feel free to email 
75 | Chris Proctor (chrisp@buffalo.edu), the project lead.
76 | 
77 | .. toctree::
78 |    :maxdepth: 1
79 |    :caption: Contents:
80 | 
81 |    manuscript
82 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/memo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/docs/memo.png


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==7.3.7
2 | sphinx_rtd_theme==2.0.0
3 | 


--------------------------------------------------------------------------------
/paper.bib:
--------------------------------------------------------------------------------
  1 | @article{dhakal2022nvivo,
  2 |   title={NVivo},
  3 |   author={Dhakal, Kerry},
  4 |   journal={Journal of the Medical Library Association: JMLA},
  5 |   volume={110},
  6 |   number={2},
  7 |   pages={270},
  8 |   year={2022},
  9 |   publisher={Medical Library Association}
 10 | }
 11 | @article{engelbart1962,
 12 |   title = {Augmenting Human Intellect: A Conceptual Framework},
 13 |   author = {Engelbart, Douglas C},
 14 |   year = {1962},
 15 |   doi = {10.21236/ad0289565},
 16 |   pages = {64--90}
 17 | }
 18 | @techreport{healy2020,
 19 |   title = {The {{Plain Person}}'s {{Guide}} to {{Plain Text Social Science}}},
 20 |   author = {Healy, Kieran},
 21 |   year = {2020}
 22 | }
 23 | @article{kidder1987,
 24 |   title = {Qualitative and Quantitative Methods: {{When}} Stories Converge},
 25 |   shorttitle = {Qualitative and Quantitative Methods},
 26 |   author = {Kidder, Louise H. and Fine, Michelle},
 27 |   year = {1987},
 28 |   month = sep,
 29 |   journal = {New Directions for Program Evaluation},
 30 |   volume = {1987},
 31 |   number = {35},
 32 |   pages = {57--75},
 33 |   issn = {0164-7989, 1551-2371},
 34 |   doi = {10.1002/ev.1459},
 35 |   urldate = {2024-09-15},
 36 |   abstract = {Abstract             The use of qualitative measures in a quantitative framework results in a reasonable likelihood of triangulation; in contrast, the independent conduct of qualitative and quantitative evaluations is a greater challenge for triangulation, but it also holds promise for greater discovery.},
 37 |   copyright = {http://onlinelibrary.wiley.com/termsAndConditions\#vor},
 38 |   langid = {english}
 39 | }
 40 | @article{kuckartz2010realizing,
 41 |   title={Realizing mixed-methods approaches with MAXQDA},
 42 |   author={Kuckartz, Udo},
 43 |   journal={Philipps-Universit{\"a}t, Marburg},
 44 |   year={2010}
 45 | }
 46 | @article{mcilroy1978,
 47 |   title = {{{UNIX}} Time-Sharing System},
 48 |   author = {McIlroy, Doug and PInson, E and Tague, B},
 49 |   year = {1978},
 50 |   journal = {The Bell System Technical Journal},
 51 |   pages = {1902--1903}
 52 | }
 53 | @article{ogbeifun2016delphi,
 54 |   title={The {Delphi} technique: A credible research methodology},
 55 |   author={Ogbeifun, E and Agwa-Ejon, J and Mbohwa, Charles and Pretorius, JH},
 56 |   year={2016}
 57 | }
 58 | @incollection{pea1997,
 59 |   title = {Practices of Distributed Intelligence and Designs for Education},
 60 |   booktitle = {Distributed Cognitions: Psychological and Educational Considerations},
 61 |   author = {Pea, Roy D},
 62 |   editor = {Salomon, Gavriel},
 63 |   year = {1997},
 64 |   series = {Learning in Doing},
 65 |   pages = {47--87},
 66 |   publisher = {Cambridge University Press},
 67 |   address = {New York},
 68 |   isbn = {978-0-521-57423-5 978-0-521-41406-7},
 69 |   langid = {english},
 70 |   lccn = {BF311 .D538 1997},
 71 |   keywords = {{Learning, Psychology of},Distributed cognition,Social aspects}
 72 | }
 73 | @inproceedings{proctor2019,
 74 |   title = {Defining and Designing Computer Science Education in a {{K12}} Public School District},
 75 |   booktitle = {Proceedings of the 50th {{ACM}} Technical Symposium on Computer Science Education},
 76 |   author = {Proctor, Chris and Bigman, Maxwell and Blikstein, Paulo},
 77 |   year = {2019},
 78 |   series = {{{SIGCSE}} '19},
 79 |   pages = {314--320},
 80 |   publisher = {Association for Computing Machinery},
 81 |   address = {New York, NY, USA},
 82 |   doi = {10.1145/3287324.3287440},
 83 |   isbn = {978-1-4503-5890-3},
 84 |   keywords = {computational thinking,equity,k12 cs}
 85 | }
 86 | @book{salmona2019qualitative,
 87 |   title={Qualitative and mixed methods data analysis using {Dedoose}: A practical approach for research across the social sciences},
 88 |   author={Salmona, Michelle and Lieber, Eli and Kaczynski, Dan},
 89 |   year={2019},
 90 |   publisher={Sage Publications}
 91 | }
 92 | 
 93 | @article{seidel1998qualitative,
 94 |   title = {Qualitative Data Analysis},
 95 |   author = {Seidel, John V},
 96 |   year = {1998}
 97 | }
 98 | @article{smit2002atlas,
 99 |   title={{ATLAS.ti} for qualitative data analysis},
100 |   author={Smit, Brigitte},
101 |   journal={Perspectives in education},
102 |   volume={20},
103 |   number={3},
104 |   pages={65--75},
105 |   year={2002},
106 |   publisher={University of the Free State}
107 | }
108 | @article{wing2011research,
109 |   title = {Research Notebook: {{Computational}} Thinking---{{What}} and Why},
110 |   author = {Wing, Jeanette},
111 |   year = {2011},
112 |   journal = {The link magazine},
113 |   volume = {6},
114 |   pages = {20--23}
115 | }
116 | 


--------------------------------------------------------------------------------
/paper.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "A tool for qualitative data analysis designed to support computational thinking"
 3 | tags: 
 4 |  - qualitative data analysis
 5 |  - qualitative coding
 6 |  - computaitonal thinking
 7 |  - computational social science
 8 |  - python
 9 | authors: 
10 |  - name: Chris Proctor
11 |    orcid: 0000-0003-3492-9590
12 |    affiliation: 1
13 | affiliations: 
14 |  - name: Graduate School of Education, University at Buffalo (SUNY), United States
15 |    index: 1
16 | date: 28 May 2024
17 | bibliography: paper.bib
18 | ---
19 | 
20 | # Summary
21 | 
22 | `qc` is a free, open-source command-line-based tool for qualitative data 
23 | analysis designed to support computational thinking. In addition to making the 
24 | qualitative data analysis process more robust and efficient, computational thinking can 
25 | contribute to the richness of subjective interpretation. The typical workflow
26 | in qualitative research is an iterative cycle of "notice things," "think about 
27 | things," and "collect things" [@seidel1998qualitative, p. 2]. `qc` provides
28 | computational affordances for each of these practices, including the ability to 
29 | integrate manual coding with automated coding, a tree-based hierarchy of codes
30 | stored in a YAML file, allowing versioning of thematic analysis, and a powerful
31 | query interface for viewing code statistics and snippets of coded documents. 
32 | 
33 | # Background
34 | 
35 | `qc` is designed to support the application of computational thinking
36 | (CT) to qualitative data analysis (QDA). In the social
37 | sciences, QDA is a method of applying codes to text, images, video, and
38 | other artifacts, then analyzing the resulting patterns of codes and
39 | using the codes to more deeply understand the text. 
40 | When QDA is used in quantitative or mixed-methods research, it is
41 | typically used to transform loosely-structured data such
42 | as an interview transcript into categories or codes which can then be
43 | used in downstream quantitative analysis answering predefined research
44 | questions. In contrast, when QDA is used in qualitative research, 
45 | it is typically part of an interpretive sensemaking process. These two uses
46 | of QDA have been referred to as *little-q* ("looking for answers") and
47 | *big-Q* ("looking for questions") qualitative research [@kidder1987].
48 | 
49 | The central design hypothesis of `qc` is that a closer partnership
50 | between the researcher and the computational tool can enhance the
51 | quality of QDA. This partnership, which could be characterized as 
52 | augmented [@engelbart1962] or distributed cognition [@pea1997], depends on
53 | the researcher's ability to conceptualize the data and the process in
54 | computational terms, becoming immersed in the matrices, trees, and other
55 | computational structures inherent to QDA rather than remaining "outside"
56 | at the level of user interface. Such practices can be identified as *computational
57 | thinking* (CT), "the thought processes involved in
58 | formulating problems and their solutions so that the solutions are
59 | represented in a form that can effectively be carried out by an
60 | information-processing agent" [@wing2011research]. The application of CT to
61 | QDA would mean conceptualizing the goal and the process of QDA in
62 | computational terms, keeping a mental model of the work the computer is
63 | doing for you.
64 | 
65 | # Statement of need
66 | 
67 | Although there are numerous well-known commercial QDA software 
68 | packages such as NVivo [@dhakal2022nvivo], Dedoose [@salmona2019qualitative], 
69 | ATLAS.ti [@smit2002atlas], and MAXQDA [@kuckartz2010realizing], they do not 
70 | provide affordances for users desiring more active engagement with the data and 
71 | processes underlying QDA. `qc` better-supports such users, providing a scriptable 
72 | command-line interface with powerful and flexible queries, what data stored in simple 
73 | and standardized formats. `qc` adopts the "unix philosophy" [@mcilroy1978] 
74 | of building tools which do one thing well while being composable into 
75 | flexible workflows, and the values of "plain-text social science" [@healy2020], emphasizing 
76 | reproducability, transparency, and collaborative open science. 
77 | 
78 | `qc` was used in [@proctor2019] (described but not cited) and the author's 
79 | doctoral dissertation; `qc` is currently a core tool supporting a large 
80 | NSF-funded Delphi study [@ogbeifun2016delphi] involving multiple interviews 
81 | with forty participant experts, open coding with over a thousand distinct 
82 | codes, four separate coders, and several custom machine learning tools 
83 | supporting the research team with clustering and synthesizing emergent themes.
84 | 
85 | # Acknowledgements
86 | 
87 | Development of `qc` was funded in part by a grant from the University at Buffalo's 
88 | Digital Scholarship Studio Network. 
89 | 
90 | # References
91 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "qualitative-coding"
 3 | version = "1.7.3"
 4 | description = "Qualitative coding tools to support computational thinking"
 5 | authors = [
 6 |     {name = "Chris Proctor",email = "chris@chrisproctor.net"}
 7 | ]
 8 | license = {text = "AGPL3"}
 9 | readme = "README.md"
10 | requires-python = ">=3.11,<3.13"
11 | dependencies = [
12 |     "tqdm (>=4.67.1,<5.0.0)",
13 |     "tabulate (>=0.9.0,<0.10.0)",
14 |     "numpy (>=2.2.1,<3.0.0)",
15 |     "pyyaml (>=6.0.2,<7.0.0)",
16 |     "click (>=8.1.8,<9.0.0)",
17 |     "sqlalchemy (>=2.0.37,<3.0.0)",
18 |     "semver (>=3.0.2,<4.0.0)",
19 |     "xmlschema (>=3.4.3,<4.0.0)",
20 |     "structlog (>=25.1.0,<26.0.0)",
21 |     "rich (>=13.9.4,<14.0.0)",
22 |     "more-itertools (>=10.6.0,<11.0.0)",
23 |     "spacy (>=3.8.4,<4.0.0)",
24 |     "webvtt-py (>=0.5.1,<0.6.0)"
25 | ]
26 | 
27 | [project.urls]
28 | homepage = "https://qualitative-coding.readthedocs.io"
29 | repository = "https://github.com/cproctor/qualitative-coding"
30 | 
31 | [project.scripts]
32 | qc = "qualitative_coding.cli:cli"
33 | 
34 | [build-system]
35 | requires = ["poetry-core>=2.0.0,<3.0.0"]
36 | build-backend = "poetry.core.masonry.api"
37 | 
38 | [tool.poetry.group.docs]
39 | optional = true
40 | 
41 | [tool.poetry.group.docs.dependencies]
42 | sphinx = "^7.3.7"
43 | sphinx-rtd-theme = "^2.0.0"
44 | 


--------------------------------------------------------------------------------
/qc_lockup.v0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/qc_lockup.v0.png


--------------------------------------------------------------------------------
/qualitative_coding/cli/__init__.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | from qualitative_coding.cli.click_aliases import ClickAliasedGroup
 3 | from qualitative_coding.cli.init import init
 4 | from qualitative_coding.cli.export import export
 5 | from qualitative_coding.cli.corpus import corpus_group
 6 | from qualitative_coding.cli.codes import codes_group
 7 | from qualitative_coding.cli.version import version
 8 | from qualitative_coding.cli.check import check
 9 | from qualitative_coding.cli.codebook import codebook
10 | from qualitative_coding.cli.code import code
11 | from qualitative_coding.cli.coders import coders
12 | from qualitative_coding.cli.memo import memo
13 | from qualitative_coding.cli.upgrade import upgrade
14 | import logging
15 | import structlog
16 | 
17 | @click.group(cls=ClickAliasedGroup)
18 | def cli():
19 |     "Qualitative coding for computer scientists"
20 | 
21 | cli.add_command(init)
22 | cli.add_command(export)
23 | cli.add_command(corpus_group)
24 | cli.add_command(codes_group)
25 | cli.add_command(version)
26 | cli.add_command(check)
27 | cli.add_command(codebook, aliases=["cb"])
28 | cli.add_command(code)
29 | cli.add_command(coders)
30 | cli.add_command(memo)
31 | cli.add_command(upgrade)
32 | 
33 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/check.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | from qualitative_coding.corpus import QCCorpus
 4 | from qualitative_coding.logs import configure_logger
 5 | from qualitative_coding.cli.decorators import (
 6 |     handle_qc_errors,
 7 | )
 8 | 
 9 | @click.command()
10 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
11 | @handle_qc_errors
12 | def check(settings):
13 |     "Check project for errors"
14 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
15 |     log = configure_logger(settings_path)
16 |     log.info("check")
17 |     corpus = QCCorpus(settings_path)
18 |     with corpus.session():
19 |         corpus.validate_corpus_paths()
20 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/click_aliases.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     Copied from https://github.com/click-contrib/click-aliases.
  3 |     The version releaseed on PyPI lacks a needed recent feature 
  4 |     (ClickAliasedGroup.add_command)
  5 | """
  6 | 
  7 | import click
  8 | 
  9 | _click7 = click.__version__[0] >= '7'
 10 | 
 11 | 
 12 | class ClickAliasedGroup(click.Group):
 13 |     def __init__(self, *args, **kwargs):
 14 |         super(ClickAliasedGroup, self).__init__(*args, **kwargs)
 15 |         self._commands = {}
 16 |         self._aliases = {}
 17 | 
 18 |     def add_command(self, *args, **kwargs):
 19 |         aliases = kwargs.pop('aliases', [])
 20 |         super(ClickAliasedGroup, self).add_command(*args, **kwargs)
 21 |         if aliases:
 22 |             cmd = args[0]
 23 |             name = args[1] if len(args) > 1 else None
 24 |             name = name or cmd.name
 25 |             if name is None:
 26 |                 raise TypeError('Command has no name.')
 27 | 
 28 |             self._commands[name] = aliases
 29 |             for alias in aliases:
 30 |                 self._aliases[alias] = cmd.name
 31 | 
 32 |     def command(self, *args, **kwargs):
 33 |         aliases = kwargs.pop('aliases', [])
 34 |         decorator = super(ClickAliasedGroup, self).command(*args, **kwargs)
 35 |         if not aliases:
 36 |             return decorator
 37 | 
 38 |         def _decorator(f):
 39 |             cmd = decorator(f)
 40 |             if aliases:
 41 |                 self._commands[cmd.name] = aliases
 42 |                 for alias in aliases:
 43 |                     self._aliases[alias] = cmd.name
 44 |             return cmd
 45 | 
 46 |         return _decorator
 47 | 
 48 |     def group(self, *args, **kwargs):
 49 |         aliases = kwargs.pop('aliases', [])
 50 |         decorator = super(ClickAliasedGroup, self).group(*args, **kwargs)
 51 |         if not aliases:
 52 |             return decorator
 53 | 
 54 |         def _decorator(f):
 55 |             cmd = decorator(f)
 56 |             if aliases:
 57 |                 self._commands[cmd.name] = aliases
 58 |                 for alias in aliases:
 59 |                     self._aliases[alias] = cmd.name
 60 |             return cmd
 61 | 
 62 |         return _decorator
 63 | 
 64 |     def resolve_alias(self, cmd_name):
 65 |         if cmd_name in self._aliases:
 66 |             return self._aliases[cmd_name]
 67 |         return cmd_name
 68 | 
 69 |     def get_command(self, ctx, cmd_name):
 70 |         cmd_name = self.resolve_alias(cmd_name)
 71 |         command = super(ClickAliasedGroup, self).get_command(ctx, cmd_name)
 72 |         if command:
 73 |             return command
 74 | 
 75 |     def format_commands(self, ctx, formatter):
 76 |         rows = []
 77 | 
 78 |         sub_commands = self.list_commands(ctx)
 79 | 
 80 |         max_len = 0
 81 |         if len(sub_commands) > 0:
 82 |             max_len = max(len(cmd) for cmd in sub_commands)        
 83 |             
 84 |         limit = formatter.width - 6 - max_len
 85 | 
 86 |         for sub_command in sub_commands:
 87 |             cmd = self.get_command(ctx, sub_command)
 88 |             if cmd is None:
 89 |                 continue
 90 |             if hasattr(cmd, 'hidden') and cmd.hidden:
 91 |                 continue
 92 |             if sub_command in self._commands:
 93 |                 aliases = ','.join(sorted(self._commands[sub_command]))
 94 |                 sub_command = '{0} ({1})'.format(sub_command, aliases)
 95 |             if _click7:
 96 |                 cmd_help = cmd.get_short_help_str(limit)
 97 |             else:
 98 |                 cmd_help = cmd.short_help or ''
 99 |             rows.append((sub_command, cmd_help))
100 | 
101 |         if rows:
102 |             with formatter.section('Commands'):
103 |                 formatter.write_dl(rows)
104 | 
105 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/code.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import yaml
 3 | import os
 4 | from pathlib import Path
 5 | from qualitative_coding.corpus import QCCorpus
 6 | from qualitative_coding.exceptions import QCError, IncompatibleOptions
 7 | from qualitative_coding.views.viewer import QCCorpusViewer
 8 | from qualitative_coding.cli.decorators import handle_qc_errors
 9 | from qualitative_coding.helpers import read_file_list
10 | from qualitative_coding.logs import configure_logger
11 | 
12 | @click.command()
13 | @click.argument("coder")
14 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
15 | @click.option("-p", "--pattern", 
16 |         help="Pattern to filter corpus filenames (glob-style)")
17 | @click.option("-f", "--filenames", 
18 |         help="File path containing a list of filenames to use")
19 | @click.option("-u", "--uncoded", is_flag=True, help="Select uncoded files")
20 | @click.option("-1", "--first", is_flag=True, help="Select first uncoded file")
21 | @click.option("-r", "--random", is_flag=True, help="Select random uncoded file")
22 | @click.option("--recover", is_flag=True, help="Recover incomplete coding session")
23 | @click.option("--abandon", is_flag=True, help="Abandon incomplete coding session")
24 | @handle_qc_errors
25 | def code(coder, settings, pattern, filenames, uncoded, first, random, recover, abandon):
26 |     "Open a file for coding"
27 |     if first and random:
28 |         msg = "--first and --random cannot both be used."
29 |         raise IncompatibleOptions(msg)
30 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
31 |     log = configure_logger(settings_path)
32 |     log.info("code", coder=coder, pattern=pattern, filenames=filenames, uncoded=uncoded, 
33 |              first=first, random=random, recover=recover, abandon=abandon)
34 |     corpus = QCCorpus(settings_path)
35 |     viewer = QCCorpusViewer(corpus)
36 |     if recover:
37 |         viewer.recover_incomplete_coding_session(coder)
38 |     elif abandon:
39 |         viewer.abandon_incomplete_coding_session()
40 |     else:
41 |         if viewer.incomplete_coding_session_exists():
42 |             raise QCError(
43 |                 "An incomplete coding session exists. " + 
44 |                 "Run qc code coder --recover to recover this coding session or " + 
45 |                 "qc code coder --abandon to abandon it."
46 |             )
47 |         f = viewer.select_file(
48 |             coder,
49 |             pattern=pattern, 
50 |             file_list=read_file_list(filenames),
51 |             uncoded=uncoded, 
52 |             first=first, 
53 |             random=random,
54 |         )
55 |         viewer.open_editor(f, coder)
56 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/codebook.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | from qualitative_coding.corpus import QCCorpus
 4 | from qualitative_coding.cli.decorators import handle_qc_errors
 5 | from qualitative_coding.logs import configure_logger
 6 | 
 7 | @click.command()
 8 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
 9 | @handle_qc_errors
10 | def codebook(settings):
11 |     "Update the codebook"
12 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
13 |     log = configure_logger(settings_path)
14 |     log.info("codebook")
15 |     corpus = QCCorpus(settings_path)
16 |     with corpus.session():
17 |         corpus.update_codebook()
18 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/coders.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | from qualitative_coding.corpus import QCCorpus
 4 | from qualitative_coding.cli.decorators import handle_qc_errors
 5 | from qualitative_coding.logs import configure_logger
 6 | 
 7 | @click.command()
 8 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
 9 | @click.option("-d", "--delete", help="Delete a coder")
10 | @handle_qc_errors
11 | def coders(settings, delete):
12 |     "List all coders"
13 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
14 |     log = configure_logger(settings_path)
15 |     log.info("coders", delete=delete)
16 |     corpus = QCCorpus(settings_path)
17 |     with corpus.session():
18 |         if delete:
19 |             corpus.delete_coder(delete)
20 |         else:
21 |             for coder in corpus.get_all_coders():
22 |                 print(coder.name)
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/codes/__init__.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | from qualitative_coding.cli.click_aliases import ClickAliasedGroup
 3 | from qualitative_coding.cli.codes.stats import stats
 4 | from qualitative_coding.cli.codes.list import _list
 5 | from qualitative_coding.cli.codes.rename import rename
 6 | from qualitative_coding.cli.codes.crosstab import crosstab
 7 | from qualitative_coding.cli.codes.find import find
 8 | 
 9 | @click.group(name="codes", cls=ClickAliasedGroup)
10 | def codes_group():
11 |     "Codes commands"
12 | 
13 | codes_group.add_command(crosstab, aliases=["ct"])
14 | codes_group.add_command(find)
15 | codes_group.add_command(_list, aliases=['ls'])
16 | codes_group.add_command(rename, aliases=["rn"])
17 | codes_group.add_command(stats)
18 | 
19 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/codes/crosstab.py:
--------------------------------------------------------------------------------
  1 | import click
  2 | import os
  3 | import yaml
  4 | from pathlib import Path
  5 | from tabulate import tabulate_formats
  6 | from qualitative_coding.corpus import QCCorpus
  7 | from qualitative_coding.views.viewer import QCCorpusViewer
  8 | from qualitative_coding.cli.decorators import handle_qc_errors
  9 | from qualitative_coding.exceptions import IncompatibleOptions
 10 | from qualitative_coding.helpers import read_file_list
 11 | from qualitative_coding.logs import configure_logger
 12 | 
 13 | @click.command()
 14 | @click.argument("codes", nargs=-1)
 15 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
 16 | @click.option("-p", "--pattern", 
 17 |         help="Pattern to filter corpus filenames (glob-style)")
 18 | @click.option("-f", "--filenames", 
 19 |         help="File path containing a list of filenames to use")
 20 | @click.option("-c", "--coders", help="Coders", multiple=True)
 21 | @click.option("-d", "--depth", help="Maximum depth in code tree", type=int)
 22 | @click.option("-n", "--unit", default="line", help="Unit of analysis",
 23 |         type=click.Choice(['line', 'paragraph', 'document']))
 24 | @click.option("-r", "--recursive-codes", "recursive_codes", is_flag=True, 
 25 |         help="Include child codes")
 26 | @click.option("-a", "--recursive-counts", is_flag=True,
 27 |         help="Counts for codes include child codes")
 28 | @click.option("-e", "--expanded", is_flag=True,
 29 |         help="Show names of codes in expanded form")
 30 | @click.option("-m", "--format", "_format", type=click.Choice(tabulate_formats),
 31 |         metavar="[tabulate.tabulate_formats]", help="Output format.")
 32 | @click.option("-o", "--outfile", help="Filename for CSV export")
 33 | @click.option("-0", "--probs", is_flag=True, 
 34 |         help="Probabilities instead of counts")
 35 | @click.option("-z", "--compact", help="Compact display", is_flag=True)
 36 | @click.option("-y", "--tidy", help="Return tidy format", is_flag=True)
 37 | @click.option("-u", "--max", "_max", help="Maximum count value to show", type=int)
 38 | @click.option("-l", "--min", "_min", help="Minimum count value to show", type=int)
 39 | @handle_qc_errors
 40 | def crosstab(codes, settings, pattern, filenames, coders, depth, unit, recursive_codes,
 41 |         recursive_counts, expanded, _format, outfile, probs, compact, tidy, _max, _min):
 42 |     "Cross-tabulate code occurrences"
 43 |     if depth and not recursive_codes:
 44 |         msg = "--depth requires --recursive-codes"
 45 |         raise IncompatibleOptions(msg)
 46 |     if tidy and compact:
 47 |         msg = "--tidy and --compact are incompatible"
 48 |         raise IncompatibleOptions(msg)
 49 |     if tidy and probs:
 50 |         msg = "--tidy and --probs are incompatible"
 51 |         raise IncompatibleOptions(msg)
 52 |     if _min and not tidy:
 53 |         msg = "--min requires --tidy"
 54 |         raise IncompatibleOptions(msg)
 55 |     if _max and not tidy:
 56 |         msg = "--max requires --tidy"
 57 |         raise IncompatibleOptions(msg)
 58 | 
 59 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
 60 |     log = configure_logger(settings_path)
 61 |     log.info("codes crosstab", codes=codes, pattern=pattern, filenames=filenames, 
 62 |              coders=coders, depth=depth, unit=unit, recursive_codes=recursive_codes, 
 63 |              recursive_counts=recursive_counts, expanded=expanded, _format=_format,
 64 |              outfile=outfile, probs=probs, compact=compact, tidy=tidy, _max=_max,
 65 |              _min=_min)
 66 |     corpus = QCCorpus(settings_path)
 67 |     viewer = QCCorpusViewer(corpus)
 68 |     if tidy:
 69 |         viewer.tidy_codes(
 70 |             codes, 
 71 |             depth=depth, 
 72 |             recursive_codes=recursive_codes,
 73 |             recursive_counts=recursive_counts,
 74 |             expanded=expanded, 
 75 |             format=_format, 
 76 |             pattern=pattern,
 77 |             file_list=read_file_list(filenames),
 78 |             coders=coders,
 79 |             unit=unit,
 80 |             outfile=outfile,
 81 |             minimum=_min,
 82 |             maximum=_max,
 83 |         )
 84 |     else:
 85 |         viewer.crosstab(
 86 |             codes, 
 87 |             depth=depth, 
 88 |             recursive_codes=recursive_codes,
 89 |             recursive_counts=recursive_counts,
 90 |             expanded=expanded, 
 91 |             format=_format, 
 92 |             pattern=pattern,
 93 |             file_list=read_file_list(filenames),
 94 |             coders=coders,
 95 |             unit=unit,
 96 |             outfile=outfile,
 97 |             probs=probs,
 98 |             compact=compact,
 99 |         )
100 | 
101 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/codes/find.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | from qualitative_coding.corpus import QCCorpus
 4 | from qualitative_coding.views.viewer import QCCorpusViewer
 5 | from qualitative_coding.cli.decorators import handle_qc_errors
 6 | from qualitative_coding.helpers import read_file_list
 7 | from qualitative_coding.exceptions import IncompatibleOptions
 8 | from qualitative_coding.logs import configure_logger
 9 | 
10 | @click.command()
11 | @click.argument("codes", nargs=-1)
12 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
13 | @click.option("-p", "--pattern", 
14 |         help="Pattern to filter corpus filenames (glob-style)")
15 | @click.option("-f", "--filenames", 
16 |         help="File path containing a list of filenames to use")
17 | @click.option("-c", "--coders", help="Coders", multiple=True)
18 | @click.option("-d", "--depth", help="Maximum depth in code tree", type=int)
19 | @click.option("-n", "--unit", default="line", help="Unit of analysis",
20 |         type=click.Choice(['line', 'paragraph', 'document']))
21 | @click.option("-r", "--recursive-codes", "recursive_codes", is_flag=True, 
22 |         help="Include child codes")
23 | @click.option("-B", "--before", default=2, type=int, 
24 |         help="Number of lines before the code to show")
25 | @click.option("-C", "--after", default=2, type=int, 
26 |         help="Number of lines after the code to show")
27 | @click.option("-o", "--no-codes", "no_codes", is_flag=True,
28 |         help="Do not show matching codes")
29 | @click.option("-l", "--no-line-numbers", "no_line_numbers", is_flag=True,
30 |         help="Do not show line numbers")
31 | @click.option("-j", "--json", is_flag=True, help="Export as JSON")
32 | @handle_qc_errors
33 | def find(codes, settings, pattern, filenames, coders, depth, unit, recursive_codes, 
34 |          before, after, no_codes, no_line_numbers, json):
35 |     "Find all coded text"
36 |     if no_codes and json:
37 |         raise IncompatibleOptions("--no-codes and --json are incompatible")
38 |     if no_line_numbers and json:
39 |         raise IncompatibleOptions("--no-line_numbers and --json are incompatible")
40 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
41 |     log = configure_logger(settings_path)
42 |     log.info("codes find", codes=codes, pattern=pattern, filenames=filenames, coders=coders,
43 |              depth=depth, unit=unit, recursive_codes=recursive_codes, before=before, 
44 |              after=after, no_codes=no_codes, json=json)
45 |     corpus = QCCorpus(settings_path)
46 |     viewer = QCCorpusViewer(corpus)
47 |     if json:
48 |         viewer.show_coded_text_json(
49 |             codes, 
50 |             before=before, 
51 |             after=after, 
52 |             recursive_codes=recursive_codes,
53 |             depth=depth,
54 |             unit=unit,
55 |             pattern=pattern,
56 |             file_list=read_file_list(filenames),
57 |             coders=coders,
58 |         )
59 |     else:
60 |         viewer.show_coded_text(
61 |             codes, 
62 |             before=before, 
63 |             after=after, 
64 |             recursive_codes=recursive_codes,
65 |             depth=depth,
66 |             unit=unit,
67 |             pattern=pattern,
68 |             file_list=read_file_list(filenames),
69 |             coders=coders,
70 |             show_codes=not no_codes,
71 |             show_line_numbers=not no_line_numbers,
72 |         )
73 | 
74 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/codes/list.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | from qualitative_coding.corpus import QCCorpus
 4 | from qualitative_coding.views.viewer import QCCorpusViewer
 5 | from qualitative_coding.logs import configure_logger
 6 | 
 7 | @click.command(name="list")
 8 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
 9 | @click.option("-e", "--expanded", is_flag=True, help="Show names of parent codes")
10 | @click.option("-d", "--depth", help="Maximum depth in code tree", type=int)
11 | def _list(settings, expanded, depth):
12 |     "List all codes"
13 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
14 |     log = configure_logger(settings_path)
15 |     log.info("codes list", expanded=expanded, depth=depth)
16 |     corpus = QCCorpus(settings_path)
17 |     viewer = QCCorpusViewer(corpus)
18 |     viewer.list_codes(expanded=expanded, depth=depth)
19 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/codes/rename.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | from qualitative_coding.corpus import QCCorpus
 4 | from qualitative_coding.helpers import read_file_list
 5 | from qualitative_coding.logs import configure_logger
 6 | 
 7 | @click.command()
 8 | @click.argument("old_codes", nargs=-1)
 9 | @click.argument("new_code")
10 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
11 | @click.option("-c", "--coders", help="Coders", multiple=True)
12 | @click.option("-p", "--pattern", 
13 |         help="Pattern to filter corpus filenames (glob-style)")
14 | @click.option("-f", "--filenames", 
15 |         help="File path containing a list of filenames to use")
16 | def rename(old_codes, new_code, settings, coders, pattern, filenames):
17 |     "Rename one or more codes"
18 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
19 |     log = configure_logger(settings_path)
20 |     log.info("rename", old_codes=old_codes, new_code=new_code, coders=coders, 
21 |              pattern=pattern, filenames=filenames)
22 |     corpus = QCCorpus(settings_path)
23 |     with corpus.session():
24 |         corpus.rename_codes(
25 |             old_codes=old_codes, 
26 |             new_code=new_code, 
27 |             pattern=pattern,
28 |             file_list=read_file_list(filenames),
29 |             coders=coders,
30 |         )
31 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/codes/stats.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | import yaml
 4 | from pathlib import Path
 5 | from qualitative_coding.corpus import QCCorpus
 6 | from qualitative_coding.views.viewer import QCCorpusViewer
 7 | from qualitative_coding.cli.decorators import handle_qc_errors
 8 | from qualitative_coding.exceptions import IncompatibleOptions
 9 | from qualitative_coding.helpers import read_file_list
10 | from qualitative_coding.logs import configure_logger
11 | from tabulate import tabulate_formats
12 | 
13 | @click.command()
14 | @click.argument("codes", nargs=-1)
15 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
16 | @click.option("-p", "--pattern", 
17 |         help="Pattern to filter corpus filenames (glob-style)")
18 | @click.option("-f", "--filenames", 
19 |         help="File path containing a list of filenames to use")
20 | @click.option("-c", "--coders", help="Coders", multiple=True)
21 | @click.option("-C", "--by-coder", is_flag=True, help="Report stats separately for each coder")
22 | @click.option("-D", "--by-document", is_flag=True, help="Report stats separately for each document")
23 | @click.option("-d", "--depth", help="Maximum depth in code tree", type=int)
24 | @click.option("-n", "--unit", default="line", help="Unit of analysis",
25 |         type=click.Choice(['line', 'paragraph', 'document']))
26 | @click.option("-r", "--recursive-codes", "recursive_codes", is_flag=True, 
27 |         help="Include child codes")
28 | @click.option("-a", "--recursive-counts", is_flag=True,
29 |         help="Counts for codes include child codes")
30 | @click.option("-e", "--expanded", is_flag=True,
31 |         help="Show names of codes in expanded form")
32 | @click.option("-m", "--format", "_format", type=click.Choice(tabulate_formats),
33 |         metavar="[tabulate.tabulate_formats]", help="Output format.")
34 | @click.option("-o", "--outfile", help="Filename for CSV export")
35 | @click.option("-u", "--max", "_max", help="Maximum count value to show", type=int)
36 | @click.option("-l", "--min", "_min", help="Minimum count value to show", type=int)
37 | @click.option("-z", "--zeros", is_flag=True, help="Include codes with zero occurrences")
38 | @click.option("-t", "--total-only", is_flag=True,
39 |         help="Show total but not count")
40 | @handle_qc_errors
41 | def stats(codes, settings, pattern, filenames, coders, by_coder, by_document, depth, unit, recursive_codes, 
42 |         recursive_counts, expanded, _format, outfile, _max, _min, zeros, total_only):
43 |     "Show statistics about codes"
44 |     if depth and not recursive_codes: 
45 |         msg = "--depth requires --recursive-codes"
46 |         raise IncompatibleOptions(msg)
47 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
48 |     log = configure_logger(settings_path)
49 |     log.info("codes stats", codes=codes, pattern=pattern, filenames=filenames, coders=coders, 
50 |              by_coder=by_coder, by_document=by_document, depth=depth, unit=unit,
51 |              recursive_codes=recursive_codes)
52 |     corpus = QCCorpus(settings_path)
53 |     viewer = QCCorpusViewer(corpus)
54 |     if by_coder and by_document:
55 |         viewer.show_document_coders_pivot_table(
56 |             codes=codes, 
57 |             recursive=recursive_codes or recursive_counts,
58 |             format=_format, 
59 |             pattern=pattern,
60 |             file_list=read_file_list(filenames),
61 |             coders=coders,
62 |             unit=unit,
63 |             outfile=outfile,
64 |         )
65 |     else:
66 |         viewer.show_stats(
67 |             codes, 
68 |             max_count=_max, 
69 |             min_count=_min, 
70 |             depth=depth, 
71 |             recursive_codes=recursive_codes,
72 |             recursive_counts=recursive_counts,
73 |             expanded=expanded, 
74 |             format=_format, 
75 |             pattern=pattern,
76 |             file_list=read_file_list(filenames),
77 |             coders=coders,
78 |             by_coder=by_coder, 
79 |             by_document=by_document,
80 |             unit=unit,
81 |             outfile=outfile,
82 |             total_only=total_only,
83 |             zeros=zeros,
84 |         )
85 | 
86 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/corpus/__init__.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | from qualitative_coding.cli.click_aliases import ClickAliasedGroup
 3 | from qualitative_coding.cli.corpus.list import list_corpus_paths
 4 | from qualitative_coding.cli.corpus.import_media import import_media
 5 | from qualitative_coding.cli.corpus.move import move
 6 | from qualitative_coding.cli.corpus.remove import remove
 7 | from qualitative_coding.cli.corpus.update import update
 8 | from qualitative_coding.cli.corpus.anonymize import anonymize
 9 | 
10 | @click.group(name="corpus", cls=ClickAliasedGroup)
11 | def corpus_group():
12 |     "Corpus commands"
13 | 
14 | corpus_group.add_command(list_corpus_paths, aliases=["ls"])
15 | corpus_group.add_command(move, aliases=["mv"])
16 | corpus_group.add_command(remove, aliases=["rm"])
17 | corpus_group.add_command(import_media)
18 | corpus_group.add_command(update)
19 | corpus_group.add_command(anonymize)
20 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/corpus/anonymize.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import click
  3 | import spacy
  4 | import yaml
  5 | from tqdm import tqdm
  6 | from pathlib import Path
  7 | from collections import defaultdict
  8 | from qualitative_coding.corpus import QCCorpus
  9 | from qualitative_coding.exceptions import QCError, IncompatibleOptions
 10 | from qualitative_coding.helpers import read_file_list
 11 | from qualitative_coding.cli.decorators import handle_qc_errors
 12 | from qualitative_coding.logs import configure_logger
 13 | 
 14 | LABELS = {
 15 |     "PERSON": "Person",
 16 |     "FAC": "Location",
 17 |     "ORG": "Organization",
 18 |     "GPE": "Location",
 19 |     "LOC": "Location",
 20 | }
 21 | 
 22 | @click.command()
 23 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
 24 | @click.option("-p", "--pattern", help="Pattern to filter corpus filenames (glob-style)")
 25 | @click.option("-f", "--filenames", help="File path containing a list of filenames to use")
 26 | @click.option("-k", "--key", default="key.yaml", help="Path to key file")
 27 | @click.option("-r", "--reverse", is_flag=True, help="Un-anonymize documents")
 28 | @click.option("-o", "--out-dir", default="anonymized", help="location for anonymized documemts")
 29 | @click.option("-u", "--update", is_flag=True, help="Update documents in place")
 30 | @click.option("-d", "--dryrun", is_flag=True, help="Show diff instead of performing update")
 31 | @handle_qc_errors
 32 | def anonymize(settings, pattern, filenames, key, reverse, out_dir, update, dryrun):
 33 |     "Anonymize corpus files"
 34 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
 35 |     key_file = Path(key)
 36 |     out_path = Path(out_dir)
 37 |     log = configure_logger(settings_path)
 38 |     log.info("corpus anonymize", pattern=pattern, filenames=filenames, key=key, 
 39 |              reverse=reverse, out_dir=out_dir, update=update, dryrun=dryrun)
 40 |     corpus = QCCorpus(settings_path)
 41 |     with corpus.session():
 42 |         docs = corpus.get_documents(pattern=pattern, file_list=read_file_list(filenames))
 43 | 
 44 |     if key_file.exists():
 45 |         keys = yaml.safe_load(key_file.read_text())
 46 |         if reverse:
 47 |             keys = reverse_keys(keys)
 48 |         out_path.mkdir(exist_ok=True, parents=True)
 49 |         with corpus.session():
 50 |             for doc in docs:
 51 |                 source = corpus.corpus_dir / doc.file_path
 52 |                 dest = out_path / doc.file_path
 53 |                 replace_keys(keys, source, dest)
 54 |                 if update:
 55 |                     corpus.update_document(source, dest, dryrun)
 56 |     else:
 57 |         if reverse:
 58 |             raise QCError("Cannot use --reverse unless key file exists")
 59 |         doc_paths = [corpus.corpus_dir / doc.file_path for doc in docs]
 60 |         generate_key_file(key, doc_paths, log)
 61 | 
 62 | def replace_keys(keys, source, dest):
 63 |     text = source.read_text()
 64 |     keys_by_length = [k for l, k in reversed(sorted((len(k), k) for k in keys.keys()))]
 65 |     for key in keys_by_length:
 66 |         text = text.replace(key, keys[key])
 67 |     dest.write_text(text)
 68 | 
 69 | def reverse_keys(keys):
 70 |     """Converts anonymization keys into de-anonymization keys.
 71 |     In a dict, each key has a single value, but there may be multiple 
 72 |     values with the same key. In this case, uses the first occurence. 
 73 |     """
 74 |     rkeys = {}
 75 |     for k, v in keys.items():
 76 |         if v not in rkeys:
 77 |             rkeys[v] = k
 78 |     return rkeys
 79 | 
 80 | def generate_key_file(key, file_paths, log):
 81 |     """Generates a YAML file containing keys for anonymization.
 82 |     A key file is required to anonymize a corpus. 
 83 |     """
 84 |     model_name = 'en_core_web_sm'
 85 |     if spacy.util.is_package(model_name):
 86 |         log.debug(f"Using spacy model {model_name}")
 87 |     else:
 88 |         log.info(f"Downloading spacy model {model_name}")
 89 |         spacy.cli.download(model_name)
 90 |     try:
 91 |         nlp = spacy.load('en_core_web_sm')
 92 |     except OSError:
 93 |         raise QCError(
 94 |             "A language model is required to run this task. " +
 95 |             f"Automatic downloading of spacy model {model_name} " +
 96 |             "failed. Please install the language model manually:\n" +
 97 |             "python -m spacy download en_core_web_sm"
 98 |         )
 99 |     entities = defaultdict(set)
100 |     for file_path in tqdm(file_paths, desc="Processing documents"):
101 |         text = file_path.read_text()
102 |         doc = nlp(text)
103 |         for ent in doc.ents:
104 |             if ent.label_ in LABELS:
105 |                 entities[ent.label_].add(ent)
106 |     placeholders = {}
107 |     for label, ents in entities.items():
108 |         placeholder = LABELS[label]
109 |         terms = sorted(e.text for e in ents)
110 |         for i, term in enumerate(terms):
111 |             placeholders[term] = f"{placeholder}_{i+1}"
112 |     Path(key).write_text(yaml.dump(placeholders))
113 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/corpus/import_media.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | from qualitative_coding.corpus import QCCorpus
 4 | from qualitative_coding.cli.decorators import handle_qc_errors
 5 | from qualitative_coding.media_importers import media_importers
 6 | from qualitative_coding.logs import configure_logger
 7 | 
 8 | @click.command(name="import")
 9 | @click.argument("file_path")
10 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
11 | @click.option("-r", "--recursive", is_flag=True, 
12 |         help="Recursively import from directory")
13 | @click.option("-c", "--corpus-root", 
14 |         help="Relative path to import dir within corpus_dir")
15 | @click.option("-i", "--importer", type=click.Choice(media_importers.keys()),
16 |         default="pandoc",
17 |         help="Importer class to use")
18 | @handle_qc_errors
19 | def import_media(file_path, settings, recursive, corpus_root, importer):
20 |     "Import corpus files"
21 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
22 |     log = configure_logger(settings_path)
23 |     log.info("corpus import", file_path=file_path, recursive=recursive, corpus_root=corpus_root,
24 |              importer=importer)
25 |     corpus = QCCorpus(settings_path)
26 |     with corpus.session():
27 |         corpus.import_media(
28 |             file_path, 
29 |             recursive=recursive, 
30 |             corpus_root=corpus_root, 
31 |             importer=importer
32 |         )
33 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/corpus/list.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | from qualitative_coding.corpus import QCCorpus
 4 | from qualitative_coding.helpers import read_file_list
 5 | from qualitative_coding.logs import configure_logger
 6 | from pathlib import Path
 7 | 
 8 | @click.command(name="list")
 9 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
10 | @click.option("-p", "--pattern", 
11 |         help="Pattern to filter corpus filenames (glob-style)")
12 | @click.option("-f", "--filenames", 
13 |         help="File path containing a list of filenames to use")
14 | def list_corpus_paths(settings, pattern, filenames):
15 |     "List corpus paths"
16 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
17 |     log = configure_logger(settings_path)
18 |     log.info("corpus list", pattern=pattern, filenames=filenames)
19 |     corpus = QCCorpus(settings_path)
20 |     paths = []
21 |     for dir_path, dirs, fns in os.walk(corpus.corpus_dir):
22 |         for fn in fns:
23 |             paths.append(str(Path(dir_path).relative_to(corpus.corpus_dir) / fn))
24 |     if pattern:
25 |         paths = [path for path in paths if pattern in path]
26 |     if filenames:
27 |         file_list = read_file_list(filenames)
28 |         paths = [path for path in paths if path in file_list]
29 |     for path in sorted(paths):
30 |         print(path)
31 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/corpus/move.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | from qualitative_coding.corpus import QCCorpus
 4 | from qualitative_coding.logs import configure_logger
 5 | from qualitative_coding.cli.decorators import handle_qc_errors
 6 | 
 7 | @click.command()
 8 | @click.argument("target")
 9 | @click.argument("destination")
10 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
11 | @click.option("-r", "--recursive", is_flag=True, 
12 |         help="Recursively import from directory")
13 | @handle_qc_errors
14 | def move(target, destination, settings, recursive):
15 |     "Move a file in the corpus"
16 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
17 |     log = configure_logger(settings_path)
18 |     log.info("corpus move", target=target, destination=destination, recursive=recursive)
19 |     corpus = QCCorpus(settings_path)
20 |     with corpus.session():
21 |         corpus.move_document(target, destination, recursive=recursive)
22 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/corpus/remove.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | from qualitative_coding.corpus import QCCorpus
 4 | from qualitative_coding.logs import configure_logger
 5 | from qualitative_coding.cli.decorators import handle_qc_errors
 6 | 
 7 | @click.command()
 8 | @click.argument("target")
 9 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
10 | @click.option("-r", "--recursive", is_flag=True, 
11 |         help="Recursively remove from directory")
12 | @handle_qc_errors
13 | def remove(target, settings, recursive):
14 |     "Remove a file from the corpus"
15 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
16 |     log = configure_logger(settings_path)
17 |     log.info("corpus remove", target=target, recursive=recursive)
18 |     corpus = QCCorpus(settings_path)
19 |     with corpus.session():
20 |         corpus.remove_document(target, recursive=recursive)
21 | 
22 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/corpus/update.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import click
 3 | from pathlib import Path
 4 | from subprocess import run
 5 | from collections import defaultdict
 6 | from qualitative_coding.corpus import QCCorpus
 7 | from qualitative_coding.exceptions import IncompatibleOptions, QCError, InvalidParameter
 8 | from qualitative_coding.cli.decorators import handle_qc_errors
 9 | from qualitative_coding.logs import configure_logger
10 | 
11 | @click.command()
12 | @click.argument("file_path", type=click.Path(exists=True))
13 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
14 | @click.option("-n", "--new", type=click.Path(exists=True), help="Path to new version")
15 | @click.option("-d", "--dryrun", is_flag=True, 
16 |         help="Show simulated results")
17 | @handle_qc_errors
18 | def update(file_path, settings, new, dryrun):
19 |     "Update the content of corpus files"
20 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
21 |     log = configure_logger(settings_path)
22 |     log.info("corpus update", new=new, dryrun=dryrun)
23 |     corpus = QCCorpus(settings_path)
24 |     with corpus.session():
25 |         corpus.update_document(file_path, new, dryrun)
26 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/decorators.py:
--------------------------------------------------------------------------------
 1 | from functools import update_wrapper
 2 | import click
 3 | import sys
 4 | from qualitative_coding.exceptions import QCError
 5 | from qualitative_coding.views.styles import error
 6 | from qualitative_coding.exceptions import IncompatibleOptions
 7 | 
 8 | def handle_qc_errors(f):
 9 |     """Decorator declaring a click command. 
10 |     Wraps execution in a try/catch block, so that QCErrors can be handled with 
11 |     graceful output.
12 |     """
13 |     def command(*args, **kwargs):
14 |         try:
15 |             return f(*args, **kwargs)
16 |         except QCError as e:
17 |             click.echo(error(str(e), preformatted=True), err=True)
18 |             sys.exit(1)
19 |     return update_wrapper(command, f)
20 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/export.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | from pathlib import Path
 4 | from qualitative_coding.corpus import QCCorpus
 5 | from qualitative_coding.refi_qda.writer import REFIQDAWriter
 6 | from qualitative_coding.cli.decorators import (
 7 |     handle_qc_errors,
 8 | )
 9 | 
10 | @click.command()
11 | @click.argument("export_path")
12 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
13 | @handle_qc_errors
14 | def export(export_path, settings):
15 |     "Export project as REFI-QDA"
16 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
17 |     corpus = QCCorpus(settings_path)
18 |     with corpus.session():
19 |         corpus.update_codebook()
20 |     path = Path(export_path).with_suffix(".qdpx")
21 |     writer = REFIQDAWriter(settings_path)
22 |     writer.write(export_path)
23 | 
24 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/init.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | from qualitative_coding.cli.decorators import handle_qc_errors
 4 | from qualitative_coding.logs import configure_logger
 5 | from os import getcwd
 6 | from pathlib import Path
 7 | 
 8 | @click.command()
 9 | @click.option("-s", "--settings", type=click.Path(), help="Settings file")
10 | @click.option("-w", "--write-settings-file", is_flag=True, help="Create a settings file but do not create directories")
11 | @click.option("-i", "--import", "_import", help="Import an existing qdpx project")
12 | @handle_qc_errors
13 | def init(settings, write_settings_file, _import):
14 |     "Initialize a qc project"
15 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
16 |     if _import:
17 |         from qualitative_coding.refi_qda.reader import REFIQDAReader
18 |         reader = REFIQDAReader(_import)
19 |         reader.unpack_project(Path.cwd())
20 |     else:
21 |         log = configure_logger(settings_path)
22 |         log.info("init", write_settings_file=write_settings_file)
23 |         from qualitative_coding.corpus import QCCorpus
24 |         QCCorpus.initialize(settings_path, write_settings_file)
25 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/memo.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | from qualitative_coding.corpus import QCCorpus
 4 | from qualitative_coding.views.viewer import QCCorpusViewer
 5 | from qualitative_coding.cli.decorators import handle_qc_errors
 6 | from qualitative_coding.logs import configure_logger
 7 | 
 8 | @click.command()
 9 | @click.argument("coder")
10 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
11 | @click.option("-m", "--message", help="short message, title of memo file")
12 | @click.option("-l", "--list", "list_memos", is_flag=True,
13 |         help="list all memos in order")
14 | @handle_qc_errors
15 | def memo(coder, settings, message, list_memos):
16 |     "Write a memo"
17 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
18 |     log = configure_logger(settings_path)
19 |     log.info("memo", coder=coder, message=message, list_memos=list_memos)
20 |     corpus = QCCorpus(settings_path)
21 |     viewer = QCCorpusViewer(corpus)
22 |     if list_memos:
23 |         click.echo(viewer.list_memos())
24 |     else:
25 |         viewer.memo(coder, message)
26 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/options.py:
--------------------------------------------------------------------------------
 1 | # MAYBE CAN BE DELETED.
 2 | from qualitative_coding.exceptions import IncompatibleOptions
 3 | 
 4 | class Truthy:
 5 |     "Like True, but when used in comparison, coerces the other object to bool."
 6 |     val = True
 7 |     def __eq__(self, other):
 8 |         return bool(other) == self.val
 9 | 
10 |     def __bool__(self):
11 |         return self.val
12 | 
13 |     def __str__(self):
14 |         return str(self.val) 
15 | 
16 | class Falsy(Truthy):
17 |     "Like Truthy, but Falsy."
18 |     val = False
19 | 
20 | def _fmt(opts, _and=True):
21 |     if len(opts) == 1:
22 |         return opts[0]
23 |     else:
24 |         return "{} {} {}".format(", ".join(opts[:-1]), "and" if _and else "or", opts[-1])
25 | 
26 | def check_incompatible(args, **conditions):
27 |     problem = all(val == getattr(args, opt, None) for opt, val in conditions.items())
28 |     if problem:
29 |         opts = ["--{}".format(k) for k in conditions.keys()]
30 |         if all(conditions.values()):
31 |             quantifier = "both" if len(conditions) == 2 else "all"
32 |             message = f"{_fmt(opts)} may not {quantifier} be used."
33 |         elif not any(conditions.values()):
34 |             message = "One of {_fmt(opts, _and=False)} is required."
35 |         else:
36 |             present = ["--{}".format(o) for o, req in conditions.items() if req]
37 |             absent = ["--{}".format(o) for o, req in conditions.items() if not req]
38 |             message = "{}{} must be used when {} {} used.".format(
39 |                 "One of " if len(absent) > 1 else "",
40 |                 _fmt(absent), 
41 |                 _fmt(present), 
42 |                 "is" if len(present) == 1 else "are"
43 |             )
44 |         raise IncompatibleOptions(message)
45 | 
46 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/upgrade.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import os
 3 | import yaml
 4 | from pathlib import Path
 5 | from qualitative_coding.cli.decorators import handle_qc_errors
 6 | from qualitative_coding.migrations import migrations, migrate
 7 | from qualitative_coding.helpers import read_settings
 8 | from qualitative_coding.logs import configure_logger
 9 | import shutil
10 | 
11 | @click.command()
12 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file")
13 | @click.option("-v", "--version", type=click.Choice([m._version for m in migrations]),
14 |         default=migrations[-1]._version,
15 |         help="Target upgrade or downgrade version")
16 | @handle_qc_errors
17 | def upgrade(settings, version):
18 |     "Upgrade project to new version of qc"
19 |     settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml")
20 |     log = configure_logger(settings_path)
21 |     log.info("upgrade", version=version)
22 |     migrate(settings_path, version)
23 | 


--------------------------------------------------------------------------------
/qualitative_coding/cli/version.py:
--------------------------------------------------------------------------------
1 | import click
2 | from importlib.metadata import metadata
3 | 
4 | @click.command()
5 | def version():
6 |     "Show version number"
7 |     version = metadata('qualitative-coding')['version']
8 |     click.echo(f"qualitative-coding {version}")
9 | 


--------------------------------------------------------------------------------
/qualitative_coding/codebook.py:
--------------------------------------------------------------------------------
1 | 
2 | class QCCodebook:
3 | 
4 |     def __init__(self, filename):
5 |         self.filename = filename
6 |         
7 | 


--------------------------------------------------------------------------------
/qualitative_coding/database/errors.py:
--------------------------------------------------------------------------------
1 | from qualitative_coding.exceptions import QCError
2 | 


--------------------------------------------------------------------------------
/qualitative_coding/database/models.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from sqlalchemy import (
 3 |     ForeignKey,
 4 |     UniqueConstraint,
 5 |     CheckConstraint,
 6 |     Table,
 7 |     Column,
 8 | )
 9 | from sqlalchemy.orm import (
10 |     DeclarativeBase,
11 |     Mapped,
12 |     mapped_column, 
13 |     relationship,
14 | )
15 | from qualitative_coding.exceptions import QCError
16 | 
17 | class Base(DeclarativeBase):
18 |     pass
19 | 
20 | class Document(Base):
21 |     __tablename__ = "document"
22 |     file_path: Mapped[str] = mapped_column(primary_key=True)
23 |     file_hash: Mapped[str] 
24 |     indices: Mapped[List["DocumentIndex"]] = relationship(back_populates="document",
25 |             cascade="all, delete-orphan")
26 | 
27 |     class AlreadyExists(QCError):
28 |         def __init__(self, doc):
29 |             self.doc = doc
30 |             err = f"A Document with file path {doc.file_path} already exists"
31 |             super().__init__(err)
32 | 
33 | class DocumentIndex(Base):
34 |     __tablename__ = "document_index"
35 |     __table_args__ = (
36 |         UniqueConstraint(
37 |             "document_id",
38 |             "name", 
39 |         ),
40 | )
41 |     id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
42 |     name: Mapped[str]
43 |     time_series: Mapped[bool] = mapped_column(default=False)
44 |     document_id: Mapped[str] = mapped_column(ForeignKey(Document.file_path))
45 |     document: Mapped["Document"] = relationship(back_populates="indices")
46 |     locations: Mapped[List["Location"]] = relationship(back_populates="document_index",
47 |             cascade="all, delete-orphan")
48 | 
49 | coded_line_location_association_table = Table(
50 |     "coded_line_location_association",
51 |     Base.metadata,
52 |     Column("coded_line_id", ForeignKey("coded_line.id"), primary_key=True),
53 |     Column("location_id", ForeignKey("location.id"), primary_key=True),
54 | )
55 | 
56 | class Location(Base):
57 |     __tablename__ = "location"
58 |     __table_args__ = (
59 |         CheckConstraint("start_line <= end_line"),
60 |     )
61 |     id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
62 |     start_line: Mapped[int]
63 |     end_line: Mapped[int]
64 |     document_index_id: Mapped[str] = mapped_column(ForeignKey(DocumentIndex.id))
65 |     document_index: Mapped["DocumentIndex"] = relationship(back_populates="locations")
66 |     coded_lines: Mapped[List["CodedLine"]] = relationship(
67 |         secondary=coded_line_location_association_table,
68 |         back_populates="locations",
69 |     )
70 | 
71 | class Code(Base):
72 |     __tablename__ = "code"
73 |     name: Mapped[str] = mapped_column(primary_key=True)
74 |     coded_lines: Mapped[List["CodedLine"]] = relationship(back_populates="code",
75 |             cascade="all, delete-orphan")
76 | 
77 | class Coder(Base):
78 |     __tablename__ = "coder"
79 |     name: Mapped[str] = mapped_column(primary_key=True)
80 |     coded_lines: Mapped[List["CodedLine"]] = relationship(back_populates="coder", 
81 |             cascade="all, delete-orphan")
82 | 
83 | class CodedLine(Base):
84 |     __tablename__ = "coded_line"
85 |     id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
86 |     line: Mapped[int]
87 |     coder_id: Mapped[str] = mapped_column(ForeignKey(Coder.name))
88 |     coder: Mapped["Coder"] = relationship(back_populates="coded_lines")
89 |     code_id: Mapped[str] = mapped_column(ForeignKey(Code.name))
90 |     code: Mapped["Code"] = relationship(back_populates="coded_lines")
91 |     locations: Mapped[List["Location"]] = relationship(
92 |         secondary=coded_line_location_association_table,
93 |         back_populates="coded_lines"
94 |     )
95 | 


--------------------------------------------------------------------------------
/qualitative_coding/demo.qdpx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/qualitative_coding/demo.qdpx


--------------------------------------------------------------------------------
/qualitative_coding/diff.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from more_itertools import peekable
  3 | from difflib import unified_diff
  4 | from subprocess import run
  5 | 
  6 | def get_git_diff(path):
  7 |     "Gits a diff between file state and HEAD"
  8 |     result = run(f"git diff {path}", shell=True, capture_output=True, text=True)
  9 |     return result.stdout
 10 | 
 11 | def get_diff(path0, path1):
 12 |     "Gets a diff between two file paths"
 13 |     with open(path0) as fh:
 14 |         doc0 = [line for line in fh]
 15 |     with open(path1) as fh:
 16 |         doc1 = [line for line in fh]
 17 |     return ''.join(unified_diff(doc0, doc1))
 18 | 
 19 | def reindex_coded_lines(coded_lines, diff):
 20 |     """Returns a new version of coded_lines, with line numbers updated to account for diff.
 21 |     Assumes coded_lines are sorted by line number.
 22 |     """
 23 |     offsets = peekable(read_diff_offsets(diff))
 24 |     current_offset_line = 0
 25 |     current_offset = 0
 26 |     cum_offset = 0
 27 |     reindexed_coded_lines = []
 28 |     for code, coder, line, path in coded_lines:
 29 |         try:
 30 |             if offsets.peek()[0] <= line:
 31 |                 current_offset_line, current_offset = next(offsets)
 32 |                 cum_offset += current_offset
 33 |         except StopIteration:
 34 |             pass
 35 | 
 36 | 
 37 |     return reindexed_coded_lines
 38 | 
 39 | def read_diff_offsets(diff):
 40 |     """Reads a unified diff and returns a list of (line, offset) tuples.
 41 |     For example, (6, 2) represents an insertion of 2 lines at line 6. 
 42 |     Adjacent deletions and insertions are assumed to be edited versions
 43 |     of the same line, so if 4 lines were deleted and 3 lines inserted at
 44 |     line 10, this would be represented as (13, -1).
 45 |     """
 46 |     offsets = []
 47 |     lines = peekable(diff.split('\n'))
 48 |     try:
 49 |         read_preamble(lines)
 50 |         while True:
 51 |             offsets += read_hunk(lines)
 52 |     except StopIteration:
 53 |         return offsets
 54 | 
 55 | def read_preamble(lines):
 56 |     line = next(lines)
 57 |     while not line.startswith('---'):
 58 |         line = next(lines)
 59 |     line = next(lines)
 60 |     assert(line.startswith('+++'))
 61 | 
 62 | def read_hunk(lines):
 63 |     line = next(lines)
 64 |     line_number = read_line_number(line)
 65 |     minus = 0
 66 |     plus = 0
 67 |     in_op = False
 68 |     op_start_line_number = 1
 69 |     ops = []
 70 |     try:
 71 |         while not lines.peek().startswith('@'):
 72 |             line = next(lines)
 73 |             if in_op:
 74 |                 if line[0] == '-':
 75 |                     minus += 1
 76 |                 elif line[0] == '+':
 77 |                     plus += 1
 78 |                 else:
 79 |                     in_op = False
 80 |                     if plus - minus > 0:
 81 |                         ops.append((op_start_line_number, plus - minus))
 82 |                     elif plus - minus < 0:
 83 |                         ops.append((op_start_line_number + minus - plus - 1, plus - minus))
 84 |             else:
 85 |                 if line[0] == '-':
 86 |                     in_op = True
 87 |                     op_start_line_number = line_number
 88 |                     minus, plus = 1, 0
 89 |                 elif line[0] == '+':
 90 |                     in_op = True
 91 |                     op_start_line_number = line_number
 92 |                     minus, plus = 0, 1
 93 |             line_number += 1
 94 |     finally:
 95 |         if in_op:
 96 |             if plus - minus > 0:
 97 |                 ops.append((op_start_line_number, plus - minus))
 98 |             elif plus - minus < 0:
 99 |                 ops.append((op_start_line_number + minus - plus - 1, plus - minus))
100 |         return ops
101 | 
102 | def read_line_number(hunk_preamble):
103 |     match = re.match('\s*@@ \-(\d+)', hunk_preamble)
104 |     return int(match.group(1))
105 | 
106 | def in_git_repo():
107 |     "Checks whether the current working directory is in a git repo."
108 |     return run("git status", shell=True, capture_output=True).returncode == 0
109 | 
110 |     
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 


--------------------------------------------------------------------------------
/qualitative_coding/editors.py:
--------------------------------------------------------------------------------
 1 | editors = {
 2 |     "code": {
 3 |         "name": "Visual Studio Code",
 4 |         "code_command": 'code "{corpus_file_path}" "{codes_file_path}" --wait',
 5 |         "memo_command": 'code "{memo_file_path}"',
 6 |     },
 7 |     "vim": {
 8 |         "name": "Vim",
 9 |         "code_command": 'vim -O "{corpus_file_path}" "{codes_file_path}" -c \'windo set scb!\'',
10 |         "memo_command": 'vim "{memo_file_path}"',
11 |     },
12 |     "nvim": {
13 |         "name": "Neovim",
14 |         "code_command": 'nvim -O "{corpus_file_path}" "{codes_file_path}" -c \'windo set scb!\'',
15 |         "memo_command": 'nvim "{memo_file_path}"',
16 |     },
17 |     "emacs": {
18 |         "name": "Emacs",
19 |         "code_command": "emacs -Q --eval (progn (find-file \"{corpus_file_path}\") (split-window-right) (other-window 1) (find-file \"{codes_file_path}\") (scroll-all-mode))",
20 |         "memo_command": 'emacs "{memo_file_path}"',
21 |     },
22 | }
23 | 


--------------------------------------------------------------------------------
/qualitative_coding/exceptions.py:
--------------------------------------------------------------------------------
 1 | class QCError(Exception):
 2 |     pass
 3 | 
 4 | class InvalidParameter(QCError):
 5 |     pass
 6 | 
 7 | class IncompatibleOptions(QCError):
 8 |     pass
 9 | 
10 | class SettingsError(QCError):
11 |     pass
12 | 
13 | class CodeFileParseError(QCError):
14 |     pass
15 | 
16 | class CodebookParseError(QCError):
17 |     pass
18 | 
19 | 


--------------------------------------------------------------------------------
/qualitative_coding/helpers.py:
--------------------------------------------------------------------------------
  1 | from textwrap import fill
  2 | from pathlib import Path
  3 | from subprocess import run
  4 | from qualitative_coding.exceptions import QCError
  5 | import yaml
  6 | 
  7 | def read_settings(path):
  8 |     if not Path(path).exists():
  9 |         raise QCError(f"Settings file {path} not found.")
 10 |     try:
 11 |         settings_text = Path(path).read_text()
 12 |     except:
 13 |         raise QCError(f"Error reading settings file {path}")
 14 |     try:
 15 |         return yaml.safe_load(settings_text)
 16 |     except:
 17 |         raise QCError(f"Error parsing settings file {path}")
 18 | 
 19 | def read_file_list(filename):
 20 |     """Many cli commands accept `--filenames`, a path to a file 
 21 |     containing a list of files. 
 22 |     """
 23 |     if filename:
 24 |         return Path(filename).read_text().split("\n")
 25 | 
 26 | def iter_paragraph_lines(fh):
 27 |     p_start = 0
 28 |     in_whitespace = False
 29 |     for i, line in enumerate(fh):
 30 |         if line.strip() == "":
 31 |             in_whitespace = True
 32 |         elif in_whitespace:
 33 |             yield p_start, i
 34 |             p_start = i
 35 |             in_whitespace = False
 36 |     yield p_start, i + 1
 37 | 
 38 | def merge_ranges(ranges, clamp=None):
 39 |     "Overlapping ranges? Let's fix that. Optionally supply clamp=[0, 100]"
 40 |     if any(filter(lambda r: r.step != 1, ranges)): raise ValueError("Ranges must have step=1")
 41 |     endpoints = [(r.start, r.stop) for r in sorted(ranges, key=lambda r: r.start)]
 42 |     results = []
 43 |     if any(endpoints):
 44 |         a, b = endpoints[0]
 45 |         for start, stop in endpoints:
 46 |             if start <= b:
 47 |                 b = max(b, stop)
 48 |             else:
 49 |                 results.append(range(a, b))
 50 |                 a, b = start, stop
 51 |         results.append(range(a, b))
 52 |     if clamp is not None:
 53 |         lo, hi = clamp
 54 |         results = [range(max(lo, r.start), min(hi, r.stop)) for r in results]
 55 |     return results
 56 | 
 57 | def prepare_corpus_text(text, width=80, preformatted=False):
 58 |     "Splits corpus text at blank lines and wraps it."
 59 |     if preformatted:
 60 |         outlines = []
 61 |         lines = text.split("\n")
 62 |         for line in lines:
 63 |             while True:
 64 |                 outlines.append(line[:width])
 65 |                 if len(line) < 80:
 66 |                     break
 67 |                 line = line[width:]
 68 |         return "\n".join(outlines)
 69 |     else:
 70 |         paragraphs = text.split("\n\n")
 71 |         return "\n\n".join(fill(p, width=width) for p in paragraphs)
 72 | 
 73 | def prompt_for_choice(prompt, options):
 74 |     "Asks for a prompt, returns an index"
 75 |     print(prompt)
 76 |     for i, opt in enumerate(options):
 77 |         print(f"{i+1}. {opt}")
 78 |     while True:
 79 |         raw_choice = input("> ")
 80 |         if raw_choice.isdigit() and int(raw_choice) in range(1, len(options)+1):
 81 |             return int(raw_choice)
 82 |         print("Sorry, that's not a valid choice.")
 83 | 
 84 | 
 85 | def _fmt(opts, _and=True):
 86 |     if len(opts) == 1:
 87 |         return opts[0]
 88 |     else:
 89 |         return "{} {} {}".format(", ".join(opts[:-1]), "and" if _and else "or", opts[-1])
 90 | 
 91 | class IncompatibleOptions(ValueError):
 92 |     pass
 93 | 
 94 | class Truthy:
 95 |     "Like True, but when used in comparison, coerces the other object to bool."
 96 |     val = True
 97 |     def __eq__(self, other):
 98 |         return bool(other) == self.val
 99 | 
100 |     def __bool__(self):
101 |         return self.val
102 | 
103 |     def __str__(self):
104 |         return str(self.val) 
105 | 
106 | class Falsy(Truthy):
107 |     "Like Truthy, but Falsy."
108 |     val = False
109 | 
110 | def check_incompatible(args, **conditions):
111 |     problem = all(val == getattr(args, opt, None) for opt, val in conditions.items())
112 |     if problem:
113 |         opts = ["--{}".format(k) for k in conditions.keys()]
114 |         if all(conditions.values()):
115 |                 message = "{} may not {} be used.".format(_fmt(opts), "both" if len(conditions) == 2 else "all")
116 |         elif not any(conditions.values()):
117 |                 message = "One of {} is required.".format(_fmt(opts, _and=False))
118 |         else:
119 |             present = ["--{}".format(o) for o, req in conditions.items() if req]
120 |             absent = ["--{}".format(o) for o, req in conditions.items() if not req]
121 |             message = "{}{} must be used when {} {} used.".format(
122 |                 "One of " if len(absent) > 1 else "",
123 |                 _fmt(absent), 
124 |                 _fmt(present), 
125 |                 "is" if len(present) == 1 else "are"
126 |             )
127 |         print(args)
128 |         raise IncompatibleOptions(message)
129 | 


--------------------------------------------------------------------------------
/qualitative_coding/logs.py:
--------------------------------------------------------------------------------
 1 | from qualitative_coding.helpers import read_settings
 2 | from pathlib import Path
 3 | import structlog
 4 | import logging
 5 | import sys
 6 | 
 7 | DEFAULT_LOG_FILE = "qualitative_coding.log"
 8 | 
 9 | def configure_logger(settings_path):
10 |     """Configures logging and structlog so that future calls to 
11 |     structlog.get_logger() will return a properly-behaved logger.
12 |     The logger logs JSON to a file (specified in settings) and, 
13 |     when settings.verbose is True, also log nicely to the console. 
14 | 
15 |     Custom log configuration can be stored in a log_config module 
16 |     (e.g. log_config.py).
17 |     """
18 |     try:
19 |         import log_config
20 |         return structlog.get_logger()
21 |     except ModuleNotFoundError:
22 |         pass
23 | 
24 |     if Path(settings_path).exists():
25 |         settings = read_settings(settings_path)
26 |         verbose = settings.get('verbose', False)
27 |         log_file_path = Path(settings.get('log_path', DEFAULT_LOG_FILE))
28 |         if not log_file_path.is_absolute():
29 |             log_file_path = Path(settings_path).parent / log_file_path
30 |     else:
31 |         log_file_path = DEFAULT_LOG_FILE
32 |         verbose = False
33 | 
34 |     root_logger = logging.getLogger()
35 |     root_logger.setLevel(logging.DEBUG if verbose else logging.INFO)
36 |     file_handler = logging.FileHandler(log_file_path, )
37 |     file_formatter = structlog.stdlib.ProcessorFormatter(
38 |         processors=[
39 |             structlog.stdlib.ProcessorFormatter.remove_processors_meta,
40 |             structlog.processors.JSONRenderer(),
41 |         ]
42 |     )
43 |     file_handler.setFormatter(file_formatter)
44 |     root_logger.addHandler(file_handler)
45 | 
46 |     if verbose:
47 |         console_handler = logging.StreamHandler()
48 |         console_formatter = structlog.stdlib.ProcessorFormatter(
49 |             processors=[
50 |                 structlog.stdlib.ProcessorFormatter.remove_processors_meta,
51 |                 structlog.dev.ConsoleRenderer(),
52 |             ],
53 |         )
54 |         console_handler.setFormatter(console_formatter)
55 |         root_logger.addHandler(console_handler)
56 | 
57 |     structlog.configure(
58 |         processors=[
59 |             structlog.stdlib.add_log_level,
60 |             structlog.processors.TimeStamper(fmt='iso'),
61 |             structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
62 |         ],
63 |         logger_factory=structlog.stdlib.LoggerFactory(),
64 |         cache_logger_on_first_use=True,
65 |     )
66 |     return structlog.get_logger()
67 | 


--------------------------------------------------------------------------------
/qualitative_coding/media_importers/__init__.py:
--------------------------------------------------------------------------------
 1 | from qualitative_coding.exceptions import InvalidParameter
 2 | from qualitative_coding.media_importers.pandoc import PandocImporter
 3 | from qualitative_coding.media_importers.verbatim import VerbatimImporter
 4 | from qualitative_coding.media_importers.vtt import VTTImporter
 5 | 
 6 | media_importers = {
 7 |     "pandoc": PandocImporter,
 8 |     "verbatim": VerbatimImporter,
 9 |     "vtt": VTTImporter,
10 | }
11 | 


--------------------------------------------------------------------------------
/qualitative_coding/media_importers/base.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class BaseMediaImporter:
 3 |     """Base class for media importers.
 4 |     The API for MediaImporters is a single method, `import_media`, which 
 5 |     takes an input filename and an output filename.
 6 |     """
 7 |     def __init__(self, settings):
 8 |         self.settings = settings
 9 | 
10 |     def import_media(self, input_filename, output_filename):
11 |         raise NotImplementedError("Subclasses of BaseMediaImporter should be used.")
12 | 
13 |     def register_media_in_database(self, corpus_path):
14 |         with self.corpus.session():
15 |             self.corpus.register_document(corpus_path)
16 | 


--------------------------------------------------------------------------------
/qualitative_coding/media_importers/pandoc.py:
--------------------------------------------------------------------------------
 1 | from subprocess import run, CalledProcessError
 2 | from qualitative_coding.media_importers.base import BaseMediaImporter
 3 | from qualitative_coding.exceptions import QCError
 4 | 
 5 | class PandocImporter(BaseMediaImporter):
 6 |     def import_media(self, input_filename, output_filename):
 7 |         self.check_for_pandoc()
 8 |         cmd = f'pandoc -i "{input_filename}" -o "{output_filename}" --to plain --columns 80'
 9 |         run(cmd, shell=True, check=True)
10 | 
11 |     def check_for_pandoc(self):
12 |         try:
13 |             run("which pandoc", shell=True, check=True, capture_output=True)
14 |         except CalledProcessError:
15 |             raise QCError("pandoc is required but was not found. Please install pandoc.")
16 | 
17 | 


--------------------------------------------------------------------------------
/qualitative_coding/media_importers/verbatim.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | from qualitative_coding.media_importers.base import BaseMediaImporter
 3 | 
 4 | class VerbatimImporter(BaseMediaImporter):
 5 |     """Imports media without making any changes.
 6 |     """
 7 |     def import_media(self, input_filename, output_filename):
 8 |         if input_filename != output_filename:
 9 |             shutil.copyfile(input_filename, output_filename)
10 | 


--------------------------------------------------------------------------------
/qualitative_coding/media_importers/vtt.py:
--------------------------------------------------------------------------------
 1 | from qualitative_coding.media_importers.base import BaseMediaImporter
 2 | from textwrap import fill
 3 | import webvtt
 4 | 
 5 | class VTTImporter(BaseMediaImporter):
 6 |     """Imports a VTT transcript file, stripping out timestamps and collapsing 
 7 |     adjacent talk turns from the same speaker.
 8 |     """
 9 |     def import_media(self, input_filename, output_filename):
10 |         turns = []
11 |         current_speaker = None
12 |         current_speech = ""
13 |         for caption in webvtt.read(input_filename):
14 |             speaker, speech = caption.text.split(':', 1)
15 |             if speaker == current_speaker: 
16 |                 current_speech += speech
17 |             else:
18 |                 if current_speech: 
19 |                     turns.append({'speaker': current_speaker, 'speech': current_speech})
20 |                 current_speaker = speaker
21 |                 current_speech = speech
22 |         turns.append({'speaker': current_speaker, 'speech': current_speech})
23 |         with open(output_filename, 'w') as fh:
24 |             for i, turn in enumerate(turns):
25 |                 if i > 0:
26 |                     fh.write('\n\n')
27 |                 fh.write(fill(turn['speaker'] + ': ' + turn['speech'], width=80))
28 | 


--------------------------------------------------------------------------------
/qualitative_coding/migrations/__init__.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | from pathlib import Path
 3 | import yaml
 4 | from semver import Version
 5 | from qualitative_coding.views.styles import info
 6 | from qualitative_coding.exceptions import QCError
 7 | from qualitative_coding.migrations.migration_0_2_3 import Migrate_0_2_3
 8 | from qualitative_coding.migrations.migration_1_0_0 import Migrate_1_0_0
 9 | from qualitative_coding.migrations.migration_1_4_0 import Migrate_1_4_0
10 | from qualitative_coding.helpers import read_settings
11 | 
12 | migrations = [
13 |     Migrate_0_2_3(),
14 |     Migrate_1_0_0(),
15 |     Migrate_1_4_0(),
16 | ]
17 | 
18 | def migrate(settings_path, target=None):
19 |     settings = read_settings(settings_path)
20 |     if 'qc_version' not in settings:
21 |         raise QCError("qc_version not specified in settings.")
22 |     current_version = Version.parse(settings['qc_version'])
23 |     target_version = Version.parse(target) if target else latest_migration()
24 |     if target_version not in [m.version for m in migrations]:
25 |         raise QCError(f"{target} is not a recognized migration")
26 |     if current_version < target_version:
27 |         for migration in migrations:
28 |             if current_version < migration.version and migration.version <= target_version:
29 |                 click.echo(info(f"Applying migration {migration.version}"))
30 |                 migration.apply(settings_path)
31 |     elif target_version < current_version:
32 |         for migration in reversed(migrations):
33 |             if target_version < migration.version and migration.version <= current_version:
34 |                 click.echo(info(f"Reverting migration {migration.version}"))
35 |                 migration.revert(settings_path)
36 | 
37 | def latest_migration():
38 |     return migrations[-1].version
39 | 


--------------------------------------------------------------------------------
/qualitative_coding/migrations/migration.py:
--------------------------------------------------------------------------------
 1 | from semver import Version
 2 | from qualitative_coding.helpers import read_settings
 3 | from pathlib import Path
 4 | import yaml
 5 | 
 6 | class QCMigration:
 7 |     """A migration specifies how to move between versions of qc.
 8 |     When migrating between version X up to version Y, all migrations
 9 |     whose semantic versions are greater than X and at least Y will be applied
10 |     in order. 
11 |     """
12 | 
13 |     _version = "0.0.0"
14 | 
15 |     @property
16 |     def version(self):
17 |         return Version.parse(self._version)
18 | 
19 |     def apply(self, settings_path):
20 |         "Forward migration"
21 | 
22 |     def revert(self, settings_path):
23 |         "Revert migration"
24 |         return settings
25 | 
26 |     def set_setting(self, settings_path, key, default_value):
27 |         """Writes a value to settings. 
28 |         By default, only writes the value if the key is not set. 
29 |         When force is True, always writes the value.
30 |         """
31 |         settings = read_settings(settings_path)
32 |         settings[key] = default_value
33 |         Path(settings_path).write_text(yaml.dump(settings))
34 |         return settings
35 | 
36 |     def delete_setting(self, settings_path, key):
37 |         """Deletes a value in settings.
38 |         """
39 |         settings = read_settings(settings_path)
40 |         del settings[key]
41 |         Path(settings_path).write_text(yaml.dump(settings))
42 |         return settings
43 | 


--------------------------------------------------------------------------------
/qualitative_coding/migrations/migration_0_2_3.py:
--------------------------------------------------------------------------------
 1 | from qualitative_coding.migrations.migration import QCMigration
 2 | 
 3 | class Migrate_0_2_3(QCMigration):
 4 |     _version = "0.2.3"
 5 | 
 6 |     def apply(self, settings):
 7 |         return settings
 8 | 
 9 |     def revert(self, settings):
10 |         return settings
11 | 


--------------------------------------------------------------------------------
/qualitative_coding/migrations/migration_1_0_0.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import shutil
  3 | from sqlalchemy import (
  4 |     create_engine,
  5 | )
  6 | from qualitative_coding.migrations.migration import QCMigration
  7 | from qualitative_coding.corpus import QCCorpus
  8 | from qualitative_coding.media_importers import media_importers
  9 | from qualitative_coding.helpers import read_settings
 10 | from qualitative_coding.database.models import (
 11 |     Base,
 12 |     Document,
 13 |     CodedLine
 14 | )
 15 | from qualitative_coding.views.styles import (
 16 |     address, 
 17 |     question, 
 18 |     debug,
 19 |     info,
 20 |     warn,
 21 |     confirm,
 22 |     error,
 23 |     success
 24 | )
 25 | import os
 26 | 
 27 | class Migrate_1_0_0(QCMigration):
 28 |     _version = "1.0.0"
 29 | 
 30 |     def apply(self, settings_path):
 31 |         self.set_setting(settings_path, "qc_version", self._version)
 32 |         self.set_setting(settings_path, "database", 'qualitative_coding.sqlite3')
 33 |         self.set_setting(settings_path, "editor", 'vim')
 34 |         QCCorpus.initialize(settings_path)
 35 |         corpus = QCCorpus(settings_path)
 36 |         corpus_v0 = QCCorpusV0(settings_path)
 37 |         with corpus.session():
 38 |             for filepath in corpus.corpus_dir.iterdir():
 39 |                 if filepath.is_dir():
 40 |                     corpus.import_media(filepath, recursive=True, importer="verbatim")
 41 |                 else:
 42 |                     corpus.import_media(filepath, importer="verbatim")
 43 |             for dir_path, dir_names, filenames in os.walk(corpus.corpus_dir):
 44 |                 for fn in filenames:
 45 |                     file_path = Path(dir_path) / fn
 46 |                     corpus_path = str(corpus.get_corpus_path(file_path))
 47 |                     coded_lines = []
 48 |                     for coder_name, code_data in corpus_v0.get_codes(file_path).items():
 49 |                         coder = corpus.get_or_create_coder(coder_name)
 50 |                         for line_num, code_name in code_data:
 51 |                             coded_lines.append({
 52 |                                 "line": line_num,
 53 |                                 "code_id": corpus.get_or_create_code(code_name).name
 54 |                             })
 55 |                     corpus.update_coded_lines(corpus_path, coder_name, coded_lines)
 56 |         shutil.rmtree(corpus_v0.codes_dir)
 57 | 
 58 |     def revert(self, settings_path):
 59 |         self.delete_setting(settings_path, "qc_version")
 60 |         self.delete_setting(settings_path, "database")
 61 |         self.delete_setting(settings_path, "editor")
 62 | 
 63 | class QCCorpusV0:
 64 |     def __init__(self, settings_file="settings.yaml"):
 65 |         self.settings_file = Path(settings_file)
 66 |         self.settings = read_settings(settings_file)
 67 |         self.corpus_dir = Path(self.settings['corpus_dir']).resolve()
 68 |         self.codes_dir = Path(self.settings['codes_dir']).resolve()
 69 | 
 70 |     def get_codes(self, corpus_text_path, coder=None, merge=False, unit='line'):
 71 |         """
 72 |         Returns codes pertaining to a corpus text.
 73 |         Returns a dict like {coder_id: [(line_num, code)...]}. 
 74 |         If merge or coder, there is no ambiguity;instead returns a list of [(line_num, code)...]
 75 |         If unit is 'document', returns a set of codes when coder or merge is given, otherwise
 76 |         returns a dict mapping coders to sets of codes.
 77 |         """
 78 |         codes = {}
 79 |         for f in self.get_code_files_for_corpus_file(corpus_text_path, coder=coder):
 80 |             codes[self.get_coder_from_code_path(f)] = self.read_codes(f)
 81 |         if coder:
 82 |             return codes.get(coder, {})
 83 |         elif merge:
 84 |             if unit == 'line': 
 85 |                 return sum(codes.values(), [])
 86 |             elif unit == 'document': 
 87 |                 return set().union(*codes.values())
 88 |             else:
 89 |                 raise NotImplementedError("Unit must be 'line' or 'document'.")
 90 |         else:
 91 |             return codes
 92 | 
 93 |     def get_code_files_for_corpus_file(self, corpus_text_path, coder=None):
 94 |         "Returns an iterator over code files pertaining to a corpus file"
 95 |         text_path = corpus_text_path.relative_to(self.corpus_dir)
 96 |         name_parts = text_path.name.split('.')
 97 |         return self.codes_dir.glob(str(text_path) + '.' + (coder or '*') + '.codes')
 98 | 
 99 |     def get_coder_from_code_path(self, code_file_path):
100 |         "Maps Path('some_interview.txt.cp.codes') -> 'cp'"
101 |         parts = code_file_path.name.split('.')
102 |         return parts[-2]
103 | 
104 |     def read_codes(self, code_file_path):
105 |         """When passed a file object, returns a list of (line_num, code) if unit is 'line'. 
106 |         When unit is 'document', Returns a set of codes.
107 |         """
108 |         codes = []
109 |         with open(code_file_path) as inf:
110 |             for line_num, line in enumerate(inf):
111 |                 codes += [(line_num, code.strip()) for code in line.split(",") if code.strip()]
112 |         return codes
113 | 


--------------------------------------------------------------------------------
/qualitative_coding/migrations/migration_1_4_0.py:
--------------------------------------------------------------------------------
 1 | from qualitative_coding.migrations.migration import QCMigration
 2 | from pathlib import Path
 3 | 
 4 | class Migrate_1_4_0(QCMigration):
 5 |     _version = "1.4.0"
 6 | 
 7 |     def apply(self, settings_path):
 8 |         self.set_setting(settings_path, "qc_version", "1.4.0")
 9 |         self.set_setting(settings_path, "verbose", False)
10 |         self.set_setting(settings_path, "log_file", 'qc.log')
11 |         self.delete_setting(settings_path, "logs_dir")
12 | 
13 |     def revert(self, settings_path):
14 |         self.set_setting(settings_path, "qc_version", "1.0.0")
15 |         self.set_setting(settings_path, "logs_dir", "logs")
16 |         self.delete_setting(settings_path, "log_file")
17 |         self.delete_setting(settings_path, "verbose")
18 |         logs_dir = Path(settings_path).parent / "logs_dir"
19 |         if not logs_dir.exists():
20 |             logs_dir.mkdir(parents=True)
21 | 


--------------------------------------------------------------------------------
/qualitative_coding/refi_qda/nvivo_project.qdpx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/qualitative_coding/refi_qda/nvivo_project.qdpx


--------------------------------------------------------------------------------
/qualitative_coding/refi_qda/reader.py:
--------------------------------------------------------------------------------
  1 | from qualitative_coding.exceptions import QCError
  2 | from qualitative_coding.corpus import QCCorpus
  3 | from qualitative_coding.tree_node import TreeNode
  4 | from xmlschema.validators.exceptions import XMLSchemaValidationError
  5 | from collections import defaultdict
  6 | from subprocess import run
  7 | from xmlschema import validate
  8 | from pathlib import Path
  9 | import re
 10 | import xml.etree.ElementTree as ET
 11 | import importlib.resources
 12 | import shutil
 13 | import zipfile
 14 | import structlog
 15 | 
 16 | log = structlog.get_logger()
 17 | 
 18 | class REFIQDAReader:
 19 |     """Imports an existing REFI-QDA project.
 20 |     NOTE: Currently does not support importing memos.
 21 |     """
 22 |     default_coder = "default"
 23 | 
 24 |     def __init__(self, qdpxfile):
 25 |         self.qdpxfile = qdpxfile
 26 |         self.validate(qdpxfile)
 27 | 
 28 |     def unpack_project(self, destination):
 29 |         self.dest_path = Path(destination)
 30 |         if not self.dest_path.exists():
 31 |             raise QCError(f"Cannot import project to {dest_path}; no such directory.")
 32 |         if len(list(self.dest_path.iterdir())) > 0:
 33 |             raise QCError("You can only import a project into an empty directory.")
 34 |         QCCorpus.initialize()
 35 |         self.corpus = QCCorpus(self.dest_path / "settings.yaml")
 36 |         (self.dest_path / "source").mkdir()
 37 |         (self.dest_path / "source" / "import").mkdir()
 38 |         with zipfile.ZipFile(self.qdpxfile, 'r', zipfile.ZIP_DEFLATED) as zf:
 39 |             zf.extractall((self.dest_path / "source"))
 40 |         tree = ET.parse(self.dest_path / "source" / "project.qde")
 41 |         with self.corpus.session():
 42 |             self.unpack_xml(tree.getroot())
 43 | 
 44 |     def unpack_xml(self, root):
 45 |         self.coder_guids = {}
 46 |         for child in root:
 47 |             if child.tag.endswith("Users"):
 48 |                 self.unpack_coders(child)
 49 |         for child in root:
 50 |             if child.tag.endswith("CodeBook"):
 51 |                 self.unpack_codebook(child)
 52 |         for child in root:
 53 |             if child.tag.endswith("Variables"):
 54 |                 log.warning("{self.qdpxfile} contains Variables, which are not supported by qc.")
 55 |         self.unpack_unsupported(root, "Variables")
 56 |         self.unpack_unsupported(root, "Cases")
 57 |         for child in root:
 58 |             if child.tag.endswith("Sources"):
 59 |                 self.unpack_sources(child)
 60 |         self.unpack_unsupported(root, "Notes")
 61 |         self.unpack_unsupported(root, "Links")
 62 |         self.unpack_unsupported(root, "Graphs")
 63 |         self.unpack_unsupported(root, "Description")
 64 |         self.unpack_unsupported(root, "NoteRef")
 65 | 
 66 |     def unpack_unsupported(self, root, tagname):
 67 |         for child in root:
 68 |             if child.tag.endswith(tagname):
 69 |                 log.warning(f"{self.qdpxfile} contains {tagname}, which are not supported by qc.")
 70 | 
 71 |     def unpack_coders(self, users):
 72 |         for user in users:
 73 |             name = user.attrib['name']
 74 |             guid = user.attrib['guid']
 75 |             self.corpus.get_or_create_coder(name)
 76 |             self.coder_guids[guid] = name
 77 | 
 78 |     def create_default_coder_if_none_defined(self):
 79 |         if not hasattr(self, "coder_guids"):
 80 |             self.corpus.get_or_create_coder("default")
 81 | 
 82 |     def unpack_codebook(self, codebook):
 83 |         for child in codebook:
 84 |             self.unpack_codes(child)
 85 | 
 86 |     def unpack_codes(self, codes):
 87 |         self.code_guids = {}
 88 |         self.code_tree = TreeNode(TreeNode.root)
 89 | 
 90 |         def unpack_code(code, parent):
 91 |             name = code.attrib['name']
 92 |             guid = code.attrib['guid']
 93 |             self.corpus.get_or_create_code(name)
 94 |             self.code_guids[guid] = name
 95 |             node = TreeNode(name, parent=parent)
 96 |             parent.children.append(node)
 97 |             for child in code:
 98 |                 if child.tag.endswith("Code"):
 99 |                     unpack_code(child, node)
100 | 
101 |         for code in codes:
102 |             unpack_code(code, self.code_tree)
103 | 
104 |         TreeNode.write_yaml(self.corpus.codebook_path, self.code_tree)
105 | 
106 |     def unpack_sources(self, sources):
107 |         self.document_guids = {}
108 |         for source in sources:
109 |             if not source.attrib.get('plainTextPath'):
110 |                 log.warning(
111 |                     f"Skipping import of source {source['name']}; " + 
112 |                     "only text sources are supported."
113 |                 )
114 |                 continue
115 |             guid = source.attrib['guid']
116 |             plain_text_path = source.attrib['plainTextPath'].replace("internal://", "")
117 |             qdpx_path = self.dest_path / "source" / "sources" / plain_text_path
118 |             importable_path = (self.dest_path / "source" / "import" / source.attrib['name']).with_suffix(
119 |                 qdpx_path.suffix
120 |             )
121 |             log.info(f"Copying {qdpx_path} -> {importable_path}")
122 |             shutil.copyfile(qdpx_path, importable_path)
123 |             self.document_guids[guid] = importable_path.name
124 |             self.corpus.import_media(importable_path, importer="verbatim")
125 |             line_positions = self.line_positions(importable_path)
126 |             coded_lines = defaultdict(list)
127 |             for selection in source:
128 |                 if selection.tag.endswith("PlainTextSelection"):
129 |                     match = re.match("line:(\d+)", selection.attrib.get("name", ""))
130 |                     if match:
131 |                         line = int(match.group(1))
132 |                     else:
133 |                         position = int(selection.attrib['startPosition'])
134 |                         line = self.get_line_for_position(position, line_positions)
135 |                     for coding in selection:
136 |                         if coding.tag.endswith("Coding"):
137 |                             coder_guid = coding.attrib['creatingUser']
138 |                             coder = self.coder_guids.get(coder_guid, self.default_coder)
139 |                             for coderef in coding:
140 |                                 if coderef.tag.endswith("CodeRef"):
141 |                                     code = self.code_guids[coderef.attrib['targetGUID']]
142 |                                     coded_lines[coder].append({'line': line, 'code_id': code})
143 |             for coder, cls in coded_lines.items():
144 |                 self.corpus.update_coded_lines(importable_path.name, coder, cls)
145 | 
146 |     def get_line_for_position(self, position, line_positions):
147 |         for line, (start, end) in enumerate(line_positions):
148 |             if position >= start:
149 |                 return line
150 | 
151 |     def validate(self, qdpxfile):
152 |         if not Path(qdpxfile).suffix == ".qdpx":
153 |             raise QCError(f"{qdpxfile} must end in .qdpx")
154 |         if not zipfile.is_zipfile(qdpxfile):
155 |             raise QCError(f"{qdpxfile} is not a zipfile")
156 |         with zipfile.ZipFile(qdpxfile, 'r', zipfile.ZIP_DEFLATED) as zf:
157 |             zroot = zipfile.Path(zf)
158 |             qde = zroot / "project.qde"
159 |             if not qde.exists():
160 |                 raise QCError("{qdpxfile} does not contain project.qde")
161 |             qcf = importlib.resources.files("qualitative_coding")
162 |             schema_path = qcf / "refi_qda" / "schema.xsd"
163 |             try:
164 |                 validate(qde.read_text(), schema_path)
165 |             except XMLSchemaValidationError as err:
166 |                 raise QCError(
167 |                     f"When reading {qdpxfile}, project.qde did not validate " + 
168 |                     f"against the REFI-QDA schema:\n" + 
169 |                     repr(err)
170 |                 )
171 | 
172 |     def line_positions(self, corpus_file_path):
173 |         """returns a list of (start, end) character positions for lines in doc.
174 |         """
175 |         text = (self.corpus.corpus_dir / corpus_file_path).read_text()
176 |         lines = []
177 |         index = 0
178 |         for line in text:
179 |             start = index
180 |             end = index + len(line)
181 |             lines.append((start, end))
182 |             index += len(line)
183 |         return lines
184 | 
185 |     def print_tree(self, project_path):
186 |         result = run("tree", cwd=project_path, capture_output=True, text=True, shell=True)
187 |         print(result.stdout)
188 | 
189 | 


--------------------------------------------------------------------------------
/qualitative_coding/refi_qda/schema.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!-- edited with XMLSpy v2005 rel. 3 U (http://www.altova.com) by  Fred van Blommestein -->
  3 | <!--
  4 |   Library:           QDA-XML version 1.0
  5 |                      
  6 |   Release Date:      18 March 2019
  7 |   Module:            Project.xsd
  8 |   -->
  9 | <!-- ===== Copyright Notice ===== -->
 10 | <!--
 11 | The Rotterdam Exchange Format Initiative (REFI, www.qdasoftware.org) as the publisher of 
 12 | QDA-XML takes no position regarding the validity or scope of any 
 13 | intellectual property or other rights that might be claimed to pertain 
 14 | to the implementation or use of the technology described in this 
 15 | document or the extent to which any license under such rights 
 16 | might or might not be available; neither does it represent that it has 
 17 | made any effort to identify any such rights. Information on QDA-XML's 
 18 | procedures with respect to rights in QDA-XML specifications can be 
 19 | found at the QDA-XML website www.qdasoftware.org..
 20 | 
 21 | REFI invites any interested party to bring to its attention any 
 22 | copyrights, patents or patent applications, or other proprietary 
 23 | rights which may cover technology that may be required to 
 24 | implement this specification.
 25 | 
 26 | This specification is licensed under the MIT license.  
 27 | 
 28 | Copyright 2019 REFI www.qdasoftware.org.
 29 |  
 30 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 31 | 
 32 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 33 | 
 34 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
 35 | -->
 36 | <xsd:schema xmlns="urn:QDA-XML:project:1.0" xmlns:xsd="http://www.w3.org/2001/XMLSchema" targetNamespace="urn:QDA-XML:project:1.0" elementFormDefault="qualified" attributeFormDefault="unqualified" version="1.0">
 37 | 	<!-- ===== Element Declarations ===== -->
 38 | 	<xsd:element name="Project" type="ProjectType">
 39 | 		<xsd:annotation>
 40 | 			<xsd:documentation>This element MUST be conveyed as the root element in any instance document based on this Schema expression</xsd:documentation>
 41 | 		</xsd:annotation>
 42 | 	</xsd:element>
 43 | 	<!-- ===== Type Definitions ===== -->
 44 | 	<xsd:complexType name="ProjectType">
 45 | 		<xsd:sequence>
 46 | 			<xsd:element name="Users" type="UsersType" minOccurs="0"/>
 47 | 			<xsd:element name="CodeBook" type="CodeBookType" minOccurs="0"/>
 48 | 			<xsd:element name="Variables" type="VariablesType" minOccurs="0"/>
 49 | 			<xsd:element name="Cases" type="CasesType" minOccurs="0"/>
 50 | 			<xsd:element name="Sources" type="SourcesType" minOccurs="0"/>
 51 | 			<xsd:element name="Notes" type="NotesType" minOccurs="0"/>
 52 | 			<xsd:element name="Links" type="LinksType" minOccurs="0"/>
 53 | 			<xsd:element name="Sets" type="SetsType" minOccurs="0"/>
 54 | 			<xsd:element name="Graphs" type="GraphsType" minOccurs="0"/>
 55 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
 56 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
 57 | 			<!-- Note(s) that apply to the project as a whole -->
 58 | 		</xsd:sequence>
 59 | 		<xsd:attribute name="name" type="xsd:string" use="required"/>
 60 | 		<xsd:attribute name="origin" type="xsd:string"/>
 61 | 		<xsd:attribute name="creatingUserGUID" type="GUIDType"/>
 62 | 		<xsd:attribute name="creationDateTime" type="xsd:dateTime"/>
 63 | 		<xsd:attribute name="modifyingUserGUID" type="GUIDType"/>
 64 | 		<xsd:attribute name="modifiedDateTime" type="xsd:dateTime"/>
 65 | 		<xsd:attribute name="basePath" type="xsd:string"/>
 66 | 	</xsd:complexType>
 67 | 	<xsd:complexType name="UsersType">
 68 | 		<xsd:sequence>
 69 | 			<xsd:element name="User" type="UserType" maxOccurs="unbounded"/>
 70 | 		</xsd:sequence>
 71 | 	</xsd:complexType>
 72 | 	<xsd:complexType name="UserType">
 73 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
 74 | 		<xsd:attribute name="name" type="xsd:string"/>
 75 | 		<xsd:attribute name="id" type="xsd:string"/>
 76 | 	</xsd:complexType>
 77 | 	<xsd:complexType name="CodeBookType">
 78 | 		<xsd:sequence>
 79 | 			<xsd:element name="Codes" type="CodesType"/>
 80 | 		</xsd:sequence>
 81 | 	</xsd:complexType>
 82 | 	<xsd:complexType name="CodesType">
 83 | 		<xsd:sequence>
 84 | 			<xsd:element name="Code" type="CodeType" maxOccurs="unbounded"/>
 85 | 		</xsd:sequence>
 86 | 	</xsd:complexType>
 87 | 	<xsd:complexType name="CodeType">
 88 | 		<xsd:sequence>
 89 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
 90 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
 91 | 			<xsd:element name="Code" type="CodeType" minOccurs="0" maxOccurs="unbounded"/>
 92 | 		</xsd:sequence>
 93 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
 94 | 		<xsd:attribute name="name" type="xsd:string" use="required"/>
 95 | 		<xsd:attribute name="isCodable" type="xsd:boolean" use="required"/>
 96 | 		<xsd:attribute name="color" type="RGBType"/>
 97 | 	</xsd:complexType>
 98 | 	<xsd:complexType name="CasesType">
 99 | 		<xsd:sequence>
100 | 			<xsd:element name="Case" type="CaseType" maxOccurs="unbounded"/>
101 | 		</xsd:sequence>
102 | 	</xsd:complexType>
103 | 	<xsd:complexType name="CaseType">
104 | 		<xsd:sequence>
105 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
106 | 			<xsd:element name="CodeRef" type="CodeRefType" minOccurs="0" maxOccurs="unbounded"/>
107 | 			<xsd:element name="VariableValue" type="VariableValueType" minOccurs="0" maxOccurs="unbounded"/>
108 | 			<xsd:element name="SourceRef" type="SourceRefType" minOccurs="0" maxOccurs="unbounded"/>
109 | 			<xsd:element name="SelectionRef" type="SelectionRefType" minOccurs="0" maxOccurs="unbounded"/>
110 | 		</xsd:sequence>
111 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
112 | 		<xsd:attribute name="name" type="xsd:string"/>
113 | 	</xsd:complexType>
114 | 	<xsd:complexType name="VariablesType">
115 | 		<xsd:sequence>
116 | 			<xsd:element name="Variable" type="VariableType" maxOccurs="unbounded"/>
117 | 		</xsd:sequence>
118 | 	</xsd:complexType>
119 | 	<xsd:complexType name="VariableType">
120 | 		<xsd:sequence>
121 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
122 | 		</xsd:sequence>
123 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
124 | 		<xsd:attribute name="name" type="xsd:string" use="required"/>
125 | 		<xsd:attribute name="typeOfVariable" type="typeOfVariableType" use="required"/>
126 | 	</xsd:complexType>
127 | 	<xsd:complexType name="VariableValueType">
128 | 		<xsd:sequence>
129 | 			<xsd:element name="VariableRef" type="VariableRefType"/>
130 | 			<xsd:choice>
131 | 				<xsd:element name="TextValue" type="xsd:string" minOccurs="0"/>
132 | 				<xsd:element name="BooleanValue" type="xsd:boolean" minOccurs="0"/>
133 | 				<xsd:element name="IntegerValue" type="xsd:integer" minOccurs="0"/>
134 | 				<xsd:element name="FloatValue" type="xsd:decimal" minOccurs="0"/>
135 | 				<xsd:element name="DateValue" type="xsd:date" minOccurs="0"/>
136 | 				<xsd:element name="DateTimeValue" type="xsd:dateTime" minOccurs="0"/>
137 | 			</xsd:choice>
138 | 		</xsd:sequence>
139 | 	</xsd:complexType>
140 | 	<xsd:complexType name="SetsType">
141 | 		<xsd:sequence>
142 | 			<xsd:element name="Set" type="SetType" maxOccurs="unbounded"/>
143 | 		</xsd:sequence>
144 | 	</xsd:complexType>
145 | 	<xsd:complexType name="SetType">
146 | 		<xsd:sequence>
147 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
148 | 			<xsd:element name="MemberCode" type="CodeRefType" minOccurs="0" maxOccurs="unbounded"/>
149 | 			<xsd:element name="MemberSource" type="SourceRefType" minOccurs="0" maxOccurs="unbounded"/>
150 | 			<xsd:element name="MemberNote" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
151 | 		</xsd:sequence>
152 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
153 | 		<xsd:attribute name="name" type="xsd:string" use="required"/>
154 | 	</xsd:complexType>
155 | 	<xsd:complexType name="SourcesType">
156 | 		<xsd:choice maxOccurs="unbounded">
157 | 			<xsd:element name="TextSource" type="TextSourceType"/>
158 | 			<xsd:element name="PictureSource" type="PictureSourceType"/>
159 | 			<xsd:element name="PDFSource" type="PDFSourceType"/>
160 | 			<xsd:element name="AudioSource" type="AudioSourceType"/>
161 | 			<xsd:element name="VideoSource" type="VideoSourceType"/>
162 | 		</xsd:choice>
163 | 	</xsd:complexType>
164 | 	<xsd:complexType name="TextSourceType">
165 | 		<xsd:sequence>
166 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
167 | 			<xsd:element name="PlainTextContent" type="xsd:string" minOccurs="0"/>
168 | 			<xsd:element name="PlainTextSelection" type="PlainTextSelectionType" minOccurs="0" maxOccurs="unbounded"/>
169 | 			<xsd:element name="Coding" type="CodingType" minOccurs="0" maxOccurs="unbounded"/>
170 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
171 | 			<xsd:element name="VariableValue" type="VariableValueType" minOccurs="0" maxOccurs="unbounded"/>
172 | 		</xsd:sequence>
173 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
174 | 		<xsd:attribute name="name" type="xsd:string"/>
175 | 		<xsd:attribute name="richTextPath" type="xsd:string"/>
176 | 		<xsd:attribute name="plainTextPath" type="xsd:string"/>
177 | 		<xsd:attribute name="creatingUser" type="GUIDType"/>
178 | 		<xsd:attribute name="creationDateTime" type="xsd:dateTime"/>
179 | 		<xsd:attribute name="modifyingUser" type="GUIDType"/>
180 | 		<xsd:attribute name="modifiedDateTime" type="xsd:dateTime"/>
181 | 		<!-- Either PlainTextContent or plainTextPath MUST be filled, not both -->
182 | 	</xsd:complexType>
183 | 	<xsd:complexType name="PlainTextSelectionType">
184 | 		<xsd:sequence>
185 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
186 | 			<xsd:element name="Coding" type="CodingType" minOccurs="0" maxOccurs="unbounded"/>
187 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
188 | 		</xsd:sequence>
189 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
190 | 		<xsd:attribute name="name" type="xsd:string"/>
191 | 		<xsd:attribute name="startPosition" type="xsd:integer" use="required"/>
192 | 		<xsd:attribute name="endPosition" type="xsd:integer" use="required"/>
193 | 		<xsd:attribute name="creatingUser" type="GUIDType"/>
194 | 		<xsd:attribute name="creationDateTime" type="xsd:dateTime"/>
195 | 		<xsd:attribute name="modifyingUser" type="GUIDType"/>
196 | 		<xsd:attribute name="modifiedDateTime" type="xsd:dateTime"/>
197 | 	</xsd:complexType>
198 | 	<xsd:complexType name="PictureSourceType">
199 | 		<xsd:sequence>
200 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
201 | 			<xsd:element name="TextDescription" type="TextSourceType" minOccurs="0"/>
202 | 			<xsd:element name="PictureSelection" type="PictureSelectionType" minOccurs="0" maxOccurs="unbounded"/>
203 | 			<xsd:element name="Coding" type="CodingType" minOccurs="0" maxOccurs="unbounded"/>
204 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
205 | 			<xsd:element name="VariableValue" type="VariableValueType" minOccurs="0" maxOccurs="unbounded"/>
206 | 		</xsd:sequence>
207 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
208 | 		<xsd:attribute name="name" type="xsd:string"/>
209 | 		<xsd:attribute name="path" type="xsd:string"/>
210 | 		<xsd:attribute name="currentPath" type="xsd:string"/>
211 | 		<xsd:attribute name="creatingUser" type="GUIDType"/>
212 | 		<xsd:attribute name="creationDateTime" type="xsd:dateTime"/>
213 | 		<xsd:attribute name="modifyingUser" type="GUIDType"/>
214 | 		<xsd:attribute name="modifiedDateTime" type="xsd:dateTime"/>
215 | 	</xsd:complexType>
216 | 	<xsd:complexType name="PictureSelectionType">
217 | 		<xsd:sequence>
218 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
219 | 			<xsd:element name="Coding" type="CodingType" minOccurs="0" maxOccurs="unbounded"/>
220 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
221 | 		</xsd:sequence>
222 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
223 | 		<xsd:attribute name="name" type="xsd:string"/>
224 | 		<xsd:attribute name="firstX" type="xsd:integer" use="required"/>
225 | 		<xsd:attribute name="firstY" type="xsd:integer" use="required"/>
226 | 		<xsd:attribute name="secondX" type="xsd:integer" use="required"/>
227 | 		<xsd:attribute name="secondY" type="xsd:integer" use="required"/>
228 | 		<xsd:attribute name="creatingUser" type="GUIDType"/>
229 | 		<xsd:attribute name="creationDateTime" type="xsd:dateTime"/>
230 | 		<xsd:attribute name="modifyingUser" type="GUIDType"/>
231 | 		<xsd:attribute name="modifiedDateTime" type="xsd:dateTime"/>
232 | 	</xsd:complexType>
233 | 	<xsd:complexType name="PDFSourceType">
234 | 		<xsd:sequence>
235 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
236 | 			<xsd:element name="PDFSelection" type="PDFSelectionType" minOccurs="0" maxOccurs="unbounded"/>
237 | 			<xsd:element name="Representation" type="TextSourceType" minOccurs="0"/>
238 | 			<xsd:element name="Coding" type="CodingType" minOccurs="0" maxOccurs="unbounded"/>
239 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
240 | 			<xsd:element name="VariableValue" type="VariableValueType" minOccurs="0" maxOccurs="unbounded"/>
241 | 		</xsd:sequence>
242 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
243 | 		<xsd:attribute name="name" type="xsd:string"/>
244 | 		<xsd:attribute name="path" type="xsd:string"/>
245 | 		<xsd:attribute name="currentPath" type="xsd:string"/>
246 | 		<xsd:attribute name="creatingUser" type="GUIDType"/>
247 | 		<xsd:attribute name="creationDateTime" type="xsd:dateTime"/>
248 | 		<xsd:attribute name="modifyingUser" type="GUIDType"/>
249 | 		<xsd:attribute name="modifiedDateTime" type="xsd:dateTime"/>
250 | 	</xsd:complexType>
251 | 	<xsd:complexType name="PDFSelectionType">
252 | 		<xsd:sequence>
253 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
254 | 			<xsd:element name="Representation" type="TextSourceType" minOccurs="0"/>
255 | 			<xsd:element name="Coding" type="CodingType" minOccurs="0" maxOccurs="unbounded"/>
256 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
257 | 		</xsd:sequence>
258 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
259 | 		<xsd:attribute name="name" type="xsd:string"/>
260 | 		<xsd:attribute name="page" type="xsd:integer" use="required"/>
261 | 		<xsd:attribute name="firstX" type="xsd:integer" use="required"/>
262 | 		<xsd:attribute name="firstY" type="xsd:integer" use="required"/>
263 | 		<xsd:attribute name="secondX" type="xsd:integer" use="required"/>
264 | 		<xsd:attribute name="secondY" type="xsd:integer" use="required"/>
265 | 		<xsd:attribute name="creatingUser" type="GUIDType"/>
266 | 		<xsd:attribute name="creationDateTime" type="xsd:dateTime"/>
267 | 		<xsd:attribute name="modifyingUser" type="GUIDType"/>
268 | 		<xsd:attribute name="modifiedDateTime" type="xsd:dateTime"/>
269 | 	</xsd:complexType>
270 | 	<xsd:complexType name="AudioSourceType">
271 | 		<xsd:sequence>
272 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
273 | 			<xsd:element name="Transcript" type="TranscriptType" minOccurs="0" maxOccurs="unbounded"/>
274 | 			<xsd:element name="AudioSelection" type="AudioSelectionType" minOccurs="0" maxOccurs="unbounded"/>
275 | 			<xsd:element name="Coding" type="CodingType" minOccurs="0" maxOccurs="unbounded"/>
276 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
277 | 			<xsd:element name="VariableValue" type="VariableValueType" minOccurs="0" maxOccurs="unbounded"/>
278 | 		</xsd:sequence>
279 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
280 | 		<xsd:attribute name="name" type="xsd:string"/>
281 | 		<xsd:attribute name="path" type="xsd:string"/>
282 | 		<xsd:attribute name="currentPath" type="xsd:string"/>
283 | 		<xsd:attribute name="creatingUser" type="GUIDType"/>
284 | 		<xsd:attribute name="creationDateTime" type="xsd:dateTime"/>
285 | 		<xsd:attribute name="modifyingUser" type="GUIDType"/>
286 | 		<xsd:attribute name="modifiedDateTime" type="xsd:dateTime"/>
287 | 	</xsd:complexType>
288 | 	<xsd:complexType name="AudioSelectionType">
289 | 		<xsd:sequence>
290 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
291 | 			<xsd:element name="Coding" type="CodingType" minOccurs="0" maxOccurs="unbounded"/>
292 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
293 | 		</xsd:sequence>
294 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
295 | 		<xsd:attribute name="name" type="xsd:string"/>
296 | 		<xsd:attribute name="begin" type="xsd:integer" use="required"/>
297 | 		<xsd:attribute name="end" type="xsd:integer" use="required"/>
298 | 		<xsd:attribute name="creatingUser" type="GUIDType"/>
299 | 		<xsd:attribute name="creationDateTime" type="xsd:dateTime"/>
300 | 		<xsd:attribute name="modifyingUser" type="GUIDType"/>
301 | 		<xsd:attribute name="modifiedDateTime" type="xsd:dateTime"/>
302 | 	</xsd:complexType>
303 | 	<xsd:complexType name="VideoSourceType">
304 | 		<xsd:sequence>
305 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
306 | 			<xsd:element name="Transcript" type="TranscriptType" minOccurs="0" maxOccurs="unbounded"/>
307 | 			<xsd:element name="VideoSelection" type="VideoSelectionType" minOccurs="0" maxOccurs="unbounded"/>
308 | 			<xsd:element name="Coding" type="CodingType" minOccurs="0" maxOccurs="unbounded"/>
309 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
310 | 			<xsd:element name="VariableValue" type="VariableValueType" minOccurs="0" maxOccurs="unbounded"/>
311 | 		</xsd:sequence>
312 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
313 | 		<xsd:attribute name="name" type="xsd:string"/>
314 | 		<xsd:attribute name="path" type="xsd:string"/>
315 | 		<xsd:attribute name="currentPath" type="xsd:string"/>
316 | 		<xsd:attribute name="creatingUser" type="GUIDType"/>
317 | 		<xsd:attribute name="creationDateTime" type="xsd:dateTime"/>
318 | 		<xsd:attribute name="modifyingUser" type="GUIDType"/>
319 | 		<xsd:attribute name="modifiedDateTime" type="xsd:dateTime"/>
320 | 	</xsd:complexType>
321 | 	<xsd:complexType name="VideoSelectionType">
322 | 		<xsd:sequence>
323 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
324 | 			<xsd:element name="Coding" type="CodingType" minOccurs="0" maxOccurs="unbounded"/>
325 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
326 | 		</xsd:sequence>
327 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
328 | 		<xsd:attribute name="name" type="xsd:string"/>
329 | 		<xsd:attribute name="begin" type="xsd:integer" use="required"/>
330 | 		<xsd:attribute name="end" type="xsd:integer" use="required"/>
331 | 		<xsd:attribute name="creatingUser" type="GUIDType"/>
332 | 		<xsd:attribute name="creationDateTime" type="xsd:dateTime"/>
333 | 		<xsd:attribute name="modifyingUser" type="GUIDType"/>
334 | 		<xsd:attribute name="modifiedDateTime" type="xsd:dateTime"/>
335 | 	</xsd:complexType>
336 | 	<xsd:complexType name="TranscriptType">
337 | 		<xsd:sequence>
338 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
339 | 			<xsd:element name="PlainTextContent" type="xsd:string" minOccurs="0"/>
340 | 			<xsd:element name="SyncPoint" type="SyncPointType" minOccurs="0" maxOccurs="unbounded"/>
341 | 			<xsd:element name="TranscriptSelection" type="TranscriptSelectionType" minOccurs="0" maxOccurs="unbounded"/>
342 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
343 | 		</xsd:sequence>
344 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
345 | 		<xsd:attribute name="name" type="xsd:string"/>
346 | 		<xsd:attribute name="richTextPath" type="xsd:string"/>
347 | 		<xsd:attribute name="plainTextPath" type="xsd:string"/>
348 | 		<xsd:attribute name="creatingUser" type="GUIDType"/>
349 | 		<xsd:attribute name="creationDateTime" type="xsd:dateTime"/>
350 | 		<xsd:attribute name="modifyingUser" type="GUIDType"/>
351 | 		<xsd:attribute name="modifiedDateTime" type="xsd:dateTime"/>
352 | 		<!-- Either PlainTextContent or plainTextPath MUST be filled, not both -->
353 | 	</xsd:complexType>
354 | 	<xsd:complexType name="TranscriptSelectionType">
355 | 		<xsd:sequence>
356 | 			<xsd:element name="Description" type="xsd:string" minOccurs="0"/>
357 | 			<xsd:element name="Coding" type="CodingType" minOccurs="0" maxOccurs="unbounded"/>
358 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
359 | 		</xsd:sequence>
360 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
361 | 		<xsd:attribute name="name" type="xsd:string"/>
362 | 		<xsd:attribute name="fromSyncPoint" type="GUIDType"/>
363 | 		<xsd:attribute name="toSyncPoint" type="GUIDType"/>
364 | 		<xsd:attribute name="creatingUser" type="GUIDType"/>
365 | 		<xsd:attribute name="creationDateTime" type="xsd:dateTime"/>
366 | 		<xsd:attribute name="modifyingUser" type="GUIDType"/>
367 | 		<xsd:attribute name="modifiedDateTime" type="xsd:dateTime"/>
368 | 	</xsd:complexType>
369 | 	<xsd:complexType name="SyncPointType">
370 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
371 | 		<xsd:attribute name="timeStamp" type="xsd:integer"/>
372 | 		<xsd:attribute name="position" type="xsd:integer"/>
373 | 	</xsd:complexType>
374 | 	<xsd:complexType name="CodingType">
375 | 		<xsd:sequence>
376 | 			<xsd:element name="CodeRef" type="CodeRefType"/>
377 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
378 | 		</xsd:sequence>
379 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
380 | 		<xsd:attribute name="creatingUser" type="GUIDType"/>
381 | 		<xsd:attribute name="creationDateTime" type="xsd:dateTime"/>
382 | 	</xsd:complexType>
383 | 	<xsd:complexType name="GraphsType">
384 | 		<xsd:sequence>
385 | 			<xsd:element name="Graph" type="GraphType" maxOccurs="unbounded"/>
386 | 		</xsd:sequence>
387 | 	</xsd:complexType>
388 | 	<xsd:complexType name="GraphType">
389 | 		<xsd:sequence>
390 | 			<xsd:element name="Vertex" type="VertexType" minOccurs="0" maxOccurs="unbounded"/>
391 | 			<xsd:element name="Edge" type="EdgeType" minOccurs="0" maxOccurs="unbounded"/>
392 | 		</xsd:sequence>
393 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
394 | 		<xsd:attribute name="name" type="xsd:string"/>
395 | 	</xsd:complexType>
396 | 	<xsd:complexType name="VertexType">
397 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
398 | 		<xsd:attribute name="representedGUID" type="GUIDType"/>
399 | 		<xsd:attribute name="name" type="xsd:string"/>
400 | 		<xsd:attribute name="firstX" type="xsd:integer" use="required"/>
401 | 		<xsd:attribute name="firstY" type="xsd:integer" use="required"/>
402 | 		<xsd:attribute name="secondX" type="xsd:integer"/>
403 | 		<xsd:attribute name="secondY" type="xsd:integer"/>
404 | 		<xsd:attribute name="shape" type="ShapeType"/>
405 | 		<xsd:attribute name="color" type="RGBType"/>
406 | 	</xsd:complexType>
407 | 	<xsd:complexType name="EdgeType">
408 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
409 | 		<xsd:attribute name="representedGUID" type="GUIDType"/>
410 | 		<xsd:attribute name="name" type="xsd:string"/>
411 | 		<xsd:attribute name="sourceVertex" type="GUIDType" use="required"/>
412 | 		<xsd:attribute name="targetVertex" type="GUIDType" use="required"/>
413 | 		<xsd:attribute name="color" type="RGBType"/>
414 | 		<xsd:attribute name="direction" type="directionType"/>
415 | 		<xsd:attribute name="lineStyle" type="LineStyleType"/>
416 | 	</xsd:complexType>
417 | 	<xsd:complexType name="NotesType">
418 | 		<xsd:sequence>
419 | 			<xsd:element name="Note" type="TextSourceType" maxOccurs="unbounded"/>
420 | 		</xsd:sequence>
421 | 	</xsd:complexType>
422 | 	<xsd:complexType name="LinksType">
423 | 		<xsd:sequence>
424 | 			<xsd:element name="Link" type="LinkType" maxOccurs="unbounded"/>
425 | 		</xsd:sequence>
426 | 	</xsd:complexType>
427 | 	<xsd:complexType name="LinkType">
428 | 		<xsd:sequence>
429 | 			<xsd:element name="NoteRef" type="NoteRefType" minOccurs="0" maxOccurs="unbounded"/>
430 | 		</xsd:sequence>
431 | 		<xsd:attribute name="guid" type="GUIDType" use="required"/>
432 | 		<xsd:attribute name="name" type="xsd:string"/>
433 | 		<xsd:attribute name="direction" type="directionType"/>
434 | 		<xsd:attribute name="color" type="RGBType"/>
435 | 		<xsd:attribute name="originGUID" type="GUIDType"/>
436 | 		<xsd:attribute name="targetGUID" type="GUIDType"/>
437 | 	</xsd:complexType>
438 | 	<xsd:complexType name="NoteRefType">
439 | 		<xsd:attribute name="targetGUID" type="GUIDType" use="required"/>
440 | 	</xsd:complexType>
441 | 	<xsd:complexType name="CodeRefType">
442 | 		<xsd:attribute name="targetGUID" type="GUIDType" use="required"/>
443 | 	</xsd:complexType>
444 | 	<xsd:complexType name="SourceRefType">
445 | 		<xsd:attribute name="targetGUID" type="GUIDType" use="required"/>
446 | 	</xsd:complexType>
447 | 	<xsd:complexType name="SelectionRefType">
448 | 		<xsd:attribute name="targetGUID" type="GUIDType" use="required"/>
449 | 	</xsd:complexType>
450 | 	<xsd:complexType name="VariableRefType">
451 | 		<xsd:attribute name="targetGUID" type="GUIDType" use="required"/>
452 | 	</xsd:complexType>
453 | 	<xsd:simpleType name="GUIDType">
454 | 		<xsd:restriction base="xsd:token">
455 | 			<xsd:pattern value="([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})|(\{[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\})"/>
456 | 		</xsd:restriction>
457 | 	</xsd:simpleType>
458 | 	<xsd:simpleType name="RGBType">
459 | 		<xsd:restriction base="xsd:token">
460 | 			<xsd:pattern value="#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})"/>
461 | 		</xsd:restriction>
462 | 	</xsd:simpleType>
463 | 	<xsd:simpleType name="directionType">
464 | 		<xsd:restriction base="xsd:token">
465 | 			<xsd:enumeration value="Associative"/>
466 | 			<xsd:enumeration value="OneWay"/>
467 | 			<xsd:enumeration value="Bidirectional"/>
468 | 		</xsd:restriction>
469 | 	</xsd:simpleType>
470 | 	<xsd:simpleType name="typeOfVariableType">
471 | 		<xsd:restriction base="xsd:token">
472 | 			<xsd:enumeration value="Text"/>
473 | 			<xsd:enumeration value="Boolean"/>
474 | 			<xsd:enumeration value="Integer"/>
475 | 			<xsd:enumeration value="Float"/>
476 | 			<xsd:enumeration value="Date"/>
477 | 			<xsd:enumeration value="DateTime"/>
478 | 		</xsd:restriction>
479 | 	</xsd:simpleType>
480 | 	<xsd:simpleType name="ShapeType">
481 | 		<xsd:restriction base="xsd:token">
482 | 			<xsd:enumeration value="Person"/>
483 | 			<xsd:enumeration value="Oval"/>
484 | 			<xsd:enumeration value="Rectangle"/>
485 | 			<xsd:enumeration value="RoundedRectangle"/>
486 | 			<xsd:enumeration value="Star"/>
487 | 			<xsd:enumeration value="LeftTriangle"/>
488 | 			<xsd:enumeration value="RightTriangle"/>
489 | 			<xsd:enumeration value="UpTriangle"/>
490 | 			<xsd:enumeration value="DownTriangle"/>
491 | 			<xsd:enumeration value="Note"/>
492 | 		</xsd:restriction>
493 | 	</xsd:simpleType>
494 | 	<xsd:simpleType name="LineStyleType">
495 | 		<xsd:restriction base="xsd:token">
496 | 			<xsd:enumeration value="dotted"/>
497 | 			<xsd:enumeration value="dashed"/>
498 | 			<xsd:enumeration value="solid"/>
499 | 		</xsd:restriction>
500 | 	</xsd:simpleType>
501 | </xsd:schema>
502 | 


--------------------------------------------------------------------------------
/qualitative_coding/refi_qda/writer.py:
--------------------------------------------------------------------------------
  1 | from qualitative_coding.corpus import QCCorpus
  2 | from qualitative_coding.exceptions import QCError, InvalidParameter
  3 | from tempfile import TemporaryDirectory
  4 | from shutil import copyfile
  5 | from pathlib import Path
  6 | from subprocess import run
  7 | from collections import defaultdict
  8 | from hashlib import md5
  9 | import os
 10 | from importlib.metadata import metadata
 11 | from zipfile import ZipFile, ZIP_DEFLATED
 12 | from uuid import UUID
 13 | from xml.etree.ElementTree import (
 14 |     Element,
 15 |     Comment,
 16 |     tostring,
 17 | )
 18 | import structlog
 19 | 
 20 | log = structlog.get_logger()
 21 | 
 22 | class REFIQDAWriter:
 23 |     """Exports a QC project as a REFI-QDA project.
 24 |     See specification at https://www.qdasoftware.org/
 25 |     """
 26 |     def __init__(self, settings, debug=False):
 27 |         self.settings = settings
 28 |         self.corpus = QCCorpus(settings)
 29 |         self.debug = debug
 30 | 
 31 |     def write(self, outpath):
 32 |         """Write a zip file at the given outpath
 33 |         """
 34 |         if Path(outpath).suffix != ".qdpx":
 35 |             raise InvalidParameter("REFI-QDA projects must have suffix .qdpx")
 36 |         with TemporaryDirectory() as tempdir:
 37 |             project_path = Path(tempdir)
 38 |             qde = self.write_xml(project_path / "project.qde")
 39 |             self.write_corpus(qde, project_path / "sources")
 40 |             if self.debug:
 41 |                 self.print_tree(project_path)
 42 |             with ZipFile(outpath, 'w', ZIP_DEFLATED) as zf:
 43 |                 for dirpath, dirnames, filenames in os.walk(tempdir):
 44 |                     for fn in filenames:
 45 |                         path = Path(dirpath) / fn
 46 |                         zf.write(path, arcname=path.relative_to(tempdir))
 47 | 
 48 |     def write_xml(self, outpath):
 49 |         root = self.xml_root()
 50 |         root.append(self.users_to_xml())
 51 |         root.append(self.codebook_to_xml())
 52 |         root.append(self.sources_to_xml())
 53 |         if self.debug:
 54 |             print(tostring(root, encoding="unicode"))
 55 |         outpath.write_text(tostring(root, encoding="unicode"))
 56 |         return root
 57 | 
 58 |     def write_corpus(self, qde, outpath):
 59 |         outpath.mkdir()
 60 |         for child in qde:
 61 |             if child.tag.endswith("Sources"):
 62 |                 for source in child:
 63 |                     project_path = self.corpus.corpus_dir / source.attrib['name']
 64 |                     export_path = outpath / source.attrib['plainTextPath'].replace("internal://", "")
 65 |                     log.info(f"Copying {project_path} -> {export_path}")
 66 |                     copyfile(project_path, export_path)
 67 | 
 68 |     def print_tree(self, project_path):
 69 |         result = run("tree", cwd=project_path, capture_output=True, text=True, shell=True)
 70 |         print(result.stdout)
 71 | 
 72 |     def xml_root(self):
 73 |         root = Element("Project")
 74 |         root.set("xmlns", "urn:QDA-XML:project:1.0")
 75 |         root.set("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance")
 76 |         version = metadata('qualitative-coding')['version']
 77 |         root.set("origin", f"qc {version}")
 78 |         root.set("name", "qc project")
 79 |         return root
 80 | 
 81 |     def codebook_to_xml(self):
 82 |         """Render the codebook as XML.
 83 |         Note that qc allows codes to appear at multiple places in the codebook. 
 84 |         However, each code in the xml tree requires its own GUID. Therefore, 
 85 |         Codings will artibrarily (but deterministically) specify the GUID of a code 
 86 |         when it appears multiple times in the codebook.
 87 |         Must be called before sources_to_xml.
 88 |         """
 89 |         def node_to_xml(node):
 90 |             xnode = Element("Code")
 91 |             xnode.set("name", node.name)
 92 |             guid = self.code_guid(node.expanded_name())
 93 |             if node.name not in self.code_guids:
 94 |                 self.code_guids[node.name] = guid
 95 |             xnode.set("guid", guid)
 96 |             xnode.set("isCodable", "true")
 97 |             for child in node.children:
 98 |                 xnode.append(node_to_xml(child))
 99 |             return xnode
100 | 
101 |         self.code_guids = {}
102 |         codebook = Element("CodeBook")
103 |         codes = Element("Codes")
104 |         codebook.append(codes)
105 |         with self.corpus.session():
106 |             root = self.corpus.get_codebook()
107 |         for node in root.children:
108 |             codes.append(node_to_xml(node))
109 |         return codebook
110 | 
111 |     def users_to_xml(self):
112 |         users = Element("Users")
113 |         with self.corpus.session():
114 |             for coder in self.corpus.get_all_coders():
115 |                 user = Element("User")
116 |                 user.set("name", coder.name)
117 |                 user.set("guid", self.guid(coder.name))
118 |                 users.append(user)
119 |         return users
120 | 
121 |     def sources_to_xml(self):
122 |         sources = Element("Sources")
123 |         with self.corpus.session():
124 |             sources = Element("Sources")
125 |             for doc in self.corpus.get_documents():
126 |                 source = Element("TextSource")
127 |                 source_guid = self.guid(doc.file_path)
128 |                 suffix = Path(doc.file_path).suffix
129 |                 internal_path = str(Path(source_guid).with_suffix(suffix))
130 |                 source.set("plainTextPath", "internal://" + internal_path)
131 |                 source.set("guid", source_guid)
132 |                 source.set("name", doc.file_path)
133 |                 doc_line_positions = self.line_positions(doc.file_path)
134 |                 coded_lines = self.corpus.get_coded_lines(file_list=[doc.file_path])
135 |                 lines_with_codes = defaultdict(list)
136 |                 for cl in coded_lines:
137 |                     lines_with_codes[cl.line].append(cl)
138 |                 for line, cls in lines_with_codes.items():
139 |                     selection = Element("PlainTextSelection")
140 |                     selection.set("guid", self.selection_guid(doc.file_path, line))
141 |                     selection.set("name", f"line:{line}")
142 |                     selection.set("startPosition", str(doc_line_positions[line][0]))
143 |                     selection.set("endPosition", str(doc_line_positions[line][1]))
144 |                     for code, coder, line, file_path in cls:
145 |                         coding = Element("Coding")
146 |                         coding.set("guid", self.coding_guid(code, coder, line, file_path))
147 |                         coding.set("creatingUser", self.coder_guid(coder))
148 |                         code_ref = Element("CodeRef")
149 |                         code_ref.set("targetGUID", self.code_guids[code])
150 |                         coding.append(code_ref)
151 |                         selection.append(coding)
152 |                     source.append(selection)
153 |                 sources.append(source)
154 |         return sources
155 | 
156 |     def line_positions(self, corpus_file_path):
157 |         """returns a list of (start, end) character positions for lines in doc.
158 |         """
159 |         lines = []
160 |         index = 0
161 |         with (self.corpus.corpus_dir / corpus_file_path).open() as fh:
162 |             for line in fh:
163 |                 start = index
164 |                 end = index + len(line)
165 |                 lines.append((start, end))
166 |                 index += len(line)
167 |         return lines
168 | 
169 |     def coder_guid(self, coder):
170 |         return self.guid(coder)
171 | 
172 |     def coding_guid(self, code, coder, line, file_path):
173 |         return self.guid(':'.join([file_path, str(line), coder, code]))
174 | 
175 |     def selection_guid(self, file_path, line):
176 |         return self.guid(f"{file_path}:{line}")
177 | 
178 |     def code_guid(self, code):
179 |         return self.guid(code)
180 | 
181 |     def guid(self, source):
182 |         digest = md5(source.encode('utf8')).hexdigest()[:16]
183 |         return str(UUID(bytes=digest.encode('utf8')))
184 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/qualitative_coding/tests/__init__.py


--------------------------------------------------------------------------------
/qualitative_coding/tests/fixtures.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | from pathlib import Path
  3 | from subprocess import run
  4 | from tempfile import TemporaryDirectory
  5 | from qualitative_coding.corpus import QCCorpus
  6 | from qualitative_coding.logs import configure_logger
  7 | from io import StringIO
  8 | import yaml
  9 | import csv
 10 | import sys
 11 | import os
 12 | 
 13 | class QCTestCase(TestCase):
 14 |     """A subclass of TestCase with methods for instantiating a QC project.
 15 |     """
 16 |     verbose = bool(os.environ.get('VERBOSE'))
 17 | 
 18 |     def setUp(self):
 19 |         self.set_up_qc_project()
 20 |         configure_logger(self.testpath / "settings.yaml")
 21 |         self.corpus = QCCorpus(self.testpath / "settings.yaml")
 22 | 
 23 |     def tearDown(self):
 24 |         self.tear_down_qc_project()
 25 | 
 26 |     def set_up_qc_project(self):
 27 |         self.tempdir = TemporaryDirectory()
 28 |         self.testpath = Path(self.tempdir.name)
 29 |         self.run_in_testpath("qc init")
 30 |         if self.verbose:
 31 |             self.update_settings('verbose', True)
 32 |         (self.testpath / "macbeth.txt").write_text(MACBETH)
 33 |         (self.testpath / "moby_dick.md").write_text(MOBY_DICK)
 34 | 
 35 |     def set_up_qc_project_0_2_3(self):
 36 |         self.tempdir = TemporaryDirectory()
 37 |         self.testpath = Path(self.tempdir.name)
 38 |         settings_0_2_3 = {
 39 |             'qc_version': "0.2.3",
 40 |             'corpus_dir': 'corpus',
 41 |             'codes_dir': 'codes',
 42 |             'log_file': 'qc.log',
 43 |             'memos_dir': 'memos',
 44 |             'codebook': 'codebook.yaml',
 45 |         }
 46 |         (self.testpath / "settings.yaml").write_text(yaml.dump(settings_0_2_3))
 47 |         for k, v in settings_0_2_3.items():
 48 |             if k.endswith("_dir"):
 49 |                 (self.testpath / v).mkdir()
 50 |         (self.testpath / "codebook.yaml").touch()
 51 |         (self.testpath / "corpus" / "macbeth.txt").write_text(MACBETH)
 52 |         (self.testpath / "codes" / "macbeth.txt.cp.codes").write_text(MACBETH_CODES_0_2_3)
 53 | 
 54 |     def tear_down_qc_project(self):
 55 |         self.tempdir.cleanup()
 56 | 
 57 |     def run_in_testpath(self, command):
 58 |         """Runs `command` with testpath as cwd.
 59 |         When debug is False, 
 60 |         """
 61 |         if self.verbose:
 62 |             result = run(command, shell=True, cwd=self.testpath, stdout=sys.stdout,
 63 |                     stderr=sys.stderr)
 64 |         else:
 65 |             result = run(command, shell=True, cwd=self.testpath, capture_output=True, text=True)
 66 |         return result
 67 | 
 68 |     def show_tree(self):
 69 |         self.run_in_testpath("tree", debug=True)
 70 | 
 71 |     def update_settings(self, key, value):
 72 |         settings_path = self.testpath / "settings.yaml"
 73 |         settings = yaml.safe_load(settings_path.read_text())
 74 |         if value is None:
 75 |             del settings[key]
 76 |         else:
 77 |             settings[key] = value
 78 |         settings_path.write_text(yaml.dump(settings))
 79 | 
 80 |     def assertFileExists(self, path, is_dir=False, message=None):
 81 |         if not Path(self.testpath / path).exists():
 82 |             message = message or f"Expected {path} to exist"
 83 |             raise AssertionError(message)
 84 |         if is_dir and not Path(self.testpath / path).is_dir():
 85 |             message = message or f"Expected {path} to be a directory"
 86 |             raise AssertionError(message)
 87 |         if not is_dir and Path(self.testpath / path).is_dir():
 88 |             message = message or f"Expected {path} to be a file, not a directory"
 89 |             raise AssertionError(message)
 90 | 
 91 |     def assertFileDoesNotExist(self, path, message=None):
 92 |         if Path(self.testpath / path).exists():
 93 |             message = message or f"Expected {path} not to exist"
 94 |             raise AssertionError(message)
 95 | 
 96 |     def set_mock_editor(self, verbose=False, crash=False):
 97 |         """Updates settings['editor'] to the mock editor.
 98 |         Also reinitializes corpus.
 99 |         """
100 |         command = str(Path("tests/mock_editor.py").resolve())
101 |         if verbose: 
102 |             command += " --verbose"
103 |         if crash: 
104 |             command += " --crash"
105 |         code_command = command + ' "{corpus_file_path}" "{codes_file_path}"'
106 |         memo_command = command + ' --memo "{memo_file_path}"'
107 |         self.update_settings("editor", "mock_editor")
108 |         self.update_settings("editors", {
109 |             'mock_editor': {
110 |                 'name': "Mock Editor",
111 |                 'code_command': code_command, 
112 |                 'memo_command': memo_command,
113 |             }
114 |         })
115 |         self.corpus = QCCorpus(self.testpath / "settings.yaml")
116 | 
117 |     def read_stats_tsv(self, stdout):
118 |         reader = csv.reader(StringIO(stdout), delimiter="\t")
119 |         table = [[item.strip() for item in row] for row in reader]
120 |         ix_name, *cols = table[0]
121 |         parse = lambda val: None if val == '' else float(val)
122 |         return {ix: dict(zip(cols, map(parse, vals))) for ix, *vals in table[1:]}
123 | 
124 | class MockCorpus:
125 |     log = None
126 |     settings = {}
127 | 
128 | MACBETH = """Tomorrow, and tomorrow, and tomorrow,
129 | Creeps in this petty pace from day to day,
130 | To the last syllable of recorded time;
131 | And all our yesterdays have lighted fools
132 | The way to dusty death. Out, out, brief candle!
133 | Life's but a walking shadow, a poor player,
134 | That struts and frets his hour upon the stage,
135 | And then is heard no more. It is a tale
136 | Told by an idiot, full of sound and fury,
137 | Signifying nothing.
138 | """
139 | 
140 | MACBETH_CODES_0_2_3 = """pace, prolepsis
141 | pace
142 | speech, prolepsis
143 | light
144 | light, prolepsis
145 | shadow, acting
146 | acting
147 | acting, speech
148 | speech
149 | speech
150 | """
151 | 
152 | MOBY_DICK = "Call me *Ishmael*. Some years ago- never mind how long precisely- having little or no money in my purse, and nothing particular to interest me on shore, I thought I would sail about a little and see the watery part of the world. It is a way I have of driving off the spleen and regulating the circulation."
153 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/mock_editor.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This is a mock editor for testing purposes. 
 4 | # Whereas a real editor would present the corpus and code files
 5 | # to the user for coding, the mock editor goes ahead and codes
 6 | # line with 'code_one' and line two (if it exists) with 'code_two'.
 7 | # When --crash is passed, exits with an exception, allowing testing
 8 | # of the error condition.
 9 | 
10 | from argparse import ArgumentParser
11 | from pathlib import Path
12 | import sys
13 | 
14 | parser = ArgumentParser()
15 | parser.add_argument("corpus_file_path")
16 | parser.add_argument("codes_file_path", nargs='?')
17 | parser.add_argument("--verbose", action="store_true")
18 | parser.add_argument("--crash", action="store_true")
19 | parser.add_argument("--memo", action="store_true")
20 | args = parser.parse_args()
21 | 
22 | if args.crash:
23 |     if args.verbose:
24 |         print("Crashing the mock editor, as requested...", file=sys.stderr)
25 |     raise SystemExit(1)
26 | 
27 | if args.memo:
28 |     if args.verbose:
29 |         print("Mock Editor is in memo mode.")
30 |     memo_file_path = Path(args.corpus_file_path)
31 |     memo = "I'm having all these ideas. I need to write them down."
32 |     memo_file_path.write_text(memo_file_path.read_text() + memo)
33 | else:
34 |     nlines = len(Path(args.corpus_file_path).read_text().split('\n'))
35 |     if nlines == 1:
36 |         Path(args.codes_file_path).write_text("code_one")
37 |     else:
38 |         lines = ["line, one", "line, two"] + ([""] * (nlines - 2))
39 |         Path(args.codes_file_path).write_text('\n'.join(lines))
40 |     if args.verbose:
41 |         print('-' * 80)
42 |         print("MOCK EDITOR")
43 |         print('-' * 80)
44 |         text = open(args.corpus_file_path)
45 |         codes = open(args.codes_file_path)
46 |         for tl, cl in zip(text, codes):
47 |             print(f"{tl.strip().ljust(80, ' ')}| {cl.strip()}")
48 |         text.close()
49 |         codes.close()
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_check.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | from pathlib import Path
 3 | from qualitative_coding.corpus import DEFAULT_SETTINGS
 4 | 
 5 | class TestCheck(QCTestCase):
 6 |     def test_check_passes_when_no_errors(self):
 7 |         result = self.run_in_testpath("qc check")
 8 |         self.assertEqual(result.stdout, "")
 9 | 
10 |     def test_check_identifies_missing_settings(self):
11 |         for setting in DEFAULT_SETTINGS:
12 |             if setting == 'qc_version': 
13 |                 continue
14 |             self.update_settings(setting, None)
15 |             result = self.run_in_testpath("qc check")
16 |             message = result.stderr
17 |             self.assertTrue(f"Expected '{setting}' in settings" in message)
18 | 
19 |     def test_check_validates_corpus_paths(self):
20 |         self.run_in_testpath("qc corpus import macbeth.txt")
21 |         (self.testpath / "corpus" / "macbeth.txt").unlink()
22 |         (self.testpath / "corpus" / "hamlet.txt").touch()
23 |         message = self.run_in_testpath("qc check").stderr
24 |         self.assertTrue("macbeth.txt" in message)
25 |         self.assertTrue("hamlet.txt" in message)
26 | 
27 |     def test_check_validates_corpus_paths(self):
28 |         self.run_in_testpath("qc corpus import macbeth.txt")
29 |         (self.testpath / "corpus" / "macbeth.txt").write_text("It was the best of times...")
30 |         message = self.run_in_testpath("qc check").stderr
31 |         self.assertTrue("macbeth.txt" in message)
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_code.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | from pathlib import Path
 3 | from qualitative_coding.corpus import QCCorpus
 4 | 
 5 | class TestCode(QCTestCase):
 6 | 
 7 |     def setUp(self):
 8 |         super().setUp()
 9 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
10 |         self.set_mock_editor(verbose=True)
11 | 
12 |     def test_code_applies_codes(self):
13 |         self.run_in_testpath("qc code chris")
14 |         with self.corpus.session():
15 |             code_counts = self.corpus.count_codes()
16 |         self.assertEqual(code_counts.get('line'), 2)
17 |         self.assertEqual(code_counts.get('one'), 1)
18 |         self.assertFileDoesNotExist("codes.txt")
19 | 
20 |     def test_code_saves_state_on_crash(self):
21 |         self.set_mock_editor(verbose=True, crash=True)
22 |         self.run_in_testpath("qc code chris")
23 |         self.assertFileExists("codes.txt")
24 |         self.assertFileExists(".coding_session")
25 | 
26 |     def test_code_recovers_incomplete_session(self):
27 |         self.set_mock_editor(verbose=True, crash=True)
28 |         self.run_in_testpath("qc code chris")
29 |         self.set_mock_editor(verbose=True)
30 |         self.run_in_testpath("qc code chris --recover")
31 |         self.assertFileDoesNotExist("codes.txt")
32 |         self.assertFileDoesNotExist(".coding_session")
33 |         result = self.run_in_testpath("qc codes list")
34 |         self.assertTrue("line" in result.stdout)
35 |         self.assertTrue("one" in result.stdout)
36 |         self.assertTrue("two" in result.stdout)
37 | 
38 |     def test_code_will_not_code_when_metadata_file_exists(self):
39 |         (self.testpath / ".coding_session").write_text('a')
40 |         self.set_mock_editor(verbose=True)
41 |         result = self.run_in_testpath("qc code chris")
42 |         self.assertNotEqual(result.returncode, 0)
43 | 
44 |     def test_code_will_not_code_when_codes_exists(self):
45 |         (self.testpath / "codes.txt").write_text('a')
46 |         self.set_mock_editor(verbose=True)
47 |         result = self.run_in_testpath("qc code chris")
48 |         self.assertNotEqual(result.returncode, 0)
49 | 
50 |     def test_code_abandon_deletes_session(self):
51 |         (self.testpath / ".coding_session").write_text('a')
52 |         (self.testpath / "codes.txt").write_text('a')
53 |         result = self.run_in_testpath("qc code chris --abandon")
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_code_parsing.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from qualitative_coding.tests.fixtures import MockCorpus
 3 | from qualitative_coding.views.viewer import QCCorpusViewer
 4 | from qualitative_coding.exceptions import CodeFileParseError
 5 | 
 6 | class TestCodeParsing(TestCase):
 7 |     def setUp(self):
 8 |         self.viewer = QCCorpusViewer(MockCorpus())
 9 | 
10 |     def test_codes_are_validated(self):
11 |         cases = [
12 |             ['funny', True],
13 |             ['funny-sort-of', True],
14 |             ['FUNNY!', False],
15 |             ['funny?', False],
16 |             ['0', True],
17 |             ['', False],
18 |             [':colon', False],
19 |             ['#hashtag', False],
20 |         ]
21 |         for code, ok in cases:
22 |             if ok: 
23 |                 self.viewer.parse_code('nobody', code)
24 |             else:
25 |                 with self.assertRaises(CodeFileParseError):
26 |                     self.viewer.parse_code('nobody', code)
27 | 
28 |     def test_parses_valid_codes_file(self):
29 |         self.viewer = QCCorpusViewer(MockCorpus())
30 |         codes = self.viewer.parse_codes('nobody', CODES_FILE, 6)
31 |         self.assertEqual(len(codes), 4)
32 |         self.assertEqual(codes[0]['line'], 2)
33 | 
34 |     def test_checks_codes_file_length(self):
35 |         self.viewer = QCCorpusViewer(MockCorpus())
36 |         with self.assertRaises(CodeFileParseError):
37 |             self.viewer.parse_codes('nobody', CODES_FILE, 7)
38 | 
39 |     def test_checks_for_misplaced_commas(self):
40 |         for case in [TRAILING_COMMA, LEADING_COMMA]:
41 |             with self.assertRaises(CodeFileParseError):
42 |                 self.viewer.parse_codes('nobody', case, 6)
43 |             
44 | 
45 | CODES_FILE = """
46 | 
47 | funny, inappropriate
48 | dull
49 | trite
50 | 
51 | """
52 | 
53 | TRAILING_COMMA = """
54 | code,
55 | """
56 | LEADING_COMMA = """
57 | ,code
58 | """
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_codebook.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | import yaml
 3 | 
 4 | class TestCodebook(QCTestCase):
 5 |     def test_codebook_is_empty_on_init(self):
 6 |         cb = yaml.safe_load((self.testpath / "codebook.yaml").read_text())
 7 |         self.assertEqual(cb, None)
 8 | 
 9 |     def test_codebook_updates_codebook_file(self):
10 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
11 |         self.set_mock_editor()
12 |         self.run_in_testpath("qc code chris")
13 |         self.run_in_testpath("qc codebook")
14 |         cb = yaml.safe_load((self.testpath / "codebook.yaml").read_text())
15 |         self.assertEqual(len(cb), 3)
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_coders.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | from pathlib import Path
 3 | 
 4 | class TestCoders(QCTestCase):
 5 |     def test_coders_shows_coders(self):
 6 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
 7 |         self.set_mock_editor()
 8 |         self.run_in_testpath("qc code chris")
 9 |         self.run_in_testpath("qc code varun")
10 |         result = self.run_in_testpath("qc coders")
11 |         self.assertTrue("chris" in result.stdout)
12 |         self.assertTrue("varun" in result.stdout)
13 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_codes_crosstab.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | from pathlib import Path
 3 | 
 4 | class TestCrosstab(QCTestCase):
 5 |     def test_crosstab_shows_counts(self):
 6 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
 7 |         self.set_mock_editor()
 8 |         self.run_in_testpath("qc code chris")
 9 |         result = self.run_in_testpath("qc codes crosstab one two line --format tsv")
10 |         table = self.read_stats_tsv(result.stdout)
11 |         self.assertEqual(table['two']['line'], 1)
12 | 
13 |     def test_crosstab_with_probs_shows_probs(self):
14 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
15 |         self.set_mock_editor()
16 |         self.run_in_testpath("qc code chris")
17 |         result = self.run_in_testpath("qc codes crosstab one two line --probs --format tsv")
18 |         table = self.read_stats_tsv(result.stdout)
19 |         self.assertEqual(table['line']['two'], 0.5)
20 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_codes_find.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | 
 3 | class TestFind(QCTestCase):
 4 |     def setUp(self):
 5 |         super().setUp()
 6 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
 7 |         self.set_mock_editor()
 8 |         self.run_in_testpath("qc code chris")
 9 | 
10 |     def test_find_shows_codes(self):
11 |         result = self.run_in_testpath("qc codes find one")
12 |         self.assertEqual(len(result.stdout.splitlines()), 8)
13 | 
14 |     def test_find_respects_context_window(self):
15 |         result = self.run_in_testpath("qc codes find one -C 5")
16 |         self.assertEqual(len(result.stdout.splitlines()), 11)
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_codes_list.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | import yaml
 3 | 
 4 | class TestList(QCTestCase):
 5 |     def setUp(self):
 6 |         super().setUp()
 7 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
 8 |         self.set_mock_editor()
 9 |         self.run_in_testpath("qc code chris")
10 |         code_tree = [{'line': ['one', 'two']}]
11 |         (self.testpath / "codebook.yaml").write_text(yaml.dump(code_tree))
12 | 
13 |     def test_list_shows_codes(self):
14 |         result = self.run_in_testpath("qc codes list")
15 |         self.assertTrue("line" in result.stdout)
16 |         self.assertTrue("one" in result.stdout)
17 |         self.assertTrue("two" in result.stdout)
18 | 
19 |     def test_list_respects_depth(self):
20 |         result = self.run_in_testpath("qc codes list --depth 1")
21 |         self.assertTrue("line" in result.stdout)
22 |         self.assertTrue("one" not in result.stdout)
23 | 
24 |     def test_list_respects_expanded(self):
25 |         result = self.run_in_testpath("qc codes list --expanded")
26 |         self.assertTrue("line:one" in result.stdout)
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_codes_rename.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | from qualitative_coding.corpus import QCCorpus
 3 | import yaml
 4 | 
 5 | class TestRename(QCTestCase):
 6 |     def setUp(self):
 7 |         super().setUp()
 8 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
 9 |         self.set_mock_editor()
10 |         self.run_in_testpath("qc code chris")
11 | 
12 |     def test_rename_renames_codes(self):
13 |         self.run_in_testpath("qc codes rename line pace")
14 |         cb = yaml.safe_load((self.testpath / "codebook.yaml").read_text())
15 |         self.assertTrue('pace' in cb)
16 | 
17 |     def test_rename_does_not_duplicate_codes(self):
18 |         corpus = QCCorpus(self.testpath/"settings.yaml")
19 |         self.run_in_testpath("qc codes rename line one")
20 |         with corpus.session():
21 |             self.assertEqual(len(corpus.get_coded_lines()), 3)
22 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_codes_stats.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | 
 3 | class TestStats(QCTestCase):
 4 |     def test_stats_shows_stats(self):
 5 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
 6 |         self.set_mock_editor()
 7 |         self.run_in_testpath("qc code chris")
 8 |         result = self.run_in_testpath("qc codes stats --format tsv")
 9 |         table = self.read_stats_tsv(result.stdout)
10 |         self.assertEqual(table['line']['Count'], 2)
11 | 
12 |     def test_stats_distinct_shows_totals_by_coder(self):
13 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
14 |         self.set_mock_editor()
15 |         self.run_in_testpath("qc code chris")
16 |         self.run_in_testpath("qc code haley")
17 |         result = self.run_in_testpath("qc codes stats --by-coder --format tsv")
18 |         table = self.read_stats_tsv(result.stdout)
19 | 
20 |     def test_stats_distinct_shows_totals_by_document(self):
21 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
22 |         self.set_mock_editor()
23 |         self.run_in_testpath("qc code chris")
24 |         self.run_in_testpath("qc code haley")
25 |         result = self.run_in_testpath("qc codes stats --by-document --format tsv")
26 |         table = self.read_stats_tsv(result.stdout)
27 | 
28 |     def test_stats_distinct_shows_coder_document_pivot_table(self):
29 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
30 |         self.set_mock_editor()
31 |         self.run_in_testpath("qc code chris")
32 |         self.run_in_testpath("qc code haley")
33 |         result = self.run_in_testpath("qc codes stats --by-document --by-coder --format tsv")
34 |         table = self.read_stats_tsv(result.stdout)
35 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_corpus_anonymize.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | from qualitative_coding.corpus import QCCorpus
 3 | import yaml
 4 | 
 5 | NEWS = """A "hefty sneeze" has caused a professional footballer to sustain a 
 6 | "nasty back injury". Victor Adeboyejo, a striker for Bolton Wanderers, 
 7 | had been due to take part in a Bristol Street Motors Trophy group game at 
 8 | Barrow on Tuesday. He was forced to pull out of the squad, however, 
 9 | because of discomfort in his back and ribcage. Manager Ian Evatt, who was 
10 | already missing first team players because of injury and the international 
11 | break, said the pain appeared to have been caused by a "pretty hefty sneeze."
12 | """
13 | 
14 | class TestCorpusAnonymize(QCTestCase):
15 |     def setUp(self):
16 |         super().setUp()
17 |         (self.testpath / "news.txt").write_text(NEWS)
18 |         self.run_in_testpath("qc corpus import news.txt")
19 |         self.run_in_testpath("qc corpus anonymize")
20 | 
21 |     def test_creates_key_file_with_yaml(self):
22 |         keyfile = self.testpath / "key.yaml"
23 |         self.assertTrue(keyfile.exists())
24 |         keys = yaml.safe_load(keyfile.read_text())
25 |         self.assertTrue("Victor Adeboyejo" in keys)
26 | 
27 |     def test_creates_anonymized_corpus(self):
28 |         self.run_in_testpath("qc corpus anonymize")
29 |         anon_news = (self.testpath / "anonymized" / "news.txt").read_text()
30 |         self.assertTrue("Victor Adeboyejo" not in anon_news)
31 | 
32 |     def test_reverses_anonymization(self):
33 |         self.run_in_testpath("qc corpus anonymize")
34 |         self.run_in_testpath("qc corpus anonymize -r -o recovered")
35 |         news = (self.testpath / "recovered" / "news.txt").read_text()
36 |         self.assertTrue("Victor Adeboyejo" in news)
37 | 
38 |     def test_replaces_longer_strings_first(self):
39 |         keyfile = self.testpath / "key.yaml"
40 |         keyfile.write_text(yaml.dump({
41 |             "Victor": "X",
42 |             "Victor Adeboyejo": "VA"
43 |         }))
44 |         self.run_in_testpath("qc corpus anonymize")
45 |         anon_news = (self.testpath / "anonymized" / "news.txt").read_text()
46 |         self.assertTrue("Adeboyejo" not in anon_news)
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_corpus_import.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | from pathlib import Path
 3 | 
 4 | class TestImport(QCTestCase):
 5 |     def test_import_verbatim(self):
 6 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
 7 |         self.assertFileImported("macbeth.txt")
 8 | 
 9 |     def test_import_pandoc(self):
10 |         self.run_in_testpath("qc corpus import moby_dick.md --importer pandoc")
11 |         self.assertFileImported("moby_dick.txt")
12 |         nlines = len((self.testpath / "corpus/moby_dick.txt").read_text().split('\n'))
13 |         self.assertEqual(nlines, 5)
14 | 
15 |     def test_import_recursive(self):
16 |         (self.testpath / "chapters").mkdir()
17 |         (self.testpath / "chapters/one.txt").write_text("one")
18 |         (self.testpath / "chapters" / "preface").mkdir()
19 |         (self.testpath / "chapters/preface/note.txt").write_text("two")
20 |         self.run_in_testpath("qc corpus import chapters --recursive")
21 |         self.assertFileImported("one.txt")
22 |         self.assertFileImported("preface/note.txt")
23 | 
24 |     def test_import_from_absolute_dir(self):
25 |         import_path = (self.testpath / "macbeth.txt").resolve()
26 |         self.run_in_testpath(f"qc corpus import {import_path}")
27 |         self.assertFileImported("macbeth.txt")
28 | 
29 |     def test_import_recursive_from_absolute_dir(self):
30 |         (self.testpath / "chapters").mkdir()
31 |         (self.testpath / "chapters/one.txt").write_text("one")
32 |         (self.testpath / "chapters" / "preface").mkdir()
33 |         (self.testpath / "chapters/preface/note.txt").write_text("two")
34 |         import_dir = (self.testpath / "chapters").resolve()
35 |         self.run_in_testpath(f"qc corpus import {import_dir} --recursive")
36 |         self.assertFileImported("one.txt")
37 |         self.assertFileImported("preface/note.txt")
38 | 
39 |     def test_import_from_rel_dir_with_dot_dot(self):
40 |         (self.testpath / "chapters").mkdir()
41 |         self.run_in_testpath("qc corpus import chapters/../macbeth.txt --importer verbatim")
42 |         self.assertFileImported("macbeth.txt")
43 | 
44 |     def test_import_from_dir_with_spaces(self):
45 |         (self.testpath / "chap ters").mkdir()
46 |         (self.testpath / "chap ters/one.txt").write_text("one")
47 |         (self.testpath / "chap ters" / "preface").mkdir()
48 |         (self.testpath / "chap ters/preface/note.txt").write_text("two")
49 |         self.run_in_testpath('qc corpus import "chap ters" --recursive')
50 |         self.assertFileImported("one.txt")
51 |         self.assertFileImported("preface/note.txt")
52 | 
53 |     def assertFileImported(self, path):
54 |         self.assertFileExists(Path("corpus") / path)
55 |         with self.corpus.session():
56 |             file_path = self.corpus.get_document(self.testpath / 'corpus' / path).file_path
57 |         self.assertEqual(file_path, path)
58 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_corpus_move.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | from pathlib import Path
 3 | 
 4 | class TestCorpusMove(QCTestCase):
 5 | 
 6 |     def test_move_works_with_files(self):
 7 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
 8 |         self.run_in_testpath("qc corpus move corpus/macbeth.txt corpus/m.txt")
 9 |         self.assertTrue((self.testpath / "corpus" / "m.txt").exists())
10 |         with self.corpus.session():
11 |             result = self.corpus.get_documents(file_list=["m.txt"])
12 |         self.assertEqual(result[0].file_path, "m.txt")
13 | 
14 |     def test_move_works_with_files_in_subdirs(self):
15 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
16 |         self.run_in_testpath("qc corpus move corpus/macbeth.txt corpus/will/macbeth.txt")
17 |         self.assertTrue((self.testpath / "corpus" / "will" / "macbeth.txt").exists())
18 |         with self.corpus.session():
19 |             result = self.corpus.get_documents(file_list=["will/macbeth.txt"])
20 |         self.assertEqual(result[0].file_path, "will/macbeth.txt")
21 | 
22 |     def test_move_works_with_recursive_subdirs(self):
23 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim --corpus-root shakespeare")
24 |         self.run_in_testpath("qc corpus move corpus/shakespeare corpus/will --recursive")
25 |         self.assertTrue((self.testpath / "corpus" / "will" / "macbeth.txt").exists())
26 |         with self.corpus.session():
27 |             result = self.corpus.get_documents(file_list=["will/macbeth.txt"])
28 |         self.assertEqual(result[0].file_path, "will/macbeth.txt")
29 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_corpus_remove.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | from pathlib import Path
 3 | 
 4 | class TestCorpusRemove(QCTestCase):
 5 |     def test_removes_individual_file(self):
 6 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
 7 |         self.run_in_testpath("qc corpus remove corpus/macbeth.txt")
 8 |         self.assertFileDoesNotExist(self.testpath / "corpus" / "macbeth.txt")
 9 | 
10 |     def test_removes_directories(self):
11 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim --corpus-root shx")
12 |         self.run_in_testpath("qc corpus remove corpus/shx --recursive")
13 |         self.assertFileDoesNotExist(self.testpath / "corpus" / "shx" / "macbeth.txt")
14 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_corpus_update.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | from qualitative_coding.corpus import QCCorpus
 3 | 
 4 | MACBETH_IMPROVED = """Tomorrow, and tomorrow, and tomorrow,
 5 | Tomorrow, and tomorrow, and tomorrow,
 6 | Tomorrow, and tomorrow, and tomorrow,
 7 | Creeps in this petty pace from day to day,
 8 | To the last syllable of recorded time;
 9 | The way to dusty death. Out, out, brief candle!
10 | Life's but a walking shadow, a poor player,
11 | Something something something,
12 | Told by an idiot, full of sound and fury,
13 | Signifying nothing.
14 | """
15 | 
16 | class TestCorpusUpdate(QCTestCase):
17 |     def setUp(self):
18 |         super().setUp()
19 |         self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim")
20 |         with self.corpus.session():
21 |             self.corpus.update_coded_lines("macbeth.txt", "chris", [
22 |                 {'line': 1, 'code_id': 'tomorrow'},
23 |                 {'line': 2, 'code_id': 'creeps'},
24 |                 {'line': 3, 'code_id': 'to'},
25 |                 {'line': 4, 'code_id': 'and'},
26 |                 {'line': 5, 'code_id': 'the'},
27 |                 {'line': 6, 'code_id': 'lifes'},
28 |                 {'line': 7, 'code_id': 'that'},
29 |                 {'line': 8, 'code_id': 'and'},
30 |                 {'line': 9, 'code_id': 'told'},
31 |             ])
32 |         (self.testpath / "macbeth_improved.txt").write_text(MACBETH_IMPROVED)
33 | 
34 |     def test_corpus_update_updates_line_numbers(self):
35 |         before = self.run_in_testpath("qc codes find speech").stdout
36 |         self.run_in_testpath("qc corpus update corpus/macbeth.txt --new macbeth_improved.txt")
37 |         after = self.run_in_testpath("qc codes find speech").stdout
38 |         self.assertEqual(before, after)
39 | 
40 |     def test_corpus_update_updates_text(self):
41 |         self.run_in_testpath("qc corpus update corpus/macbeth.txt --new macbeth_improved.txt")
42 |         text = (self.testpath / "corpus/macbeth.txt").read_text()
43 |         self.assertEqual(text, MACBETH_IMPROVED)
44 | 
45 |     def test_corpus_update_updates_file_hash(self):
46 |         with self.corpus.session():
47 |             old_hash = self.corpus.get_document(self.testpath / "corpus/macbeth.txt").file_hash
48 |         self.run_in_testpath("qc corpus update corpus/macbeth.txt --new macbeth_improved.txt")
49 |         with self.corpus.session():
50 |             new_hash = self.corpus.get_document(self.testpath / "corpus/macbeth.txt").file_hash
51 |         self.assertNotEqual(old_hash, new_hash)
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_export.py:
--------------------------------------------------------------------------------
 1 | from qualitative_coding.tests.fixtures import QCTestCase
 2 | from pathlib import Path
 3 | 
 4 | class TestExport(QCTestCase):
 5 |     def test_creates_qdpx_file(self):
 6 |         self.run_in_testpath("qc corpus import macbeth.txt")
 7 |         self.set_mock_editor(verbose=True)
 8 |         self.run_in_testpath("qc code chris")
 9 |         self.run_in_testpath("qc code haley")
10 |         self.run_in_testpath("qc export out.qdpx")
11 |         self.assertFileExists("out.qdpx")
12 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_init.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | from tempfile import TemporaryDirectory
 3 | from pathlib import Path
 4 | import yaml
 5 | 
 6 | # TODO: 
 7 | # - Ensure that init catches validation errors when editors is malformed.
 8 | # - Ensure that init catches validation errors when editor not in editors.
 9 | # - Ensure that init catches files in corpus_dir which have not been imported
10 | # - Ensure that init functions properly when the settings file is a relative
11 | #   and an absolute path.
12 | 
13 | class TestInit(QCTestCase):
14 |     def setUp(self):
15 |         self.tempdir = TemporaryDirectory()
16 |         self.testpath = Path(self.tempdir.name)
17 |         self.run_in_testpath("qc init")
18 | 
19 |     def test_init_creates_setup_file(self):
20 |         self.assertFileExists(self.testpath / "settings.yaml")
21 | 
22 |     def test_init2_creates_expected_dirs(self):
23 |         self.run_in_testpath("qc init")
24 |         self.assertFileExists("corpus", is_dir=True)
25 |         self.assertFileExists("memos", is_dir=True)
26 | 
27 |     def test_init2_creates_db(self):
28 |         self.run_in_testpath("qc init")
29 |         self.assertFileExists('qualitative_coding.sqlite3')
30 | 
31 |     def test_init_check_catches_errors(self):
32 |         result = self.run_in_testpath("qc init")
33 |         self.assertEqual("", result.stderr)
34 |         self.update_settings("corpus_dir", None)
35 |         result = self.run_in_testpath("qc init")
36 |         self.assertNotEqual("", result.stderr)
37 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_init_import.py:
--------------------------------------------------------------------------------
 1 | from qualitative_coding.tests.fixtures import QCTestCase
 2 | from qualitative_coding.corpus import QCCorpus
 3 | from tempfile import TemporaryDirectory
 4 | from pathlib import Path
 5 | from subprocess import run
 6 | 
 7 | class TestInitImport(QCTestCase):
 8 |     def test_imports_from_qdpx_file(self):
 9 |         """Sort of an elaborate test: exports and then re-imports a project.
10 |         """
11 |         self.run_in_testpath("qc corpus import macbeth.txt")
12 |         self.set_mock_editor(verbose=True)
13 |         self.run_in_testpath("qc code chris")
14 |         self.run_in_testpath("qc code haley")
15 |         self.run_in_testpath("qc export out.qdpx")
16 |         self.assertFileExists("out.qdpx")
17 |         with TemporaryDirectory() as outdir:
18 |             qdxp_file = self.testpath / "out.qdpx"
19 |             result = run(f'qc init --import "{qdxp_file}"', cwd=outdir, shell=True, 
20 |                     check=True, capture_output=True, text=True)
21 |             corpus = QCCorpus(Path(outdir) / "settings.yaml")
22 |             self.assertFileExists(Path(outdir) / "corpus" / "macbeth.txt")
23 |             with corpus.session():
24 |                 self.assertEqual(len(corpus.get_codes()), 3)
25 |                 self.assertEqual(len(list(corpus.get_all_coders())), 2)
26 |                 self.assertEqual(len(corpus.get_coded_lines()), 8)
27 | 
28 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_logs.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | from logs import configure_logger
 3 | import structlog
 4 | from pathlib import Path
 5 | 
 6 | class TestLogs(QCTestCase):
 7 |     def test_log_info_saves_to_file(self):
 8 |         configure_logger(self.testpath / "settings.yaml")
 9 |         log = structlog.get_logger()
10 |         log.info("test")
11 |         self.assertFileExists("qualitative_coding.log")
12 |         with open(self.testpath / "qualitative_coding.log") as fh:
13 |             lines = list(fh)
14 |         self.assertTrue(len(lines) > 0)
15 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_memo.py:
--------------------------------------------------------------------------------
1 | from tests.fixtures import QCTestCase
2 | 
3 | class TestMemo(QCTestCase):
4 |     def test_memo_saves_memo(self):
5 |         self.set_mock_editor(verbose=True)
6 |         self.run_in_testpath("qc memo chris")
7 |         memo_files = list((self.testpath / "memos").iterdir())
8 |         self.assertEqual(len(memo_files), 1)
9 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_read_diff_offsets.py:
--------------------------------------------------------------------------------
 1 | from difflib import unified_diff
 2 | from unittest import TestCase
 3 | from qualitative_coding.diff import read_diff_offsets
 4 | 
 5 | doc0 = [t + '\n' for t in 'abcdefghijklmnop']
 6 | doc1 = [t + '\n' for t in '1bcdef12lmnopqr']
 7 | diff = ''.join(unified_diff(doc0, doc1, n=1))
 8 | 
 9 | class TestReadDiffOffsets(TestCase):
10 |     def test_read_diff_offsets_reads_correct_offsets(self):
11 |         expected = [(9, -3), (17, 2)]
12 |         observed = read_diff_offsets(diff)
13 |         self.assertEqual(expected, observed)
14 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_refi_qda_writer.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from tests.fixtures import QCTestCase
 3 | from qualitative_coding.refi_qda.writer import REFIQDAWriter
 4 | from tempfile import TemporaryDirectory
 5 | from xmlschema import validate
 6 | import importlib.resources
 7 | 
 8 | CODEBOOK = """
 9 | - one
10 | - two
11 | - three
12 | - tens:
13 |   - twenty
14 |   - thirty
15 |   - forty
16 | """
17 | 
18 | class TestREFIQDAWriter(QCTestCase):
19 |     def setUp(self):
20 |         super().setUp()
21 |         self.writer = REFIQDAWriter(self.testpath / "settings.yaml")
22 | 
23 |     def test_writes_nested_codes(self):
24 |         with open(self.testpath / "codebook.yaml", 'w') as codebook:
25 |             codebook.write(CODEBOOK)
26 |         codebook_xml = self.writer.codebook_to_xml()
27 |         codes = codebook_xml.find('Codes')
28 |         tens = codes.find("Code[@name='tens']")
29 |         self.assertEqual(len(tens.findall('Code')), 3)
30 | 
31 |     def test_xml_validates(self):
32 |         schema_path = importlib.resources.files("qualitative_coding") / "refi_qda" / "schema.xsd"
33 |         self.run_in_testpath("qc corpus import macbeth.txt")
34 |         self.set_mock_editor(verbose=True)
35 |         self.run_in_testpath("qc code chris")
36 |         with TemporaryDirectory() as tempdir:
37 |             project_path = Path(tempdir)
38 |             xml_path = project_path / "project.qde"
39 |             self.writer.write_xml(xml_path)
40 |             validate(xml_path, schema_path)
41 |             
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_tree_node.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from qualitative_coding.tree_node import TreeNode
 3 | from tempfile import TemporaryDirectory
 4 | import yaml
 5 | from pathlib import Path
 6 | 
 7 | class TestTreeNode(TestCase):
 8 |     def test_read_write_are_isomorphic(self):
 9 |         with TemporaryDirectory() as tempdir:
10 |             for case in [EMPTY_CODEBOOK, FLAT_CODEBOOK, NESTED_CODEBOOK]:
11 |                 infile = Path(tempdir) / "in.yaml"
12 |                 outfile = Path(tempdir) / "out.yaml"
13 |                 infile.write_text(case)
14 |                 tn = TreeNode.read_yaml(infile)
15 |                 TreeNode.write_yaml(outfile, tn)
16 |                 self.assertEqual(outfile.read_text(), case)
17 | 
18 | EMPTY_CODEBOOK = "[]\n"
19 | FLAT_CODEBOOK = """- a one
20 | - b two
21 | - c three
22 | """
23 | NESTED_CODEBOOK = """- one:
24 |   - a
25 |   - b
26 |   - c
27 | - two:
28 |   - d
29 | """
30 | CASES = [EMPTY_CODEBOOK, FLAT_CODEBOOK, NESTED_CODEBOOK]
31 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_upgrade.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | from qualitative_coding.corpus import QCCorpus
 3 | from qualitative_coding.logs import configure_logger
 4 | 
 5 | class TestUpgrade(QCTestCase):
 6 |     def setUp(self):
 7 |         pass
 8 | 
 9 |     def test_upgrade_noop(self):
10 |         self.set_up_qc_project()
11 |         result = self.run_in_testpath("qc upgrade")
12 |         self.assertEqual(result.stdout, "")
13 | 
14 |     def test_upgrade_0_2_3_to_1_0_0(self):
15 |         self.set_up_qc_project_0_2_3()
16 |         configure_logger(self.testpath / "settings.yaml")
17 |         result = self.run_in_testpath("qc upgrade -v 1.0.0")
18 |         corpus = QCCorpus(self.testpath / "settings.yaml")
19 |         self.assertFileDoesNotExist("codes")
20 |         with corpus.session():
21 |             code_counts = corpus.count_codes()
22 |         self.assertEqual(code_counts['prolepsis'], 3)
23 | 


--------------------------------------------------------------------------------
/qualitative_coding/tests/test_version.py:
--------------------------------------------------------------------------------
 1 | from tests.fixtures import QCTestCase
 2 | from importlib.metadata import metadata
 3 | 
 4 | class TestVersion(QCTestCase):
 5 |     def test_version_is_correct(self):
 6 |         version = metadata('qualitative-coding')['version']
 7 |         result = self.run_in_testpath("qc version")
 8 |         self.assertTrue(version in result.stdout)
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/qualitative_coding/tree_node.py:
--------------------------------------------------------------------------------
  1 | # An idiosyncratic implementation of nodes in a tree structure.
  2 | # Could use refactoring
  3 | 
  4 | import yaml
  5 | from functools import total_ordering
  6 | from qualitative_coding.exceptions import CodebookParseError
  7 | 
  8 | @total_ordering
  9 | class TreeNode:
 10 |     """
 11 |     A node in a tree, represented as either a string (terminal)
 12 |     or a dict (with children).
 13 |     """
 14 |     root = "$ROOT$"
 15 |     indent = "    "
 16 |     list_marker = "- "
 17 | 
 18 |     @classmethod
 19 |     def read_yaml(cls, filename):
 20 |         with open(filename) as f:
 21 |             try:
 22 |                 data = yaml.safe_load(f)
 23 |                 return TreeNode({cls.root: data})
 24 |             except yaml.scanner.ScannerError as err:
 25 |                 m = err.problem_mark
 26 |                 message = f"Error reading {filename} on line {m.line}: {err.problem}"
 27 |                 raise CodebookParseError(message)
 28 |             except yaml.parser.ParserError as err:
 29 |                 m = err.problem_mark
 30 |                 message = f"Error reading {filename} on line {m.line}: {err.problem}"
 31 |                 raise CodebookParseError(message)
 32 | 
 33 |     @classmethod
 34 |     def write_yaml(cls, filename, tree_node):
 35 |         with open(filename, 'w') as f:
 36 |             f.write(yaml.dump(tree_node.to_json(), default_flow_style=False))
 37 | 
 38 |     def __init__(self, representation, parent=None):
 39 |         self.parent = parent
 40 |         if isinstance(representation, str):
 41 |             self.name = representation
 42 |             self.children = []
 43 |         elif isinstance(representation, dict) and len(representation) == 1:
 44 |             ((self.name, children),) = representation.items()
 45 |             self.children = [TreeNode(child, parent=self) for child in children or []]
 46 |         else:
 47 |             raise ValueError("Illegal node representation: {}".format(representation))
 48 | 
 49 |     def add_child(self, representation):
 50 |         self.children.append(TreeNode(representation, parent=self))
 51 | 
 52 |     def remove_children_by_name(self, name):
 53 |         for child in self.children: 
 54 |             child.remove_children_by_name(name)
 55 |             if child.name == name:
 56 |                 for c in child.children:
 57 |                     self.children.append(c)
 58 |                     c.parent = self
 59 |         self.children = [c for c in self.children if c.name != name]
 60 | 
 61 |     def rename(self, old_name, new_name):
 62 |         "Renames all children"
 63 |         if self.name == old_name:
 64 |             self.name = new_name
 65 |         for child in self.children:
 66 |             child.rename(old_name, new_name)
 67 | 
 68 |     def ancestors(self):
 69 |         "Returns a list of ancestors, ending with self"
 70 |         if self.is_root() or self.is_root():
 71 |             return []
 72 |         else:
 73 |             return self.parent.ancestors() + [self]
 74 | 
 75 |     def depth(self):
 76 |         return len(self.ancestors())
 77 | 
 78 |     def backtrack_to(self, target_nodes):
 79 |         "Returns a list of ancestors traversed to reach one of target_nodes"
 80 |         traversed = []
 81 |         for a in reversed(self.ancestors()):
 82 |             if a in target_nodes:
 83 |                 return list(reversed(traversed))
 84 |             else:
 85 |                 traversed.append(a)
 86 |         return None
 87 | 
 88 |     def flatten(self, names=False, expanded=False, sep=":", depth=None):
 89 |         """
 90 |         Returns the node and its children as a depth-first list.
 91 |         If names, return strings of node names.
 92 |         If expanded, return expanded name, like 'fruits:apples:pippin'
 93 |         If depth is not None, limits the depth of recursion
 94 |         """
 95 |         result = [] if self.is_root() else [self]
 96 |         if depth is None or depth > 0:
 97 |             for child in self.children: 
 98 |                 result += child.flatten(depth=depth if depth is None else depth - 1)
 99 |         if names:
100 |             if expanded:
101 |                 result = [n.expanded_name(sep=sep) for n in result]
102 |             else:
103 |                 result = [n.name for n in result]
104 |         return sorted(result)
105 | 
106 |     def expanded_name(self, sep=":"):
107 |         "Returns expanded name, like 'fruits:apples:pippin'"
108 |         if self.parent and not self.parent.is_root():
109 |             return self.parent.expanded_name(sep=sep) + sep + self.name
110 |         else:
111 |             return self.name
112 | 
113 |     def indented_name(self, nodes, sep=":", indent_length=2, indent_start='.'):
114 |         "Returns indented name, like '.    pippin'"
115 |         ancestor_traversal = self.parent.backtrack_to(nodes)
116 |         if ancestor_traversal is None: # This node goes all the way back to root
117 |             return ":".join(n.name for n in self.ancestors())
118 |         else: 
119 |             ancestor_depth = self.depth() - len(ancestor_traversal) - 1
120 |             return (
121 |                 indent_start + 
122 |                 ' ' * indent_length * ancestor_depth + 
123 |                 ":".join(a.name for a in ancestor_traversal+[self])
124 |             )
125 | 
126 |     def find(self, name):
127 |         "Returns all child nodes (including self) with matching name"
128 |         result = [self] if self.name == name else []
129 |         for child in self.children:
130 |             result += child.find(name)
131 |         return result
132 | 
133 |     def sum(self, prop):
134 |         "Returns the sum of self plus all children's values for prop"
135 |         val = getattr(self, prop) if hasattr(self, prop) else 0
136 |         return val + sum(c.sum(prop) for c in self.children)
137 | 
138 |     def to_json(self):
139 |         "Returns a str/list/dict representation. The root node is stored as a list."
140 |         if self.children:
141 |             if self.is_root():
142 |                 return [child.to_json() for child in sorted(self.children)]
143 |             else:
144 |                 return {self.name: [child.to_json() for child in sorted(self.children)]}
145 |         else:
146 |             if self.is_root():
147 |                 return []
148 |             else:
149 |                 return self.name
150 | 
151 |     def __str__(self, max_depth=None, current_depth=0):
152 |         "String representation of tree, limited to `max_depth` if provided. `current_depth` is used internally for recursion."
153 |         if self.is_root():
154 |             if max_depth is None or max_depth > 0:
155 |                 md = None if max_depth is None else max_depth - 1
156 |                 return "".join([c.__str__(max_depth=md, current_depth=current_depth) for c in sorted(self.children)])
157 |             else:
158 |                 return ""
159 |         else:
160 |             string_rep = self.indent * current_depth + self.list_marker + self.name + "\n"
161 |             if max_depth is None or current_depth < max_depth:
162 |                 string_rep += "".join([c.__str__(max_depth=max_depth, current_depth=current_depth+1) for c in sorted(self.children)])
163 |             return string_rep
164 |             
165 |     def is_root(self):
166 |         return self.name == self.root
167 | 
168 |     def __eq__(self, other):
169 |         return self.expanded_name() == other.expanded_name()
170 | 
171 |     def __lt__(self, other):
172 |         return self.expanded_name() < other.expanded_name()
173 | 
174 |     def __hash__(self):
175 |         return hash(self.expanded_name())
176 | 
177 |     def __repr__(self):
178 |         return "<{}>".format(self.name)
179 | 
180 | 


--------------------------------------------------------------------------------
/qualitative_coding/user_input.py:
--------------------------------------------------------------------------------
 1 | import curses
 2 | import curses.ascii
 3 | 
 4 | CONTROL_CHARS = [ord(ch) for ch in ':/?']
 5 | 
 6 | def is_enter(ch):
 7 |     return ch == curses.KEY_ENTER or ch == 10 or ch == 13
 8 | 
 9 | def is_control_char(ch):
10 |     return ch in CONTROL_CHARS
11 | 
12 | def is_arrow_key(ch):
13 |     return ch in (curses.KEY_RIGHT, curses.KEY_UP, curses.KEY_LEFT, curses.KEY_DOWN)
14 | 
15 | def is_escape(ch):
16 |     return ch == curses.ascii.ESC
17 | 
18 | def allowed_in_command(ch):
19 |     return ch == ord(' ') or curses.ascii.isalnum(ch)
20 | 
21 | def allowed_in_coding(ch):
22 |     return ch == ord(' ') or ch == ord(',') or curses.ascii.isalnum(ch)
23 | 


--------------------------------------------------------------------------------
/qualitative_coding/views/coding_ui.py:
--------------------------------------------------------------------------------
  1 | import curses
  2 | from textwrap import wrap
  3 | from signal import signal, SIGWINCH
  4 | from enum import Flag, auto
  5 | import os
  6 | import qualitative_coding.user_input as UI
  7 | 
  8 | """
  9 | What's next? 
 10 | 
 11 | - Add a debug mode
 12 | - Implement arrow key traversal of lines
 13 |   - Implement seek_line as O(1) operation
 14 |     - Implement map of line_number -> pad_row_number
 15 |     - Implement text wrapping for the codes pad
 16 |       - Rebuild line number map on wrap or unwrap
 17 |       - Write a function mapping logical cursor position to (row, col) within a line
 18 |     - Update codes pad to have dynamic width
 19 |     - Rebuild line number map on window resize
 20 |     - Update line numbers to not use a pad. It's too much trouble. 
 21 |       Instead, just draw line numbers from top to bottom of the screen.
 22 |   - Draw line numbers for coding pad too.
 23 |   - Store target_cursor_position (for when scrolling through lines which are too short)
 24 | 
 25 | Previously, I was keeping track of a global logical line mapping. I was considering this 
 26 | necessary because sometimes a display line takes up more space than one line. Actually, I 
 27 | still need to do this, if I want to take advantage of the pad-scrolling functionality 
 28 | (which I do). Therefore, I need to keep track of the difference between logical lines
 29 | and display lines. Will I allow text lines to overflow the 80-character buffer? Yes, I think 
 30 | so. I can handle them the same way I'll handle representations of codes (comma-separated); 
 31 | when there are too many for one line, then let them overflow onto the next display line.
 32 | 
 33 | The only performance implications here are when a code is edited, such that a logical code
 34 | line changes the number of display lines needed. In this case, I'll need to re-index the
 35 | display lines for code sets.
 36 | """
 37 | 
 38 | class Pads(Flag):
 39 |     INDEX = auto()
 40 |     TEXT = auto()
 41 |     CODES = auto()
 42 |     STATUS = auto()
 43 |     ALL = INDEX | TEXT | CODES | STATUS
 44 | 
 45 | class CodingUI:
 46 |     """Implements a curses-based user interface for coding texts in the corpus.
 47 |     Initialized with:
 48 | 
 49 |     text: an iterable of lines of the text
 50 |     codes: an iterable of (code, line) tuples
 51 |     codebook: an iterable of all codes.
 52 |     """
 53 | 
 54 |     TEXT_WIDTH = 80
 55 |     DIVIDER_WIDTH = 1
 56 |     CODES_WIDTH = 200
 57 |     DEBUG = True
 58 | 
 59 |     def __init__(self, text, codes, codebook):
 60 |         self.text = text
 61 |         self.codes = codes
 62 |         self.codebook = codebook
 63 | 
 64 |     def run(self):
 65 |         "Starts a UI session"
 66 |         os.environ.setdefault('ESCDELAY', '25')
 67 |         curses.wrapper(self._run)
 68 | 
 69 |     def _run(self, stdscr):
 70 |         "Starts a UI session, receiving a prepared screen"
 71 |         curses.start_color()
 72 |         curses.use_default_colors()
 73 |         curses.init_pair(1, curses.COLOR_YELLOW, curses.COLOR_BLACK)
 74 |         self.screen = stdscr
 75 |         self.running = True
 76 |         self.edit_mode = True
 77 |         self.status_message = ""
 78 |         self.control_buffer = ""
 79 |         self.pad_height = len(self.text)
 80 |         self.index_width = len(str(len(self.text))) + 1
 81 |         self.focus_window_line = 0
 82 |         self.focus_line = 0
 83 |         self.cursor_position = 0
 84 |         self.measure_screen()
 85 |         self.index_pad = self.create_index_pad()
 86 |         self.text_pad = self.create_text_pad()
 87 |         self.codes_pad = self.create_codes_pad()
 88 |         self.codes_pad = self.create_codes_pad()
 89 |         self.status_pad = self.create_status_pad()
 90 |         self.set_status_message("Welcome! " + self.help_message(), render=False)
 91 |         signal(SIGWINCH, self.handle_screen_resize)
 92 |         self.screen.clear()
 93 |         self.render()
 94 | 
 95 |         while self.running:
 96 |             self.handle_keypress(self.screen.getch())
 97 | 
 98 |     def render(self, pads=Pads.ALL):
 99 |         """Renders the latest state. 
100 |         Optional `pads` is a Pads (enum.Flag) specifying which 
101 |         pads on the screen should be refreshed. This is an optimization 
102 |         for when only part of the screen needs to be rendered.
103 |         """
104 |         PAD_YMIN = self.line_row_index[self.focus_window_line]
105 |         PAD_XMIN = 0
106 |         SCREEN_YMIN = 0
107 |         SCREEN_YMAX = self.rows - 2
108 | 
109 |         self.screen.noutrefresh()
110 |         self.draw_divider()
111 |         self.draw_line_numbers(self.text_nums_x0)
112 |         self.draw_line_numbers(self.codes_nums_x0)
113 |         if Pads.TEXT & pads:
114 |             self.text_pad.noutrefresh(
115 |                 PAD_YMIN, PAD_XMIN, 
116 |                 SCREEN_YMIN, self.text_x0, 
117 |                 SCREEN_YMAX, self.text_x1
118 |             )
119 |         if Pads.CODES & pads:
120 |             self.codes_pad.noutrefresh(
121 |                 PAD_YMIN, PAD_XMIN, 
122 |                 SCREEN_YMIN, self.codes_x0,
123 |                 SCREEN_YMAX, self.codes_x1
124 |             )
125 |         if self.DEBUG or Pads.STATUS & pads:
126 |             self.status_pad.noutrefresh(
127 |                 0, 0, 
128 |                 self.rows - 1, 0, 
129 |                 self.rows, self.cols
130 |             )
131 |         curses.doupdate()
132 | 
133 |     def create_index_pad(self):
134 |         "Creates a pad for displaying line numbers, starting with 1"
135 |         pad = curses.newpad(self.pad_height, self.index_width)
136 |         y = 0
137 |         for y, line in enumerate(self.text):
138 |             pad.addstr(y, 0, str(y + 1).rjust(self.index_width - 1), curses.color_pair(1))
139 |         return pad
140 | 
141 |     def create_text_pad(self):
142 |         "Creates a pad for showing the text being coded."
143 |         pad = curses.newpad(self.pad_height, self.TEXT_WIDTH)
144 |         for y, line in enumerate(self.text):
145 |             pad.addstr(y, 0, line[:self.TEXT_WIDTH])
146 |         return pad
147 | 
148 |     def create_codes_pad(self):
149 |         "Creates a pad for showing the codes."
150 |         pad = curses.newpad(self.pad_height, self.CODES_WIDTH)
151 |         y = 0
152 |         for codes, logical_line in zip(self.codes, self.text):
153 |             pad.addstr(y, 0, codes)
154 |             for line in logical_line:
155 |                 y += 1
156 |         return pad
157 | 
158 |     def create_status_pad(self):
159 |         "Creates a pad for the status bar"
160 |         pad = curses.newpad(1, self.cols - 1)
161 |         pad.addstr(0, 0, self.status_message.ljust(self.cols - 2), curses.A_REVERSE)
162 |         return pad
163 | 
164 |     def measure_screen(self):
165 |         """Gets the dimensions of the screen and computes layout values.
166 |         """
167 |         rows, cols = self.screen.getmaxyx()
168 |         self.rows = rows
169 |         self.cols = cols
170 |         self.text_nums_x0 = 0
171 |         self.text_x0 = self.index_width + 1
172 |         self.text_x1 = self.divider_x = self.text_x0 + self.TEXT_WIDTH
173 |         self.codes_nums_x0 = self.divider_x + self.DIVIDER_WIDTH
174 |         self.codes_x0 = self.codes_nums_x0 + self.index_width + 1
175 |         self.codes_width = self.cols - self.codes_x0
176 |         self.codes_x1 = self.codes_x0 + self.codes_width - 1
177 | 
178 |     def handle_screen_resize(self):
179 |         self.measure_screen()
180 |         self.render()
181 | 
182 |     def handle_keypress(self, ch):
183 |         if self.edit_mode:
184 |             if UI.is_control_char(ch):
185 |                 self.edit_mode = False
186 |                 self.set_status_message(chr(ch))
187 |                 self.control_buffer = chr(ch)
188 |             elif ch == curses.KEY_DOWN:
189 |                 self.seek_line(self.focus_line + 1)
190 |             elif ch == curses.KEY_UP:
191 |                 self.seek_line(self.focus_line - 1)
192 |         else:
193 |             if UI.is_escape(ch):
194 |                 self.set_status_message('')
195 |                 self.edit_mode = True
196 |             elif UI.allowed_in_command(ch):
197 |                 self.set_status_message(self.status_message + chr(ch))
198 |                 self.control_buffer += chr(ch)
199 |             elif UI.is_enter(ch):
200 |                 self.handle_control_command()
201 | 
202 |     def handle_control_command(self):
203 |         sigil, command = self.control_buffer[0], self.control_buffer[1:]
204 |         if sigil == ':':
205 |             if command.isdigit():
206 |                 self.set_status_message(f"SEEKING TO LINE {command}")
207 |             elif command == 'q':
208 |                 self.running = False
209 |                 curses.endwin()
210 |             elif command == 'h':
211 |                 self.show_help()
212 |             elif command == 'g': 
213 |                 self.seek_line(0)
214 |             elif command == 'G':
215 |                 self.seek_line(len(self.text) - 1)
216 |             else:
217 |                 self.set_status_message("???")
218 |             self.edit_mode = True
219 |         elif sigil == '/':
220 |             self.set_status_message(f"SEARCHING FOR {command}")
221 |             self.edit_mode = True
222 |         elif sigil == '?':
223 |             self.set_status_message(f"REVERSE SEARCHING FOR {command}")
224 |             self.edit_mode = True
225 | 
226 |     def seek_line(self, index):
227 |         """Tries to move the focus to line `index`.
228 |         Checks that `index` is in bounds, then updates the focus_window_line.
229 |         """
230 |         self.focus_line = max(0, min(index, len(self.codes) - 1))
231 |         if self.focus_line < self.focus_window_line:
232 |             self.focus_window_line = self.focus_line
233 |         #elif not self.line_is_in_view(self.focus_line):
234 | 
235 |         
236 |         """
237 |         self.line_row_index maps the row positions of logical lines on the text
238 |         and codes pads. When the focus line is lower than the focus window line, 
239 |         we can just set the focus window line to the focus line. 
240 | 
241 |         But what about the other end? I need to check whether the focus line is in 
242 |         view. If not, I need to increase the focus window line. I could do this 
243 |         by walking the focus window line forward, but I want an O(1) update. 
244 |         So I'll set focus_window_line to focus_line and then walk it backward
245 |         as long as the whole focus line is in view. (In the perverse case of 
246 |         an extremely long line which can't be displayed, the screen will show as much 
247 |         of the line as possible.
248 | 
249 |         """
250 |         # TODO this is clumsy. Save the target cursor_position
251 |         self.cursor_position = min(len(self.codes[index]), self.cursor_position)
252 |         self.render()
253 | 
254 |     def line_is_in_view(self, line):
255 |         y0, y1 = self.lines_in_view()
256 |         return y0 <= line and line < y1
257 | 
258 |     def lines_in_view(self):
259 |         "Returns the top (inclusive) and bottom (exclusive) logical lines in view"
260 |         j = 0
261 |         y0 = y1 = self.focus_window_line
262 |         while True:
263 |             if y1 + 1 < len(self.codes) and j + self.line_row_index[y1] < self.cols:
264 |                 y1 += 1
265 |             else:
266 |                 return y0, y1
267 | 
268 |     def set_status_message(self, msg, render=True):
269 |         "Renders `msg` on the status bar"
270 |         self.status_message = msg[:self.rows - 1]
271 |         if self.DEBUG:
272 |             debug_msg = self.debug_message()[:self.rows - 1]
273 |             smx = max(0, self.rows - 1 - len(debug_msg))
274 |             self.status_message = self.status_message[:smx].ljust(smx) + debug_msg
275 |         self.status_pad.addstr(0, 0, self.status_message.ljust(self.cols - 2), curses.A_REVERSE)
276 |         self.render(Pads.STATUS)
277 | 
278 |     def help_message(self):
279 |         return (
280 |             ':h -> help | :q -> save and quit | :12 -> go to line 12 | '
281 |             '/cat -> search forward for "cat" | ?dog -> search backward for "dog"'
282 |             )
283 | 
284 |     def debug_message(self):
285 |         "Defines what is displayed in the debug message"
286 |         return f" | focus: {self.focus_line}, window: {self.focus_window_line}"
287 | 
288 |     def show_help(self):
289 |         self.set_status_message(self.help_message())
290 | 
291 |     def split_text(self, text):
292 |         lines = [wrap(line or ' ', width=self.TEXT_WIDTH) or [''] for line in text]
293 |         return lines
294 |     
295 |     def draw_divider(self):
296 |         for y in range(self.rows - 1):
297 |             self.screen.addstr(y, self.divider_x, '|', curses.A_REVERSE)
298 | 
299 |     def draw_line_numbers(self, screen_x):
300 |         """Draws line numbers at the specified column. 
301 |         """
302 |         SPACE_FOR_STATUS_ROW = 1
303 |         screen_y = 0
304 |         ix = self.focus_window_line
305 |         while screen_y < self.rows - SPACE_FOR_STATUS_ROW and ix < len(self.text):
306 |             display_num = str(ix + 1).rjust(self.index_width - 1)
307 |             self.screen.addstr(screen_y, screen_x, display_num, curses.color_pair(1))
308 |             if ix + 1 < len(self.text):
309 |                 pad_y_delta = self.line_row_index[ix + 1] - self.line_row_index[ix]
310 |                 screen_y += pad_y_delta
311 |             ix += 1
312 | 
313 | 
314 | 


--------------------------------------------------------------------------------
/qualitative_coding/views/styles.py:
--------------------------------------------------------------------------------
 1 | from textwrap import fill
 2 | import click
 3 | 
 4 | FW = 80
 5 | 
 6 | def formatter(**style_args):
 7 |     """A factory function which returns a formatting function.
 8 |     """
 9 |     def format_message(message, preformatted=False, list_format=False):
10 |         message = str(message)
11 |         if preformatted:
12 |             if list_format: 
13 |                 raise ValueError("preformatted and list_format are incompatible options")
14 |             fmsg = message
15 |         elif list_format:
16 |             fmsg = fill(message, width=FW, initial_indent='- ', subsequent_indent='  ')
17 |         else:
18 |             fmsg = fill(message, width=FW)
19 |         return click.style(fmsg, **style_args)
20 |     return format_message
21 | 
22 | address = formatter(fg='cyan')
23 | question = formatter(fg='cyan')
24 | debug = formatter(dim=True)
25 | info = formatter(fg='blue')
26 | warn = formatter(fg='yellow')
27 | confirm = formatter(fg='yellow')
28 | error = formatter(fg='red')
29 | success = formatter(fg='green')
30 | 


--------------------------------------------------------------------------------