├── .gitignore ├── .readthedocs.yaml ├── AGPL3.txt ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── docs ├── Makefile ├── _templates │ └── breadcrumbs.html ├── coding.png ├── coding_google_docs.png ├── coding_vscode.png ├── conf.py ├── index.rst ├── make.bat ├── manuscript.rst ├── memo.png └── requirements.txt ├── paper.bib ├── paper.md ├── poetry.lock ├── pyproject.toml ├── qc_lockup.v0.png └── qualitative_coding ├── cli ├── __init__.py ├── check.py ├── click_aliases.py ├── code.py ├── codebook.py ├── coders.py ├── codes │ ├── __init__.py │ ├── crosstab.py │ ├── find.py │ ├── list.py │ ├── rename.py │ └── stats.py ├── corpus │ ├── __init__.py │ ├── anonymize.py │ ├── import_media.py │ ├── list.py │ ├── move.py │ ├── remove.py │ └── update.py ├── decorators.py ├── export.py ├── init.py ├── memo.py ├── options.py ├── upgrade.py └── version.py ├── codebook.py ├── corpus.py ├── database ├── errors.py └── models.py ├── demo.qdpx ├── diff.py ├── editors.py ├── exceptions.py ├── helpers.py ├── logs.py ├── media_importers ├── __init__.py ├── base.py ├── pandoc.py ├── verbatim.py └── vtt.py ├── migrations ├── __init__.py ├── migration.py ├── migration_0_2_3.py ├── migration_1_0_0.py └── migration_1_4_0.py ├── refi_qda ├── nvivo_project.qdpx ├── reader.py ├── schema.xsd └── writer.py ├── tests ├── __init__.py ├── fixtures.py ├── mock_editor.py ├── test_check.py ├── test_code.py ├── test_code_parsing.py ├── test_codebook.py ├── test_coders.py ├── test_codes_crosstab.py ├── test_codes_find.py ├── test_codes_list.py ├── test_codes_rename.py ├── test_codes_stats.py ├── test_corpus_anonymize.py ├── test_corpus_import.py ├── test_corpus_move.py ├── test_corpus_remove.py ├── test_corpus_update.py ├── test_export.py ├── test_init.py ├── test_init_import.py ├── test_logs.py ├── test_memo.py ├── test_read_diff_offsets.py ├── test_refi_qda_writer.py ├── test_tree_node.py ├── test_upgrade.py └── test_version.py ├── tree_node.py ├── user_input.py └── views ├── coding_ui.py ├── styles.py └── viewer.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | .DS_Store 4 | *.swo 5 | qualitative_coding.egg-info/* 6 | dist/* 7 | NOTES.md 8 | TODO.md 9 | docs/_build/* 10 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for Sphinx projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | build: 8 | os: ubuntu-22.04 9 | tools: 10 | python: "3.12" 11 | 12 | sphinx: 13 | configuration: docs/conf.py 14 | 15 | python: 16 | install: 17 | - requirements: docs/requirements.txt 18 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | [Chris Proctor](chrisp@buffalo.edu), the project lead, would be delighted to hear about your experience 4 | using `qc`. Bug reports, feature requests, and discussion of the future directions of `qc` takes place 5 | on the [project repository’s issues page](https://github.com/cproctor/qualitative-coding/issues). 6 | Code contributions to this project should be via pull requests on this repository. 7 | 8 | ## How you can help 9 | 10 | `qc` is a young project; it's not yet clear how (or if) the project should grow to best support 11 | its users. Probably the best way to contribute is to use `qc` in your own research, and to share 12 | limitations you discover or new features you wish existed. This 13 | [repository’s issues page](https://github.com/cproctor/qualitative-coding/issues) would be a great 14 | place to iterate ideas for new features. 15 | 16 | ## Code of conduct 17 | 18 | [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](code_of_conduct.md) 19 | 20 | This project adopts the [Contributor Covenant](https://www.contributor-covenant.org/). 21 | Please contact [Chris Proctor](chrisp@buffalo.edu) if you experience treatment which makes you 22 | feel unsafe or unwelcome. 23 | 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | QC: Qualitative coding for computer scientists 2 | Copyright (C) 2019 Chris Proctor 3 | chrisproctor.net 4 | pypi.org@accounts.chrisproctor.net 5 | 6 | This program is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU Affero General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | This program is distributed in the hope that it will be useful, 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | GNU Affero General Public License for more details. 15 | 16 | You should have received a copy of the GNU Affero General Public License 17 | along with this program (AGPL3.txt). If not, see 18 | . 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![QC logo](qc_lockup.v0.png) 2 | 3 | [![status](https://joss.theoj.org/papers/7d0c7ed7cbe8f614d986fc699ae42729/status.svg)](https://joss.theoj.org/papers/7d0c7ed7cbe8f614d986fc699ae42729) 4 | 5 | `qc` is a free, open-source command-line-based tool for qualitative data 6 | analysis designed to support computational thinking. In addition to making the 7 | qualitative data analysis process more efficient, computational thinking can 8 | contribute to the richness of subjective interpretation. The typical workflow 9 | in qualitative research is an iterative cycle of "notice things," "think about 10 | things," and "collect things" (seidel, 1998). `qc` provides 11 | computational affordances for each of these practices, including the ability to 12 | integrate manual coding with automated coding, a tree-based hierarchy of codes 13 | stored in a YAML file, allowing versioning of thematic analysis, and a powerful 14 | query interface for viewing code statistics and snippets of coded documents. 15 | 16 | Qualitative data analysis, in its various forms, is a core methodology for 17 | qualitative, mixed methods, and some quantitative research in the social 18 | sciences. Although there are a variety of well-known commercial QDA software 19 | packages such as NVivo, Dedoose, Atlas.TI, and MaxQDA, they are generally 20 | designed to protect users from complexity rather than providing 21 | affordances for engaging with complexity via algorithms and data structures. 22 | The central design hypothesis of `qc` is that a closer partnership between 23 | the researcher and the computational tool can enhance the quality of QDA. 24 | `qc` adopts the "unix philosophy" (McIlroy, 1978) of building tools which do 25 | one thing well while being composable into flexible workflows, and the 26 | values of "plain-text social science" (Healy, 2020), emphasizing 27 | reproducability, transparency, and collaborative open science. 28 | 29 | `qc` was used in [a prior paper](https://chrisproctor.net/research/proctor_2019_defining/) 30 | and the author's doctoral dissertation; `qc` is currently a core tool supporting a large 31 | NSF-funded Delphi study involving multiple interviews 32 | with forty participant experts, open coding with over a thousand distinct 33 | codes, four separate coders, and several custom machine learning tools 34 | supporting the research team with clustering and synthesizing emergent themes. 35 | `qc` is a free, open-source command-line-based tool for qualitative data analysis 36 | designed to support computational thinking. In addition to making qualitative data 37 | analysis process more efficient, computational thinking can contribute to the richness 38 | of subjective interpretation. Although numerous powerful software packages exist 39 | for qualitative data analysis, they are generally designed to protect users from complexity 40 | rather than providing affordances for engaging with complexity via algorithms and 41 | data structures. 42 | 43 | ## Installation 44 | 45 | `qc` is distributed via the Python Package Index (PYPI), and can be 46 | installed on any POSIX system (Linux, Unix, Mac OS, or Windows Subsystem 47 | for Linux) which has Python 3.9 or higher installed. If you want to install 48 | `qc` globally on your system, the cleanest approaach is to use 49 | [pipx](https://pipx.pypa.io/stable/). 50 | 51 | pipx install qualitative-coding 52 | 53 | If your research project 54 | is already contained within a Python package and you want to install `qc` 55 | as a local dependency, simply add `qualitative-coding` to `pyproject.toml` 56 | or `requirements.txt`. 57 | 58 | `qc` relies on [Pandoc](https://pandoc.org/) for converting between file formats, 59 | so make sure that is installed as well. `qc` uses a text editor for coding; 60 | you should install Visual Studio Code, the default editor, unless you prefer 61 | a different editor such as emacs or vim. 62 | 63 | ## Usage 64 | 65 | Please see the [package documentation](https://qualitative-coding.readthedocs.io) 66 | for details on the design of `qc`, a vignette illustrating its usage, and full 67 | documentation of `qc`'s commands. 68 | 69 | ## Acknowledgements 70 | 71 | Partial support for development of `qc` was provided by UB's Digital Studio Scholarship 72 | Network. Logo design by Blessed Mhungu. 73 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_templates/breadcrumbs.html: -------------------------------------------------------------------------------- 1 | {%- extends "sphinx_rtd_theme/breadcrumbs.html" %} 2 | 3 | {% block breadcrumbs %} 4 | {% endblock %} 5 | 6 | {% block breadcrumbs_aside %} 7 | {% endblock %} 8 | -------------------------------------------------------------------------------- /docs/coding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/docs/coding.png -------------------------------------------------------------------------------- /docs/coding_google_docs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/docs/coding_google_docs.png -------------------------------------------------------------------------------- /docs/coding_vscode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/docs/coding_vscode.png -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | project = 'Qualitative Coding' 10 | copyright = '2024, Chris Proctor' 11 | author = 'Chris Proctor' 12 | release = '1.7.3' 13 | 14 | # -- General configuration --------------------------------------------------- 15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 16 | 17 | extensions = [] 18 | 19 | templates_path = ['_templates'] 20 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 21 | 22 | 23 | 24 | # -- Options for HTML output ------------------------------------------------- 25 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 26 | 27 | html_theme = 'sphinx_rtd_theme' 28 | html_static_path = ['_static'] 29 | html_logo = '../qc_lockup.v0.png' 30 | html_theme_options = { 31 | 'logo_only': True, 32 | 'display_version': False, 33 | } 34 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Qualitative Coding documentation master file, created by 2 | sphinx-quickstart on Tue May 28 09:51:22 2024. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | `qc`: A tool for qualitative data analysis designed to support computational thinking 7 | ===================================================================================== 8 | 9 | .. image:: https://joss.theoj.org/papers/7d0c7ed7cbe8f614d986fc699ae42729/status.svg 10 | :alt: Journal of Open Source Software 11 | :target: https://joss.theoj.org/papers/7d0c7ed7cbe8f614d986fc699ae42729 12 | 13 | ``qc`` is a free, open-source command-line-based tool for qualitative 14 | data analysis designed to support computational thinking. In addition to 15 | making qualitative data analysis process more efficient, computational 16 | thinking can contribute to the richness of subjective interpretation. 17 | Although numerous powerful software packages exist for qualitative data 18 | analysis, they are generally designed to protect users from complexity 19 | rather than providing affordances for engaging with complexity via 20 | algorithms and data structures. 21 | 22 | .. _installation: 23 | 24 | Installation 25 | ------------ 26 | 27 | Prerequisites 28 | ~~~~~~~~~~~~~ 29 | 30 | ``qc`` runs on the command line (also called a shell or a terminal), so you will 31 | need to be familiar with using a command line in order to use ``qc``. 32 | 33 | * `Python 3.9 `__ or higher. 34 | * `Pandoc `__. ``qc`` relies on Pandoc for converting between 35 | file formats. 36 | * A code editor. You should install 37 | `Visual Studio Code `__, the default 38 | editor, unless you prefer a different editor such as emacs or vim. 39 | * Terminal bindings for the code editor, allowing the code editor to be 40 | opened from Terminal. `Here are instructions for Visual Studio Code `_. 41 | * The `Sync Scroll `__ 42 | extension for Visual Studio Code. 43 | 44 | Install with pip or pipx 45 | ~~~~~~~~~~~~~~~~~~~~~~~~ 46 | 47 | ``qc`` is distributed via the Python Package Index (PYPI). If you want to 48 | install ``qc`` globally on your system, the cleanest approach is to use 49 | `pipx `__. 50 | 51 | .. note:: 52 | 53 | The command below (and others throughout this documentation) 54 | is intended to be entered into a terminal. 55 | The ``%`` character is the command prompt indicating that the 56 | terminal is ready for input; don't type it into your terminal. 57 | Don't worry if your terminal uses a different command prompt 58 | such as ``$``. 59 | 60 | .. code-block:: console 61 | 62 | % pipx install qualitative-coding 63 | 64 | Install as a dependency 65 | ~~~~~~~~~~~~~~~~~~~~~~~ 66 | 67 | If your research project is already contained within a Python package 68 | and you want to install ``qc`` as a local dependency, simply add 69 | ``qualitative-coding`` to ``pyproject.toml`` or ``requirements.txt``. 70 | 71 | Stuck? 72 | ~~~~~~ 73 | 74 | If you get stuck installing ``qc``, feel free to email 75 | Chris Proctor (chrisp@buffalo.edu), the project lead. 76 | 77 | .. toctree:: 78 | :maxdepth: 1 79 | :caption: Contents: 80 | 81 | manuscript 82 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/memo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/docs/memo.png -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx==7.3.7 2 | sphinx_rtd_theme==2.0.0 3 | -------------------------------------------------------------------------------- /paper.bib: -------------------------------------------------------------------------------- 1 | @article{dhakal2022nvivo, 2 | title={NVivo}, 3 | author={Dhakal, Kerry}, 4 | journal={Journal of the Medical Library Association: JMLA}, 5 | volume={110}, 6 | number={2}, 7 | pages={270}, 8 | year={2022}, 9 | publisher={Medical Library Association} 10 | } 11 | @article{engelbart1962, 12 | title = {Augmenting Human Intellect: A Conceptual Framework}, 13 | author = {Engelbart, Douglas C}, 14 | year = {1962}, 15 | doi = {10.21236/ad0289565}, 16 | pages = {64--90} 17 | } 18 | @techreport{healy2020, 19 | title = {The {{Plain Person}}'s {{Guide}} to {{Plain Text Social Science}}}, 20 | author = {Healy, Kieran}, 21 | year = {2020} 22 | } 23 | @article{kidder1987, 24 | title = {Qualitative and Quantitative Methods: {{When}} Stories Converge}, 25 | shorttitle = {Qualitative and Quantitative Methods}, 26 | author = {Kidder, Louise H. and Fine, Michelle}, 27 | year = {1987}, 28 | month = sep, 29 | journal = {New Directions for Program Evaluation}, 30 | volume = {1987}, 31 | number = {35}, 32 | pages = {57--75}, 33 | issn = {0164-7989, 1551-2371}, 34 | doi = {10.1002/ev.1459}, 35 | urldate = {2024-09-15}, 36 | abstract = {Abstract The use of qualitative measures in a quantitative framework results in a reasonable likelihood of triangulation; in contrast, the independent conduct of qualitative and quantitative evaluations is a greater challenge for triangulation, but it also holds promise for greater discovery.}, 37 | copyright = {http://onlinelibrary.wiley.com/termsAndConditions\#vor}, 38 | langid = {english} 39 | } 40 | @article{kuckartz2010realizing, 41 | title={Realizing mixed-methods approaches with MAXQDA}, 42 | author={Kuckartz, Udo}, 43 | journal={Philipps-Universit{\"a}t, Marburg}, 44 | year={2010} 45 | } 46 | @article{mcilroy1978, 47 | title = {{{UNIX}} Time-Sharing System}, 48 | author = {McIlroy, Doug and PInson, E and Tague, B}, 49 | year = {1978}, 50 | journal = {The Bell System Technical Journal}, 51 | pages = {1902--1903} 52 | } 53 | @article{ogbeifun2016delphi, 54 | title={The {Delphi} technique: A credible research methodology}, 55 | author={Ogbeifun, E and Agwa-Ejon, J and Mbohwa, Charles and Pretorius, JH}, 56 | year={2016} 57 | } 58 | @incollection{pea1997, 59 | title = {Practices of Distributed Intelligence and Designs for Education}, 60 | booktitle = {Distributed Cognitions: Psychological and Educational Considerations}, 61 | author = {Pea, Roy D}, 62 | editor = {Salomon, Gavriel}, 63 | year = {1997}, 64 | series = {Learning in Doing}, 65 | pages = {47--87}, 66 | publisher = {Cambridge University Press}, 67 | address = {New York}, 68 | isbn = {978-0-521-57423-5 978-0-521-41406-7}, 69 | langid = {english}, 70 | lccn = {BF311 .D538 1997}, 71 | keywords = {{Learning, Psychology of},Distributed cognition,Social aspects} 72 | } 73 | @inproceedings{proctor2019, 74 | title = {Defining and Designing Computer Science Education in a {{K12}} Public School District}, 75 | booktitle = {Proceedings of the 50th {{ACM}} Technical Symposium on Computer Science Education}, 76 | author = {Proctor, Chris and Bigman, Maxwell and Blikstein, Paulo}, 77 | year = {2019}, 78 | series = {{{SIGCSE}} '19}, 79 | pages = {314--320}, 80 | publisher = {Association for Computing Machinery}, 81 | address = {New York, NY, USA}, 82 | doi = {10.1145/3287324.3287440}, 83 | isbn = {978-1-4503-5890-3}, 84 | keywords = {computational thinking,equity,k12 cs} 85 | } 86 | @book{salmona2019qualitative, 87 | title={Qualitative and mixed methods data analysis using {Dedoose}: A practical approach for research across the social sciences}, 88 | author={Salmona, Michelle and Lieber, Eli and Kaczynski, Dan}, 89 | year={2019}, 90 | publisher={Sage Publications} 91 | } 92 | 93 | @article{seidel1998qualitative, 94 | title = {Qualitative Data Analysis}, 95 | author = {Seidel, John V}, 96 | year = {1998} 97 | } 98 | @article{smit2002atlas, 99 | title={{ATLAS.ti} for qualitative data analysis}, 100 | author={Smit, Brigitte}, 101 | journal={Perspectives in education}, 102 | volume={20}, 103 | number={3}, 104 | pages={65--75}, 105 | year={2002}, 106 | publisher={University of the Free State} 107 | } 108 | @article{wing2011research, 109 | title = {Research Notebook: {{Computational}} Thinking---{{What}} and Why}, 110 | author = {Wing, Jeanette}, 111 | year = {2011}, 112 | journal = {The link magazine}, 113 | volume = {6}, 114 | pages = {20--23} 115 | } 116 | -------------------------------------------------------------------------------- /paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "A tool for qualitative data analysis designed to support computational thinking" 3 | tags: 4 | - qualitative data analysis 5 | - qualitative coding 6 | - computaitonal thinking 7 | - computational social science 8 | - python 9 | authors: 10 | - name: Chris Proctor 11 | orcid: 0000-0003-3492-9590 12 | affiliation: 1 13 | affiliations: 14 | - name: Graduate School of Education, University at Buffalo (SUNY), United States 15 | index: 1 16 | date: 28 May 2024 17 | bibliography: paper.bib 18 | --- 19 | 20 | # Summary 21 | 22 | `qc` is a free, open-source command-line-based tool for qualitative data 23 | analysis designed to support computational thinking. In addition to making the 24 | qualitative data analysis process more robust and efficient, computational thinking can 25 | contribute to the richness of subjective interpretation. The typical workflow 26 | in qualitative research is an iterative cycle of "notice things," "think about 27 | things," and "collect things" [@seidel1998qualitative, p. 2]. `qc` provides 28 | computational affordances for each of these practices, including the ability to 29 | integrate manual coding with automated coding, a tree-based hierarchy of codes 30 | stored in a YAML file, allowing versioning of thematic analysis, and a powerful 31 | query interface for viewing code statistics and snippets of coded documents. 32 | 33 | # Background 34 | 35 | `qc` is designed to support the application of computational thinking 36 | (CT) to qualitative data analysis (QDA). In the social 37 | sciences, QDA is a method of applying codes to text, images, video, and 38 | other artifacts, then analyzing the resulting patterns of codes and 39 | using the codes to more deeply understand the text. 40 | When QDA is used in quantitative or mixed-methods research, it is 41 | typically used to transform loosely-structured data such 42 | as an interview transcript into categories or codes which can then be 43 | used in downstream quantitative analysis answering predefined research 44 | questions. In contrast, when QDA is used in qualitative research, 45 | it is typically part of an interpretive sensemaking process. These two uses 46 | of QDA have been referred to as *little-q* ("looking for answers") and 47 | *big-Q* ("looking for questions") qualitative research [@kidder1987]. 48 | 49 | The central design hypothesis of `qc` is that a closer partnership 50 | between the researcher and the computational tool can enhance the 51 | quality of QDA. This partnership, which could be characterized as 52 | augmented [@engelbart1962] or distributed cognition [@pea1997], depends on 53 | the researcher's ability to conceptualize the data and the process in 54 | computational terms, becoming immersed in the matrices, trees, and other 55 | computational structures inherent to QDA rather than remaining "outside" 56 | at the level of user interface. Such practices can be identified as *computational 57 | thinking* (CT), "the thought processes involved in 58 | formulating problems and their solutions so that the solutions are 59 | represented in a form that can effectively be carried out by an 60 | information-processing agent" [@wing2011research]. The application of CT to 61 | QDA would mean conceptualizing the goal and the process of QDA in 62 | computational terms, keeping a mental model of the work the computer is 63 | doing for you. 64 | 65 | # Statement of need 66 | 67 | Although there are numerous well-known commercial QDA software 68 | packages such as NVivo [@dhakal2022nvivo], Dedoose [@salmona2019qualitative], 69 | ATLAS.ti [@smit2002atlas], and MAXQDA [@kuckartz2010realizing], they do not 70 | provide affordances for users desiring more active engagement with the data and 71 | processes underlying QDA. `qc` better-supports such users, providing a scriptable 72 | command-line interface with powerful and flexible queries, what data stored in simple 73 | and standardized formats. `qc` adopts the "unix philosophy" [@mcilroy1978] 74 | of building tools which do one thing well while being composable into 75 | flexible workflows, and the values of "plain-text social science" [@healy2020], emphasizing 76 | reproducability, transparency, and collaborative open science. 77 | 78 | `qc` was used in [@proctor2019] (described but not cited) and the author's 79 | doctoral dissertation; `qc` is currently a core tool supporting a large 80 | NSF-funded Delphi study [@ogbeifun2016delphi] involving multiple interviews 81 | with forty participant experts, open coding with over a thousand distinct 82 | codes, four separate coders, and several custom machine learning tools 83 | supporting the research team with clustering and synthesizing emergent themes. 84 | 85 | # Acknowledgements 86 | 87 | Development of `qc` was funded in part by a grant from the University at Buffalo's 88 | Digital Scholarship Studio Network. 89 | 90 | # References 91 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "qualitative-coding" 3 | version = "1.7.3" 4 | description = "Qualitative coding tools to support computational thinking" 5 | authors = [ 6 | {name = "Chris Proctor",email = "chris@chrisproctor.net"} 7 | ] 8 | license = {text = "AGPL3"} 9 | readme = "README.md" 10 | requires-python = ">=3.11,<3.13" 11 | dependencies = [ 12 | "tqdm (>=4.67.1,<5.0.0)", 13 | "tabulate (>=0.9.0,<0.10.0)", 14 | "numpy (>=2.2.1,<3.0.0)", 15 | "pyyaml (>=6.0.2,<7.0.0)", 16 | "click (>=8.1.8,<9.0.0)", 17 | "sqlalchemy (>=2.0.37,<3.0.0)", 18 | "semver (>=3.0.2,<4.0.0)", 19 | "xmlschema (>=3.4.3,<4.0.0)", 20 | "structlog (>=25.1.0,<26.0.0)", 21 | "rich (>=13.9.4,<14.0.0)", 22 | "more-itertools (>=10.6.0,<11.0.0)", 23 | "spacy (>=3.8.4,<4.0.0)", 24 | "webvtt-py (>=0.5.1,<0.6.0)" 25 | ] 26 | 27 | [project.urls] 28 | homepage = "https://qualitative-coding.readthedocs.io" 29 | repository = "https://github.com/cproctor/qualitative-coding" 30 | 31 | [project.scripts] 32 | qc = "qualitative_coding.cli:cli" 33 | 34 | [build-system] 35 | requires = ["poetry-core>=2.0.0,<3.0.0"] 36 | build-backend = "poetry.core.masonry.api" 37 | 38 | [tool.poetry.group.docs] 39 | optional = true 40 | 41 | [tool.poetry.group.docs.dependencies] 42 | sphinx = "^7.3.7" 43 | sphinx-rtd-theme = "^2.0.0" 44 | -------------------------------------------------------------------------------- /qc_lockup.v0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/qc_lockup.v0.png -------------------------------------------------------------------------------- /qualitative_coding/cli/__init__.py: -------------------------------------------------------------------------------- 1 | import click 2 | from qualitative_coding.cli.click_aliases import ClickAliasedGroup 3 | from qualitative_coding.cli.init import init 4 | from qualitative_coding.cli.export import export 5 | from qualitative_coding.cli.corpus import corpus_group 6 | from qualitative_coding.cli.codes import codes_group 7 | from qualitative_coding.cli.version import version 8 | from qualitative_coding.cli.check import check 9 | from qualitative_coding.cli.codebook import codebook 10 | from qualitative_coding.cli.code import code 11 | from qualitative_coding.cli.coders import coders 12 | from qualitative_coding.cli.memo import memo 13 | from qualitative_coding.cli.upgrade import upgrade 14 | import logging 15 | import structlog 16 | 17 | @click.group(cls=ClickAliasedGroup) 18 | def cli(): 19 | "Qualitative coding for computer scientists" 20 | 21 | cli.add_command(init) 22 | cli.add_command(export) 23 | cli.add_command(corpus_group) 24 | cli.add_command(codes_group) 25 | cli.add_command(version) 26 | cli.add_command(check) 27 | cli.add_command(codebook, aliases=["cb"]) 28 | cli.add_command(code) 29 | cli.add_command(coders) 30 | cli.add_command(memo) 31 | cli.add_command(upgrade) 32 | 33 | -------------------------------------------------------------------------------- /qualitative_coding/cli/check.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | from qualitative_coding.corpus import QCCorpus 4 | from qualitative_coding.logs import configure_logger 5 | from qualitative_coding.cli.decorators import ( 6 | handle_qc_errors, 7 | ) 8 | 9 | @click.command() 10 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 11 | @handle_qc_errors 12 | def check(settings): 13 | "Check project for errors" 14 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 15 | log = configure_logger(settings_path) 16 | log.info("check") 17 | corpus = QCCorpus(settings_path) 18 | with corpus.session(): 19 | corpus.validate_corpus_paths() 20 | -------------------------------------------------------------------------------- /qualitative_coding/cli/click_aliases.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copied from https://github.com/click-contrib/click-aliases. 3 | The version releaseed on PyPI lacks a needed recent feature 4 | (ClickAliasedGroup.add_command) 5 | """ 6 | 7 | import click 8 | 9 | _click7 = click.__version__[0] >= '7' 10 | 11 | 12 | class ClickAliasedGroup(click.Group): 13 | def __init__(self, *args, **kwargs): 14 | super(ClickAliasedGroup, self).__init__(*args, **kwargs) 15 | self._commands = {} 16 | self._aliases = {} 17 | 18 | def add_command(self, *args, **kwargs): 19 | aliases = kwargs.pop('aliases', []) 20 | super(ClickAliasedGroup, self).add_command(*args, **kwargs) 21 | if aliases: 22 | cmd = args[0] 23 | name = args[1] if len(args) > 1 else None 24 | name = name or cmd.name 25 | if name is None: 26 | raise TypeError('Command has no name.') 27 | 28 | self._commands[name] = aliases 29 | for alias in aliases: 30 | self._aliases[alias] = cmd.name 31 | 32 | def command(self, *args, **kwargs): 33 | aliases = kwargs.pop('aliases', []) 34 | decorator = super(ClickAliasedGroup, self).command(*args, **kwargs) 35 | if not aliases: 36 | return decorator 37 | 38 | def _decorator(f): 39 | cmd = decorator(f) 40 | if aliases: 41 | self._commands[cmd.name] = aliases 42 | for alias in aliases: 43 | self._aliases[alias] = cmd.name 44 | return cmd 45 | 46 | return _decorator 47 | 48 | def group(self, *args, **kwargs): 49 | aliases = kwargs.pop('aliases', []) 50 | decorator = super(ClickAliasedGroup, self).group(*args, **kwargs) 51 | if not aliases: 52 | return decorator 53 | 54 | def _decorator(f): 55 | cmd = decorator(f) 56 | if aliases: 57 | self._commands[cmd.name] = aliases 58 | for alias in aliases: 59 | self._aliases[alias] = cmd.name 60 | return cmd 61 | 62 | return _decorator 63 | 64 | def resolve_alias(self, cmd_name): 65 | if cmd_name in self._aliases: 66 | return self._aliases[cmd_name] 67 | return cmd_name 68 | 69 | def get_command(self, ctx, cmd_name): 70 | cmd_name = self.resolve_alias(cmd_name) 71 | command = super(ClickAliasedGroup, self).get_command(ctx, cmd_name) 72 | if command: 73 | return command 74 | 75 | def format_commands(self, ctx, formatter): 76 | rows = [] 77 | 78 | sub_commands = self.list_commands(ctx) 79 | 80 | max_len = 0 81 | if len(sub_commands) > 0: 82 | max_len = max(len(cmd) for cmd in sub_commands) 83 | 84 | limit = formatter.width - 6 - max_len 85 | 86 | for sub_command in sub_commands: 87 | cmd = self.get_command(ctx, sub_command) 88 | if cmd is None: 89 | continue 90 | if hasattr(cmd, 'hidden') and cmd.hidden: 91 | continue 92 | if sub_command in self._commands: 93 | aliases = ','.join(sorted(self._commands[sub_command])) 94 | sub_command = '{0} ({1})'.format(sub_command, aliases) 95 | if _click7: 96 | cmd_help = cmd.get_short_help_str(limit) 97 | else: 98 | cmd_help = cmd.short_help or '' 99 | rows.append((sub_command, cmd_help)) 100 | 101 | if rows: 102 | with formatter.section('Commands'): 103 | formatter.write_dl(rows) 104 | 105 | -------------------------------------------------------------------------------- /qualitative_coding/cli/code.py: -------------------------------------------------------------------------------- 1 | import click 2 | import yaml 3 | import os 4 | from pathlib import Path 5 | from qualitative_coding.corpus import QCCorpus 6 | from qualitative_coding.exceptions import QCError, IncompatibleOptions 7 | from qualitative_coding.views.viewer import QCCorpusViewer 8 | from qualitative_coding.cli.decorators import handle_qc_errors 9 | from qualitative_coding.helpers import read_file_list 10 | from qualitative_coding.logs import configure_logger 11 | 12 | @click.command() 13 | @click.argument("coder") 14 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 15 | @click.option("-p", "--pattern", 16 | help="Pattern to filter corpus filenames (glob-style)") 17 | @click.option("-f", "--filenames", 18 | help="File path containing a list of filenames to use") 19 | @click.option("-u", "--uncoded", is_flag=True, help="Select uncoded files") 20 | @click.option("-1", "--first", is_flag=True, help="Select first uncoded file") 21 | @click.option("-r", "--random", is_flag=True, help="Select random uncoded file") 22 | @click.option("--recover", is_flag=True, help="Recover incomplete coding session") 23 | @click.option("--abandon", is_flag=True, help="Abandon incomplete coding session") 24 | @handle_qc_errors 25 | def code(coder, settings, pattern, filenames, uncoded, first, random, recover, abandon): 26 | "Open a file for coding" 27 | if first and random: 28 | msg = "--first and --random cannot both be used." 29 | raise IncompatibleOptions(msg) 30 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 31 | log = configure_logger(settings_path) 32 | log.info("code", coder=coder, pattern=pattern, filenames=filenames, uncoded=uncoded, 33 | first=first, random=random, recover=recover, abandon=abandon) 34 | corpus = QCCorpus(settings_path) 35 | viewer = QCCorpusViewer(corpus) 36 | if recover: 37 | viewer.recover_incomplete_coding_session(coder) 38 | elif abandon: 39 | viewer.abandon_incomplete_coding_session() 40 | else: 41 | if viewer.incomplete_coding_session_exists(): 42 | raise QCError( 43 | "An incomplete coding session exists. " + 44 | "Run qc code coder --recover to recover this coding session or " + 45 | "qc code coder --abandon to abandon it." 46 | ) 47 | f = viewer.select_file( 48 | coder, 49 | pattern=pattern, 50 | file_list=read_file_list(filenames), 51 | uncoded=uncoded, 52 | first=first, 53 | random=random, 54 | ) 55 | viewer.open_editor(f, coder) 56 | -------------------------------------------------------------------------------- /qualitative_coding/cli/codebook.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | from qualitative_coding.corpus import QCCorpus 4 | from qualitative_coding.cli.decorators import handle_qc_errors 5 | from qualitative_coding.logs import configure_logger 6 | 7 | @click.command() 8 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 9 | @handle_qc_errors 10 | def codebook(settings): 11 | "Update the codebook" 12 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 13 | log = configure_logger(settings_path) 14 | log.info("codebook") 15 | corpus = QCCorpus(settings_path) 16 | with corpus.session(): 17 | corpus.update_codebook() 18 | -------------------------------------------------------------------------------- /qualitative_coding/cli/coders.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | from qualitative_coding.corpus import QCCorpus 4 | from qualitative_coding.cli.decorators import handle_qc_errors 5 | from qualitative_coding.logs import configure_logger 6 | 7 | @click.command() 8 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 9 | @click.option("-d", "--delete", help="Delete a coder") 10 | @handle_qc_errors 11 | def coders(settings, delete): 12 | "List all coders" 13 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 14 | log = configure_logger(settings_path) 15 | log.info("coders", delete=delete) 16 | corpus = QCCorpus(settings_path) 17 | with corpus.session(): 18 | if delete: 19 | corpus.delete_coder(delete) 20 | else: 21 | for coder in corpus.get_all_coders(): 22 | print(coder.name) 23 | 24 | 25 | -------------------------------------------------------------------------------- /qualitative_coding/cli/codes/__init__.py: -------------------------------------------------------------------------------- 1 | import click 2 | from qualitative_coding.cli.click_aliases import ClickAliasedGroup 3 | from qualitative_coding.cli.codes.stats import stats 4 | from qualitative_coding.cli.codes.list import _list 5 | from qualitative_coding.cli.codes.rename import rename 6 | from qualitative_coding.cli.codes.crosstab import crosstab 7 | from qualitative_coding.cli.codes.find import find 8 | 9 | @click.group(name="codes", cls=ClickAliasedGroup) 10 | def codes_group(): 11 | "Codes commands" 12 | 13 | codes_group.add_command(crosstab, aliases=["ct"]) 14 | codes_group.add_command(find) 15 | codes_group.add_command(_list, aliases=['ls']) 16 | codes_group.add_command(rename, aliases=["rn"]) 17 | codes_group.add_command(stats) 18 | 19 | -------------------------------------------------------------------------------- /qualitative_coding/cli/codes/crosstab.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | import yaml 4 | from pathlib import Path 5 | from tabulate import tabulate_formats 6 | from qualitative_coding.corpus import QCCorpus 7 | from qualitative_coding.views.viewer import QCCorpusViewer 8 | from qualitative_coding.cli.decorators import handle_qc_errors 9 | from qualitative_coding.exceptions import IncompatibleOptions 10 | from qualitative_coding.helpers import read_file_list 11 | from qualitative_coding.logs import configure_logger 12 | 13 | @click.command() 14 | @click.argument("codes", nargs=-1) 15 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 16 | @click.option("-p", "--pattern", 17 | help="Pattern to filter corpus filenames (glob-style)") 18 | @click.option("-f", "--filenames", 19 | help="File path containing a list of filenames to use") 20 | @click.option("-c", "--coders", help="Coders", multiple=True) 21 | @click.option("-d", "--depth", help="Maximum depth in code tree", type=int) 22 | @click.option("-n", "--unit", default="line", help="Unit of analysis", 23 | type=click.Choice(['line', 'paragraph', 'document'])) 24 | @click.option("-r", "--recursive-codes", "recursive_codes", is_flag=True, 25 | help="Include child codes") 26 | @click.option("-a", "--recursive-counts", is_flag=True, 27 | help="Counts for codes include child codes") 28 | @click.option("-e", "--expanded", is_flag=True, 29 | help="Show names of codes in expanded form") 30 | @click.option("-m", "--format", "_format", type=click.Choice(tabulate_formats), 31 | metavar="[tabulate.tabulate_formats]", help="Output format.") 32 | @click.option("-o", "--outfile", help="Filename for CSV export") 33 | @click.option("-0", "--probs", is_flag=True, 34 | help="Probabilities instead of counts") 35 | @click.option("-z", "--compact", help="Compact display", is_flag=True) 36 | @click.option("-y", "--tidy", help="Return tidy format", is_flag=True) 37 | @click.option("-u", "--max", "_max", help="Maximum count value to show", type=int) 38 | @click.option("-l", "--min", "_min", help="Minimum count value to show", type=int) 39 | @handle_qc_errors 40 | def crosstab(codes, settings, pattern, filenames, coders, depth, unit, recursive_codes, 41 | recursive_counts, expanded, _format, outfile, probs, compact, tidy, _max, _min): 42 | "Cross-tabulate code occurrences" 43 | if depth and not recursive_codes: 44 | msg = "--depth requires --recursive-codes" 45 | raise IncompatibleOptions(msg) 46 | if tidy and compact: 47 | msg = "--tidy and --compact are incompatible" 48 | raise IncompatibleOptions(msg) 49 | if tidy and probs: 50 | msg = "--tidy and --probs are incompatible" 51 | raise IncompatibleOptions(msg) 52 | if _min and not tidy: 53 | msg = "--min requires --tidy" 54 | raise IncompatibleOptions(msg) 55 | if _max and not tidy: 56 | msg = "--max requires --tidy" 57 | raise IncompatibleOptions(msg) 58 | 59 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 60 | log = configure_logger(settings_path) 61 | log.info("codes crosstab", codes=codes, pattern=pattern, filenames=filenames, 62 | coders=coders, depth=depth, unit=unit, recursive_codes=recursive_codes, 63 | recursive_counts=recursive_counts, expanded=expanded, _format=_format, 64 | outfile=outfile, probs=probs, compact=compact, tidy=tidy, _max=_max, 65 | _min=_min) 66 | corpus = QCCorpus(settings_path) 67 | viewer = QCCorpusViewer(corpus) 68 | if tidy: 69 | viewer.tidy_codes( 70 | codes, 71 | depth=depth, 72 | recursive_codes=recursive_codes, 73 | recursive_counts=recursive_counts, 74 | expanded=expanded, 75 | format=_format, 76 | pattern=pattern, 77 | file_list=read_file_list(filenames), 78 | coders=coders, 79 | unit=unit, 80 | outfile=outfile, 81 | minimum=_min, 82 | maximum=_max, 83 | ) 84 | else: 85 | viewer.crosstab( 86 | codes, 87 | depth=depth, 88 | recursive_codes=recursive_codes, 89 | recursive_counts=recursive_counts, 90 | expanded=expanded, 91 | format=_format, 92 | pattern=pattern, 93 | file_list=read_file_list(filenames), 94 | coders=coders, 95 | unit=unit, 96 | outfile=outfile, 97 | probs=probs, 98 | compact=compact, 99 | ) 100 | 101 | -------------------------------------------------------------------------------- /qualitative_coding/cli/codes/find.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | from qualitative_coding.corpus import QCCorpus 4 | from qualitative_coding.views.viewer import QCCorpusViewer 5 | from qualitative_coding.cli.decorators import handle_qc_errors 6 | from qualitative_coding.helpers import read_file_list 7 | from qualitative_coding.exceptions import IncompatibleOptions 8 | from qualitative_coding.logs import configure_logger 9 | 10 | @click.command() 11 | @click.argument("codes", nargs=-1) 12 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 13 | @click.option("-p", "--pattern", 14 | help="Pattern to filter corpus filenames (glob-style)") 15 | @click.option("-f", "--filenames", 16 | help="File path containing a list of filenames to use") 17 | @click.option("-c", "--coders", help="Coders", multiple=True) 18 | @click.option("-d", "--depth", help="Maximum depth in code tree", type=int) 19 | @click.option("-n", "--unit", default="line", help="Unit of analysis", 20 | type=click.Choice(['line', 'paragraph', 'document'])) 21 | @click.option("-r", "--recursive-codes", "recursive_codes", is_flag=True, 22 | help="Include child codes") 23 | @click.option("-B", "--before", default=2, type=int, 24 | help="Number of lines before the code to show") 25 | @click.option("-C", "--after", default=2, type=int, 26 | help="Number of lines after the code to show") 27 | @click.option("-o", "--no-codes", "no_codes", is_flag=True, 28 | help="Do not show matching codes") 29 | @click.option("-l", "--no-line-numbers", "no_line_numbers", is_flag=True, 30 | help="Do not show line numbers") 31 | @click.option("-j", "--json", is_flag=True, help="Export as JSON") 32 | @handle_qc_errors 33 | def find(codes, settings, pattern, filenames, coders, depth, unit, recursive_codes, 34 | before, after, no_codes, no_line_numbers, json): 35 | "Find all coded text" 36 | if no_codes and json: 37 | raise IncompatibleOptions("--no-codes and --json are incompatible") 38 | if no_line_numbers and json: 39 | raise IncompatibleOptions("--no-line_numbers and --json are incompatible") 40 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 41 | log = configure_logger(settings_path) 42 | log.info("codes find", codes=codes, pattern=pattern, filenames=filenames, coders=coders, 43 | depth=depth, unit=unit, recursive_codes=recursive_codes, before=before, 44 | after=after, no_codes=no_codes, json=json) 45 | corpus = QCCorpus(settings_path) 46 | viewer = QCCorpusViewer(corpus) 47 | if json: 48 | viewer.show_coded_text_json( 49 | codes, 50 | before=before, 51 | after=after, 52 | recursive_codes=recursive_codes, 53 | depth=depth, 54 | unit=unit, 55 | pattern=pattern, 56 | file_list=read_file_list(filenames), 57 | coders=coders, 58 | ) 59 | else: 60 | viewer.show_coded_text( 61 | codes, 62 | before=before, 63 | after=after, 64 | recursive_codes=recursive_codes, 65 | depth=depth, 66 | unit=unit, 67 | pattern=pattern, 68 | file_list=read_file_list(filenames), 69 | coders=coders, 70 | show_codes=not no_codes, 71 | show_line_numbers=not no_line_numbers, 72 | ) 73 | 74 | -------------------------------------------------------------------------------- /qualitative_coding/cli/codes/list.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | from qualitative_coding.corpus import QCCorpus 4 | from qualitative_coding.views.viewer import QCCorpusViewer 5 | from qualitative_coding.logs import configure_logger 6 | 7 | @click.command(name="list") 8 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 9 | @click.option("-e", "--expanded", is_flag=True, help="Show names of parent codes") 10 | @click.option("-d", "--depth", help="Maximum depth in code tree", type=int) 11 | def _list(settings, expanded, depth): 12 | "List all codes" 13 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 14 | log = configure_logger(settings_path) 15 | log.info("codes list", expanded=expanded, depth=depth) 16 | corpus = QCCorpus(settings_path) 17 | viewer = QCCorpusViewer(corpus) 18 | viewer.list_codes(expanded=expanded, depth=depth) 19 | -------------------------------------------------------------------------------- /qualitative_coding/cli/codes/rename.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | from qualitative_coding.corpus import QCCorpus 4 | from qualitative_coding.helpers import read_file_list 5 | from qualitative_coding.logs import configure_logger 6 | 7 | @click.command() 8 | @click.argument("old_codes", nargs=-1) 9 | @click.argument("new_code") 10 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 11 | @click.option("-c", "--coders", help="Coders", multiple=True) 12 | @click.option("-p", "--pattern", 13 | help="Pattern to filter corpus filenames (glob-style)") 14 | @click.option("-f", "--filenames", 15 | help="File path containing a list of filenames to use") 16 | def rename(old_codes, new_code, settings, coders, pattern, filenames): 17 | "Rename one or more codes" 18 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 19 | log = configure_logger(settings_path) 20 | log.info("rename", old_codes=old_codes, new_code=new_code, coders=coders, 21 | pattern=pattern, filenames=filenames) 22 | corpus = QCCorpus(settings_path) 23 | with corpus.session(): 24 | corpus.rename_codes( 25 | old_codes=old_codes, 26 | new_code=new_code, 27 | pattern=pattern, 28 | file_list=read_file_list(filenames), 29 | coders=coders, 30 | ) 31 | -------------------------------------------------------------------------------- /qualitative_coding/cli/codes/stats.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | import yaml 4 | from pathlib import Path 5 | from qualitative_coding.corpus import QCCorpus 6 | from qualitative_coding.views.viewer import QCCorpusViewer 7 | from qualitative_coding.cli.decorators import handle_qc_errors 8 | from qualitative_coding.exceptions import IncompatibleOptions 9 | from qualitative_coding.helpers import read_file_list 10 | from qualitative_coding.logs import configure_logger 11 | from tabulate import tabulate_formats 12 | 13 | @click.command() 14 | @click.argument("codes", nargs=-1) 15 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 16 | @click.option("-p", "--pattern", 17 | help="Pattern to filter corpus filenames (glob-style)") 18 | @click.option("-f", "--filenames", 19 | help="File path containing a list of filenames to use") 20 | @click.option("-c", "--coders", help="Coders", multiple=True) 21 | @click.option("-C", "--by-coder", is_flag=True, help="Report stats separately for each coder") 22 | @click.option("-D", "--by-document", is_flag=True, help="Report stats separately for each document") 23 | @click.option("-d", "--depth", help="Maximum depth in code tree", type=int) 24 | @click.option("-n", "--unit", default="line", help="Unit of analysis", 25 | type=click.Choice(['line', 'paragraph', 'document'])) 26 | @click.option("-r", "--recursive-codes", "recursive_codes", is_flag=True, 27 | help="Include child codes") 28 | @click.option("-a", "--recursive-counts", is_flag=True, 29 | help="Counts for codes include child codes") 30 | @click.option("-e", "--expanded", is_flag=True, 31 | help="Show names of codes in expanded form") 32 | @click.option("-m", "--format", "_format", type=click.Choice(tabulate_formats), 33 | metavar="[tabulate.tabulate_formats]", help="Output format.") 34 | @click.option("-o", "--outfile", help="Filename for CSV export") 35 | @click.option("-u", "--max", "_max", help="Maximum count value to show", type=int) 36 | @click.option("-l", "--min", "_min", help="Minimum count value to show", type=int) 37 | @click.option("-z", "--zeros", is_flag=True, help="Include codes with zero occurrences") 38 | @click.option("-t", "--total-only", is_flag=True, 39 | help="Show total but not count") 40 | @handle_qc_errors 41 | def stats(codes, settings, pattern, filenames, coders, by_coder, by_document, depth, unit, recursive_codes, 42 | recursive_counts, expanded, _format, outfile, _max, _min, zeros, total_only): 43 | "Show statistics about codes" 44 | if depth and not recursive_codes: 45 | msg = "--depth requires --recursive-codes" 46 | raise IncompatibleOptions(msg) 47 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 48 | log = configure_logger(settings_path) 49 | log.info("codes stats", codes=codes, pattern=pattern, filenames=filenames, coders=coders, 50 | by_coder=by_coder, by_document=by_document, depth=depth, unit=unit, 51 | recursive_codes=recursive_codes) 52 | corpus = QCCorpus(settings_path) 53 | viewer = QCCorpusViewer(corpus) 54 | if by_coder and by_document: 55 | viewer.show_document_coders_pivot_table( 56 | codes=codes, 57 | recursive=recursive_codes or recursive_counts, 58 | format=_format, 59 | pattern=pattern, 60 | file_list=read_file_list(filenames), 61 | coders=coders, 62 | unit=unit, 63 | outfile=outfile, 64 | ) 65 | else: 66 | viewer.show_stats( 67 | codes, 68 | max_count=_max, 69 | min_count=_min, 70 | depth=depth, 71 | recursive_codes=recursive_codes, 72 | recursive_counts=recursive_counts, 73 | expanded=expanded, 74 | format=_format, 75 | pattern=pattern, 76 | file_list=read_file_list(filenames), 77 | coders=coders, 78 | by_coder=by_coder, 79 | by_document=by_document, 80 | unit=unit, 81 | outfile=outfile, 82 | total_only=total_only, 83 | zeros=zeros, 84 | ) 85 | 86 | -------------------------------------------------------------------------------- /qualitative_coding/cli/corpus/__init__.py: -------------------------------------------------------------------------------- 1 | import click 2 | from qualitative_coding.cli.click_aliases import ClickAliasedGroup 3 | from qualitative_coding.cli.corpus.list import list_corpus_paths 4 | from qualitative_coding.cli.corpus.import_media import import_media 5 | from qualitative_coding.cli.corpus.move import move 6 | from qualitative_coding.cli.corpus.remove import remove 7 | from qualitative_coding.cli.corpus.update import update 8 | from qualitative_coding.cli.corpus.anonymize import anonymize 9 | 10 | @click.group(name="corpus", cls=ClickAliasedGroup) 11 | def corpus_group(): 12 | "Corpus commands" 13 | 14 | corpus_group.add_command(list_corpus_paths, aliases=["ls"]) 15 | corpus_group.add_command(move, aliases=["mv"]) 16 | corpus_group.add_command(remove, aliases=["rm"]) 17 | corpus_group.add_command(import_media) 18 | corpus_group.add_command(update) 19 | corpus_group.add_command(anonymize) 20 | -------------------------------------------------------------------------------- /qualitative_coding/cli/corpus/anonymize.py: -------------------------------------------------------------------------------- 1 | import os 2 | import click 3 | import spacy 4 | import yaml 5 | from tqdm import tqdm 6 | from pathlib import Path 7 | from collections import defaultdict 8 | from qualitative_coding.corpus import QCCorpus 9 | from qualitative_coding.exceptions import QCError, IncompatibleOptions 10 | from qualitative_coding.helpers import read_file_list 11 | from qualitative_coding.cli.decorators import handle_qc_errors 12 | from qualitative_coding.logs import configure_logger 13 | 14 | LABELS = { 15 | "PERSON": "Person", 16 | "FAC": "Location", 17 | "ORG": "Organization", 18 | "GPE": "Location", 19 | "LOC": "Location", 20 | } 21 | 22 | @click.command() 23 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 24 | @click.option("-p", "--pattern", help="Pattern to filter corpus filenames (glob-style)") 25 | @click.option("-f", "--filenames", help="File path containing a list of filenames to use") 26 | @click.option("-k", "--key", default="key.yaml", help="Path to key file") 27 | @click.option("-r", "--reverse", is_flag=True, help="Un-anonymize documents") 28 | @click.option("-o", "--out-dir", default="anonymized", help="location for anonymized documemts") 29 | @click.option("-u", "--update", is_flag=True, help="Update documents in place") 30 | @click.option("-d", "--dryrun", is_flag=True, help="Show diff instead of performing update") 31 | @handle_qc_errors 32 | def anonymize(settings, pattern, filenames, key, reverse, out_dir, update, dryrun): 33 | "Anonymize corpus files" 34 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 35 | key_file = Path(key) 36 | out_path = Path(out_dir) 37 | log = configure_logger(settings_path) 38 | log.info("corpus anonymize", pattern=pattern, filenames=filenames, key=key, 39 | reverse=reverse, out_dir=out_dir, update=update, dryrun=dryrun) 40 | corpus = QCCorpus(settings_path) 41 | with corpus.session(): 42 | docs = corpus.get_documents(pattern=pattern, file_list=read_file_list(filenames)) 43 | 44 | if key_file.exists(): 45 | keys = yaml.safe_load(key_file.read_text()) 46 | if reverse: 47 | keys = reverse_keys(keys) 48 | out_path.mkdir(exist_ok=True, parents=True) 49 | with corpus.session(): 50 | for doc in docs: 51 | source = corpus.corpus_dir / doc.file_path 52 | dest = out_path / doc.file_path 53 | replace_keys(keys, source, dest) 54 | if update: 55 | corpus.update_document(source, dest, dryrun) 56 | else: 57 | if reverse: 58 | raise QCError("Cannot use --reverse unless key file exists") 59 | doc_paths = [corpus.corpus_dir / doc.file_path for doc in docs] 60 | generate_key_file(key, doc_paths, log) 61 | 62 | def replace_keys(keys, source, dest): 63 | text = source.read_text() 64 | keys_by_length = [k for l, k in reversed(sorted((len(k), k) for k in keys.keys()))] 65 | for key in keys_by_length: 66 | text = text.replace(key, keys[key]) 67 | dest.write_text(text) 68 | 69 | def reverse_keys(keys): 70 | """Converts anonymization keys into de-anonymization keys. 71 | In a dict, each key has a single value, but there may be multiple 72 | values with the same key. In this case, uses the first occurence. 73 | """ 74 | rkeys = {} 75 | for k, v in keys.items(): 76 | if v not in rkeys: 77 | rkeys[v] = k 78 | return rkeys 79 | 80 | def generate_key_file(key, file_paths, log): 81 | """Generates a YAML file containing keys for anonymization. 82 | A key file is required to anonymize a corpus. 83 | """ 84 | model_name = 'en_core_web_sm' 85 | if spacy.util.is_package(model_name): 86 | log.debug(f"Using spacy model {model_name}") 87 | else: 88 | log.info(f"Downloading spacy model {model_name}") 89 | spacy.cli.download(model_name) 90 | try: 91 | nlp = spacy.load('en_core_web_sm') 92 | except OSError: 93 | raise QCError( 94 | "A language model is required to run this task. " + 95 | f"Automatic downloading of spacy model {model_name} " + 96 | "failed. Please install the language model manually:\n" + 97 | "python -m spacy download en_core_web_sm" 98 | ) 99 | entities = defaultdict(set) 100 | for file_path in tqdm(file_paths, desc="Processing documents"): 101 | text = file_path.read_text() 102 | doc = nlp(text) 103 | for ent in doc.ents: 104 | if ent.label_ in LABELS: 105 | entities[ent.label_].add(ent) 106 | placeholders = {} 107 | for label, ents in entities.items(): 108 | placeholder = LABELS[label] 109 | terms = sorted(e.text for e in ents) 110 | for i, term in enumerate(terms): 111 | placeholders[term] = f"{placeholder}_{i+1}" 112 | Path(key).write_text(yaml.dump(placeholders)) 113 | -------------------------------------------------------------------------------- /qualitative_coding/cli/corpus/import_media.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | from qualitative_coding.corpus import QCCorpus 4 | from qualitative_coding.cli.decorators import handle_qc_errors 5 | from qualitative_coding.media_importers import media_importers 6 | from qualitative_coding.logs import configure_logger 7 | 8 | @click.command(name="import") 9 | @click.argument("file_path") 10 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 11 | @click.option("-r", "--recursive", is_flag=True, 12 | help="Recursively import from directory") 13 | @click.option("-c", "--corpus-root", 14 | help="Relative path to import dir within corpus_dir") 15 | @click.option("-i", "--importer", type=click.Choice(media_importers.keys()), 16 | default="pandoc", 17 | help="Importer class to use") 18 | @handle_qc_errors 19 | def import_media(file_path, settings, recursive, corpus_root, importer): 20 | "Import corpus files" 21 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 22 | log = configure_logger(settings_path) 23 | log.info("corpus import", file_path=file_path, recursive=recursive, corpus_root=corpus_root, 24 | importer=importer) 25 | corpus = QCCorpus(settings_path) 26 | with corpus.session(): 27 | corpus.import_media( 28 | file_path, 29 | recursive=recursive, 30 | corpus_root=corpus_root, 31 | importer=importer 32 | ) 33 | -------------------------------------------------------------------------------- /qualitative_coding/cli/corpus/list.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | from qualitative_coding.corpus import QCCorpus 4 | from qualitative_coding.helpers import read_file_list 5 | from qualitative_coding.logs import configure_logger 6 | from pathlib import Path 7 | 8 | @click.command(name="list") 9 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 10 | @click.option("-p", "--pattern", 11 | help="Pattern to filter corpus filenames (glob-style)") 12 | @click.option("-f", "--filenames", 13 | help="File path containing a list of filenames to use") 14 | def list_corpus_paths(settings, pattern, filenames): 15 | "List corpus paths" 16 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 17 | log = configure_logger(settings_path) 18 | log.info("corpus list", pattern=pattern, filenames=filenames) 19 | corpus = QCCorpus(settings_path) 20 | paths = [] 21 | for dir_path, dirs, fns in os.walk(corpus.corpus_dir): 22 | for fn in fns: 23 | paths.append(str(Path(dir_path).relative_to(corpus.corpus_dir) / fn)) 24 | if pattern: 25 | paths = [path for path in paths if pattern in path] 26 | if filenames: 27 | file_list = read_file_list(filenames) 28 | paths = [path for path in paths if path in file_list] 29 | for path in sorted(paths): 30 | print(path) 31 | -------------------------------------------------------------------------------- /qualitative_coding/cli/corpus/move.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | from qualitative_coding.corpus import QCCorpus 4 | from qualitative_coding.logs import configure_logger 5 | from qualitative_coding.cli.decorators import handle_qc_errors 6 | 7 | @click.command() 8 | @click.argument("target") 9 | @click.argument("destination") 10 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 11 | @click.option("-r", "--recursive", is_flag=True, 12 | help="Recursively import from directory") 13 | @handle_qc_errors 14 | def move(target, destination, settings, recursive): 15 | "Move a file in the corpus" 16 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 17 | log = configure_logger(settings_path) 18 | log.info("corpus move", target=target, destination=destination, recursive=recursive) 19 | corpus = QCCorpus(settings_path) 20 | with corpus.session(): 21 | corpus.move_document(target, destination, recursive=recursive) 22 | -------------------------------------------------------------------------------- /qualitative_coding/cli/corpus/remove.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | from qualitative_coding.corpus import QCCorpus 4 | from qualitative_coding.logs import configure_logger 5 | from qualitative_coding.cli.decorators import handle_qc_errors 6 | 7 | @click.command() 8 | @click.argument("target") 9 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 10 | @click.option("-r", "--recursive", is_flag=True, 11 | help="Recursively remove from directory") 12 | @handle_qc_errors 13 | def remove(target, settings, recursive): 14 | "Remove a file from the corpus" 15 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 16 | log = configure_logger(settings_path) 17 | log.info("corpus remove", target=target, recursive=recursive) 18 | corpus = QCCorpus(settings_path) 19 | with corpus.session(): 20 | corpus.remove_document(target, recursive=recursive) 21 | 22 | -------------------------------------------------------------------------------- /qualitative_coding/cli/corpus/update.py: -------------------------------------------------------------------------------- 1 | import os 2 | import click 3 | from pathlib import Path 4 | from subprocess import run 5 | from collections import defaultdict 6 | from qualitative_coding.corpus import QCCorpus 7 | from qualitative_coding.exceptions import IncompatibleOptions, QCError, InvalidParameter 8 | from qualitative_coding.cli.decorators import handle_qc_errors 9 | from qualitative_coding.logs import configure_logger 10 | 11 | @click.command() 12 | @click.argument("file_path", type=click.Path(exists=True)) 13 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 14 | @click.option("-n", "--new", type=click.Path(exists=True), help="Path to new version") 15 | @click.option("-d", "--dryrun", is_flag=True, 16 | help="Show simulated results") 17 | @handle_qc_errors 18 | def update(file_path, settings, new, dryrun): 19 | "Update the content of corpus files" 20 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 21 | log = configure_logger(settings_path) 22 | log.info("corpus update", new=new, dryrun=dryrun) 23 | corpus = QCCorpus(settings_path) 24 | with corpus.session(): 25 | corpus.update_document(file_path, new, dryrun) 26 | -------------------------------------------------------------------------------- /qualitative_coding/cli/decorators.py: -------------------------------------------------------------------------------- 1 | from functools import update_wrapper 2 | import click 3 | import sys 4 | from qualitative_coding.exceptions import QCError 5 | from qualitative_coding.views.styles import error 6 | from qualitative_coding.exceptions import IncompatibleOptions 7 | 8 | def handle_qc_errors(f): 9 | """Decorator declaring a click command. 10 | Wraps execution in a try/catch block, so that QCErrors can be handled with 11 | graceful output. 12 | """ 13 | def command(*args, **kwargs): 14 | try: 15 | return f(*args, **kwargs) 16 | except QCError as e: 17 | click.echo(error(str(e), preformatted=True), err=True) 18 | sys.exit(1) 19 | return update_wrapper(command, f) 20 | -------------------------------------------------------------------------------- /qualitative_coding/cli/export.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | from pathlib import Path 4 | from qualitative_coding.corpus import QCCorpus 5 | from qualitative_coding.refi_qda.writer import REFIQDAWriter 6 | from qualitative_coding.cli.decorators import ( 7 | handle_qc_errors, 8 | ) 9 | 10 | @click.command() 11 | @click.argument("export_path") 12 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 13 | @handle_qc_errors 14 | def export(export_path, settings): 15 | "Export project as REFI-QDA" 16 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 17 | corpus = QCCorpus(settings_path) 18 | with corpus.session(): 19 | corpus.update_codebook() 20 | path = Path(export_path).with_suffix(".qdpx") 21 | writer = REFIQDAWriter(settings_path) 22 | writer.write(export_path) 23 | 24 | -------------------------------------------------------------------------------- /qualitative_coding/cli/init.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | from qualitative_coding.cli.decorators import handle_qc_errors 4 | from qualitative_coding.logs import configure_logger 5 | from os import getcwd 6 | from pathlib import Path 7 | 8 | @click.command() 9 | @click.option("-s", "--settings", type=click.Path(), help="Settings file") 10 | @click.option("-w", "--write-settings-file", is_flag=True, help="Create a settings file but do not create directories") 11 | @click.option("-i", "--import", "_import", help="Import an existing qdpx project") 12 | @handle_qc_errors 13 | def init(settings, write_settings_file, _import): 14 | "Initialize a qc project" 15 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 16 | if _import: 17 | from qualitative_coding.refi_qda.reader import REFIQDAReader 18 | reader = REFIQDAReader(_import) 19 | reader.unpack_project(Path.cwd()) 20 | else: 21 | log = configure_logger(settings_path) 22 | log.info("init", write_settings_file=write_settings_file) 23 | from qualitative_coding.corpus import QCCorpus 24 | QCCorpus.initialize(settings_path, write_settings_file) 25 | -------------------------------------------------------------------------------- /qualitative_coding/cli/memo.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | from qualitative_coding.corpus import QCCorpus 4 | from qualitative_coding.views.viewer import QCCorpusViewer 5 | from qualitative_coding.cli.decorators import handle_qc_errors 6 | from qualitative_coding.logs import configure_logger 7 | 8 | @click.command() 9 | @click.argument("coder") 10 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 11 | @click.option("-m", "--message", help="short message, title of memo file") 12 | @click.option("-l", "--list", "list_memos", is_flag=True, 13 | help="list all memos in order") 14 | @handle_qc_errors 15 | def memo(coder, settings, message, list_memos): 16 | "Write a memo" 17 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 18 | log = configure_logger(settings_path) 19 | log.info("memo", coder=coder, message=message, list_memos=list_memos) 20 | corpus = QCCorpus(settings_path) 21 | viewer = QCCorpusViewer(corpus) 22 | if list_memos: 23 | click.echo(viewer.list_memos()) 24 | else: 25 | viewer.memo(coder, message) 26 | -------------------------------------------------------------------------------- /qualitative_coding/cli/options.py: -------------------------------------------------------------------------------- 1 | # MAYBE CAN BE DELETED. 2 | from qualitative_coding.exceptions import IncompatibleOptions 3 | 4 | class Truthy: 5 | "Like True, but when used in comparison, coerces the other object to bool." 6 | val = True 7 | def __eq__(self, other): 8 | return bool(other) == self.val 9 | 10 | def __bool__(self): 11 | return self.val 12 | 13 | def __str__(self): 14 | return str(self.val) 15 | 16 | class Falsy(Truthy): 17 | "Like Truthy, but Falsy." 18 | val = False 19 | 20 | def _fmt(opts, _and=True): 21 | if len(opts) == 1: 22 | return opts[0] 23 | else: 24 | return "{} {} {}".format(", ".join(opts[:-1]), "and" if _and else "or", opts[-1]) 25 | 26 | def check_incompatible(args, **conditions): 27 | problem = all(val == getattr(args, opt, None) for opt, val in conditions.items()) 28 | if problem: 29 | opts = ["--{}".format(k) for k in conditions.keys()] 30 | if all(conditions.values()): 31 | quantifier = "both" if len(conditions) == 2 else "all" 32 | message = f"{_fmt(opts)} may not {quantifier} be used." 33 | elif not any(conditions.values()): 34 | message = "One of {_fmt(opts, _and=False)} is required." 35 | else: 36 | present = ["--{}".format(o) for o, req in conditions.items() if req] 37 | absent = ["--{}".format(o) for o, req in conditions.items() if not req] 38 | message = "{}{} must be used when {} {} used.".format( 39 | "One of " if len(absent) > 1 else "", 40 | _fmt(absent), 41 | _fmt(present), 42 | "is" if len(present) == 1 else "are" 43 | ) 44 | raise IncompatibleOptions(message) 45 | 46 | -------------------------------------------------------------------------------- /qualitative_coding/cli/upgrade.py: -------------------------------------------------------------------------------- 1 | import click 2 | import os 3 | import yaml 4 | from pathlib import Path 5 | from qualitative_coding.cli.decorators import handle_qc_errors 6 | from qualitative_coding.migrations import migrations, migrate 7 | from qualitative_coding.helpers import read_settings 8 | from qualitative_coding.logs import configure_logger 9 | import shutil 10 | 11 | @click.command() 12 | @click.option("-s", "--settings", type=click.Path(exists=True), help="Settings file") 13 | @click.option("-v", "--version", type=click.Choice([m._version for m in migrations]), 14 | default=migrations[-1]._version, 15 | help="Target upgrade or downgrade version") 16 | @handle_qc_errors 17 | def upgrade(settings, version): 18 | "Upgrade project to new version of qc" 19 | settings_path = settings or os.environ.get("QC_SETTINGS", "settings.yaml") 20 | log = configure_logger(settings_path) 21 | log.info("upgrade", version=version) 22 | migrate(settings_path, version) 23 | -------------------------------------------------------------------------------- /qualitative_coding/cli/version.py: -------------------------------------------------------------------------------- 1 | import click 2 | from importlib.metadata import metadata 3 | 4 | @click.command() 5 | def version(): 6 | "Show version number" 7 | version = metadata('qualitative-coding')['version'] 8 | click.echo(f"qualitative-coding {version}") 9 | -------------------------------------------------------------------------------- /qualitative_coding/codebook.py: -------------------------------------------------------------------------------- 1 | 2 | class QCCodebook: 3 | 4 | def __init__(self, filename): 5 | self.filename = filename 6 | 7 | -------------------------------------------------------------------------------- /qualitative_coding/database/errors.py: -------------------------------------------------------------------------------- 1 | from qualitative_coding.exceptions import QCError 2 | -------------------------------------------------------------------------------- /qualitative_coding/database/models.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from sqlalchemy import ( 3 | ForeignKey, 4 | UniqueConstraint, 5 | CheckConstraint, 6 | Table, 7 | Column, 8 | ) 9 | from sqlalchemy.orm import ( 10 | DeclarativeBase, 11 | Mapped, 12 | mapped_column, 13 | relationship, 14 | ) 15 | from qualitative_coding.exceptions import QCError 16 | 17 | class Base(DeclarativeBase): 18 | pass 19 | 20 | class Document(Base): 21 | __tablename__ = "document" 22 | file_path: Mapped[str] = mapped_column(primary_key=True) 23 | file_hash: Mapped[str] 24 | indices: Mapped[List["DocumentIndex"]] = relationship(back_populates="document", 25 | cascade="all, delete-orphan") 26 | 27 | class AlreadyExists(QCError): 28 | def __init__(self, doc): 29 | self.doc = doc 30 | err = f"A Document with file path {doc.file_path} already exists" 31 | super().__init__(err) 32 | 33 | class DocumentIndex(Base): 34 | __tablename__ = "document_index" 35 | __table_args__ = ( 36 | UniqueConstraint( 37 | "document_id", 38 | "name", 39 | ), 40 | ) 41 | id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) 42 | name: Mapped[str] 43 | time_series: Mapped[bool] = mapped_column(default=False) 44 | document_id: Mapped[str] = mapped_column(ForeignKey(Document.file_path)) 45 | document: Mapped["Document"] = relationship(back_populates="indices") 46 | locations: Mapped[List["Location"]] = relationship(back_populates="document_index", 47 | cascade="all, delete-orphan") 48 | 49 | coded_line_location_association_table = Table( 50 | "coded_line_location_association", 51 | Base.metadata, 52 | Column("coded_line_id", ForeignKey("coded_line.id"), primary_key=True), 53 | Column("location_id", ForeignKey("location.id"), primary_key=True), 54 | ) 55 | 56 | class Location(Base): 57 | __tablename__ = "location" 58 | __table_args__ = ( 59 | CheckConstraint("start_line <= end_line"), 60 | ) 61 | id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) 62 | start_line: Mapped[int] 63 | end_line: Mapped[int] 64 | document_index_id: Mapped[str] = mapped_column(ForeignKey(DocumentIndex.id)) 65 | document_index: Mapped["DocumentIndex"] = relationship(back_populates="locations") 66 | coded_lines: Mapped[List["CodedLine"]] = relationship( 67 | secondary=coded_line_location_association_table, 68 | back_populates="locations", 69 | ) 70 | 71 | class Code(Base): 72 | __tablename__ = "code" 73 | name: Mapped[str] = mapped_column(primary_key=True) 74 | coded_lines: Mapped[List["CodedLine"]] = relationship(back_populates="code", 75 | cascade="all, delete-orphan") 76 | 77 | class Coder(Base): 78 | __tablename__ = "coder" 79 | name: Mapped[str] = mapped_column(primary_key=True) 80 | coded_lines: Mapped[List["CodedLine"]] = relationship(back_populates="coder", 81 | cascade="all, delete-orphan") 82 | 83 | class CodedLine(Base): 84 | __tablename__ = "coded_line" 85 | id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) 86 | line: Mapped[int] 87 | coder_id: Mapped[str] = mapped_column(ForeignKey(Coder.name)) 88 | coder: Mapped["Coder"] = relationship(back_populates="coded_lines") 89 | code_id: Mapped[str] = mapped_column(ForeignKey(Code.name)) 90 | code: Mapped["Code"] = relationship(back_populates="coded_lines") 91 | locations: Mapped[List["Location"]] = relationship( 92 | secondary=coded_line_location_association_table, 93 | back_populates="coded_lines" 94 | ) 95 | -------------------------------------------------------------------------------- /qualitative_coding/demo.qdpx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/qualitative_coding/demo.qdpx -------------------------------------------------------------------------------- /qualitative_coding/diff.py: -------------------------------------------------------------------------------- 1 | import re 2 | from more_itertools import peekable 3 | from difflib import unified_diff 4 | from subprocess import run 5 | 6 | def get_git_diff(path): 7 | "Gits a diff between file state and HEAD" 8 | result = run(f"git diff {path}", shell=True, capture_output=True, text=True) 9 | return result.stdout 10 | 11 | def get_diff(path0, path1): 12 | "Gets a diff between two file paths" 13 | with open(path0) as fh: 14 | doc0 = [line for line in fh] 15 | with open(path1) as fh: 16 | doc1 = [line for line in fh] 17 | return ''.join(unified_diff(doc0, doc1)) 18 | 19 | def reindex_coded_lines(coded_lines, diff): 20 | """Returns a new version of coded_lines, with line numbers updated to account for diff. 21 | Assumes coded_lines are sorted by line number. 22 | """ 23 | offsets = peekable(read_diff_offsets(diff)) 24 | current_offset_line = 0 25 | current_offset = 0 26 | cum_offset = 0 27 | reindexed_coded_lines = [] 28 | for code, coder, line, path in coded_lines: 29 | try: 30 | if offsets.peek()[0] <= line: 31 | current_offset_line, current_offset = next(offsets) 32 | cum_offset += current_offset 33 | except StopIteration: 34 | pass 35 | 36 | 37 | return reindexed_coded_lines 38 | 39 | def read_diff_offsets(diff): 40 | """Reads a unified diff and returns a list of (line, offset) tuples. 41 | For example, (6, 2) represents an insertion of 2 lines at line 6. 42 | Adjacent deletions and insertions are assumed to be edited versions 43 | of the same line, so if 4 lines were deleted and 3 lines inserted at 44 | line 10, this would be represented as (13, -1). 45 | """ 46 | offsets = [] 47 | lines = peekable(diff.split('\n')) 48 | try: 49 | read_preamble(lines) 50 | while True: 51 | offsets += read_hunk(lines) 52 | except StopIteration: 53 | return offsets 54 | 55 | def read_preamble(lines): 56 | line = next(lines) 57 | while not line.startswith('---'): 58 | line = next(lines) 59 | line = next(lines) 60 | assert(line.startswith('+++')) 61 | 62 | def read_hunk(lines): 63 | line = next(lines) 64 | line_number = read_line_number(line) 65 | minus = 0 66 | plus = 0 67 | in_op = False 68 | op_start_line_number = 1 69 | ops = [] 70 | try: 71 | while not lines.peek().startswith('@'): 72 | line = next(lines) 73 | if in_op: 74 | if line[0] == '-': 75 | minus += 1 76 | elif line[0] == '+': 77 | plus += 1 78 | else: 79 | in_op = False 80 | if plus - minus > 0: 81 | ops.append((op_start_line_number, plus - minus)) 82 | elif plus - minus < 0: 83 | ops.append((op_start_line_number + minus - plus - 1, plus - minus)) 84 | else: 85 | if line[0] == '-': 86 | in_op = True 87 | op_start_line_number = line_number 88 | minus, plus = 1, 0 89 | elif line[0] == '+': 90 | in_op = True 91 | op_start_line_number = line_number 92 | minus, plus = 0, 1 93 | line_number += 1 94 | finally: 95 | if in_op: 96 | if plus - minus > 0: 97 | ops.append((op_start_line_number, plus - minus)) 98 | elif plus - minus < 0: 99 | ops.append((op_start_line_number + minus - plus - 1, plus - minus)) 100 | return ops 101 | 102 | def read_line_number(hunk_preamble): 103 | match = re.match('\s*@@ \-(\d+)', hunk_preamble) 104 | return int(match.group(1)) 105 | 106 | def in_git_repo(): 107 | "Checks whether the current working directory is in a git repo." 108 | return run("git status", shell=True, capture_output=True).returncode == 0 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /qualitative_coding/editors.py: -------------------------------------------------------------------------------- 1 | editors = { 2 | "code": { 3 | "name": "Visual Studio Code", 4 | "code_command": 'code "{corpus_file_path}" "{codes_file_path}" --wait', 5 | "memo_command": 'code "{memo_file_path}"', 6 | }, 7 | "vim": { 8 | "name": "Vim", 9 | "code_command": 'vim -O "{corpus_file_path}" "{codes_file_path}" -c \'windo set scb!\'', 10 | "memo_command": 'vim "{memo_file_path}"', 11 | }, 12 | "nvim": { 13 | "name": "Neovim", 14 | "code_command": 'nvim -O "{corpus_file_path}" "{codes_file_path}" -c \'windo set scb!\'', 15 | "memo_command": 'nvim "{memo_file_path}"', 16 | }, 17 | "emacs": { 18 | "name": "Emacs", 19 | "code_command": "emacs -Q --eval (progn (find-file \"{corpus_file_path}\") (split-window-right) (other-window 1) (find-file \"{codes_file_path}\") (scroll-all-mode))", 20 | "memo_command": 'emacs "{memo_file_path}"', 21 | }, 22 | } 23 | -------------------------------------------------------------------------------- /qualitative_coding/exceptions.py: -------------------------------------------------------------------------------- 1 | class QCError(Exception): 2 | pass 3 | 4 | class InvalidParameter(QCError): 5 | pass 6 | 7 | class IncompatibleOptions(QCError): 8 | pass 9 | 10 | class SettingsError(QCError): 11 | pass 12 | 13 | class CodeFileParseError(QCError): 14 | pass 15 | 16 | class CodebookParseError(QCError): 17 | pass 18 | 19 | -------------------------------------------------------------------------------- /qualitative_coding/helpers.py: -------------------------------------------------------------------------------- 1 | from textwrap import fill 2 | from pathlib import Path 3 | from subprocess import run 4 | from qualitative_coding.exceptions import QCError 5 | import yaml 6 | 7 | def read_settings(path): 8 | if not Path(path).exists(): 9 | raise QCError(f"Settings file {path} not found.") 10 | try: 11 | settings_text = Path(path).read_text() 12 | except: 13 | raise QCError(f"Error reading settings file {path}") 14 | try: 15 | return yaml.safe_load(settings_text) 16 | except: 17 | raise QCError(f"Error parsing settings file {path}") 18 | 19 | def read_file_list(filename): 20 | """Many cli commands accept `--filenames`, a path to a file 21 | containing a list of files. 22 | """ 23 | if filename: 24 | return Path(filename).read_text().split("\n") 25 | 26 | def iter_paragraph_lines(fh): 27 | p_start = 0 28 | in_whitespace = False 29 | for i, line in enumerate(fh): 30 | if line.strip() == "": 31 | in_whitespace = True 32 | elif in_whitespace: 33 | yield p_start, i 34 | p_start = i 35 | in_whitespace = False 36 | yield p_start, i + 1 37 | 38 | def merge_ranges(ranges, clamp=None): 39 | "Overlapping ranges? Let's fix that. Optionally supply clamp=[0, 100]" 40 | if any(filter(lambda r: r.step != 1, ranges)): raise ValueError("Ranges must have step=1") 41 | endpoints = [(r.start, r.stop) for r in sorted(ranges, key=lambda r: r.start)] 42 | results = [] 43 | if any(endpoints): 44 | a, b = endpoints[0] 45 | for start, stop in endpoints: 46 | if start <= b: 47 | b = max(b, stop) 48 | else: 49 | results.append(range(a, b)) 50 | a, b = start, stop 51 | results.append(range(a, b)) 52 | if clamp is not None: 53 | lo, hi = clamp 54 | results = [range(max(lo, r.start), min(hi, r.stop)) for r in results] 55 | return results 56 | 57 | def prepare_corpus_text(text, width=80, preformatted=False): 58 | "Splits corpus text at blank lines and wraps it." 59 | if preformatted: 60 | outlines = [] 61 | lines = text.split("\n") 62 | for line in lines: 63 | while True: 64 | outlines.append(line[:width]) 65 | if len(line) < 80: 66 | break 67 | line = line[width:] 68 | return "\n".join(outlines) 69 | else: 70 | paragraphs = text.split("\n\n") 71 | return "\n\n".join(fill(p, width=width) for p in paragraphs) 72 | 73 | def prompt_for_choice(prompt, options): 74 | "Asks for a prompt, returns an index" 75 | print(prompt) 76 | for i, opt in enumerate(options): 77 | print(f"{i+1}. {opt}") 78 | while True: 79 | raw_choice = input("> ") 80 | if raw_choice.isdigit() and int(raw_choice) in range(1, len(options)+1): 81 | return int(raw_choice) 82 | print("Sorry, that's not a valid choice.") 83 | 84 | 85 | def _fmt(opts, _and=True): 86 | if len(opts) == 1: 87 | return opts[0] 88 | else: 89 | return "{} {} {}".format(", ".join(opts[:-1]), "and" if _and else "or", opts[-1]) 90 | 91 | class IncompatibleOptions(ValueError): 92 | pass 93 | 94 | class Truthy: 95 | "Like True, but when used in comparison, coerces the other object to bool." 96 | val = True 97 | def __eq__(self, other): 98 | return bool(other) == self.val 99 | 100 | def __bool__(self): 101 | return self.val 102 | 103 | def __str__(self): 104 | return str(self.val) 105 | 106 | class Falsy(Truthy): 107 | "Like Truthy, but Falsy." 108 | val = False 109 | 110 | def check_incompatible(args, **conditions): 111 | problem = all(val == getattr(args, opt, None) for opt, val in conditions.items()) 112 | if problem: 113 | opts = ["--{}".format(k) for k in conditions.keys()] 114 | if all(conditions.values()): 115 | message = "{} may not {} be used.".format(_fmt(opts), "both" if len(conditions) == 2 else "all") 116 | elif not any(conditions.values()): 117 | message = "One of {} is required.".format(_fmt(opts, _and=False)) 118 | else: 119 | present = ["--{}".format(o) for o, req in conditions.items() if req] 120 | absent = ["--{}".format(o) for o, req in conditions.items() if not req] 121 | message = "{}{} must be used when {} {} used.".format( 122 | "One of " if len(absent) > 1 else "", 123 | _fmt(absent), 124 | _fmt(present), 125 | "is" if len(present) == 1 else "are" 126 | ) 127 | print(args) 128 | raise IncompatibleOptions(message) 129 | -------------------------------------------------------------------------------- /qualitative_coding/logs.py: -------------------------------------------------------------------------------- 1 | from qualitative_coding.helpers import read_settings 2 | from pathlib import Path 3 | import structlog 4 | import logging 5 | import sys 6 | 7 | DEFAULT_LOG_FILE = "qualitative_coding.log" 8 | 9 | def configure_logger(settings_path): 10 | """Configures logging and structlog so that future calls to 11 | structlog.get_logger() will return a properly-behaved logger. 12 | The logger logs JSON to a file (specified in settings) and, 13 | when settings.verbose is True, also log nicely to the console. 14 | 15 | Custom log configuration can be stored in a log_config module 16 | (e.g. log_config.py). 17 | """ 18 | try: 19 | import log_config 20 | return structlog.get_logger() 21 | except ModuleNotFoundError: 22 | pass 23 | 24 | if Path(settings_path).exists(): 25 | settings = read_settings(settings_path) 26 | verbose = settings.get('verbose', False) 27 | log_file_path = Path(settings.get('log_path', DEFAULT_LOG_FILE)) 28 | if not log_file_path.is_absolute(): 29 | log_file_path = Path(settings_path).parent / log_file_path 30 | else: 31 | log_file_path = DEFAULT_LOG_FILE 32 | verbose = False 33 | 34 | root_logger = logging.getLogger() 35 | root_logger.setLevel(logging.DEBUG if verbose else logging.INFO) 36 | file_handler = logging.FileHandler(log_file_path, ) 37 | file_formatter = structlog.stdlib.ProcessorFormatter( 38 | processors=[ 39 | structlog.stdlib.ProcessorFormatter.remove_processors_meta, 40 | structlog.processors.JSONRenderer(), 41 | ] 42 | ) 43 | file_handler.setFormatter(file_formatter) 44 | root_logger.addHandler(file_handler) 45 | 46 | if verbose: 47 | console_handler = logging.StreamHandler() 48 | console_formatter = structlog.stdlib.ProcessorFormatter( 49 | processors=[ 50 | structlog.stdlib.ProcessorFormatter.remove_processors_meta, 51 | structlog.dev.ConsoleRenderer(), 52 | ], 53 | ) 54 | console_handler.setFormatter(console_formatter) 55 | root_logger.addHandler(console_handler) 56 | 57 | structlog.configure( 58 | processors=[ 59 | structlog.stdlib.add_log_level, 60 | structlog.processors.TimeStamper(fmt='iso'), 61 | structlog.stdlib.ProcessorFormatter.wrap_for_formatter, 62 | ], 63 | logger_factory=structlog.stdlib.LoggerFactory(), 64 | cache_logger_on_first_use=True, 65 | ) 66 | return structlog.get_logger() 67 | -------------------------------------------------------------------------------- /qualitative_coding/media_importers/__init__.py: -------------------------------------------------------------------------------- 1 | from qualitative_coding.exceptions import InvalidParameter 2 | from qualitative_coding.media_importers.pandoc import PandocImporter 3 | from qualitative_coding.media_importers.verbatim import VerbatimImporter 4 | from qualitative_coding.media_importers.vtt import VTTImporter 5 | 6 | media_importers = { 7 | "pandoc": PandocImporter, 8 | "verbatim": VerbatimImporter, 9 | "vtt": VTTImporter, 10 | } 11 | -------------------------------------------------------------------------------- /qualitative_coding/media_importers/base.py: -------------------------------------------------------------------------------- 1 | 2 | class BaseMediaImporter: 3 | """Base class for media importers. 4 | The API for MediaImporters is a single method, `import_media`, which 5 | takes an input filename and an output filename. 6 | """ 7 | def __init__(self, settings): 8 | self.settings = settings 9 | 10 | def import_media(self, input_filename, output_filename): 11 | raise NotImplementedError("Subclasses of BaseMediaImporter should be used.") 12 | 13 | def register_media_in_database(self, corpus_path): 14 | with self.corpus.session(): 15 | self.corpus.register_document(corpus_path) 16 | -------------------------------------------------------------------------------- /qualitative_coding/media_importers/pandoc.py: -------------------------------------------------------------------------------- 1 | from subprocess import run, CalledProcessError 2 | from qualitative_coding.media_importers.base import BaseMediaImporter 3 | from qualitative_coding.exceptions import QCError 4 | 5 | class PandocImporter(BaseMediaImporter): 6 | def import_media(self, input_filename, output_filename): 7 | self.check_for_pandoc() 8 | cmd = f'pandoc -i "{input_filename}" -o "{output_filename}" --to plain --columns 80' 9 | run(cmd, shell=True, check=True) 10 | 11 | def check_for_pandoc(self): 12 | try: 13 | run("which pandoc", shell=True, check=True, capture_output=True) 14 | except CalledProcessError: 15 | raise QCError("pandoc is required but was not found. Please install pandoc.") 16 | 17 | -------------------------------------------------------------------------------- /qualitative_coding/media_importers/verbatim.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from qualitative_coding.media_importers.base import BaseMediaImporter 3 | 4 | class VerbatimImporter(BaseMediaImporter): 5 | """Imports media without making any changes. 6 | """ 7 | def import_media(self, input_filename, output_filename): 8 | if input_filename != output_filename: 9 | shutil.copyfile(input_filename, output_filename) 10 | -------------------------------------------------------------------------------- /qualitative_coding/media_importers/vtt.py: -------------------------------------------------------------------------------- 1 | from qualitative_coding.media_importers.base import BaseMediaImporter 2 | from textwrap import fill 3 | import webvtt 4 | 5 | class VTTImporter(BaseMediaImporter): 6 | """Imports a VTT transcript file, stripping out timestamps and collapsing 7 | adjacent talk turns from the same speaker. 8 | """ 9 | def import_media(self, input_filename, output_filename): 10 | turns = [] 11 | current_speaker = None 12 | current_speech = "" 13 | for caption in webvtt.read(input_filename): 14 | speaker, speech = caption.text.split(':', 1) 15 | if speaker == current_speaker: 16 | current_speech += speech 17 | else: 18 | if current_speech: 19 | turns.append({'speaker': current_speaker, 'speech': current_speech}) 20 | current_speaker = speaker 21 | current_speech = speech 22 | turns.append({'speaker': current_speaker, 'speech': current_speech}) 23 | with open(output_filename, 'w') as fh: 24 | for i, turn in enumerate(turns): 25 | if i > 0: 26 | fh.write('\n\n') 27 | fh.write(fill(turn['speaker'] + ': ' + turn['speech'], width=80)) 28 | -------------------------------------------------------------------------------- /qualitative_coding/migrations/__init__.py: -------------------------------------------------------------------------------- 1 | import click 2 | from pathlib import Path 3 | import yaml 4 | from semver import Version 5 | from qualitative_coding.views.styles import info 6 | from qualitative_coding.exceptions import QCError 7 | from qualitative_coding.migrations.migration_0_2_3 import Migrate_0_2_3 8 | from qualitative_coding.migrations.migration_1_0_0 import Migrate_1_0_0 9 | from qualitative_coding.migrations.migration_1_4_0 import Migrate_1_4_0 10 | from qualitative_coding.helpers import read_settings 11 | 12 | migrations = [ 13 | Migrate_0_2_3(), 14 | Migrate_1_0_0(), 15 | Migrate_1_4_0(), 16 | ] 17 | 18 | def migrate(settings_path, target=None): 19 | settings = read_settings(settings_path) 20 | if 'qc_version' not in settings: 21 | raise QCError("qc_version not specified in settings.") 22 | current_version = Version.parse(settings['qc_version']) 23 | target_version = Version.parse(target) if target else latest_migration() 24 | if target_version not in [m.version for m in migrations]: 25 | raise QCError(f"{target} is not a recognized migration") 26 | if current_version < target_version: 27 | for migration in migrations: 28 | if current_version < migration.version and migration.version <= target_version: 29 | click.echo(info(f"Applying migration {migration.version}")) 30 | migration.apply(settings_path) 31 | elif target_version < current_version: 32 | for migration in reversed(migrations): 33 | if target_version < migration.version and migration.version <= current_version: 34 | click.echo(info(f"Reverting migration {migration.version}")) 35 | migration.revert(settings_path) 36 | 37 | def latest_migration(): 38 | return migrations[-1].version 39 | -------------------------------------------------------------------------------- /qualitative_coding/migrations/migration.py: -------------------------------------------------------------------------------- 1 | from semver import Version 2 | from qualitative_coding.helpers import read_settings 3 | from pathlib import Path 4 | import yaml 5 | 6 | class QCMigration: 7 | """A migration specifies how to move between versions of qc. 8 | When migrating between version X up to version Y, all migrations 9 | whose semantic versions are greater than X and at least Y will be applied 10 | in order. 11 | """ 12 | 13 | _version = "0.0.0" 14 | 15 | @property 16 | def version(self): 17 | return Version.parse(self._version) 18 | 19 | def apply(self, settings_path): 20 | "Forward migration" 21 | 22 | def revert(self, settings_path): 23 | "Revert migration" 24 | return settings 25 | 26 | def set_setting(self, settings_path, key, default_value): 27 | """Writes a value to settings. 28 | By default, only writes the value if the key is not set. 29 | When force is True, always writes the value. 30 | """ 31 | settings = read_settings(settings_path) 32 | settings[key] = default_value 33 | Path(settings_path).write_text(yaml.dump(settings)) 34 | return settings 35 | 36 | def delete_setting(self, settings_path, key): 37 | """Deletes a value in settings. 38 | """ 39 | settings = read_settings(settings_path) 40 | del settings[key] 41 | Path(settings_path).write_text(yaml.dump(settings)) 42 | return settings 43 | -------------------------------------------------------------------------------- /qualitative_coding/migrations/migration_0_2_3.py: -------------------------------------------------------------------------------- 1 | from qualitative_coding.migrations.migration import QCMigration 2 | 3 | class Migrate_0_2_3(QCMigration): 4 | _version = "0.2.3" 5 | 6 | def apply(self, settings): 7 | return settings 8 | 9 | def revert(self, settings): 10 | return settings 11 | -------------------------------------------------------------------------------- /qualitative_coding/migrations/migration_1_0_0.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import shutil 3 | from sqlalchemy import ( 4 | create_engine, 5 | ) 6 | from qualitative_coding.migrations.migration import QCMigration 7 | from qualitative_coding.corpus import QCCorpus 8 | from qualitative_coding.media_importers import media_importers 9 | from qualitative_coding.helpers import read_settings 10 | from qualitative_coding.database.models import ( 11 | Base, 12 | Document, 13 | CodedLine 14 | ) 15 | from qualitative_coding.views.styles import ( 16 | address, 17 | question, 18 | debug, 19 | info, 20 | warn, 21 | confirm, 22 | error, 23 | success 24 | ) 25 | import os 26 | 27 | class Migrate_1_0_0(QCMigration): 28 | _version = "1.0.0" 29 | 30 | def apply(self, settings_path): 31 | self.set_setting(settings_path, "qc_version", self._version) 32 | self.set_setting(settings_path, "database", 'qualitative_coding.sqlite3') 33 | self.set_setting(settings_path, "editor", 'vim') 34 | QCCorpus.initialize(settings_path) 35 | corpus = QCCorpus(settings_path) 36 | corpus_v0 = QCCorpusV0(settings_path) 37 | with corpus.session(): 38 | for filepath in corpus.corpus_dir.iterdir(): 39 | if filepath.is_dir(): 40 | corpus.import_media(filepath, recursive=True, importer="verbatim") 41 | else: 42 | corpus.import_media(filepath, importer="verbatim") 43 | for dir_path, dir_names, filenames in os.walk(corpus.corpus_dir): 44 | for fn in filenames: 45 | file_path = Path(dir_path) / fn 46 | corpus_path = str(corpus.get_corpus_path(file_path)) 47 | coded_lines = [] 48 | for coder_name, code_data in corpus_v0.get_codes(file_path).items(): 49 | coder = corpus.get_or_create_coder(coder_name) 50 | for line_num, code_name in code_data: 51 | coded_lines.append({ 52 | "line": line_num, 53 | "code_id": corpus.get_or_create_code(code_name).name 54 | }) 55 | corpus.update_coded_lines(corpus_path, coder_name, coded_lines) 56 | shutil.rmtree(corpus_v0.codes_dir) 57 | 58 | def revert(self, settings_path): 59 | self.delete_setting(settings_path, "qc_version") 60 | self.delete_setting(settings_path, "database") 61 | self.delete_setting(settings_path, "editor") 62 | 63 | class QCCorpusV0: 64 | def __init__(self, settings_file="settings.yaml"): 65 | self.settings_file = Path(settings_file) 66 | self.settings = read_settings(settings_file) 67 | self.corpus_dir = Path(self.settings['corpus_dir']).resolve() 68 | self.codes_dir = Path(self.settings['codes_dir']).resolve() 69 | 70 | def get_codes(self, corpus_text_path, coder=None, merge=False, unit='line'): 71 | """ 72 | Returns codes pertaining to a corpus text. 73 | Returns a dict like {coder_id: [(line_num, code)...]}. 74 | If merge or coder, there is no ambiguity;instead returns a list of [(line_num, code)...] 75 | If unit is 'document', returns a set of codes when coder or merge is given, otherwise 76 | returns a dict mapping coders to sets of codes. 77 | """ 78 | codes = {} 79 | for f in self.get_code_files_for_corpus_file(corpus_text_path, coder=coder): 80 | codes[self.get_coder_from_code_path(f)] = self.read_codes(f) 81 | if coder: 82 | return codes.get(coder, {}) 83 | elif merge: 84 | if unit == 'line': 85 | return sum(codes.values(), []) 86 | elif unit == 'document': 87 | return set().union(*codes.values()) 88 | else: 89 | raise NotImplementedError("Unit must be 'line' or 'document'.") 90 | else: 91 | return codes 92 | 93 | def get_code_files_for_corpus_file(self, corpus_text_path, coder=None): 94 | "Returns an iterator over code files pertaining to a corpus file" 95 | text_path = corpus_text_path.relative_to(self.corpus_dir) 96 | name_parts = text_path.name.split('.') 97 | return self.codes_dir.glob(str(text_path) + '.' + (coder or '*') + '.codes') 98 | 99 | def get_coder_from_code_path(self, code_file_path): 100 | "Maps Path('some_interview.txt.cp.codes') -> 'cp'" 101 | parts = code_file_path.name.split('.') 102 | return parts[-2] 103 | 104 | def read_codes(self, code_file_path): 105 | """When passed a file object, returns a list of (line_num, code) if unit is 'line'. 106 | When unit is 'document', Returns a set of codes. 107 | """ 108 | codes = [] 109 | with open(code_file_path) as inf: 110 | for line_num, line in enumerate(inf): 111 | codes += [(line_num, code.strip()) for code in line.split(",") if code.strip()] 112 | return codes 113 | -------------------------------------------------------------------------------- /qualitative_coding/migrations/migration_1_4_0.py: -------------------------------------------------------------------------------- 1 | from qualitative_coding.migrations.migration import QCMigration 2 | from pathlib import Path 3 | 4 | class Migrate_1_4_0(QCMigration): 5 | _version = "1.4.0" 6 | 7 | def apply(self, settings_path): 8 | self.set_setting(settings_path, "qc_version", "1.4.0") 9 | self.set_setting(settings_path, "verbose", False) 10 | self.set_setting(settings_path, "log_file", 'qc.log') 11 | self.delete_setting(settings_path, "logs_dir") 12 | 13 | def revert(self, settings_path): 14 | self.set_setting(settings_path, "qc_version", "1.0.0") 15 | self.set_setting(settings_path, "logs_dir", "logs") 16 | self.delete_setting(settings_path, "log_file") 17 | self.delete_setting(settings_path, "verbose") 18 | logs_dir = Path(settings_path).parent / "logs_dir" 19 | if not logs_dir.exists(): 20 | logs_dir.mkdir(parents=True) 21 | -------------------------------------------------------------------------------- /qualitative_coding/refi_qda/nvivo_project.qdpx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/qualitative_coding/refi_qda/nvivo_project.qdpx -------------------------------------------------------------------------------- /qualitative_coding/refi_qda/reader.py: -------------------------------------------------------------------------------- 1 | from qualitative_coding.exceptions import QCError 2 | from qualitative_coding.corpus import QCCorpus 3 | from qualitative_coding.tree_node import TreeNode 4 | from xmlschema.validators.exceptions import XMLSchemaValidationError 5 | from collections import defaultdict 6 | from subprocess import run 7 | from xmlschema import validate 8 | from pathlib import Path 9 | import re 10 | import xml.etree.ElementTree as ET 11 | import importlib.resources 12 | import shutil 13 | import zipfile 14 | import structlog 15 | 16 | log = structlog.get_logger() 17 | 18 | class REFIQDAReader: 19 | """Imports an existing REFI-QDA project. 20 | NOTE: Currently does not support importing memos. 21 | """ 22 | default_coder = "default" 23 | 24 | def __init__(self, qdpxfile): 25 | self.qdpxfile = qdpxfile 26 | self.validate(qdpxfile) 27 | 28 | def unpack_project(self, destination): 29 | self.dest_path = Path(destination) 30 | if not self.dest_path.exists(): 31 | raise QCError(f"Cannot import project to {dest_path}; no such directory.") 32 | if len(list(self.dest_path.iterdir())) > 0: 33 | raise QCError("You can only import a project into an empty directory.") 34 | QCCorpus.initialize() 35 | self.corpus = QCCorpus(self.dest_path / "settings.yaml") 36 | (self.dest_path / "source").mkdir() 37 | (self.dest_path / "source" / "import").mkdir() 38 | with zipfile.ZipFile(self.qdpxfile, 'r', zipfile.ZIP_DEFLATED) as zf: 39 | zf.extractall((self.dest_path / "source")) 40 | tree = ET.parse(self.dest_path / "source" / "project.qde") 41 | with self.corpus.session(): 42 | self.unpack_xml(tree.getroot()) 43 | 44 | def unpack_xml(self, root): 45 | self.coder_guids = {} 46 | for child in root: 47 | if child.tag.endswith("Users"): 48 | self.unpack_coders(child) 49 | for child in root: 50 | if child.tag.endswith("CodeBook"): 51 | self.unpack_codebook(child) 52 | for child in root: 53 | if child.tag.endswith("Variables"): 54 | log.warning("{self.qdpxfile} contains Variables, which are not supported by qc.") 55 | self.unpack_unsupported(root, "Variables") 56 | self.unpack_unsupported(root, "Cases") 57 | for child in root: 58 | if child.tag.endswith("Sources"): 59 | self.unpack_sources(child) 60 | self.unpack_unsupported(root, "Notes") 61 | self.unpack_unsupported(root, "Links") 62 | self.unpack_unsupported(root, "Graphs") 63 | self.unpack_unsupported(root, "Description") 64 | self.unpack_unsupported(root, "NoteRef") 65 | 66 | def unpack_unsupported(self, root, tagname): 67 | for child in root: 68 | if child.tag.endswith(tagname): 69 | log.warning(f"{self.qdpxfile} contains {tagname}, which are not supported by qc.") 70 | 71 | def unpack_coders(self, users): 72 | for user in users: 73 | name = user.attrib['name'] 74 | guid = user.attrib['guid'] 75 | self.corpus.get_or_create_coder(name) 76 | self.coder_guids[guid] = name 77 | 78 | def create_default_coder_if_none_defined(self): 79 | if not hasattr(self, "coder_guids"): 80 | self.corpus.get_or_create_coder("default") 81 | 82 | def unpack_codebook(self, codebook): 83 | for child in codebook: 84 | self.unpack_codes(child) 85 | 86 | def unpack_codes(self, codes): 87 | self.code_guids = {} 88 | self.code_tree = TreeNode(TreeNode.root) 89 | 90 | def unpack_code(code, parent): 91 | name = code.attrib['name'] 92 | guid = code.attrib['guid'] 93 | self.corpus.get_or_create_code(name) 94 | self.code_guids[guid] = name 95 | node = TreeNode(name, parent=parent) 96 | parent.children.append(node) 97 | for child in code: 98 | if child.tag.endswith("Code"): 99 | unpack_code(child, node) 100 | 101 | for code in codes: 102 | unpack_code(code, self.code_tree) 103 | 104 | TreeNode.write_yaml(self.corpus.codebook_path, self.code_tree) 105 | 106 | def unpack_sources(self, sources): 107 | self.document_guids = {} 108 | for source in sources: 109 | if not source.attrib.get('plainTextPath'): 110 | log.warning( 111 | f"Skipping import of source {source['name']}; " + 112 | "only text sources are supported." 113 | ) 114 | continue 115 | guid = source.attrib['guid'] 116 | plain_text_path = source.attrib['plainTextPath'].replace("internal://", "") 117 | qdpx_path = self.dest_path / "source" / "sources" / plain_text_path 118 | importable_path = (self.dest_path / "source" / "import" / source.attrib['name']).with_suffix( 119 | qdpx_path.suffix 120 | ) 121 | log.info(f"Copying {qdpx_path} -> {importable_path}") 122 | shutil.copyfile(qdpx_path, importable_path) 123 | self.document_guids[guid] = importable_path.name 124 | self.corpus.import_media(importable_path, importer="verbatim") 125 | line_positions = self.line_positions(importable_path) 126 | coded_lines = defaultdict(list) 127 | for selection in source: 128 | if selection.tag.endswith("PlainTextSelection"): 129 | match = re.match("line:(\d+)", selection.attrib.get("name", "")) 130 | if match: 131 | line = int(match.group(1)) 132 | else: 133 | position = int(selection.attrib['startPosition']) 134 | line = self.get_line_for_position(position, line_positions) 135 | for coding in selection: 136 | if coding.tag.endswith("Coding"): 137 | coder_guid = coding.attrib['creatingUser'] 138 | coder = self.coder_guids.get(coder_guid, self.default_coder) 139 | for coderef in coding: 140 | if coderef.tag.endswith("CodeRef"): 141 | code = self.code_guids[coderef.attrib['targetGUID']] 142 | coded_lines[coder].append({'line': line, 'code_id': code}) 143 | for coder, cls in coded_lines.items(): 144 | self.corpus.update_coded_lines(importable_path.name, coder, cls) 145 | 146 | def get_line_for_position(self, position, line_positions): 147 | for line, (start, end) in enumerate(line_positions): 148 | if position >= start: 149 | return line 150 | 151 | def validate(self, qdpxfile): 152 | if not Path(qdpxfile).suffix == ".qdpx": 153 | raise QCError(f"{qdpxfile} must end in .qdpx") 154 | if not zipfile.is_zipfile(qdpxfile): 155 | raise QCError(f"{qdpxfile} is not a zipfile") 156 | with zipfile.ZipFile(qdpxfile, 'r', zipfile.ZIP_DEFLATED) as zf: 157 | zroot = zipfile.Path(zf) 158 | qde = zroot / "project.qde" 159 | if not qde.exists(): 160 | raise QCError("{qdpxfile} does not contain project.qde") 161 | qcf = importlib.resources.files("qualitative_coding") 162 | schema_path = qcf / "refi_qda" / "schema.xsd" 163 | try: 164 | validate(qde.read_text(), schema_path) 165 | except XMLSchemaValidationError as err: 166 | raise QCError( 167 | f"When reading {qdpxfile}, project.qde did not validate " + 168 | f"against the REFI-QDA schema:\n" + 169 | repr(err) 170 | ) 171 | 172 | def line_positions(self, corpus_file_path): 173 | """returns a list of (start, end) character positions for lines in doc. 174 | """ 175 | text = (self.corpus.corpus_dir / corpus_file_path).read_text() 176 | lines = [] 177 | index = 0 178 | for line in text: 179 | start = index 180 | end = index + len(line) 181 | lines.append((start, end)) 182 | index += len(line) 183 | return lines 184 | 185 | def print_tree(self, project_path): 186 | result = run("tree", cwd=project_path, capture_output=True, text=True, shell=True) 187 | print(result.stdout) 188 | 189 | -------------------------------------------------------------------------------- /qualitative_coding/refi_qda/schema.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 10 | 36 | 37 | 38 | 39 | 40 | This element MUST be conveyed as the root element in any instance document based on this Schema expression 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | -------------------------------------------------------------------------------- /qualitative_coding/refi_qda/writer.py: -------------------------------------------------------------------------------- 1 | from qualitative_coding.corpus import QCCorpus 2 | from qualitative_coding.exceptions import QCError, InvalidParameter 3 | from tempfile import TemporaryDirectory 4 | from shutil import copyfile 5 | from pathlib import Path 6 | from subprocess import run 7 | from collections import defaultdict 8 | from hashlib import md5 9 | import os 10 | from importlib.metadata import metadata 11 | from zipfile import ZipFile, ZIP_DEFLATED 12 | from uuid import UUID 13 | from xml.etree.ElementTree import ( 14 | Element, 15 | Comment, 16 | tostring, 17 | ) 18 | import structlog 19 | 20 | log = structlog.get_logger() 21 | 22 | class REFIQDAWriter: 23 | """Exports a QC project as a REFI-QDA project. 24 | See specification at https://www.qdasoftware.org/ 25 | """ 26 | def __init__(self, settings, debug=False): 27 | self.settings = settings 28 | self.corpus = QCCorpus(settings) 29 | self.debug = debug 30 | 31 | def write(self, outpath): 32 | """Write a zip file at the given outpath 33 | """ 34 | if Path(outpath).suffix != ".qdpx": 35 | raise InvalidParameter("REFI-QDA projects must have suffix .qdpx") 36 | with TemporaryDirectory() as tempdir: 37 | project_path = Path(tempdir) 38 | qde = self.write_xml(project_path / "project.qde") 39 | self.write_corpus(qde, project_path / "sources") 40 | if self.debug: 41 | self.print_tree(project_path) 42 | with ZipFile(outpath, 'w', ZIP_DEFLATED) as zf: 43 | for dirpath, dirnames, filenames in os.walk(tempdir): 44 | for fn in filenames: 45 | path = Path(dirpath) / fn 46 | zf.write(path, arcname=path.relative_to(tempdir)) 47 | 48 | def write_xml(self, outpath): 49 | root = self.xml_root() 50 | root.append(self.users_to_xml()) 51 | root.append(self.codebook_to_xml()) 52 | root.append(self.sources_to_xml()) 53 | if self.debug: 54 | print(tostring(root, encoding="unicode")) 55 | outpath.write_text(tostring(root, encoding="unicode")) 56 | return root 57 | 58 | def write_corpus(self, qde, outpath): 59 | outpath.mkdir() 60 | for child in qde: 61 | if child.tag.endswith("Sources"): 62 | for source in child: 63 | project_path = self.corpus.corpus_dir / source.attrib['name'] 64 | export_path = outpath / source.attrib['plainTextPath'].replace("internal://", "") 65 | log.info(f"Copying {project_path} -> {export_path}") 66 | copyfile(project_path, export_path) 67 | 68 | def print_tree(self, project_path): 69 | result = run("tree", cwd=project_path, capture_output=True, text=True, shell=True) 70 | print(result.stdout) 71 | 72 | def xml_root(self): 73 | root = Element("Project") 74 | root.set("xmlns", "urn:QDA-XML:project:1.0") 75 | root.set("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance") 76 | version = metadata('qualitative-coding')['version'] 77 | root.set("origin", f"qc {version}") 78 | root.set("name", "qc project") 79 | return root 80 | 81 | def codebook_to_xml(self): 82 | """Render the codebook as XML. 83 | Note that qc allows codes to appear at multiple places in the codebook. 84 | However, each code in the xml tree requires its own GUID. Therefore, 85 | Codings will artibrarily (but deterministically) specify the GUID of a code 86 | when it appears multiple times in the codebook. 87 | Must be called before sources_to_xml. 88 | """ 89 | def node_to_xml(node): 90 | xnode = Element("Code") 91 | xnode.set("name", node.name) 92 | guid = self.code_guid(node.expanded_name()) 93 | if node.name not in self.code_guids: 94 | self.code_guids[node.name] = guid 95 | xnode.set("guid", guid) 96 | xnode.set("isCodable", "true") 97 | for child in node.children: 98 | xnode.append(node_to_xml(child)) 99 | return xnode 100 | 101 | self.code_guids = {} 102 | codebook = Element("CodeBook") 103 | codes = Element("Codes") 104 | codebook.append(codes) 105 | with self.corpus.session(): 106 | root = self.corpus.get_codebook() 107 | for node in root.children: 108 | codes.append(node_to_xml(node)) 109 | return codebook 110 | 111 | def users_to_xml(self): 112 | users = Element("Users") 113 | with self.corpus.session(): 114 | for coder in self.corpus.get_all_coders(): 115 | user = Element("User") 116 | user.set("name", coder.name) 117 | user.set("guid", self.guid(coder.name)) 118 | users.append(user) 119 | return users 120 | 121 | def sources_to_xml(self): 122 | sources = Element("Sources") 123 | with self.corpus.session(): 124 | sources = Element("Sources") 125 | for doc in self.corpus.get_documents(): 126 | source = Element("TextSource") 127 | source_guid = self.guid(doc.file_path) 128 | suffix = Path(doc.file_path).suffix 129 | internal_path = str(Path(source_guid).with_suffix(suffix)) 130 | source.set("plainTextPath", "internal://" + internal_path) 131 | source.set("guid", source_guid) 132 | source.set("name", doc.file_path) 133 | doc_line_positions = self.line_positions(doc.file_path) 134 | coded_lines = self.corpus.get_coded_lines(file_list=[doc.file_path]) 135 | lines_with_codes = defaultdict(list) 136 | for cl in coded_lines: 137 | lines_with_codes[cl.line].append(cl) 138 | for line, cls in lines_with_codes.items(): 139 | selection = Element("PlainTextSelection") 140 | selection.set("guid", self.selection_guid(doc.file_path, line)) 141 | selection.set("name", f"line:{line}") 142 | selection.set("startPosition", str(doc_line_positions[line][0])) 143 | selection.set("endPosition", str(doc_line_positions[line][1])) 144 | for code, coder, line, file_path in cls: 145 | coding = Element("Coding") 146 | coding.set("guid", self.coding_guid(code, coder, line, file_path)) 147 | coding.set("creatingUser", self.coder_guid(coder)) 148 | code_ref = Element("CodeRef") 149 | code_ref.set("targetGUID", self.code_guids[code]) 150 | coding.append(code_ref) 151 | selection.append(coding) 152 | source.append(selection) 153 | sources.append(source) 154 | return sources 155 | 156 | def line_positions(self, corpus_file_path): 157 | """returns a list of (start, end) character positions for lines in doc. 158 | """ 159 | lines = [] 160 | index = 0 161 | with (self.corpus.corpus_dir / corpus_file_path).open() as fh: 162 | for line in fh: 163 | start = index 164 | end = index + len(line) 165 | lines.append((start, end)) 166 | index += len(line) 167 | return lines 168 | 169 | def coder_guid(self, coder): 170 | return self.guid(coder) 171 | 172 | def coding_guid(self, code, coder, line, file_path): 173 | return self.guid(':'.join([file_path, str(line), coder, code])) 174 | 175 | def selection_guid(self, file_path, line): 176 | return self.guid(f"{file_path}:{line}") 177 | 178 | def code_guid(self, code): 179 | return self.guid(code) 180 | 181 | def guid(self, source): 182 | digest = md5(source.encode('utf8')).hexdigest()[:16] 183 | return str(UUID(bytes=digest.encode('utf8'))) 184 | -------------------------------------------------------------------------------- /qualitative_coding/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cproctor/qualitative-coding/cbbbbcc9b5a6aa7ed4ee4bf8af82dc9b9ab154f2/qualitative_coding/tests/__init__.py -------------------------------------------------------------------------------- /qualitative_coding/tests/fixtures.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from pathlib import Path 3 | from subprocess import run 4 | from tempfile import TemporaryDirectory 5 | from qualitative_coding.corpus import QCCorpus 6 | from qualitative_coding.logs import configure_logger 7 | from io import StringIO 8 | import yaml 9 | import csv 10 | import sys 11 | import os 12 | 13 | class QCTestCase(TestCase): 14 | """A subclass of TestCase with methods for instantiating a QC project. 15 | """ 16 | verbose = bool(os.environ.get('VERBOSE')) 17 | 18 | def setUp(self): 19 | self.set_up_qc_project() 20 | configure_logger(self.testpath / "settings.yaml") 21 | self.corpus = QCCorpus(self.testpath / "settings.yaml") 22 | 23 | def tearDown(self): 24 | self.tear_down_qc_project() 25 | 26 | def set_up_qc_project(self): 27 | self.tempdir = TemporaryDirectory() 28 | self.testpath = Path(self.tempdir.name) 29 | self.run_in_testpath("qc init") 30 | if self.verbose: 31 | self.update_settings('verbose', True) 32 | (self.testpath / "macbeth.txt").write_text(MACBETH) 33 | (self.testpath / "moby_dick.md").write_text(MOBY_DICK) 34 | 35 | def set_up_qc_project_0_2_3(self): 36 | self.tempdir = TemporaryDirectory() 37 | self.testpath = Path(self.tempdir.name) 38 | settings_0_2_3 = { 39 | 'qc_version': "0.2.3", 40 | 'corpus_dir': 'corpus', 41 | 'codes_dir': 'codes', 42 | 'log_file': 'qc.log', 43 | 'memos_dir': 'memos', 44 | 'codebook': 'codebook.yaml', 45 | } 46 | (self.testpath / "settings.yaml").write_text(yaml.dump(settings_0_2_3)) 47 | for k, v in settings_0_2_3.items(): 48 | if k.endswith("_dir"): 49 | (self.testpath / v).mkdir() 50 | (self.testpath / "codebook.yaml").touch() 51 | (self.testpath / "corpus" / "macbeth.txt").write_text(MACBETH) 52 | (self.testpath / "codes" / "macbeth.txt.cp.codes").write_text(MACBETH_CODES_0_2_3) 53 | 54 | def tear_down_qc_project(self): 55 | self.tempdir.cleanup() 56 | 57 | def run_in_testpath(self, command): 58 | """Runs `command` with testpath as cwd. 59 | When debug is False, 60 | """ 61 | if self.verbose: 62 | result = run(command, shell=True, cwd=self.testpath, stdout=sys.stdout, 63 | stderr=sys.stderr) 64 | else: 65 | result = run(command, shell=True, cwd=self.testpath, capture_output=True, text=True) 66 | return result 67 | 68 | def show_tree(self): 69 | self.run_in_testpath("tree", debug=True) 70 | 71 | def update_settings(self, key, value): 72 | settings_path = self.testpath / "settings.yaml" 73 | settings = yaml.safe_load(settings_path.read_text()) 74 | if value is None: 75 | del settings[key] 76 | else: 77 | settings[key] = value 78 | settings_path.write_text(yaml.dump(settings)) 79 | 80 | def assertFileExists(self, path, is_dir=False, message=None): 81 | if not Path(self.testpath / path).exists(): 82 | message = message or f"Expected {path} to exist" 83 | raise AssertionError(message) 84 | if is_dir and not Path(self.testpath / path).is_dir(): 85 | message = message or f"Expected {path} to be a directory" 86 | raise AssertionError(message) 87 | if not is_dir and Path(self.testpath / path).is_dir(): 88 | message = message or f"Expected {path} to be a file, not a directory" 89 | raise AssertionError(message) 90 | 91 | def assertFileDoesNotExist(self, path, message=None): 92 | if Path(self.testpath / path).exists(): 93 | message = message or f"Expected {path} not to exist" 94 | raise AssertionError(message) 95 | 96 | def set_mock_editor(self, verbose=False, crash=False): 97 | """Updates settings['editor'] to the mock editor. 98 | Also reinitializes corpus. 99 | """ 100 | command = str(Path("tests/mock_editor.py").resolve()) 101 | if verbose: 102 | command += " --verbose" 103 | if crash: 104 | command += " --crash" 105 | code_command = command + ' "{corpus_file_path}" "{codes_file_path}"' 106 | memo_command = command + ' --memo "{memo_file_path}"' 107 | self.update_settings("editor", "mock_editor") 108 | self.update_settings("editors", { 109 | 'mock_editor': { 110 | 'name': "Mock Editor", 111 | 'code_command': code_command, 112 | 'memo_command': memo_command, 113 | } 114 | }) 115 | self.corpus = QCCorpus(self.testpath / "settings.yaml") 116 | 117 | def read_stats_tsv(self, stdout): 118 | reader = csv.reader(StringIO(stdout), delimiter="\t") 119 | table = [[item.strip() for item in row] for row in reader] 120 | ix_name, *cols = table[0] 121 | parse = lambda val: None if val == '' else float(val) 122 | return {ix: dict(zip(cols, map(parse, vals))) for ix, *vals in table[1:]} 123 | 124 | class MockCorpus: 125 | log = None 126 | settings = {} 127 | 128 | MACBETH = """Tomorrow, and tomorrow, and tomorrow, 129 | Creeps in this petty pace from day to day, 130 | To the last syllable of recorded time; 131 | And all our yesterdays have lighted fools 132 | The way to dusty death. Out, out, brief candle! 133 | Life's but a walking shadow, a poor player, 134 | That struts and frets his hour upon the stage, 135 | And then is heard no more. It is a tale 136 | Told by an idiot, full of sound and fury, 137 | Signifying nothing. 138 | """ 139 | 140 | MACBETH_CODES_0_2_3 = """pace, prolepsis 141 | pace 142 | speech, prolepsis 143 | light 144 | light, prolepsis 145 | shadow, acting 146 | acting 147 | acting, speech 148 | speech 149 | speech 150 | """ 151 | 152 | MOBY_DICK = "Call me *Ishmael*. Some years ago- never mind how long precisely- having little or no money in my purse, and nothing particular to interest me on shore, I thought I would sail about a little and see the watery part of the world. It is a way I have of driving off the spleen and regulating the circulation." 153 | -------------------------------------------------------------------------------- /qualitative_coding/tests/mock_editor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This is a mock editor for testing purposes. 4 | # Whereas a real editor would present the corpus and code files 5 | # to the user for coding, the mock editor goes ahead and codes 6 | # line with 'code_one' and line two (if it exists) with 'code_two'. 7 | # When --crash is passed, exits with an exception, allowing testing 8 | # of the error condition. 9 | 10 | from argparse import ArgumentParser 11 | from pathlib import Path 12 | import sys 13 | 14 | parser = ArgumentParser() 15 | parser.add_argument("corpus_file_path") 16 | parser.add_argument("codes_file_path", nargs='?') 17 | parser.add_argument("--verbose", action="store_true") 18 | parser.add_argument("--crash", action="store_true") 19 | parser.add_argument("--memo", action="store_true") 20 | args = parser.parse_args() 21 | 22 | if args.crash: 23 | if args.verbose: 24 | print("Crashing the mock editor, as requested...", file=sys.stderr) 25 | raise SystemExit(1) 26 | 27 | if args.memo: 28 | if args.verbose: 29 | print("Mock Editor is in memo mode.") 30 | memo_file_path = Path(args.corpus_file_path) 31 | memo = "I'm having all these ideas. I need to write them down." 32 | memo_file_path.write_text(memo_file_path.read_text() + memo) 33 | else: 34 | nlines = len(Path(args.corpus_file_path).read_text().split('\n')) 35 | if nlines == 1: 36 | Path(args.codes_file_path).write_text("code_one") 37 | else: 38 | lines = ["line, one", "line, two"] + ([""] * (nlines - 2)) 39 | Path(args.codes_file_path).write_text('\n'.join(lines)) 40 | if args.verbose: 41 | print('-' * 80) 42 | print("MOCK EDITOR") 43 | print('-' * 80) 44 | text = open(args.corpus_file_path) 45 | codes = open(args.codes_file_path) 46 | for tl, cl in zip(text, codes): 47 | print(f"{tl.strip().ljust(80, ' ')}| {cl.strip()}") 48 | text.close() 49 | codes.close() 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_check.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | from pathlib import Path 3 | from qualitative_coding.corpus import DEFAULT_SETTINGS 4 | 5 | class TestCheck(QCTestCase): 6 | def test_check_passes_when_no_errors(self): 7 | result = self.run_in_testpath("qc check") 8 | self.assertEqual(result.stdout, "") 9 | 10 | def test_check_identifies_missing_settings(self): 11 | for setting in DEFAULT_SETTINGS: 12 | if setting == 'qc_version': 13 | continue 14 | self.update_settings(setting, None) 15 | result = self.run_in_testpath("qc check") 16 | message = result.stderr 17 | self.assertTrue(f"Expected '{setting}' in settings" in message) 18 | 19 | def test_check_validates_corpus_paths(self): 20 | self.run_in_testpath("qc corpus import macbeth.txt") 21 | (self.testpath / "corpus" / "macbeth.txt").unlink() 22 | (self.testpath / "corpus" / "hamlet.txt").touch() 23 | message = self.run_in_testpath("qc check").stderr 24 | self.assertTrue("macbeth.txt" in message) 25 | self.assertTrue("hamlet.txt" in message) 26 | 27 | def test_check_validates_corpus_paths(self): 28 | self.run_in_testpath("qc corpus import macbeth.txt") 29 | (self.testpath / "corpus" / "macbeth.txt").write_text("It was the best of times...") 30 | message = self.run_in_testpath("qc check").stderr 31 | self.assertTrue("macbeth.txt" in message) 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_code.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | from pathlib import Path 3 | from qualitative_coding.corpus import QCCorpus 4 | 5 | class TestCode(QCTestCase): 6 | 7 | def setUp(self): 8 | super().setUp() 9 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 10 | self.set_mock_editor(verbose=True) 11 | 12 | def test_code_applies_codes(self): 13 | self.run_in_testpath("qc code chris") 14 | with self.corpus.session(): 15 | code_counts = self.corpus.count_codes() 16 | self.assertEqual(code_counts.get('line'), 2) 17 | self.assertEqual(code_counts.get('one'), 1) 18 | self.assertFileDoesNotExist("codes.txt") 19 | 20 | def test_code_saves_state_on_crash(self): 21 | self.set_mock_editor(verbose=True, crash=True) 22 | self.run_in_testpath("qc code chris") 23 | self.assertFileExists("codes.txt") 24 | self.assertFileExists(".coding_session") 25 | 26 | def test_code_recovers_incomplete_session(self): 27 | self.set_mock_editor(verbose=True, crash=True) 28 | self.run_in_testpath("qc code chris") 29 | self.set_mock_editor(verbose=True) 30 | self.run_in_testpath("qc code chris --recover") 31 | self.assertFileDoesNotExist("codes.txt") 32 | self.assertFileDoesNotExist(".coding_session") 33 | result = self.run_in_testpath("qc codes list") 34 | self.assertTrue("line" in result.stdout) 35 | self.assertTrue("one" in result.stdout) 36 | self.assertTrue("two" in result.stdout) 37 | 38 | def test_code_will_not_code_when_metadata_file_exists(self): 39 | (self.testpath / ".coding_session").write_text('a') 40 | self.set_mock_editor(verbose=True) 41 | result = self.run_in_testpath("qc code chris") 42 | self.assertNotEqual(result.returncode, 0) 43 | 44 | def test_code_will_not_code_when_codes_exists(self): 45 | (self.testpath / "codes.txt").write_text('a') 46 | self.set_mock_editor(verbose=True) 47 | result = self.run_in_testpath("qc code chris") 48 | self.assertNotEqual(result.returncode, 0) 49 | 50 | def test_code_abandon_deletes_session(self): 51 | (self.testpath / ".coding_session").write_text('a') 52 | (self.testpath / "codes.txt").write_text('a') 53 | result = self.run_in_testpath("qc code chris --abandon") 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_code_parsing.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from qualitative_coding.tests.fixtures import MockCorpus 3 | from qualitative_coding.views.viewer import QCCorpusViewer 4 | from qualitative_coding.exceptions import CodeFileParseError 5 | 6 | class TestCodeParsing(TestCase): 7 | def setUp(self): 8 | self.viewer = QCCorpusViewer(MockCorpus()) 9 | 10 | def test_codes_are_validated(self): 11 | cases = [ 12 | ['funny', True], 13 | ['funny-sort-of', True], 14 | ['FUNNY!', False], 15 | ['funny?', False], 16 | ['0', True], 17 | ['', False], 18 | [':colon', False], 19 | ['#hashtag', False], 20 | ] 21 | for code, ok in cases: 22 | if ok: 23 | self.viewer.parse_code('nobody', code) 24 | else: 25 | with self.assertRaises(CodeFileParseError): 26 | self.viewer.parse_code('nobody', code) 27 | 28 | def test_parses_valid_codes_file(self): 29 | self.viewer = QCCorpusViewer(MockCorpus()) 30 | codes = self.viewer.parse_codes('nobody', CODES_FILE, 6) 31 | self.assertEqual(len(codes), 4) 32 | self.assertEqual(codes[0]['line'], 2) 33 | 34 | def test_checks_codes_file_length(self): 35 | self.viewer = QCCorpusViewer(MockCorpus()) 36 | with self.assertRaises(CodeFileParseError): 37 | self.viewer.parse_codes('nobody', CODES_FILE, 7) 38 | 39 | def test_checks_for_misplaced_commas(self): 40 | for case in [TRAILING_COMMA, LEADING_COMMA]: 41 | with self.assertRaises(CodeFileParseError): 42 | self.viewer.parse_codes('nobody', case, 6) 43 | 44 | 45 | CODES_FILE = """ 46 | 47 | funny, inappropriate 48 | dull 49 | trite 50 | 51 | """ 52 | 53 | TRAILING_COMMA = """ 54 | code, 55 | """ 56 | LEADING_COMMA = """ 57 | ,code 58 | """ 59 | 60 | 61 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_codebook.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | import yaml 3 | 4 | class TestCodebook(QCTestCase): 5 | def test_codebook_is_empty_on_init(self): 6 | cb = yaml.safe_load((self.testpath / "codebook.yaml").read_text()) 7 | self.assertEqual(cb, None) 8 | 9 | def test_codebook_updates_codebook_file(self): 10 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 11 | self.set_mock_editor() 12 | self.run_in_testpath("qc code chris") 13 | self.run_in_testpath("qc codebook") 14 | cb = yaml.safe_load((self.testpath / "codebook.yaml").read_text()) 15 | self.assertEqual(len(cb), 3) 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_coders.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | from pathlib import Path 3 | 4 | class TestCoders(QCTestCase): 5 | def test_coders_shows_coders(self): 6 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 7 | self.set_mock_editor() 8 | self.run_in_testpath("qc code chris") 9 | self.run_in_testpath("qc code varun") 10 | result = self.run_in_testpath("qc coders") 11 | self.assertTrue("chris" in result.stdout) 12 | self.assertTrue("varun" in result.stdout) 13 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_codes_crosstab.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | from pathlib import Path 3 | 4 | class TestCrosstab(QCTestCase): 5 | def test_crosstab_shows_counts(self): 6 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 7 | self.set_mock_editor() 8 | self.run_in_testpath("qc code chris") 9 | result = self.run_in_testpath("qc codes crosstab one two line --format tsv") 10 | table = self.read_stats_tsv(result.stdout) 11 | self.assertEqual(table['two']['line'], 1) 12 | 13 | def test_crosstab_with_probs_shows_probs(self): 14 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 15 | self.set_mock_editor() 16 | self.run_in_testpath("qc code chris") 17 | result = self.run_in_testpath("qc codes crosstab one two line --probs --format tsv") 18 | table = self.read_stats_tsv(result.stdout) 19 | self.assertEqual(table['line']['two'], 0.5) 20 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_codes_find.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | 3 | class TestFind(QCTestCase): 4 | def setUp(self): 5 | super().setUp() 6 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 7 | self.set_mock_editor() 8 | self.run_in_testpath("qc code chris") 9 | 10 | def test_find_shows_codes(self): 11 | result = self.run_in_testpath("qc codes find one") 12 | self.assertEqual(len(result.stdout.splitlines()), 8) 13 | 14 | def test_find_respects_context_window(self): 15 | result = self.run_in_testpath("qc codes find one -C 5") 16 | self.assertEqual(len(result.stdout.splitlines()), 11) 17 | 18 | 19 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_codes_list.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | import yaml 3 | 4 | class TestList(QCTestCase): 5 | def setUp(self): 6 | super().setUp() 7 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 8 | self.set_mock_editor() 9 | self.run_in_testpath("qc code chris") 10 | code_tree = [{'line': ['one', 'two']}] 11 | (self.testpath / "codebook.yaml").write_text(yaml.dump(code_tree)) 12 | 13 | def test_list_shows_codes(self): 14 | result = self.run_in_testpath("qc codes list") 15 | self.assertTrue("line" in result.stdout) 16 | self.assertTrue("one" in result.stdout) 17 | self.assertTrue("two" in result.stdout) 18 | 19 | def test_list_respects_depth(self): 20 | result = self.run_in_testpath("qc codes list --depth 1") 21 | self.assertTrue("line" in result.stdout) 22 | self.assertTrue("one" not in result.stdout) 23 | 24 | def test_list_respects_expanded(self): 25 | result = self.run_in_testpath("qc codes list --expanded") 26 | self.assertTrue("line:one" in result.stdout) 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_codes_rename.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | from qualitative_coding.corpus import QCCorpus 3 | import yaml 4 | 5 | class TestRename(QCTestCase): 6 | def setUp(self): 7 | super().setUp() 8 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 9 | self.set_mock_editor() 10 | self.run_in_testpath("qc code chris") 11 | 12 | def test_rename_renames_codes(self): 13 | self.run_in_testpath("qc codes rename line pace") 14 | cb = yaml.safe_load((self.testpath / "codebook.yaml").read_text()) 15 | self.assertTrue('pace' in cb) 16 | 17 | def test_rename_does_not_duplicate_codes(self): 18 | corpus = QCCorpus(self.testpath/"settings.yaml") 19 | self.run_in_testpath("qc codes rename line one") 20 | with corpus.session(): 21 | self.assertEqual(len(corpus.get_coded_lines()), 3) 22 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_codes_stats.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | 3 | class TestStats(QCTestCase): 4 | def test_stats_shows_stats(self): 5 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 6 | self.set_mock_editor() 7 | self.run_in_testpath("qc code chris") 8 | result = self.run_in_testpath("qc codes stats --format tsv") 9 | table = self.read_stats_tsv(result.stdout) 10 | self.assertEqual(table['line']['Count'], 2) 11 | 12 | def test_stats_distinct_shows_totals_by_coder(self): 13 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 14 | self.set_mock_editor() 15 | self.run_in_testpath("qc code chris") 16 | self.run_in_testpath("qc code haley") 17 | result = self.run_in_testpath("qc codes stats --by-coder --format tsv") 18 | table = self.read_stats_tsv(result.stdout) 19 | 20 | def test_stats_distinct_shows_totals_by_document(self): 21 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 22 | self.set_mock_editor() 23 | self.run_in_testpath("qc code chris") 24 | self.run_in_testpath("qc code haley") 25 | result = self.run_in_testpath("qc codes stats --by-document --format tsv") 26 | table = self.read_stats_tsv(result.stdout) 27 | 28 | def test_stats_distinct_shows_coder_document_pivot_table(self): 29 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 30 | self.set_mock_editor() 31 | self.run_in_testpath("qc code chris") 32 | self.run_in_testpath("qc code haley") 33 | result = self.run_in_testpath("qc codes stats --by-document --by-coder --format tsv") 34 | table = self.read_stats_tsv(result.stdout) 35 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_corpus_anonymize.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | from qualitative_coding.corpus import QCCorpus 3 | import yaml 4 | 5 | NEWS = """A "hefty sneeze" has caused a professional footballer to sustain a 6 | "nasty back injury". Victor Adeboyejo, a striker for Bolton Wanderers, 7 | had been due to take part in a Bristol Street Motors Trophy group game at 8 | Barrow on Tuesday. He was forced to pull out of the squad, however, 9 | because of discomfort in his back and ribcage. Manager Ian Evatt, who was 10 | already missing first team players because of injury and the international 11 | break, said the pain appeared to have been caused by a "pretty hefty sneeze." 12 | """ 13 | 14 | class TestCorpusAnonymize(QCTestCase): 15 | def setUp(self): 16 | super().setUp() 17 | (self.testpath / "news.txt").write_text(NEWS) 18 | self.run_in_testpath("qc corpus import news.txt") 19 | self.run_in_testpath("qc corpus anonymize") 20 | 21 | def test_creates_key_file_with_yaml(self): 22 | keyfile = self.testpath / "key.yaml" 23 | self.assertTrue(keyfile.exists()) 24 | keys = yaml.safe_load(keyfile.read_text()) 25 | self.assertTrue("Victor Adeboyejo" in keys) 26 | 27 | def test_creates_anonymized_corpus(self): 28 | self.run_in_testpath("qc corpus anonymize") 29 | anon_news = (self.testpath / "anonymized" / "news.txt").read_text() 30 | self.assertTrue("Victor Adeboyejo" not in anon_news) 31 | 32 | def test_reverses_anonymization(self): 33 | self.run_in_testpath("qc corpus anonymize") 34 | self.run_in_testpath("qc corpus anonymize -r -o recovered") 35 | news = (self.testpath / "recovered" / "news.txt").read_text() 36 | self.assertTrue("Victor Adeboyejo" in news) 37 | 38 | def test_replaces_longer_strings_first(self): 39 | keyfile = self.testpath / "key.yaml" 40 | keyfile.write_text(yaml.dump({ 41 | "Victor": "X", 42 | "Victor Adeboyejo": "VA" 43 | })) 44 | self.run_in_testpath("qc corpus anonymize") 45 | anon_news = (self.testpath / "anonymized" / "news.txt").read_text() 46 | self.assertTrue("Adeboyejo" not in anon_news) 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_corpus_import.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | from pathlib import Path 3 | 4 | class TestImport(QCTestCase): 5 | def test_import_verbatim(self): 6 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 7 | self.assertFileImported("macbeth.txt") 8 | 9 | def test_import_pandoc(self): 10 | self.run_in_testpath("qc corpus import moby_dick.md --importer pandoc") 11 | self.assertFileImported("moby_dick.txt") 12 | nlines = len((self.testpath / "corpus/moby_dick.txt").read_text().split('\n')) 13 | self.assertEqual(nlines, 5) 14 | 15 | def test_import_recursive(self): 16 | (self.testpath / "chapters").mkdir() 17 | (self.testpath / "chapters/one.txt").write_text("one") 18 | (self.testpath / "chapters" / "preface").mkdir() 19 | (self.testpath / "chapters/preface/note.txt").write_text("two") 20 | self.run_in_testpath("qc corpus import chapters --recursive") 21 | self.assertFileImported("one.txt") 22 | self.assertFileImported("preface/note.txt") 23 | 24 | def test_import_from_absolute_dir(self): 25 | import_path = (self.testpath / "macbeth.txt").resolve() 26 | self.run_in_testpath(f"qc corpus import {import_path}") 27 | self.assertFileImported("macbeth.txt") 28 | 29 | def test_import_recursive_from_absolute_dir(self): 30 | (self.testpath / "chapters").mkdir() 31 | (self.testpath / "chapters/one.txt").write_text("one") 32 | (self.testpath / "chapters" / "preface").mkdir() 33 | (self.testpath / "chapters/preface/note.txt").write_text("two") 34 | import_dir = (self.testpath / "chapters").resolve() 35 | self.run_in_testpath(f"qc corpus import {import_dir} --recursive") 36 | self.assertFileImported("one.txt") 37 | self.assertFileImported("preface/note.txt") 38 | 39 | def test_import_from_rel_dir_with_dot_dot(self): 40 | (self.testpath / "chapters").mkdir() 41 | self.run_in_testpath("qc corpus import chapters/../macbeth.txt --importer verbatim") 42 | self.assertFileImported("macbeth.txt") 43 | 44 | def test_import_from_dir_with_spaces(self): 45 | (self.testpath / "chap ters").mkdir() 46 | (self.testpath / "chap ters/one.txt").write_text("one") 47 | (self.testpath / "chap ters" / "preface").mkdir() 48 | (self.testpath / "chap ters/preface/note.txt").write_text("two") 49 | self.run_in_testpath('qc corpus import "chap ters" --recursive') 50 | self.assertFileImported("one.txt") 51 | self.assertFileImported("preface/note.txt") 52 | 53 | def assertFileImported(self, path): 54 | self.assertFileExists(Path("corpus") / path) 55 | with self.corpus.session(): 56 | file_path = self.corpus.get_document(self.testpath / 'corpus' / path).file_path 57 | self.assertEqual(file_path, path) 58 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_corpus_move.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | from pathlib import Path 3 | 4 | class TestCorpusMove(QCTestCase): 5 | 6 | def test_move_works_with_files(self): 7 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 8 | self.run_in_testpath("qc corpus move corpus/macbeth.txt corpus/m.txt") 9 | self.assertTrue((self.testpath / "corpus" / "m.txt").exists()) 10 | with self.corpus.session(): 11 | result = self.corpus.get_documents(file_list=["m.txt"]) 12 | self.assertEqual(result[0].file_path, "m.txt") 13 | 14 | def test_move_works_with_files_in_subdirs(self): 15 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 16 | self.run_in_testpath("qc corpus move corpus/macbeth.txt corpus/will/macbeth.txt") 17 | self.assertTrue((self.testpath / "corpus" / "will" / "macbeth.txt").exists()) 18 | with self.corpus.session(): 19 | result = self.corpus.get_documents(file_list=["will/macbeth.txt"]) 20 | self.assertEqual(result[0].file_path, "will/macbeth.txt") 21 | 22 | def test_move_works_with_recursive_subdirs(self): 23 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim --corpus-root shakespeare") 24 | self.run_in_testpath("qc corpus move corpus/shakespeare corpus/will --recursive") 25 | self.assertTrue((self.testpath / "corpus" / "will" / "macbeth.txt").exists()) 26 | with self.corpus.session(): 27 | result = self.corpus.get_documents(file_list=["will/macbeth.txt"]) 28 | self.assertEqual(result[0].file_path, "will/macbeth.txt") 29 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_corpus_remove.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | from pathlib import Path 3 | 4 | class TestCorpusRemove(QCTestCase): 5 | def test_removes_individual_file(self): 6 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 7 | self.run_in_testpath("qc corpus remove corpus/macbeth.txt") 8 | self.assertFileDoesNotExist(self.testpath / "corpus" / "macbeth.txt") 9 | 10 | def test_removes_directories(self): 11 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim --corpus-root shx") 12 | self.run_in_testpath("qc corpus remove corpus/shx --recursive") 13 | self.assertFileDoesNotExist(self.testpath / "corpus" / "shx" / "macbeth.txt") 14 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_corpus_update.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | from qualitative_coding.corpus import QCCorpus 3 | 4 | MACBETH_IMPROVED = """Tomorrow, and tomorrow, and tomorrow, 5 | Tomorrow, and tomorrow, and tomorrow, 6 | Tomorrow, and tomorrow, and tomorrow, 7 | Creeps in this petty pace from day to day, 8 | To the last syllable of recorded time; 9 | The way to dusty death. Out, out, brief candle! 10 | Life's but a walking shadow, a poor player, 11 | Something something something, 12 | Told by an idiot, full of sound and fury, 13 | Signifying nothing. 14 | """ 15 | 16 | class TestCorpusUpdate(QCTestCase): 17 | def setUp(self): 18 | super().setUp() 19 | self.run_in_testpath("qc corpus import macbeth.txt --importer verbatim") 20 | with self.corpus.session(): 21 | self.corpus.update_coded_lines("macbeth.txt", "chris", [ 22 | {'line': 1, 'code_id': 'tomorrow'}, 23 | {'line': 2, 'code_id': 'creeps'}, 24 | {'line': 3, 'code_id': 'to'}, 25 | {'line': 4, 'code_id': 'and'}, 26 | {'line': 5, 'code_id': 'the'}, 27 | {'line': 6, 'code_id': 'lifes'}, 28 | {'line': 7, 'code_id': 'that'}, 29 | {'line': 8, 'code_id': 'and'}, 30 | {'line': 9, 'code_id': 'told'}, 31 | ]) 32 | (self.testpath / "macbeth_improved.txt").write_text(MACBETH_IMPROVED) 33 | 34 | def test_corpus_update_updates_line_numbers(self): 35 | before = self.run_in_testpath("qc codes find speech").stdout 36 | self.run_in_testpath("qc corpus update corpus/macbeth.txt --new macbeth_improved.txt") 37 | after = self.run_in_testpath("qc codes find speech").stdout 38 | self.assertEqual(before, after) 39 | 40 | def test_corpus_update_updates_text(self): 41 | self.run_in_testpath("qc corpus update corpus/macbeth.txt --new macbeth_improved.txt") 42 | text = (self.testpath / "corpus/macbeth.txt").read_text() 43 | self.assertEqual(text, MACBETH_IMPROVED) 44 | 45 | def test_corpus_update_updates_file_hash(self): 46 | with self.corpus.session(): 47 | old_hash = self.corpus.get_document(self.testpath / "corpus/macbeth.txt").file_hash 48 | self.run_in_testpath("qc corpus update corpus/macbeth.txt --new macbeth_improved.txt") 49 | with self.corpus.session(): 50 | new_hash = self.corpus.get_document(self.testpath / "corpus/macbeth.txt").file_hash 51 | self.assertNotEqual(old_hash, new_hash) 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_export.py: -------------------------------------------------------------------------------- 1 | from qualitative_coding.tests.fixtures import QCTestCase 2 | from pathlib import Path 3 | 4 | class TestExport(QCTestCase): 5 | def test_creates_qdpx_file(self): 6 | self.run_in_testpath("qc corpus import macbeth.txt") 7 | self.set_mock_editor(verbose=True) 8 | self.run_in_testpath("qc code chris") 9 | self.run_in_testpath("qc code haley") 10 | self.run_in_testpath("qc export out.qdpx") 11 | self.assertFileExists("out.qdpx") 12 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_init.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | from tempfile import TemporaryDirectory 3 | from pathlib import Path 4 | import yaml 5 | 6 | # TODO: 7 | # - Ensure that init catches validation errors when editors is malformed. 8 | # - Ensure that init catches validation errors when editor not in editors. 9 | # - Ensure that init catches files in corpus_dir which have not been imported 10 | # - Ensure that init functions properly when the settings file is a relative 11 | # and an absolute path. 12 | 13 | class TestInit(QCTestCase): 14 | def setUp(self): 15 | self.tempdir = TemporaryDirectory() 16 | self.testpath = Path(self.tempdir.name) 17 | self.run_in_testpath("qc init") 18 | 19 | def test_init_creates_setup_file(self): 20 | self.assertFileExists(self.testpath / "settings.yaml") 21 | 22 | def test_init2_creates_expected_dirs(self): 23 | self.run_in_testpath("qc init") 24 | self.assertFileExists("corpus", is_dir=True) 25 | self.assertFileExists("memos", is_dir=True) 26 | 27 | def test_init2_creates_db(self): 28 | self.run_in_testpath("qc init") 29 | self.assertFileExists('qualitative_coding.sqlite3') 30 | 31 | def test_init_check_catches_errors(self): 32 | result = self.run_in_testpath("qc init") 33 | self.assertEqual("", result.stderr) 34 | self.update_settings("corpus_dir", None) 35 | result = self.run_in_testpath("qc init") 36 | self.assertNotEqual("", result.stderr) 37 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_init_import.py: -------------------------------------------------------------------------------- 1 | from qualitative_coding.tests.fixtures import QCTestCase 2 | from qualitative_coding.corpus import QCCorpus 3 | from tempfile import TemporaryDirectory 4 | from pathlib import Path 5 | from subprocess import run 6 | 7 | class TestInitImport(QCTestCase): 8 | def test_imports_from_qdpx_file(self): 9 | """Sort of an elaborate test: exports and then re-imports a project. 10 | """ 11 | self.run_in_testpath("qc corpus import macbeth.txt") 12 | self.set_mock_editor(verbose=True) 13 | self.run_in_testpath("qc code chris") 14 | self.run_in_testpath("qc code haley") 15 | self.run_in_testpath("qc export out.qdpx") 16 | self.assertFileExists("out.qdpx") 17 | with TemporaryDirectory() as outdir: 18 | qdxp_file = self.testpath / "out.qdpx" 19 | result = run(f'qc init --import "{qdxp_file}"', cwd=outdir, shell=True, 20 | check=True, capture_output=True, text=True) 21 | corpus = QCCorpus(Path(outdir) / "settings.yaml") 22 | self.assertFileExists(Path(outdir) / "corpus" / "macbeth.txt") 23 | with corpus.session(): 24 | self.assertEqual(len(corpus.get_codes()), 3) 25 | self.assertEqual(len(list(corpus.get_all_coders())), 2) 26 | self.assertEqual(len(corpus.get_coded_lines()), 8) 27 | 28 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_logs.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | from logs import configure_logger 3 | import structlog 4 | from pathlib import Path 5 | 6 | class TestLogs(QCTestCase): 7 | def test_log_info_saves_to_file(self): 8 | configure_logger(self.testpath / "settings.yaml") 9 | log = structlog.get_logger() 10 | log.info("test") 11 | self.assertFileExists("qualitative_coding.log") 12 | with open(self.testpath / "qualitative_coding.log") as fh: 13 | lines = list(fh) 14 | self.assertTrue(len(lines) > 0) 15 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_memo.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | 3 | class TestMemo(QCTestCase): 4 | def test_memo_saves_memo(self): 5 | self.set_mock_editor(verbose=True) 6 | self.run_in_testpath("qc memo chris") 7 | memo_files = list((self.testpath / "memos").iterdir()) 8 | self.assertEqual(len(memo_files), 1) 9 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_read_diff_offsets.py: -------------------------------------------------------------------------------- 1 | from difflib import unified_diff 2 | from unittest import TestCase 3 | from qualitative_coding.diff import read_diff_offsets 4 | 5 | doc0 = [t + '\n' for t in 'abcdefghijklmnop'] 6 | doc1 = [t + '\n' for t in '1bcdef12lmnopqr'] 7 | diff = ''.join(unified_diff(doc0, doc1, n=1)) 8 | 9 | class TestReadDiffOffsets(TestCase): 10 | def test_read_diff_offsets_reads_correct_offsets(self): 11 | expected = [(9, -3), (17, 2)] 12 | observed = read_diff_offsets(diff) 13 | self.assertEqual(expected, observed) 14 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_refi_qda_writer.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from tests.fixtures import QCTestCase 3 | from qualitative_coding.refi_qda.writer import REFIQDAWriter 4 | from tempfile import TemporaryDirectory 5 | from xmlschema import validate 6 | import importlib.resources 7 | 8 | CODEBOOK = """ 9 | - one 10 | - two 11 | - three 12 | - tens: 13 | - twenty 14 | - thirty 15 | - forty 16 | """ 17 | 18 | class TestREFIQDAWriter(QCTestCase): 19 | def setUp(self): 20 | super().setUp() 21 | self.writer = REFIQDAWriter(self.testpath / "settings.yaml") 22 | 23 | def test_writes_nested_codes(self): 24 | with open(self.testpath / "codebook.yaml", 'w') as codebook: 25 | codebook.write(CODEBOOK) 26 | codebook_xml = self.writer.codebook_to_xml() 27 | codes = codebook_xml.find('Codes') 28 | tens = codes.find("Code[@name='tens']") 29 | self.assertEqual(len(tens.findall('Code')), 3) 30 | 31 | def test_xml_validates(self): 32 | schema_path = importlib.resources.files("qualitative_coding") / "refi_qda" / "schema.xsd" 33 | self.run_in_testpath("qc corpus import macbeth.txt") 34 | self.set_mock_editor(verbose=True) 35 | self.run_in_testpath("qc code chris") 36 | with TemporaryDirectory() as tempdir: 37 | project_path = Path(tempdir) 38 | xml_path = project_path / "project.qde" 39 | self.writer.write_xml(xml_path) 40 | validate(xml_path, schema_path) 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_tree_node.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from qualitative_coding.tree_node import TreeNode 3 | from tempfile import TemporaryDirectory 4 | import yaml 5 | from pathlib import Path 6 | 7 | class TestTreeNode(TestCase): 8 | def test_read_write_are_isomorphic(self): 9 | with TemporaryDirectory() as tempdir: 10 | for case in [EMPTY_CODEBOOK, FLAT_CODEBOOK, NESTED_CODEBOOK]: 11 | infile = Path(tempdir) / "in.yaml" 12 | outfile = Path(tempdir) / "out.yaml" 13 | infile.write_text(case) 14 | tn = TreeNode.read_yaml(infile) 15 | TreeNode.write_yaml(outfile, tn) 16 | self.assertEqual(outfile.read_text(), case) 17 | 18 | EMPTY_CODEBOOK = "[]\n" 19 | FLAT_CODEBOOK = """- a one 20 | - b two 21 | - c three 22 | """ 23 | NESTED_CODEBOOK = """- one: 24 | - a 25 | - b 26 | - c 27 | - two: 28 | - d 29 | """ 30 | CASES = [EMPTY_CODEBOOK, FLAT_CODEBOOK, NESTED_CODEBOOK] 31 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_upgrade.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | from qualitative_coding.corpus import QCCorpus 3 | from qualitative_coding.logs import configure_logger 4 | 5 | class TestUpgrade(QCTestCase): 6 | def setUp(self): 7 | pass 8 | 9 | def test_upgrade_noop(self): 10 | self.set_up_qc_project() 11 | result = self.run_in_testpath("qc upgrade") 12 | self.assertEqual(result.stdout, "") 13 | 14 | def test_upgrade_0_2_3_to_1_0_0(self): 15 | self.set_up_qc_project_0_2_3() 16 | configure_logger(self.testpath / "settings.yaml") 17 | result = self.run_in_testpath("qc upgrade -v 1.0.0") 18 | corpus = QCCorpus(self.testpath / "settings.yaml") 19 | self.assertFileDoesNotExist("codes") 20 | with corpus.session(): 21 | code_counts = corpus.count_codes() 22 | self.assertEqual(code_counts['prolepsis'], 3) 23 | -------------------------------------------------------------------------------- /qualitative_coding/tests/test_version.py: -------------------------------------------------------------------------------- 1 | from tests.fixtures import QCTestCase 2 | from importlib.metadata import metadata 3 | 4 | class TestVersion(QCTestCase): 5 | def test_version_is_correct(self): 6 | version = metadata('qualitative-coding')['version'] 7 | result = self.run_in_testpath("qc version") 8 | self.assertTrue(version in result.stdout) 9 | 10 | 11 | -------------------------------------------------------------------------------- /qualitative_coding/tree_node.py: -------------------------------------------------------------------------------- 1 | # An idiosyncratic implementation of nodes in a tree structure. 2 | # Could use refactoring 3 | 4 | import yaml 5 | from functools import total_ordering 6 | from qualitative_coding.exceptions import CodebookParseError 7 | 8 | @total_ordering 9 | class TreeNode: 10 | """ 11 | A node in a tree, represented as either a string (terminal) 12 | or a dict (with children). 13 | """ 14 | root = "$ROOT$" 15 | indent = " " 16 | list_marker = "- " 17 | 18 | @classmethod 19 | def read_yaml(cls, filename): 20 | with open(filename) as f: 21 | try: 22 | data = yaml.safe_load(f) 23 | return TreeNode({cls.root: data}) 24 | except yaml.scanner.ScannerError as err: 25 | m = err.problem_mark 26 | message = f"Error reading {filename} on line {m.line}: {err.problem}" 27 | raise CodebookParseError(message) 28 | except yaml.parser.ParserError as err: 29 | m = err.problem_mark 30 | message = f"Error reading {filename} on line {m.line}: {err.problem}" 31 | raise CodebookParseError(message) 32 | 33 | @classmethod 34 | def write_yaml(cls, filename, tree_node): 35 | with open(filename, 'w') as f: 36 | f.write(yaml.dump(tree_node.to_json(), default_flow_style=False)) 37 | 38 | def __init__(self, representation, parent=None): 39 | self.parent = parent 40 | if isinstance(representation, str): 41 | self.name = representation 42 | self.children = [] 43 | elif isinstance(representation, dict) and len(representation) == 1: 44 | ((self.name, children),) = representation.items() 45 | self.children = [TreeNode(child, parent=self) for child in children or []] 46 | else: 47 | raise ValueError("Illegal node representation: {}".format(representation)) 48 | 49 | def add_child(self, representation): 50 | self.children.append(TreeNode(representation, parent=self)) 51 | 52 | def remove_children_by_name(self, name): 53 | for child in self.children: 54 | child.remove_children_by_name(name) 55 | if child.name == name: 56 | for c in child.children: 57 | self.children.append(c) 58 | c.parent = self 59 | self.children = [c for c in self.children if c.name != name] 60 | 61 | def rename(self, old_name, new_name): 62 | "Renames all children" 63 | if self.name == old_name: 64 | self.name = new_name 65 | for child in self.children: 66 | child.rename(old_name, new_name) 67 | 68 | def ancestors(self): 69 | "Returns a list of ancestors, ending with self" 70 | if self.is_root() or self.is_root(): 71 | return [] 72 | else: 73 | return self.parent.ancestors() + [self] 74 | 75 | def depth(self): 76 | return len(self.ancestors()) 77 | 78 | def backtrack_to(self, target_nodes): 79 | "Returns a list of ancestors traversed to reach one of target_nodes" 80 | traversed = [] 81 | for a in reversed(self.ancestors()): 82 | if a in target_nodes: 83 | return list(reversed(traversed)) 84 | else: 85 | traversed.append(a) 86 | return None 87 | 88 | def flatten(self, names=False, expanded=False, sep=":", depth=None): 89 | """ 90 | Returns the node and its children as a depth-first list. 91 | If names, return strings of node names. 92 | If expanded, return expanded name, like 'fruits:apples:pippin' 93 | If depth is not None, limits the depth of recursion 94 | """ 95 | result = [] if self.is_root() else [self] 96 | if depth is None or depth > 0: 97 | for child in self.children: 98 | result += child.flatten(depth=depth if depth is None else depth - 1) 99 | if names: 100 | if expanded: 101 | result = [n.expanded_name(sep=sep) for n in result] 102 | else: 103 | result = [n.name for n in result] 104 | return sorted(result) 105 | 106 | def expanded_name(self, sep=":"): 107 | "Returns expanded name, like 'fruits:apples:pippin'" 108 | if self.parent and not self.parent.is_root(): 109 | return self.parent.expanded_name(sep=sep) + sep + self.name 110 | else: 111 | return self.name 112 | 113 | def indented_name(self, nodes, sep=":", indent_length=2, indent_start='.'): 114 | "Returns indented name, like '. pippin'" 115 | ancestor_traversal = self.parent.backtrack_to(nodes) 116 | if ancestor_traversal is None: # This node goes all the way back to root 117 | return ":".join(n.name for n in self.ancestors()) 118 | else: 119 | ancestor_depth = self.depth() - len(ancestor_traversal) - 1 120 | return ( 121 | indent_start + 122 | ' ' * indent_length * ancestor_depth + 123 | ":".join(a.name for a in ancestor_traversal+[self]) 124 | ) 125 | 126 | def find(self, name): 127 | "Returns all child nodes (including self) with matching name" 128 | result = [self] if self.name == name else [] 129 | for child in self.children: 130 | result += child.find(name) 131 | return result 132 | 133 | def sum(self, prop): 134 | "Returns the sum of self plus all children's values for prop" 135 | val = getattr(self, prop) if hasattr(self, prop) else 0 136 | return val + sum(c.sum(prop) for c in self.children) 137 | 138 | def to_json(self): 139 | "Returns a str/list/dict representation. The root node is stored as a list." 140 | if self.children: 141 | if self.is_root(): 142 | return [child.to_json() for child in sorted(self.children)] 143 | else: 144 | return {self.name: [child.to_json() for child in sorted(self.children)]} 145 | else: 146 | if self.is_root(): 147 | return [] 148 | else: 149 | return self.name 150 | 151 | def __str__(self, max_depth=None, current_depth=0): 152 | "String representation of tree, limited to `max_depth` if provided. `current_depth` is used internally for recursion." 153 | if self.is_root(): 154 | if max_depth is None or max_depth > 0: 155 | md = None if max_depth is None else max_depth - 1 156 | return "".join([c.__str__(max_depth=md, current_depth=current_depth) for c in sorted(self.children)]) 157 | else: 158 | return "" 159 | else: 160 | string_rep = self.indent * current_depth + self.list_marker + self.name + "\n" 161 | if max_depth is None or current_depth < max_depth: 162 | string_rep += "".join([c.__str__(max_depth=max_depth, current_depth=current_depth+1) for c in sorted(self.children)]) 163 | return string_rep 164 | 165 | def is_root(self): 166 | return self.name == self.root 167 | 168 | def __eq__(self, other): 169 | return self.expanded_name() == other.expanded_name() 170 | 171 | def __lt__(self, other): 172 | return self.expanded_name() < other.expanded_name() 173 | 174 | def __hash__(self): 175 | return hash(self.expanded_name()) 176 | 177 | def __repr__(self): 178 | return "<{}>".format(self.name) 179 | 180 | -------------------------------------------------------------------------------- /qualitative_coding/user_input.py: -------------------------------------------------------------------------------- 1 | import curses 2 | import curses.ascii 3 | 4 | CONTROL_CHARS = [ord(ch) for ch in ':/?'] 5 | 6 | def is_enter(ch): 7 | return ch == curses.KEY_ENTER or ch == 10 or ch == 13 8 | 9 | def is_control_char(ch): 10 | return ch in CONTROL_CHARS 11 | 12 | def is_arrow_key(ch): 13 | return ch in (curses.KEY_RIGHT, curses.KEY_UP, curses.KEY_LEFT, curses.KEY_DOWN) 14 | 15 | def is_escape(ch): 16 | return ch == curses.ascii.ESC 17 | 18 | def allowed_in_command(ch): 19 | return ch == ord(' ') or curses.ascii.isalnum(ch) 20 | 21 | def allowed_in_coding(ch): 22 | return ch == ord(' ') or ch == ord(',') or curses.ascii.isalnum(ch) 23 | -------------------------------------------------------------------------------- /qualitative_coding/views/coding_ui.py: -------------------------------------------------------------------------------- 1 | import curses 2 | from textwrap import wrap 3 | from signal import signal, SIGWINCH 4 | from enum import Flag, auto 5 | import os 6 | import qualitative_coding.user_input as UI 7 | 8 | """ 9 | What's next? 10 | 11 | - Add a debug mode 12 | - Implement arrow key traversal of lines 13 | - Implement seek_line as O(1) operation 14 | - Implement map of line_number -> pad_row_number 15 | - Implement text wrapping for the codes pad 16 | - Rebuild line number map on wrap or unwrap 17 | - Write a function mapping logical cursor position to (row, col) within a line 18 | - Update codes pad to have dynamic width 19 | - Rebuild line number map on window resize 20 | - Update line numbers to not use a pad. It's too much trouble. 21 | Instead, just draw line numbers from top to bottom of the screen. 22 | - Draw line numbers for coding pad too. 23 | - Store target_cursor_position (for when scrolling through lines which are too short) 24 | 25 | Previously, I was keeping track of a global logical line mapping. I was considering this 26 | necessary because sometimes a display line takes up more space than one line. Actually, I 27 | still need to do this, if I want to take advantage of the pad-scrolling functionality 28 | (which I do). Therefore, I need to keep track of the difference between logical lines 29 | and display lines. Will I allow text lines to overflow the 80-character buffer? Yes, I think 30 | so. I can handle them the same way I'll handle representations of codes (comma-separated); 31 | when there are too many for one line, then let them overflow onto the next display line. 32 | 33 | The only performance implications here are when a code is edited, such that a logical code 34 | line changes the number of display lines needed. In this case, I'll need to re-index the 35 | display lines for code sets. 36 | """ 37 | 38 | class Pads(Flag): 39 | INDEX = auto() 40 | TEXT = auto() 41 | CODES = auto() 42 | STATUS = auto() 43 | ALL = INDEX | TEXT | CODES | STATUS 44 | 45 | class CodingUI: 46 | """Implements a curses-based user interface for coding texts in the corpus. 47 | Initialized with: 48 | 49 | text: an iterable of lines of the text 50 | codes: an iterable of (code, line) tuples 51 | codebook: an iterable of all codes. 52 | """ 53 | 54 | TEXT_WIDTH = 80 55 | DIVIDER_WIDTH = 1 56 | CODES_WIDTH = 200 57 | DEBUG = True 58 | 59 | def __init__(self, text, codes, codebook): 60 | self.text = text 61 | self.codes = codes 62 | self.codebook = codebook 63 | 64 | def run(self): 65 | "Starts a UI session" 66 | os.environ.setdefault('ESCDELAY', '25') 67 | curses.wrapper(self._run) 68 | 69 | def _run(self, stdscr): 70 | "Starts a UI session, receiving a prepared screen" 71 | curses.start_color() 72 | curses.use_default_colors() 73 | curses.init_pair(1, curses.COLOR_YELLOW, curses.COLOR_BLACK) 74 | self.screen = stdscr 75 | self.running = True 76 | self.edit_mode = True 77 | self.status_message = "" 78 | self.control_buffer = "" 79 | self.pad_height = len(self.text) 80 | self.index_width = len(str(len(self.text))) + 1 81 | self.focus_window_line = 0 82 | self.focus_line = 0 83 | self.cursor_position = 0 84 | self.measure_screen() 85 | self.index_pad = self.create_index_pad() 86 | self.text_pad = self.create_text_pad() 87 | self.codes_pad = self.create_codes_pad() 88 | self.codes_pad = self.create_codes_pad() 89 | self.status_pad = self.create_status_pad() 90 | self.set_status_message("Welcome! " + self.help_message(), render=False) 91 | signal(SIGWINCH, self.handle_screen_resize) 92 | self.screen.clear() 93 | self.render() 94 | 95 | while self.running: 96 | self.handle_keypress(self.screen.getch()) 97 | 98 | def render(self, pads=Pads.ALL): 99 | """Renders the latest state. 100 | Optional `pads` is a Pads (enum.Flag) specifying which 101 | pads on the screen should be refreshed. This is an optimization 102 | for when only part of the screen needs to be rendered. 103 | """ 104 | PAD_YMIN = self.line_row_index[self.focus_window_line] 105 | PAD_XMIN = 0 106 | SCREEN_YMIN = 0 107 | SCREEN_YMAX = self.rows - 2 108 | 109 | self.screen.noutrefresh() 110 | self.draw_divider() 111 | self.draw_line_numbers(self.text_nums_x0) 112 | self.draw_line_numbers(self.codes_nums_x0) 113 | if Pads.TEXT & pads: 114 | self.text_pad.noutrefresh( 115 | PAD_YMIN, PAD_XMIN, 116 | SCREEN_YMIN, self.text_x0, 117 | SCREEN_YMAX, self.text_x1 118 | ) 119 | if Pads.CODES & pads: 120 | self.codes_pad.noutrefresh( 121 | PAD_YMIN, PAD_XMIN, 122 | SCREEN_YMIN, self.codes_x0, 123 | SCREEN_YMAX, self.codes_x1 124 | ) 125 | if self.DEBUG or Pads.STATUS & pads: 126 | self.status_pad.noutrefresh( 127 | 0, 0, 128 | self.rows - 1, 0, 129 | self.rows, self.cols 130 | ) 131 | curses.doupdate() 132 | 133 | def create_index_pad(self): 134 | "Creates a pad for displaying line numbers, starting with 1" 135 | pad = curses.newpad(self.pad_height, self.index_width) 136 | y = 0 137 | for y, line in enumerate(self.text): 138 | pad.addstr(y, 0, str(y + 1).rjust(self.index_width - 1), curses.color_pair(1)) 139 | return pad 140 | 141 | def create_text_pad(self): 142 | "Creates a pad for showing the text being coded." 143 | pad = curses.newpad(self.pad_height, self.TEXT_WIDTH) 144 | for y, line in enumerate(self.text): 145 | pad.addstr(y, 0, line[:self.TEXT_WIDTH]) 146 | return pad 147 | 148 | def create_codes_pad(self): 149 | "Creates a pad for showing the codes." 150 | pad = curses.newpad(self.pad_height, self.CODES_WIDTH) 151 | y = 0 152 | for codes, logical_line in zip(self.codes, self.text): 153 | pad.addstr(y, 0, codes) 154 | for line in logical_line: 155 | y += 1 156 | return pad 157 | 158 | def create_status_pad(self): 159 | "Creates a pad for the status bar" 160 | pad = curses.newpad(1, self.cols - 1) 161 | pad.addstr(0, 0, self.status_message.ljust(self.cols - 2), curses.A_REVERSE) 162 | return pad 163 | 164 | def measure_screen(self): 165 | """Gets the dimensions of the screen and computes layout values. 166 | """ 167 | rows, cols = self.screen.getmaxyx() 168 | self.rows = rows 169 | self.cols = cols 170 | self.text_nums_x0 = 0 171 | self.text_x0 = self.index_width + 1 172 | self.text_x1 = self.divider_x = self.text_x0 + self.TEXT_WIDTH 173 | self.codes_nums_x0 = self.divider_x + self.DIVIDER_WIDTH 174 | self.codes_x0 = self.codes_nums_x0 + self.index_width + 1 175 | self.codes_width = self.cols - self.codes_x0 176 | self.codes_x1 = self.codes_x0 + self.codes_width - 1 177 | 178 | def handle_screen_resize(self): 179 | self.measure_screen() 180 | self.render() 181 | 182 | def handle_keypress(self, ch): 183 | if self.edit_mode: 184 | if UI.is_control_char(ch): 185 | self.edit_mode = False 186 | self.set_status_message(chr(ch)) 187 | self.control_buffer = chr(ch) 188 | elif ch == curses.KEY_DOWN: 189 | self.seek_line(self.focus_line + 1) 190 | elif ch == curses.KEY_UP: 191 | self.seek_line(self.focus_line - 1) 192 | else: 193 | if UI.is_escape(ch): 194 | self.set_status_message('') 195 | self.edit_mode = True 196 | elif UI.allowed_in_command(ch): 197 | self.set_status_message(self.status_message + chr(ch)) 198 | self.control_buffer += chr(ch) 199 | elif UI.is_enter(ch): 200 | self.handle_control_command() 201 | 202 | def handle_control_command(self): 203 | sigil, command = self.control_buffer[0], self.control_buffer[1:] 204 | if sigil == ':': 205 | if command.isdigit(): 206 | self.set_status_message(f"SEEKING TO LINE {command}") 207 | elif command == 'q': 208 | self.running = False 209 | curses.endwin() 210 | elif command == 'h': 211 | self.show_help() 212 | elif command == 'g': 213 | self.seek_line(0) 214 | elif command == 'G': 215 | self.seek_line(len(self.text) - 1) 216 | else: 217 | self.set_status_message("???") 218 | self.edit_mode = True 219 | elif sigil == '/': 220 | self.set_status_message(f"SEARCHING FOR {command}") 221 | self.edit_mode = True 222 | elif sigil == '?': 223 | self.set_status_message(f"REVERSE SEARCHING FOR {command}") 224 | self.edit_mode = True 225 | 226 | def seek_line(self, index): 227 | """Tries to move the focus to line `index`. 228 | Checks that `index` is in bounds, then updates the focus_window_line. 229 | """ 230 | self.focus_line = max(0, min(index, len(self.codes) - 1)) 231 | if self.focus_line < self.focus_window_line: 232 | self.focus_window_line = self.focus_line 233 | #elif not self.line_is_in_view(self.focus_line): 234 | 235 | 236 | """ 237 | self.line_row_index maps the row positions of logical lines on the text 238 | and codes pads. When the focus line is lower than the focus window line, 239 | we can just set the focus window line to the focus line. 240 | 241 | But what about the other end? I need to check whether the focus line is in 242 | view. If not, I need to increase the focus window line. I could do this 243 | by walking the focus window line forward, but I want an O(1) update. 244 | So I'll set focus_window_line to focus_line and then walk it backward 245 | as long as the whole focus line is in view. (In the perverse case of 246 | an extremely long line which can't be displayed, the screen will show as much 247 | of the line as possible. 248 | 249 | """ 250 | # TODO this is clumsy. Save the target cursor_position 251 | self.cursor_position = min(len(self.codes[index]), self.cursor_position) 252 | self.render() 253 | 254 | def line_is_in_view(self, line): 255 | y0, y1 = self.lines_in_view() 256 | return y0 <= line and line < y1 257 | 258 | def lines_in_view(self): 259 | "Returns the top (inclusive) and bottom (exclusive) logical lines in view" 260 | j = 0 261 | y0 = y1 = self.focus_window_line 262 | while True: 263 | if y1 + 1 < len(self.codes) and j + self.line_row_index[y1] < self.cols: 264 | y1 += 1 265 | else: 266 | return y0, y1 267 | 268 | def set_status_message(self, msg, render=True): 269 | "Renders `msg` on the status bar" 270 | self.status_message = msg[:self.rows - 1] 271 | if self.DEBUG: 272 | debug_msg = self.debug_message()[:self.rows - 1] 273 | smx = max(0, self.rows - 1 - len(debug_msg)) 274 | self.status_message = self.status_message[:smx].ljust(smx) + debug_msg 275 | self.status_pad.addstr(0, 0, self.status_message.ljust(self.cols - 2), curses.A_REVERSE) 276 | self.render(Pads.STATUS) 277 | 278 | def help_message(self): 279 | return ( 280 | ':h -> help | :q -> save and quit | :12 -> go to line 12 | ' 281 | '/cat -> search forward for "cat" | ?dog -> search backward for "dog"' 282 | ) 283 | 284 | def debug_message(self): 285 | "Defines what is displayed in the debug message" 286 | return f" | focus: {self.focus_line}, window: {self.focus_window_line}" 287 | 288 | def show_help(self): 289 | self.set_status_message(self.help_message()) 290 | 291 | def split_text(self, text): 292 | lines = [wrap(line or ' ', width=self.TEXT_WIDTH) or [''] for line in text] 293 | return lines 294 | 295 | def draw_divider(self): 296 | for y in range(self.rows - 1): 297 | self.screen.addstr(y, self.divider_x, '|', curses.A_REVERSE) 298 | 299 | def draw_line_numbers(self, screen_x): 300 | """Draws line numbers at the specified column. 301 | """ 302 | SPACE_FOR_STATUS_ROW = 1 303 | screen_y = 0 304 | ix = self.focus_window_line 305 | while screen_y < self.rows - SPACE_FOR_STATUS_ROW and ix < len(self.text): 306 | display_num = str(ix + 1).rjust(self.index_width - 1) 307 | self.screen.addstr(screen_y, screen_x, display_num, curses.color_pair(1)) 308 | if ix + 1 < len(self.text): 309 | pad_y_delta = self.line_row_index[ix + 1] - self.line_row_index[ix] 310 | screen_y += pad_y_delta 311 | ix += 1 312 | 313 | 314 | -------------------------------------------------------------------------------- /qualitative_coding/views/styles.py: -------------------------------------------------------------------------------- 1 | from textwrap import fill 2 | import click 3 | 4 | FW = 80 5 | 6 | def formatter(**style_args): 7 | """A factory function which returns a formatting function. 8 | """ 9 | def format_message(message, preformatted=False, list_format=False): 10 | message = str(message) 11 | if preformatted: 12 | if list_format: 13 | raise ValueError("preformatted and list_format are incompatible options") 14 | fmsg = message 15 | elif list_format: 16 | fmsg = fill(message, width=FW, initial_indent='- ', subsequent_indent=' ') 17 | else: 18 | fmsg = fill(message, width=FW) 19 | return click.style(fmsg, **style_args) 20 | return format_message 21 | 22 | address = formatter(fg='cyan') 23 | question = formatter(fg='cyan') 24 | debug = formatter(dim=True) 25 | info = formatter(fg='blue') 26 | warn = formatter(fg='yellow') 27 | confirm = formatter(fg='yellow') 28 | error = formatter(fg='red') 29 | success = formatter(fg='green') 30 | --------------------------------------------------------------------------------