├── .Rbuildignore
├── .circleci
    └── config.yml
├── .github
    ├── .gitignore
    └── workflows
    │   └── Actions.yaml
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
    ├── 000.R
    ├── ai_attach_direction.R
    ├── ai_crude_data_block_joins.R
    ├── ai_data_gid_join.R
    ├── ai_get_data_attr_map.R
    ├── ai_get_data_attr_map_details.R
    ├── ai_get_data_attr_map_main.R
    ├── ai_relative_data_join_attr.R
    ├── ai_relative_data_split_attr.R
    ├── analyze_cells.R
    ├── as_cell_df.R
    ├── as_cell_df_internal.R
    ├── attach_trace_info.R
    ├── cell_trace_plot.R
    ├── class-cell_df.R
    ├── class-cell_df_analysis.R
    ├── class-intermediate.R
    ├── class-read_cell_part.R
    ├── cli_bindings.R
    ├── collate_columns.R
    ├── collate_columns_etc.R
    ├── common_data_patterns.R
    ├── compact_gid_maps.R
    ├── compose_cells.R
    ├── detect_LibreOffice.R
    ├── detect_and_read.R
    ├── extend_data_block.R
    ├── file_etc.R
    ├── fj.R
    ├── generics-cell_df.R
    ├── generics-cell_df_analysis.R
    ├── generics-read_cell_part.R
    ├── get_all_col_representative.R
    ├── get_cells_from_admap.R
    ├── get_definiteness_checks.R
    ├── get_direction.R
    ├── get_direction_df.R
    ├── get_direction_metric.R
    ├── get_group_id.R
    ├── get_links_df.R
    ├── get_unpivotr_direction_names.R
    ├── global_vars.R
    ├── is_attachable.R
    ├── is_available.R
    ├── is_conforms_to_rcdf.R
    ├── is_xlsx_ok.R
    ├── make_read_cell_part.R
    ├── mask_data.R
    ├── merge_admaps.R
    ├── multi_names.R
    ├── possible_to_support.R
    ├── read_cells.R
    ├── read_cells_stages.R
    ├── read_excel_whole.R
    ├── read_excel_whole_readxl.R
    ├── read_pdf_from_tabulizer.R
    ├── read_xls_from_xlsx.R
    ├── reduce_2dfs.R
    ├── refine_cols.R
    ├── safe_dependency_check.R
    ├── shiny_etc.R
    ├── shiny_main.R
    ├── shiny_parts_server.R
    ├── shiny_parts_server_components.R
    ├── shiny_parts_ui.R
    ├── similarity_score.R
    ├── stitch_direction.R
    ├── tidycells-package.R
    ├── utils-deps.R
    ├── utils-pipe.R
    ├── validate_cells.R
    ├── validate_read_cell_part_object.R
    ├── value_attribute_classifiers.R
    ├── value_attribute_classify.R
    └── visual_functions.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── appveyor.yml
├── codecov.yml
├── cran-comments.md
├── dev-notes.md
├── docs
    ├── 404.html
    ├── LICENSE-text.html
    ├── LICENSE.html
    ├── articles
    │   ├── ext
    │   │   ├── compose_cells_cli1.png
    │   │   ├── logo.png
    │   │   ├── marks.png
    │   │   ├── read_cells.svg
    │   │   ├── read_cells_out.png
    │   │   ├── v12.png
    │   │   ├── v34.png
    │   │   └── v56.png
    │   ├── index.html
    │   ├── tidycells-intro.html
    │   └── tidycells-intro_files
    │   │   └── figure-html
    │   │       ├── unnamed-chunk-15-1.png
    │   │       ├── unnamed-chunk-21-1.png
    │   │       ├── unnamed-chunk-22-1.png
    │   │       ├── unnamed-chunk-25-1.png
    │   │       ├── unnamed-chunk-30-1.png
    │   │       └── unnamed-chunk-30-2.png
    ├── authors.html
    ├── dev-notes.html
    ├── docsearch.css
    ├── docsearch.js
    ├── index.html
    ├── link.svg
    ├── logo.png
    ├── news
    │   └── index.html
    ├── pkgdown.css
    ├── pkgdown.js
    ├── pkgdown.yml
    └── reference
    │   ├── analyse_cells.html
    │   ├── analyze_cells-1.png
    │   ├── analyze_cells-2.png
    │   ├── analyze_cells.html
    │   ├── as_cell_df.html
    │   ├── basic_classifier.html
    │   ├── cell_analysis-class.html
    │   ├── cell_composition_traceback.html
    │   ├── cell_df-class.html
    │   ├── collate_columns.html
    │   ├── compose_cells.html
    │   ├── figures
    │       └── read_cells.svg
    │   ├── get_direction.html
    │   ├── get_direction_df.html
    │   ├── get_direction_metric.html
    │   ├── get_group_id.html
    │   ├── get_unpivotr_direction_names.html
    │   ├── index.html
    │   ├── numeric_values_classifier.html
    │   ├── pipe.html
    │   ├── read_cell_part-class.html
    │   ├── read_cells.html
    │   ├── read_cells_internal.html
    │   ├── sample_based_classifier.html
    │   ├── tidycells-package.html
    │   ├── validate_cells.html
    │   ├── value_attribute_classify-1.png
    │   ├── value_attribute_classify-2.png
    │   ├── value_attribute_classify-3.png
    │   ├── value_attribute_classify.html
    │   └── visual_functions.html
├── inst
    ├── extdata
    │   ├── RBI_HBS_Table_No_166.xlsx
    │   ├── marks.xlsx
    │   ├── marks_cells.rds
    │   ├── messy
    │   │   ├── csv.docx
    │   │   ├── docx.html
    │   │   ├── html.csv
    │   │   ├── pdf.docx
    │   │   ├── xls.pdf
    │   │   └── xlsx.csv
    │   └── untidy.xlsx
    └── rstudio
    │   └── addins.dcf
├── logo.png
├── man
    ├── analyse_cells.Rd
    ├── analyze_cells.Rd
    ├── as_cell_df.Rd
    ├── basic_classifier.Rd
    ├── cell_analysis-class.Rd
    ├── cell_composition_traceback.Rd
    ├── cell_df-class.Rd
    ├── collate_columns.Rd
    ├── compose_cells.Rd
    ├── figures
    │   └── read_cells.svg
    ├── get_direction.Rd
    ├── get_direction_df.Rd
    ├── get_direction_metric.Rd
    ├── get_group_id.Rd
    ├── get_unpivotr_direction_names.Rd
    ├── numeric_values_classifier.Rd
    ├── pipe.Rd
    ├── read_cell_part-class.Rd
    ├── read_cells.Rd
    ├── read_cells_internal.Rd
    ├── sample_based_classifier.Rd
    ├── tidycells-package.Rd
    ├── validate_cells.Rd
    ├── value_attribute_classify.Rd
    └── visual_functions.Rd
├── pkgdown
    └── favicon
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-60x60.png
    │   ├── apple-touch-icon-76x76.png
    │   ├── apple-touch-icon.png
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   └── favicon.ico
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-VA_classifier.R
    │   ├── test-analyze_cells.R
    │   ├── test-as_cell_df.R
    │   ├── test-cells_to_df.R
    │   ├── test-collate_columns.R
    │   ├── test-etc.R
    │   ├── test-optional_package.R
    │   ├── test-print_cell_analysis.R
    │   ├── test-print_cell_df.R
    │   ├── test-read_cells.R
    │   ├── test-read_cells_real.R
    │   ├── test-use_cases.R
    │   ├── testdata
    │       ├── WSS.rds
    │       ├── badcells.rds
    │       ├── doc.doc
    │       ├── enron_from_unpivotr_processed.rds
    │       └── test.xls
    │   ├── testlib
    │       └── shiny_test.R
    │   └── testusecases
    │       └── ex1.rds
├── tidycells.Rproj
└── vignettes
    ├── .gitignore
    ├── ext
        ├── compose_cells_cli1.png
        ├── logo.png
        ├── marks.png
        ├── read_cells.svg
        ├── read_cells_out.png
        ├── v12.png
        ├── v34.png
        └── v56.png
    └── tidycells-intro.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^LICENSE\.md$
 4 | ^\.travis\.yml$
 5 | ^appveyor\.yml$
 6 | ^codecov\.yml$
 7 | ^README\.Rmd$
 8 | ^doc$
 9 | ^Meta$
10 | ^cran-comments\.md$
11 | ^dev-notes\.md$
12 | ^logo\.png$
13 | ^_pkgdown\.yml$
14 | ^docs$
15 | ^pkgdown$
16 | ^\.circleci$
17 | ^\.circleci/config\.yml$
18 | ^\.github$
19 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | jobs:
 3 |   build:
 4 |     docker:
 5 |       - image: rocker/verse:latest
 6 |     environment:
 7 |       R_LIBS: ~/R/Library
 8 |     steps:
 9 |       - restore_cache:
10 |           keys:
11 |             - r-pkg-cache-{{ arch }}-{{ .Branch }}
12 |             - r-pkg-cache-{{ arch }}-
13 |       - checkout
14 |       - run:
15 |           name: Install package dependencies
16 |           command: |
17 |             mkdir -p ~/R/Library
18 |             Rscript -e 'install.packages("remotes")'
19 |             Rscript -e 'remotes::install_deps(dependencies = TRUE)'
20 |       - run:
21 |           name: Session information and installed package versions
22 |           command: |
23 |             Rscript -e 'sessionInfo()'
24 |             Rscript -e 'installed.packages()[, c("Package", "Version")]'
25 |             Rscript -e 'rmarkdown::pandoc_version()'
26 |       - run:
27 |           name: Build package
28 |           command: R CMD build .
29 |       - run:
30 |           name: Check package
31 |           command: R CMD check --as-cran --no-manual *tar.gz
32 |       - store_artifacts:
33 |           path: tidycells.Rcheck/
34 |       - save_cache:
35 |           key: r-pkg-cache-{{ arch }}-{{ .Branch }}
36 |           paths:
37 |             - "~/R/Library"
38 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/Actions.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - master
 5 |   pull_request:
 6 |     branches:
 7 |       - master
 8 | 
 9 | name: Actions
10 | 
11 | jobs:
12 |   Actions:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: windows-latest, r: 'release'}
22 |           - {os: windows-latest, r: 'devel'}
23 |           - {os: windows-latest, r: 'oldrel'}
24 |           - {os: macOS-latest, r: 'release'}
25 |           - {os: macOS-latest, r: 'devel'}
26 |           - {os: macOS-latest, r: 'oldrel'}
27 |           - {os: ubuntu-16.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"}
28 | 
29 |     env:
30 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
31 |       RSPM: ${{ matrix.config.rspm }}
32 | 
33 |     steps:
34 |       - uses: actions/checkout@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@master
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 | 
40 |       - uses: r-lib/actions/setup-pandoc@master
41 | 
42 |       - name: Query dependencies
43 |         run: |
44 |           install.packages('remotes')
45 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
46 |         shell: Rscript {0}
47 | 
48 |       - name: Cache R packages
49 |         if: runner.os != 'Windows'
50 |         uses: actions/cache@v1
51 |         with:
52 |           path: ${{ env.R_LIBS_USER }}
53 |           key: ${{ runner.os }}-r-${{ matrix.config.r }}-1-${{ hashFiles('.github/depends.Rds') }}
54 |           restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-1-
55 | 
56 |       - name: Install system dependencies
57 |         if: runner.os == 'Linux'
58 |         env:
59 |           RHUB_PLATFORM: linux-x86_64-ubuntu-gcc
60 |         run: |
61 |           Rscript -e "remotes::install_github('r-hub/sysreqs')"
62 |           sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))")
63 |           sudo -s eval "$sysreqs"
64 | 
65 |       - name: Install dependencies
66 |         run: |
67 |           try(system("sudo R CMD javareconf"))
68 |           remotes::install_cran("rJava", type = ifelse(tolower(Sys.info()[["sysname"]])=="linux","source","both"))
69 |           remotes::install_deps(dependencies = TRUE)
70 |           remotes::install_cran("rcmdcheck")
71 |         shell: Rscript {0}
72 | 
73 |       - name: Check
74 |         env:
75 |           _R_CHECK_CRAN_INCOMING_REMOTE_: false
76 |         run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
77 |         shell: Rscript {0}
78 | 
79 |       - name: Upload check results
80 |         if: failure()
81 |         uses: actions/upload-artifact@master
82 |         with:
83 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
84 |           path: check
85 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | inst/doc
6 | doc
7 | Meta
8 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | # libreoffice for doc
 3 | before_install:
 4 |     - if [[ "${TRAVIS_OS_NAME}" = "linux" ]]; then sudo apt-add-repository -y ppa:libreoffice/ppa; fi
 5 |     - if [[ "${TRAVIS_OS_NAME}" = "linux" ]]; then sudo apt-get install libreoffice; fi
 6 |     # temp fix for ps in osx
 7 |     - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then R -e "install.packages(c('ps'), repos = 'http://cran.rstudio.com/')"  ; fi
 8 | 
 9 | 
10 | language: R
11 | 
12 | r:
13 |   - oldrel
14 |   - release
15 |   - devel
16 | 
17 | cache: packages
18 | 
19 | warnings_are_errors: true
20 | 
21 | os:
22 |   - linux
23 |   - osx
24 | 
25 | matrix:
26 |   exclude:
27 |     -
28 |       os: osx
29 |       r: devel
30 | 
31 | after_success:
32 |   - if [[ "${TRAVIS_OS_NAME}" = "linux" && "${TRAVIS_R_VERSION_STRING}" = "release" ]]; then Rscript -e 'covr::codecov(type = "all")'; fi
33 |   - if [[ "${TRAVIS_OS_NAME}" = "linux" && "${TRAVIS_R_VERSION_STRING}" = "release" ]]; then Rscript -e 'covr::coveralls(type = "all", line_exclusions = c("R/shiny_etc.R","R/shiny_main.R","R/shiny_parts_server.R","R/shiny_parts_server_components.R","R/shiny_parts_ui.R","R/visual_functions.R"))'; fi
34 |   - if [[ "${TRAVIS_OS_NAME}" = "linux" ]]; then libreoffice --version; fi
35 |   - if [[ "${TRAVIS_OS_NAME}" = "linux" && "${TRAVIS_R_VERSION_STRING}" = "release" ]]; then Rscript -e 'if(!requireNamespace("RCurl", quietly = TRUE)) install.packages("RCurl")'; fi
36 |   - if [[ "${TRAVIS_OS_NAME}" = "linux" && "${TRAVIS_R_VERSION_STRING}" = "release" ]]; then Rscript -e 'b <- list.files(pattern = "tidycells_[0-9.]+.tar.gz", full.names = T);if(length(b)>0){b <- normalizePath(max(b));cat("Got", b, "\n");lk <- paste0("ftp://win-builder.r-project.org/", c("R-devel", "R-release", "R-oldrelease"), "/", basename(b));fup <- function(u) {i <- 0;e <- NULL;repeat({i<-i + 1;e<- try(RCurl::ftpUpload(what = b, to = u), silent = TRUE);if(inherits(e, "try-error")){Sys.sleep(1);if(u!=lk[1]){e <- "failed"}}else{cat("Uploaded for:", u, "\n");break();};if(i>50) break();});e};lapply(lk, fup)}'; fi
37 | 
38 | before_deploy:
39 |   # so no overwrite of linux build
40 |   - if [[ "${TRAVIS_OS_NAME}" = "osx" ]]; then rm -f *.tar.gz; fi
41 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Type: Package
 2 | Package: tidycells
 3 | Title: Read Tabular Data from Diverse Sources and Easily Make
 4 |     Them Tidy
 5 | Version: 0.2.2.999
 6 | Authors@R: 
 7 |     person(given = "Indranil",
 8 |            family = "Gayen",
 9 |            role = c("aut", "cre"),
10 |            email = "nil.gayen@gmail.com",
11 |            comment = c(ORCID = "0000-0003-0197-1944"))
12 | Description: Provides utilities to read, cells from complex
13 |     tabular data and heuristic detection based 'structural assignment' of
14 |     those cells to a columnar or tidy format. Read functionality has the
15 |     ability to read structured, partially structured or unstructured
16 |     tabular data from various types of documents. The 'structural
17 |     assignment' functionality has both supervised and unsupervised way of
18 |     assigning cells data to columnar/tidy format. Multiple disconnected
19 |     blocks of tables in a single sheet are also handled appropriately.
20 |     These tools are suitable for unattended conversation of messy tables
21 |     into a consumable format(usable for further analysis and data
22 |     wrangling).
23 | License: MIT + file LICENSE
24 | URL: https://r-rudra.github.io/tidycells/,
25 |     https://github.com/r-rudra/tidycells
26 | BugReports: https://github.com/r-rudra/tidycells/issues
27 | Depends:
28 |     R (>= 3.2.0)
29 | Imports: 
30 |     dplyr (>= 0.8.1),
31 |     ggplot2,
32 |     graphics,
33 |     magrittr,
34 |     methods,
35 |     purrr (>= 0.3.2),
36 |     rlang,
37 |     stats,
38 |     stringr (>= 1.4.0),
39 |     tibble,
40 |     tidyr,
41 |     unpivotr (>= 0.5.1),
42 |     utils
43 | Suggests: 
44 |     cli,
45 |     covr,
46 |     docxtractr,
47 |     DT,
48 |     knitr,
49 |     miniUI,
50 |     plotly,
51 |     readr,
52 |     readxl,
53 |     rmarkdown,
54 |     rstudioapi,
55 |     shiny,
56 |     shinytest,
57 |     stringdist,
58 |     tabulizer,
59 |     testthat (>= 2.1.0),
60 |     tidyxl,
61 |     xlsx,
62 |     XML
63 | VignetteBuilder: 
64 |     knitr
65 | Encoding: UTF-8
66 | LazyData: true
67 | Roxygen: list(markdown = TRUE)
68 | RoxygenNote: 7.1.0
69 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2019
2 | COPYRIGHT HOLDER: Indranil Gayen
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2019 Indranil Gayen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(as.data.frame,cell_df)
 4 | S3method(as.matrix,cell_df)
 5 | S3method(as_cell_df,cells)
 6 | S3method(as_cell_df,data.frame)
 7 | S3method(as_cell_df,matrix)
 8 | S3method(basic_classifier,cell_df)
 9 | S3method(basic_classifier,default)
10 | S3method(numeric_values_classifier,cell_df)
11 | S3method(numeric_values_classifier,default)
12 | S3method(plot,cell_analysis)
13 | S3method(plot,cell_df)
14 | S3method(print,cell_analysis)
15 | S3method(print,cell_df)
16 | S3method(print,read_cell_part)
17 | S3method(read_cells,"NULL")
18 | S3method(read_cells,character)
19 | S3method(read_cells,default)
20 | S3method(read_cells,read_cell_part)
21 | S3method(sample_based_classifier,cell_df)
22 | S3method(sample_based_classifier,default)
23 | S3method(summary,cell_df)
24 | export("%>%")
25 | export(analyse_cells)
26 | export(analyze_cells)
27 | export(as_cell_df)
28 | export(basic_classifier)
29 | export(cell_composition_traceback)
30 | export(collate_columns)
31 | export(compose_cells)
32 | export(numeric_values_classifier)
33 | export(read_cells)
34 | export(sample_based_classifier)
35 | export(validate_cells)
36 | export(value_attribute_classify)
37 | export(visual_crop)
38 | export(visual_data_block_inspection)
39 | export(visual_orientation_modification)
40 | export(visual_traceback)
41 | export(visual_va_classify)
42 | exportClasses(cell_analysis)
43 | exportClasses(cell_df)
44 | exportClasses(read_cell_part)
45 | importFrom(dplyr,anti_join)
46 | importFrom(dplyr,arrange)
47 | importFrom(dplyr,bind_rows)
48 | importFrom(dplyr,case_when)
49 | importFrom(dplyr,count)
50 | importFrom(dplyr,desc)
51 | importFrom(dplyr,distinct)
52 | importFrom(dplyr,filter)
53 | importFrom(dplyr,full_join)
54 | importFrom(dplyr,group_by)
55 | importFrom(dplyr,group_split)
56 | importFrom(dplyr,if_else)
57 | importFrom(dplyr,inner_join)
58 | importFrom(dplyr,left_join)
59 | importFrom(dplyr,mutate)
60 | importFrom(dplyr,mutate_all)
61 | importFrom(dplyr,n)
62 | importFrom(dplyr,n_distinct)
63 | importFrom(dplyr,pull)
64 | importFrom(dplyr,recode)
65 | importFrom(dplyr,rename)
66 | importFrom(dplyr,right_join)
67 | importFrom(dplyr,select)
68 | importFrom(dplyr,summarise)
69 | importFrom(dplyr,ungroup)
70 | importFrom(magrittr,"%>%")
71 | importFrom(methods,setOldClass)
72 | importFrom(purrr,imap_dfr)
73 | importFrom(purrr,map)
74 | importFrom(purrr,map_chr)
75 | importFrom(purrr,map_dbl)
76 | importFrom(purrr,map_df)
77 | importFrom(purrr,map_int)
78 | importFrom(purrr,map_lgl)
79 | importFrom(purrr,reduce)
80 | importFrom(rlang,abort)
81 | importFrom(rlang,warn)
82 | importFrom(tibble,as_tibble)
83 | importFrom(tibble,tibble)
84 | importFrom(unpivotr,enhead)
85 | importFrom(utils,hasName)
86 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # tidycells (development version)
 2 | 
 3 | 
 4 | # *News*
 5 | 
 6 | # tidycells 0.2.2 _(2020-01-07)_
 7 | 
 8 | ## Other changes
 9 | 
10 | * Internal changes to unit tests which will fix [#11](https://github.com/r-rudra/tidycells/issues/11) 
11 | 
12 | # tidycells 0.2.1 _(2019-08-27)_
13 | 
14 | ## New features
15 | 
16 | * Enhancement in the heuristic-based algorithm
17 | 
18 | ## Other changes
19 | 
20 | * Now if `read_cells` fails in the intermediate stage, it will give the output of last successful stage 
21 | 
22 | # tidycells 0.2.0 _(2019-08-20)_
23 | 
24 | ## First CRAN Release
25 | 
26 | * Initial CRAN Release
27 | 
28 | # tidycells 0.1.9 _(2019-08-12)_
29 | 
30 | ## New Features`
31 | 
32 | * Added `collate_columns` to collate attribute-columns having similar content.
33 | 
34 | # tidycells 0.1.0 _(2019-07-25)_
35 | 
36 | ## Initial Public Release
37 | 
38 | * Initial Release to GitHub
39 | * Prior to this it was a private package
40 | 
41 | 


--------------------------------------------------------------------------------
/R/000.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # pkg share global vars
 3 | tidycells_pkg_env <- new.env()
 4 | # used in --> is_available.R
 5 | assign("na_pkgs", NULL, envir = tidycells_pkg_env)
 6 | 
 7 | 
 8 | # safe_dependency_check
 9 | # in later version create a proper framework
10 | # used in --> is_available.R and safe_dependency_check.R
11 | 


--------------------------------------------------------------------------------
/R/ai_attach_direction.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ai_attach_direction <- function(d_att_dat_map_raw) {
 4 | 
 5 |   # asp: attr split
 6 |   d_att_dat_map_raw_asp <- d_att_dat_map_raw %>%
 7 |     # kept for tracking
 8 |     mutate(direction_basic = direction) %>%
 9 |     mutate(attr_gid_split = if_else(direction_group == "NS" & attr_group == "major", row_a,
10 |       if_else(direction_group == "WE" & attr_group == "major", col_a, 0L)
11 |     ))
12 | 
13 |   d_att_dat_map_raw_asp <- d_att_dat_map_raw_asp %>%
14 |     mutate(attr_gid_split = if_else(direction_group == "corner", col_a, attr_gid_split))
15 | 
16 |   d_att_dat_map_raw_asp %>%
17 |     group_by(data_gid, attr_gid, direction, attr_gid_split) %>%
18 |     group_split() %>%
19 |     map_df(~ .x %>% mutate(direction = get_direction(.x)))
20 | }
21 | 


--------------------------------------------------------------------------------
/R/ai_crude_data_block_joins.R:
--------------------------------------------------------------------------------
 1 | ai_crude_data_block_joins <- function(basic_admap, d_dat) {
 2 |   main_dir_attr <- basic_admap %>%
 3 |     filter(direction_group != "corner") %>%
 4 |     group_by(data_gid) %>%
 5 |     summarise(aid = attr_gid %>%
 6 |       paste0("_", direction) %>%
 7 |       unique() %>%
 8 |       sort() %>%
 9 |       paste0(collapse = "+"))
10 | 
11 |   crude_map <- main_dir_attr %>%
12 |     group_by(aid) %>%
13 |     mutate(new_gid = min(data_gid), n = n()) %>%
14 |     ungroup() %>%
15 |     filter(n > 1) %>%
16 |     select(gid = data_gid, new_gid)
17 | 
18 |   done <- FALSE
19 |   if (nrow(crude_map) > 0) {
20 |     done <- TRUE
21 |     d_dat <- get_group_id_join_gids(d_dat, gid_map = crude_map)
22 |   }
23 |   list(d_dat = d_dat, done = done)
24 | }
25 | 


--------------------------------------------------------------------------------
/R/ai_data_gid_join.R:
--------------------------------------------------------------------------------
 1 | ai_data_gid_join <- function(d_dat, data_attr_map, full_data) {
 2 |   repeat({
 3 |     if (length(unique(d_dat$group_id_map$gid)) < 2) break()
 4 | 
 5 |     data_gid_comb <- d_dat$group_id_map$gid %>%
 6 |       unique() %>%
 7 |       utils::combn(2) %>%
 8 |       as.data.frame(stringsAsFactors = FALSE)
 9 | 
10 |     data_gid_comb_chk <- data_gid_comb %>%
11 |       map_lgl(~ is_attachable(
12 |         gid1 = .x[1], gid2 = .x[2],
13 |         group_info = d_dat, whole_data = full_data,
14 |         data_attr_map = data_attr_map
15 |       ))
16 |     if (any(data_gid_comb_chk)) {
17 |       data_gid_joins <- data_gid_comb[data_gid_comb_chk]
18 | 
19 |       data_gid_join_map <- get_links_df(data_gid_joins)
20 |       d_dat <- get_group_id_join_gids(d_dat, gid_map = data_gid_join_map)
21 |     } else {
22 |       break()
23 |     }
24 |   })
25 |   d_dat
26 | }
27 | 


--------------------------------------------------------------------------------
/R/ai_get_data_attr_map.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ai_get_data_attr_map <- function(dat_boundary,
 3 |                                  att_gid_map,
 4 |                                  attr_to_near_data = FALSE, leave_inside = FALSE) {
 5 | 
 6 |   # check relative location of each attr_gid (gid) wrt each data_gid
 7 |   if (!leave_inside) {
 8 |     d_att_map <- dat_boundary %>%
 9 |       split(.$gid) %>%
10 |       map_df(~ get_direction_df(.x, datt = att_gid_map))
11 |   } else {
12 |     d_att_map <- dat_boundary %>%
13 |       split(.$gid) %>%
14 |       map_df(~ get_direction_df(.x, datt = att_gid_map, allow_inside = TRUE))
15 |   }
16 | 
17 | 
18 |   # for each attr_gid (gid), data_gid, direction, direction_group :- get minimum distance
19 |   d_gid_att_map <- d_att_map %>%
20 |     group_by(gid, data_gid, direction, direction_group) %>%
21 |     summarise(md = min(c(dist, Inf))) %>%
22 |     ungroup()
23 | 
24 |   # attach nearest attr_gid to each data gid
25 |   d_gid_att_map_min_d <- d_gid_att_map %>%
26 |     group_by(data_gid, direction_group) %>%
27 |     mutate(m_dist = min(c(md, Inf))) %>%
28 |     ungroup() %>%
29 |     filter(md == m_dist) %>%
30 |     select(-md) %>%
31 |     rename(attr_gid = gid, dist = m_dist)
32 | 
33 |   # by default each data gid will choose nearest attr_gid (multiple allowed)
34 |   if (attr_to_near_data) {
35 |     # this does same for attr gid too
36 |     # after each data gid choose nearest attr then each attr will choose nearest data gid
37 | 
38 |     d_gid_att_map_min_d <- d_gid_att_map_min_d %>%
39 |       group_by(attr_gid) %>%
40 |       mutate(md = min(c(dist, Inf))) %>%
41 |       ungroup() %>%
42 |       filter(md == dist) %>%
43 |       select(-md)
44 |   }
45 | 
46 | 
47 |   lo <- list(map = d_gid_att_map_min_d, all_map = d_gid_att_map, raw = d_att_map)
48 |   if (!leave_inside) {
49 |     lo
50 |   } else {
51 |     lo %>% map(~ .x %>% filter(direction != "INSIDE"))
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/R/ai_get_data_attr_map_details.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # dimention analysis and raw maps
 3 | ai_get_data_attr_map_details <- function(basic_map, d_dat, d_att, major_direction_relax = TRUE) {
 4 |   dimension_analysis <- list()
 5 | 
 6 |   dimension_analysis$data_gid_dim <- d_dat$group_id_map %>%
 7 |     group_by(gid) %>%
 8 |     summarise(
 9 |       r_dim_data = n_distinct(row),
10 |       c_dim_data = n_distinct(col)
11 |     )
12 | 
13 | 
14 |   d_att_dat_map <- basic_map
15 | 
16 |   d_att_dat_map_raw <- d_att_dat_map %>%
17 |     # join with data_gid to attach all data-cells
18 |     inner_join(d_dat$group_id_map %>%
19 |       select(row_d = row, col_d = col, data_gid = gid),
20 |     by = "data_gid"
21 |     ) %>%
22 |     # join with attr_gid to attach all attr-cells
23 |     inner_join(d_att$group_id_map %>%
24 |       select(row_a = row, col_a = col, attr_gid = gid),
25 |     by = "attr_gid"
26 |     )
27 | 
28 |   # attach dimension
29 |   dimension_analysis$attr_data_dim <- d_att_dat_map_raw %>%
30 |     group_by(attr_gid, data_gid) %>%
31 |     summarise(
32 |       r_dim = row_d %>% intersect(row_a) %>% length(),
33 |       c_dim = col_d %>% intersect(col_a) %>% length(),
34 |       direction_group = direction_group[1]
35 |     ) %>%
36 |     ungroup() %>%
37 |     inner_join(dimension_analysis$data_gid_dim, by = c("data_gid" = "gid")) %>%
38 |     mutate(rel_dim = if_else(direction_group == "NS", c_dim / c_dim_data, r_dim / r_dim_data)) %>%
39 |     mutate(rel_dim = if_else(direction_group == "corner", 0, rel_dim)) %>%
40 |     mutate(full_dim = (rel_dim >= 1))
41 | 
42 |   # in case only non full dim major (NS or WE) attr present
43 |   if (major_direction_relax) {
44 |     dimension_analysis$attr_data_dim <- dimension_analysis$attr_data_dim %>%
45 |       group_by(data_gid, direction_group) %>%
46 |       mutate(
47 |         is_full_dim_present = any(full_dim),
48 |         this_attr_max_rel = (rel_dim == max(rel_dim))
49 |       ) %>%
50 |       ungroup() %>%
51 |       rename(full_dim_orig = full_dim) %>%
52 |       mutate(full_dim = if_else(direction_group == "corner",
53 |         full_dim_orig,
54 |         if_else(is_full_dim_present,
55 |           full_dim_orig,
56 |           this_attr_max_rel
57 |         )
58 |       ))
59 |   }
60 | 
61 |   # fix major minor
62 | 
63 |   d_att_dat_map <- dimension_analysis$attr_data_dim %>%
64 |     distinct(attr_gid, data_gid, full_dim) %>%
65 |     right_join(d_att_dat_map, by = c("attr_gid", "data_gid")) %>%
66 |     mutate(attr_group = if_else(full_dim, "major", "minor")) %>%
67 |     select(-full_dim)
68 | 
69 | 
70 | 
71 |   d_att_dat_map_raw <- d_att_dat_map_raw %>%
72 |     inner_join(d_att_dat_map %>% select(attr_gid, data_gid, direction, attr_group),
73 |       by = c("attr_gid", "direction", "data_gid")
74 |     )
75 | 
76 |   list(raw_map = d_att_dat_map_raw, map = d_att_dat_map, dimension_analysis = dimension_analysis)
77 | }
78 | 


--------------------------------------------------------------------------------
/R/ai_get_data_attr_map_main.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ai_get_data_attr_map_main <- function(d_dat, d_att, crude_join = TRUE) {
 3 | 
 4 |   #  start with simple attr data map
 5 |   admap0 <- ai_get_data_attr_map(
 6 |     dat_boundary = d_dat$group_id_boundary,
 7 |     att_gid_map = d_att$group_id_map
 8 |   )
 9 | 
10 | 
11 |   if (crude_join) {
12 |     # crude joins
13 |     # absolutely sure joins
14 |     crude_djoins <- ai_crude_data_block_joins(basic_admap = admap0$map, d_dat = d_dat)
15 |     if (crude_djoins$done) {
16 |       d_dat <- crude_djoins$d_dat
17 |       admap0 <- ai_get_data_attr_map(
18 |         dat_boundary = d_dat$group_id_boundary,
19 |         att_gid_map = d_att$group_id_map
20 |       )
21 |     }
22 |   }
23 | 
24 | 
25 |   # split attr gid relative to data_gid
26 |   rel_chk <- ai_relative_data_split_attr(basic_map = admap0, d_att = d_att, d_dat = d_dat)
27 |   if (rel_chk$done) {
28 |     d_att <- rel_chk$d_att %>% map(unique)
29 |     admap0 <- rel_chk$admap
30 |   }
31 | 
32 | 
33 |   admap1_major_minor <- admap0$all_map %>%
34 |     rename(attr_gid = gid, dist = md) %>%
35 |     filter(direction_group != "corner") %>%
36 |     ai_get_data_attr_map_details(d_dat, d_att)
37 | 
38 |   admap1_major_minor_compact <- admap1_major_minor$map %>%
39 |     filter(attr_group == "major") %>%
40 |     rename(md = dist) %>%
41 |     group_by(data_gid, direction_group) %>%
42 |     mutate(m_dist = min(md)) %>%
43 |     ungroup() %>%
44 |     filter(md == m_dist) %>%
45 |     select(-md) %>%
46 |     rename(dist = m_dist)
47 | 
48 |   admap0_pass0 <- admap1_major_minor_compact %>%
49 |     filter(direction_group != "corner") %>%
50 |     unique() %>%
51 |     select(-attr_group)
52 | 
53 |   # dimension analysis done here (major minor classification)
54 |   admap1 <- admap0_pass0 %>%
55 |     ai_get_data_attr_map_details(d_dat, d_att)
56 | 
57 |   list(admap = admap1, d_dat = d_dat, d_att = d_att)
58 | }
59 | 


--------------------------------------------------------------------------------
/R/ai_relative_data_join_attr.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ai_relative_data_join_attr <- function(admap_main, d_att) {
 4 |   chk <- admap_main$raw_map %>%
 5 |     distinct(attr_gid, data_gid, direction, attr_group) %>%
 6 |     group_by(data_gid, direction, attr_group) %>%
 7 |     mutate(n_att = n_distinct(attr_gid)) %>%
 8 |     ungroup() %>%
 9 |     filter(n_att > 1)
10 | 
11 |   done <- FALSE
12 | 
13 |   if (nrow(chk) > 0) {
14 |     # relative join required
15 |     done <- TRUE
16 | 
17 |     rel_gids <- chk %>%
18 |       select(-n_att) %>%
19 |       inner_join(admap_main$raw_map, by = c("attr_gid", "data_gid", "direction", "attr_group"))
20 | 
21 |     d_att_dat_map_raw_rest <- admap_main$raw_map %>%
22 |       anti_join(chk, by = c("attr_gid", "data_gid", "direction", "attr_group"))
23 | 
24 |     rel_gids_att <- rel_gids %>%
25 |       distinct(attr_gid, data_gid, direction, attr_group, row = row_a, col = col_a) %>%
26 |       group_by(data_gid, direction, attr_group) %>%
27 |       mutate(new_attr_gid = paste(min(attr_gid), data_gid, direction, sep = "_")) %>%
28 |       ungroup()
29 | 
30 |     rel_gids <- rel_gids %>% inner_join(rel_gids_att %>% distinct(attr_gid, new_attr_gid), by = "attr_gid")
31 | 
32 |     rel_gids <- rel_gids %>%
33 |       group_by(new_attr_gid, data_gid) %>%
34 |       mutate(
35 |         # this is possibly not required anymore as attr_group is in grouping vars
36 |         new_attr_group = if_else(any(attr_group == "major"), "major", "minor"),
37 |         new_dist = min(dist)
38 |       ) %>%
39 |       ungroup()
40 | 
41 |     admap_main$raw_map <- rel_gids %>%
42 |       select(-attr_group, -attr_gid, -dist) %>%
43 |       rename(attr_gid = new_attr_gid, attr_group = new_attr_group, dist = new_dist) %>%
44 |       bind_rows(d_att_dat_map_raw_rest)
45 | 
46 |     admap_main$map <- admap_main$raw_map %>%
47 |       distinct(attr_gid, data_gid, direction, direction_group, dist, attr_group)
48 | 
49 |     # dimesion analysis is not kept
50 | 
51 |     # update d_att
52 |     d_att$group_id_map <- d_att$group_id_map %>%
53 |       bind_rows(rel_gids_att %>%
54 |         select(gid = new_attr_gid, row, col))
55 |     d_att$group_id_boundary <- get_group_id_boundary(d_att$group_id_map)
56 | 
57 |     chk_this <- chk %>% distinct(gid = attr_gid, data_gid)
58 | 
59 |     if (is.null(d_att$missed_blocks)) {
60 |       d_att$missed_blocks <- chk_this
61 |     } else {
62 |       d_att$missed_blocks <- chk_this %>%
63 |         bind_rows(d_att$missed_blocks) %>%
64 |         unique()
65 |     }
66 |   }
67 | 
68 |   list(done = done, d_att = d_att, admap = admap_main)
69 | }
70 | 


--------------------------------------------------------------------------------
/R/ai_relative_data_split_attr.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ai_relative_data_split_attr <- function(basic_map, d_att, d_dat) {
 3 |   chk <- basic_map$map %>%
 4 |     group_by(data_gid, attr_gid) %>%
 5 |     summarise(n_dirs = n_distinct(direction)) %>%
 6 |     ungroup() %>%
 7 |     filter(n_dirs > 1)
 8 | 
 9 |   done <- FALSE
10 | 
11 |   if (nrow(chk) > 0) {
12 |     # relative split required
13 |     done <- TRUE
14 | 
15 |     rel_gids <- chk %>%
16 |       select(-n_dirs) %>%
17 |       rename(gid = attr_gid) %>%
18 |       inner_join(basic_map$raw, by = c("data_gid", "gid"))
19 |     rel_gids <- rel_gids %>%
20 |       mutate(new_attr_gid = paste(gid, data_gid, direction, sep = "_"))
21 | 
22 |     d_att$group_id_map <- d_att$group_id_map %>%
23 |       bind_rows(rel_gids %>%
24 |         select(gid = new_attr_gid, row, col))
25 |     d_att$group_id_boundary <- get_group_id_boundary(d_att$group_id_map)
26 | 
27 |     # information kept for missing link detection
28 |     chk_this <- chk %>% distinct(gid = attr_gid, data_gid)
29 |     d_att$missed_blocks <- chk_this
30 | 
31 |     admap_new <- ai_get_data_attr_map(
32 |       dat_boundary = d_dat$group_id_boundary,
33 |       att_gid_map = d_att$group_id_map
34 |     )
35 |     admap_new$raw <- admap_new$raw %>%
36 |       anti_join(chk_this, by = c("gid", "data_gid"))
37 |     admap_new$all_map <- admap_new$all_map %>%
38 |       anti_join(chk_this, by = c("gid", "data_gid"))
39 |     admap_new$map <- admap_new$map %>%
40 |       anti_join(chk_this %>% rename(attr_gid = gid), by = c("attr_gid", "data_gid"))
41 |   } else {
42 |     admap_new <- basic_map
43 |   }
44 | 
45 |   list(done = done, d_att = d_att, admap = admap_new)
46 | }
47 | 


--------------------------------------------------------------------------------
/R/as_cell_df.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Transform data into Cell-DF Structure
 3 | #'
 4 | #' @description Transform an R object (mostly matrix or data.frame) into a [`cell_df`][cell_df-class]
 5 | #' for further processing in other `tidycells` functions.
 6 | #'
 7 | #' @param d the data (either a matrix with column name or a data.frame)
 8 | #' @param take_row_names consider row names as separate cells
 9 | #' (applicable only for data with no (row, col) information). Default is \code{FALSE}.
10 | #' @param take_col_names consider column names as separate cells
11 | #' (applicable only for data with no (row, col) information). Default is \code{FALSE}.
12 | #'
13 | #' @return An object of class [`cell_df`][cell_df-class].
14 | #'
15 | #' **Note**: After this, you may like to do [`Value Attribute Classification`][value_attribute_classify()].
16 | #'
17 | #' @export
18 | #' @rdname as_cell_df
19 | #' @seealso
20 | #' * [`validate_cells`][validate_cells()] which is used to validate `cell_df`.
21 | #' * [`as_cells`][unpivotr::as_cells()] from `unpivotr` package.
22 | #'
23 | #' @examples
24 | #'
25 | #' as_cell_df(iris)
26 | #'
27 | #' # consider column name as cell
28 | #' as_cell_df(iris, take_col_names = TRUE)
29 | #'
30 | #' # if the data is already in a similar format it will not further transform
31 | #' # which is not true for ---> unpivotr::as_cells
32 | #' # check ---> unpivotr::as_cells(iris) %>% unpivotr::as_cells()
33 | #' unpivotr::as_cells(iris) %>% as_cell_df()
34 | as_cell_df <- function(d, take_row_names = FALSE, take_col_names = FALSE) {
35 |   UseMethod("as_cell_df")
36 | }
37 | 
38 | #' @export
39 | as_cell_df.cells <- function(d, ...) {
40 |   d
41 | }
42 | 
43 | #' @export
44 | as_cell_df.data.frame <- function(d, ...) {
45 |   d %>%
46 |     attach_intermediate_class() %>%
47 |     as_cell_df_internal(...)
48 | }
49 | 
50 | #' @export
51 | as_cell_df.matrix <- function(d, ...) {
52 |   d %>%
53 |     as.data.frame() %>%
54 |     attach_intermediate_class() %>%
55 |     as_cell_df_internal(...)
56 | }
57 | 


--------------------------------------------------------------------------------
/R/attach_trace_info.R:
--------------------------------------------------------------------------------
 1 | 
 2 | attach_trace_info <- function(ca, dc) {
 3 |   if (missing(dc)) {
 4 |     if (missing(ca)) {
 5 |       abort("either 'ca' or 'dc' required")
 6 |     }
 7 |     dc_t <- compose_cells_raw(ca, trace_it_back = TRUE, silent = TRUE)
 8 |     dc <- dc_t
 9 |   } else {
10 |     if (!any(stringr::str_detect(colnames(dc), "cellAddress_"))) {
11 |       if (!missing(ca)) {
12 |         dc_t <- compose_cells_raw(ca, trace_it_back = TRUE, silent = TRUE)
13 |         dc_t <- dc_t[c("row", "col", colnames(dc_t)[stringr::str_detect(colnames(dc_t), "cellAddress_")])]
14 |         dc_t$chk_this <- "ok"
15 |         dc <- dc %>% left_join(dc_t, by = c("row", "col"))
16 |         if (any(is.na(dc$chk_this))) {
17 |           abort("at least one row could not be mapped properly")
18 |         }
19 |       } else {
20 |         abort("supplied composition does not contain trace information and 'ca' is not given")
21 |       }
22 |     }
23 |   }
24 | 
25 |   dc
26 | }
27 | 


--------------------------------------------------------------------------------
/R/cell_trace_plot.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' Display static composition traceback plot
  4 | #'
  5 | #' @param ca A cell analysis
  6 | #' @param dc (optional) a composed tidy data.frame
  7 | #' @param trace_row the row number to trace. (Default 1)
  8 | #' @keywords internal
  9 | #' @export
 10 | cell_composition_traceback <- function(ca, dc, trace_row = 1L) {
 11 |   cell_trace_plot(dc, trace_row, ca)
 12 | }
 13 | 
 14 | 
 15 | cell_trace_plot <- function(dc, trace_row, ca, prior_plot, prior_ca_plot) {
 16 |   dc <- attach_trace_info(ca, dc)
 17 | 
 18 |   if (!missing(trace_row)) {
 19 |     if (length(trace_row) > 0) {
 20 |       if (missing(prior_plot)) {
 21 |         if (missing(prior_ca_plot)) {
 22 |           prior_plot <- graphics::plot(ca$cell_df, no_plot = TRUE)
 23 |         }
 24 |       }
 25 | 
 26 |       d0 <- dc[trace_row, ]
 27 | 
 28 |       connected_cells <- d0[stringr::str_detect(colnames(d0), "cellAddress")]
 29 |       connected_cells <- connected_cells %>%
 30 |         map_lgl(~ !is.na(.x)) %>%
 31 |         connected_cells[.]
 32 |       connected_cells <- connected_cells %>%
 33 |         imap_dfr(~ .x %>%
 34 |           stringr::str_split(" :: ") %>%
 35 |           reduce(c) %>%
 36 |           stringr::str_split("_") %>%
 37 |           map(~ .x %>%
 38 |             as.integer() %>%
 39 |             t()) %>%
 40 |           reduce(rbind) %>%
 41 |           as.data.frame() %>%
 42 |           mutate(cn_id_raw = .y))
 43 |       connected_cells <- connected_cells %>%
 44 |         mutate(
 45 |           cn_id = cn_id_raw %>%
 46 |             stringr::str_remove("cellAddress_"),
 47 |           cn = cn_id %>%
 48 |             stringr::str_extract("major|minor")
 49 |         )
 50 |       colnames(connected_cells)[1:2] <- c("row", "col")
 51 | 
 52 |       connected_cells <- connected_cells %>%
 53 |         mutate(cn_id_ = cn_id) %>%
 54 |         tidyr::separate(cn_id_, into = c(rep(NA, 3), "d1", "d2")) %>%
 55 |         mutate(dty = (as.numeric(d1) + as.numeric(d2)) %% 2) %>%
 56 |         select(-d1, -d2)
 57 | 
 58 |       connected_cells <- connected_cells %>% mutate(r = d0$row, c = d0$col)
 59 | 
 60 | 
 61 |       if (missing(prior_ca_plot)) {
 62 |         prior_ca_plot <- graphics::plot(ca,
 63 |           prior_plot = prior_plot,
 64 |           gids = d0$data_block,
 65 |           zoom_selected_gids = TRUE,
 66 |           block_boundary = FALSE,
 67 |           direction_text = FALSE,
 68 |           no_plot = TRUE
 69 |         )
 70 |       }
 71 |       this_plot <- prior_ca_plot
 72 |       this_plot <- this_plot + ggplot2::guides(fill = FALSE)
 73 | 
 74 |       this_plot <- this_plot +
 75 |         ggplot2::geom_tile(
 76 |           data = d0,
 77 |           ggplot2::aes(col, -row),
 78 |           color = "red", lwd = 1, inherit.aes = FALSE,
 79 |           alpha = 0,
 80 |           width = 1, height = 1, na.rm = TRUE
 81 |         )
 82 | 
 83 |       if (length(unique(connected_cells$dty)) == 1) {
 84 |         this_plot <- this_plot +
 85 |           ggplot2::geom_curve(
 86 |             data = connected_cells,
 87 |             ggplot2::aes(
 88 |               x = c, y = -r,
 89 |               xend = col, yend = -row,
 90 |               color = cn
 91 |             ),
 92 |             inherit.aes = FALSE, curvature = -0.1,
 93 |             arrow = ggplot2::arrow(), lwd = 1, na.rm = TRUE
 94 |           )
 95 |       } else {
 96 |         this_plot <- this_plot +
 97 |           ggplot2::geom_curve(
 98 |             data = connected_cells %>% filter(dty == 1),
 99 |             ggplot2::aes(
100 |               x = c, y = -r,
101 |               xend = col, yend = -row,
102 |               color = cn
103 |             ),
104 |             inherit.aes = FALSE, curvature = -0.1,
105 |             arrow = ggplot2::arrow(), lwd = 1, lty = "longdash", na.rm = TRUE
106 |           ) +
107 |           ggplot2::geom_curve(
108 |             data = connected_cells %>% filter(dty != 1),
109 |             ggplot2::aes(
110 |               x = c, y = -r,
111 |               xend = col, yend = -row,
112 |               color = cn
113 |             ),
114 |             inherit.aes = FALSE, curvature = 0.1,
115 |             arrow = ggplot2::arrow(), lwd = 1, na.rm = TRUE
116 |           )
117 |       }
118 | 
119 | 
120 |       this_plot <- this_plot +
121 |         ggplot2::guides(color = FALSE) +
122 |         ggplot2::scale_color_manual(values = c(major = "#B5525288", minor = "#4156A888"))
123 | 
124 | 
125 |       return(this_plot)
126 |     }
127 |   }
128 | }
129 | 


--------------------------------------------------------------------------------
/R/class-cell_df.R:
--------------------------------------------------------------------------------
 1 | 
 2 | cell_df_class <- c("cell_df", "cells", "rc_df", "tbl_df", "tbl", "data.frame")
 3 | 
 4 | cell_df_class_internal <- c("cell_df", "cells", "rc_df", "tidyxl", "unpivotr", "readr", "unknown")
 5 | 
 6 | #' @importFrom methods setOldClass
 7 | #' @exportClass cell_df
 8 | 
 9 | setOldClass(cell_df_class)
10 | 
11 | #' `cell_df` class
12 | #'
13 | #' @description
14 | #' The `cell_df` class is a subclass of [`tbl_df`][tibble::tibble()] and [`data.frame`][base::data.frame()],
15 | #' created in order to store cell level information.
16 | #'
17 | #' @section Properties of `cell_df`:
18 | #'
19 | #' Objects of class `cell_df` have:
20 | #' * A `class` attribute of `c("cell_df", "cells", "rc_df", "tbl_df", "tbl", "data.frame")`.
21 | #' * Two column of `integer` type named `row` and `col`. All entries must be positive and not `NA`.
22 | #'   This represents a cell address.
23 | #' * A column of `character` type named `data_type`. Which can contain only `numeric` or `character` as entries.
24 | #'   This represents the data type of the cell (classified to only two categories).
25 | #' * A column of `character` type named `value`. This stores value of corresponding cells.
26 | #' * Apart from these columns it can contain other columns.
27 | #'   However, (`row`, `col`) together should identify the row of the `cell_df` uniquely.
28 | #'
29 | #' @section Applicable methods on `cell_df`:
30 | #' * `print`: Prints identifier of the class and the content of the underlying tibble.
31 | #' * `summary`: Calculates basic stats like number of rows and columns, number of characters and numeric fields, density etc.
32 | #' * `plot`: Plots (using [`ggplot2`][ggplot2::ggplot()]) the cell information as ordinary table.
33 | #' * `as.matrix`: This will transform the data back into tabular form and create a character matrix.
34 | #' * `as.data.frame`: Similar to `as.matrix` this will also transform the data back into tabular form.
35 | #'
36 | #' @seealso
37 | #'
38 | #' The function [`validate_cells`][validate_cells()] which is used to validate `cell_df`.
39 | #'
40 | #' The `as.matrix` and `as.data.frame` method is similar to [`unpivotr::rectify`][unpivotr::rectify()].
41 | #'
42 | #' Object of this class is also compatible to most of the functions from \code{\link[unpivotr:unpivotr-package]{unpivotr}} package.
43 | #'
44 | #' @name cell_df-class
45 | #' @keywords internal
46 | NULL
47 | 
48 | 
49 | new_cell_df <- function(dat) {
50 |   val <- validate_cells(dat)
51 | 
52 |   if (!val) {
53 |     abort(paste0(attr(val, "msg"), collapse = "\n"))
54 |   }
55 | 
56 |   if (!is.integer(dat$row)) {
57 |     dat <- dat %>% mutate(row = as.integer(row))
58 |   }
59 | 
60 |   if (!is.integer(dat$col)) {
61 |     dat <- dat %>% mutate(col = as.integer(col))
62 |   }
63 | 
64 |   # if all above passes
65 | 
66 |   # unclass cell_df induces class
67 |   dat <- unset_cell_df_class(dat)
68 | 
69 |   # re-attach cell_df class
70 |   class(dat) <- class(dat) %>%
71 |     c(cell_df_class, .)
72 | 
73 | 
74 | 
75 |   dat
76 | }
77 | 
78 | unset_cell_df_class <- function(dat) {
79 |   class(dat) <- class(dat) %>%
80 |     setdiff(cell_df_class_internal)
81 |   dat
82 | }
83 | 
84 | is_cell_df <- function(d) {
85 |   inherits(d, "cell_df")
86 | }
87 | 


--------------------------------------------------------------------------------
/R/class-cell_df_analysis.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' `cell_analysis` class
 3 | #'
 4 | #' @description
 5 | #' The `cell_analysis` class is based on list,
 6 | #' created in order to store analysis of cell level information ([`cell_df`][cell_df-class]).
 7 | #'
 8 | #' @section Properties of `cell_analysis`:
 9 | #'
10 | #' Objects of class `cell_analysis` have following named nodes:
11 | #' * `cells` : Contains information about `cell_group_type` in terms of (data, minor and major attributes).(a tibble)
12 | #' * `sections` : Contains boundaries of each data block. (a tibble)
13 | #' * `details` : a list containing further information
14 | #' * `cell_df` : The original [`cell_df`][cell_df-class] which is passed for processing
15 | #'
16 | #' @section Applicable methods on `cell_analysis`:
17 | #' * `print`: Prints identifier of the class and the number of blocks (and potential issues if any).
18 | #' * `plot`: Plots (using [`ggplot2`][ggplot2::ggplot()]) the data-block information.
19 | #'
20 | #' @name cell_analysis-class
21 | #' @keywords internal
22 | NULL
23 | 
24 | #' @exportClass cell_analysis
25 | cell_df_analysis_class <- c("cell_analysis", "cell_df_analysis", "list")
26 | setOldClass(cell_df_analysis_class)
27 | 


--------------------------------------------------------------------------------
/R/class-intermediate.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' @importFrom utils hasName
 3 | 
 4 | 
 5 | detect_cell_df_pattern <- function(dat) {
 6 | 
 7 |   # this is built as per the description of the return values from supported packages
 8 |   # supported packages / functions :
 9 |   # 1) tidyxl::xlsx_cells
10 |   # 2) unpivotr::as_cells
11 |   # 3) readr::melt_csv (and family)
12 | 
13 |   chk <- tibble(
14 |     type = c(
15 |       "tidyxl",
16 |       "unpivotr",
17 |       "readr"
18 |     ),
19 | 
20 |     col_names = list(
21 |       c("sheet", "address", "row", "col", "is_blank", "data_type", "error", "logical", "numeric", "date", "character"),
22 |       c("row", "col", "data_type"),
23 |       c("row", "col", "data_type", "value")
24 |     ),
25 | 
26 |     data_types = list(
27 |       c("error", "logical", "numeric", "date", "character", "blank"),
28 |       c("chr", "cplx", "cplx", "dbl", "fct", "int", "lgl", "list", "ord"),
29 |       c("integer", "character", "date")
30 |     ),
31 | 
32 |     optional_cols = list(
33 |       c("sheet", "error", "logical", "numeric", "date", "character", "blank"),
34 |       c("chr", "cplx", "cplx", "dbl", "fct", "int", "lgl", "list", "ord"),
35 |       c()
36 |     )
37 |   )
38 | 
39 |   if (!hasName(dat, "data_type")) {
40 |     # all of them has data_type
41 |     return("unknown")
42 |   }
43 | 
44 |   d_type <- chk %>%
45 |     mutate(
46 |       ccn = col_names %>% map_lgl(~ hasName(dat, .x) %>% all()),
47 |       cdt = data_types %>% map_int(~ (.x %in% dat$data_type) %>% sum()),
48 |       coc = optional_cols %>% map_int(~ hasName(dat, .x) %>% sum())
49 |     ) %>%
50 |     filter(ccn) %>%
51 |     filter(cdt == max(cdt), coc == max(coc)) %>%
52 |     pull(type)
53 | 
54 |   if (length(d_type) == 0) {
55 |     return("unknown")
56 |   }
57 | 
58 |   if (length(d_type) > 1) {
59 |     d_type <- d_type[1]
60 |   }
61 | 
62 |   d_type
63 | }
64 | 
65 | 
66 | attach_intermediate_class <- function(dat) {
67 |   class(dat) <- c(detect_cell_df_pattern(dat), class(dat))
68 | 
69 |   dat
70 | }
71 | 


--------------------------------------------------------------------------------
/R/class-read_cell_part.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' `read_cell_part` class
 3 | #'
 4 | #' @description
 5 | #' The `read_cell_part` class is based on list,
 6 | #' created in order to store (possible) intermediate [`read_cells`][read_cells()] output in detailed format.
 7 | #'
 8 | #' **Note:** This class is designed mainly for _internal use_.
 9 | #' It is recommended to manipulate objects of this class only for cases where desired output is not coming.
10 | #'
11 | #' @section Properties of `read_cell_part`:
12 | #'
13 | #' Objects of class `read_cell_part` **may** have following named nodes (the nodes may change based on stage):
14 | #' * `file_name` : file name which was given to process
15 | #' * `stage` : stage at which it was last processed.
16 | #' * `info` : a list containing further information on type and content of the file (provided the file got read)
17 | #' * `is_empty` : whether the file contains no 'known tabular information'
18 | #' * `cell_list` : list of [`cell_df`][cell_df-class]
19 | #' (possibly after `Value Attribute Classification` done if stage is higher than `make_cells`)
20 | #' * `cell_analysis_list` : list of [`cell_analysis`][cell_analysis-class]
21 | #' * `final_composition` : final composition (a `tibble`) with only main columns
22 | #' (or all column if `compose_main_cols_only = FALSE`)
23 | #' * `final_composition_main` : only appear if `compose_main_cols_only = FALSE`.
24 | #' This holds final composition (a `tibble`) with only main columns
25 | #'
26 | #' @section Applicable methods on `read_cell_part`:
27 | #' * `print`: Prints identifier of the class and the stage at which it is last processed.
28 | #'
29 | #' @name read_cell_part-class
30 | #' @keywords internal
31 | NULL
32 | 
33 | #' @exportClass read_cell_part
34 | read_cell_part_class <- c("read_cell_part", "list")
35 | setOldClass(read_cell_part_class)
36 | 


--------------------------------------------------------------------------------
/R/cli_bindings.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ok_cli <- function() {
 3 |   is_available("cli")
 4 | }
 5 | 
 6 | cli_bs <- function() {
 7 |   if (ok_cli()) {
 8 |     paste0(cli::symbol$menu, " ")
 9 |   } else {
10 |     "* "
11 |   }
12 | }
13 | cli_tick <- function() {
14 |   if (ok_cli()) {
15 |     cli::symbol$tick
16 |   } else {
17 |     "V"
18 |   }
19 | }
20 | cli_cross <- function() {
21 |   if (ok_cli()) {
22 |     cli::symbol$cross
23 |   } else {
24 |     "X"
25 |   }
26 | }
27 | 
28 | 
29 | cli_bb <- function(x) {
30 |   if (ok_cli()) {
31 |     cli::style_bold(cli::col_blue(x))
32 |   } else {
33 |     x
34 |   }
35 | }
36 | 
37 | cli_b <- function(x) {
38 |   if (ok_cli()) {
39 |     cli::col_blue(x)
40 |   } else {
41 |     x
42 |   }
43 | }
44 | 
45 | cli_r <- function(x) {
46 |   if (ok_cli()) {
47 |     cli::col_red(x)
48 |   } else {
49 |     x
50 |   }
51 | }
52 | 
53 | cli_g <- function(x) {
54 |   if (ok_cli()) {
55 |     cli::col_green(x)
56 |   } else {
57 |     x
58 |   }
59 | }
60 | 
61 | cli_br <- function(x) {
62 |   if (ok_cli()) {
63 |     cli::style_bold(cli::col_red(x))
64 |   } else {
65 |     x
66 |   }
67 | }
68 | 
69 | cli_box <- function(x, ...) {
70 |   if (ok_cli()) {
71 |     cli::cat_boxx(x, ...)
72 |   } else {
73 |     cat(x, sep = "\n")
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/R/collate_columns.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | #' Collate Columns Based on Content
  5 | #'
  6 | #' @description After [`compose_cells`][compose_cells()], this function rearranges and rename attribute-columns in order to
  7 | #' make columns properly aligned, based on the content of the columns.
  8 | #'
  9 | #' @param composed_data output of [`compose_cells`][compose_cells()] (preferably not processed)
 10 | #' @param combine_threshold a numerical threshold (between 0-1) for content-based collation of columns. (Default 1)
 11 | #' @param rest_cols number of rest columns (beyond `combine_threshold` joins these many numbers of columns to keep)
 12 | #' @param retain_other_cols whether to keep other intermediate (and possibly not so important) columns. (Default `FALSE`)
 13 | #' @param retain_cell_address whether to keep columns like (`row`, `col`, `data_block`).
 14 | #' This may be required for [`traceback`][cell_composition_traceback()] (Default `FALSE`)
 15 | #'
 16 | #' @return A column collated data.frame
 17 | #'
 18 | #' @details
 19 | #' * **Dependency on _stringdist_**: If you have \code{\link[stringdist:stringdist-package]{stringdist}} installed,
 20 | #' the approximate string matching will be enhanced. There may be variations in outcome if you have `stringdist`
 21 | #' vs if you don't have it.
 22 | #' * **Possibility of randomness**: If the attribute column is containing many distinct values, then a column representative sample will be drawn.
 23 | #' Hence it is always recommended to [`set.seed`][base::set.seed()] if reproducibility is a matter of concern.
 24 | #'
 25 | #' @export
 26 | #'
 27 | collate_columns <- function(composed_data,
 28 |                             combine_threshold = 1,
 29 |                             rest_cols = Inf,
 30 |                             retain_other_cols = FALSE,
 31 |                             retain_cell_address = FALSE) {
 32 |   ok <- FALSE
 33 | 
 34 |   defcols_this <- defcols
 35 |   if (is.data.frame(composed_data)) {
 36 |     if (!utils::hasName(composed_data, "table_tag")) {
 37 |       defcols_this <- setdiff(defcols_this, "table_tag")
 38 |     }
 39 |     if (all(utils::hasName(composed_data, defcols_this))) {
 40 |       ok <- TRUE
 41 | 
 42 |       if (utils::hasName(composed_data, "table_tag")) {
 43 |         dcl <- composed_data %>%
 44 |           group_by(data_block, table_tag)
 45 |       } else {
 46 |         dcl <- composed_data %>%
 47 |           group_by(data_block)
 48 |       }
 49 | 
 50 |       dcl <- dcl %>%
 51 |         group_split() %>%
 52 |         map(~ {
 53 |           .d <- ungroup(.x)
 54 |           this_cols <- colnames(.d)
 55 |           nm_cols <- this_cols[stringr::str_detect(this_cols, "row|col|corner")]
 56 |           nm_cols <- setdiff(nm_cols, defcols_this)
 57 |           if (length(nm_cols) == 0) {
 58 |             nm_cols <- setdiff(this_cols, defcols_this)
 59 |           }
 60 |           .d <- .d[c(defcols_this, nm_cols)]
 61 |           na_c <- .d %>% map_lgl(~ is.na(.x) %>% all())
 62 |           .d[!na_c]
 63 |         })
 64 |     }
 65 |   } else {
 66 |     # data.frame is a list,  first data.frame check is required
 67 |     if (is.list(composed_data)) {
 68 |       if (all(map_lgl(composed_data, is.data.frame))) {
 69 |         if (!any(map_lgl(composed_data, ~ utils::hasName(.x, "table_tag")))) {
 70 |           defcols_this <- setdiff(defcols_this, "table_tag")
 71 |         }
 72 |         if (all(map_lgl(composed_data, ~ all(utils::hasName(.x, defcols_this))))) {
 73 |           ok <- TRUE
 74 |           dcl <- composed_data
 75 |         }
 76 |       }
 77 |     }
 78 |   }
 79 | 
 80 | 
 81 | 
 82 |   if (!ok) {
 83 |     abort("The argument composed_data has to be output of compose_cells. Given composed_data has no known format.")
 84 |   }
 85 | 
 86 |   if (length(dcl) == 1) {
 87 |     out_d <- dcl[[1]]
 88 | 
 89 |     colnames(out_d) <- stringr::str_replace_all(colnames(out_d), "uncollated_", "old_uc_")
 90 |     colnames(out_d) <- stringr::str_replace_all(colnames(out_d), "collated_", "old_c_")
 91 | 
 92 |     restcols <- setdiff(colnames(out_d), defcols_this)
 93 |     if (length(restcols) > 0) {
 94 |       cn_map_0 <- tibble(cn = restcols) %>%
 95 |         mutate(is_major = stringr::str_detect(tolower(cn), "major")) %>%
 96 |         arrange(cn) %>%
 97 |         mutate(sn = seq_along(cn), sn_m = sn - is_major * (10^10)) %>%
 98 |         arrange(sn_m) %>%
 99 |         mutate(fsn = seq_along(cn), new_cn = paste0("collated_", fsn)) %>%
100 |         select(cn, new_cn)
101 | 
102 |       for (i in seq_along(cn_map_0$cn)) {
103 |         colnames(out_d)[which(colnames(out_d) == cn_map_0$cn[i])] <- cn_map_0$new_cn[i]
104 |       }
105 |     }
106 |   } else {
107 |     out_d <- dcl %>% reduce(reduce_2dfs,
108 |       combine_th = combine_threshold,
109 |       rest_cols = rest_cols,
110 |       retain_other_cols = retain_other_cols
111 |     )
112 |   }
113 | 
114 | 
115 | 
116 | 
117 | 
118 |   if (!retain_cell_address) {
119 |     out_d <- out_d[setdiff(colnames(out_d), c("row", "col", "data_block"))]
120 |   }
121 | 
122 |   out_d[sort(colnames(out_d))]
123 | }
124 | 


--------------------------------------------------------------------------------
/R/collate_columns_etc.R:
--------------------------------------------------------------------------------
 1 | 
 2 | norm_this <- function(x) {
 3 |   m <- min(x, na.rm = TRUE)
 4 |   M <- max(x, na.rm = TRUE)
 5 |   if (m == M) {
 6 |     if (m > 0.5) {
 7 |       x <- rep(1, length(x))
 8 |     } else {
 9 |       x <- rep(0, length(x))
10 |     }
11 |   } else {
12 |     x <- (x - m) / (M - m)
13 |   }
14 |   x
15 | }
16 | 
17 | 
18 | # possibly table tag if across table collation is required
19 | defcols <- c("row", "col", "value", "data_block", "table_tag")
20 | 


--------------------------------------------------------------------------------
/R/common_data_patterns.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #@Dev
 4 | # quick added may need to think through about how to add it properly.
 5 | # common data patterns
 6 | cdp_va_classify_value_rect <- function(d){
 7 |   val_range <- d %>%
 8 |     filter(type == "value") %>%
 9 |     summarise(mr = min(row), mc = min(col), Mr = max(row), Mc =max(col))
10 |   d$type[
11 |     d$row>=val_range$mr &
12 |       d$row<=val_range$Mr &
13 |       d$col>=val_range$mc &
14 |       d$col<=val_range$Mc
15 |   ] <- "value"
16 |   d
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/R/compact_gid_maps.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # diffrent split resulted in same attr_gid
 3 | # these will be compated to single group
 4 | compact_gid_maps <- function(gid_map, admap_main) {
 5 |   gid_map_raw <- gid_map$group_id_map
 6 | 
 7 |   foot_prints <- gid_map_raw %>%
 8 |     group_by(gid) %>%
 9 |     group_split() %>%
10 |     map_df(~ .x %>%
11 |       distinct(row, col, gid) %>%
12 |       arrange(row, col) %>%
13 |       summarise(gid = gid[1], fp = paste0(row, ",", col, collapse = ";")))
14 | 
15 |   ngmap <- foot_prints %>%
16 |     group_by(fp) %>%
17 |     mutate(new_gid = min(gid)) %>%
18 |     ungroup() %>%
19 |     distinct(gid, new_gid)
20 | 
21 |   gid_map_raw_new <- gid_map_raw %>% left_join(ngmap, by = "gid")
22 |   gid_map_raw_new <- gid_map_raw_new %>%
23 |     mutate(new_gid = if_else(is.na(new_gid), gid, new_gid)) %>%
24 |     select(-gid) %>%
25 |     rename(gid = new_gid) %>%
26 |     distinct()
27 | 
28 |   gid_map$group_id_map <- gid_map_raw_new
29 |   gid_map$group_id_boundary <- get_group_id_boundary(gid_map_raw_new)
30 | 
31 |   admap_main_raw_map_new <- admap_main$raw_map %>%
32 |     left_join(ngmap %>% rename(attr_gid = gid, new_attr_gid = new_gid), by = "attr_gid")
33 |   admap_main_raw_map_new <- admap_main_raw_map_new %>%
34 |     mutate(new_attr_gid = if_else(is.na(new_attr_gid), attr_gid, new_attr_gid)) %>%
35 |     select(-attr_gid) %>%
36 |     rename(attr_gid = new_attr_gid) %>%
37 |     distinct()
38 | 
39 |   admap_main$raw_map <- admap_main_raw_map_new
40 | 
41 |   admap_main$map <- admap_main$raw_map %>%
42 |     distinct(attr_gid, data_gid, direction, direction_group, dist, attr_group)
43 | 
44 |   # dimesion analysis is not kept
45 | 
46 |   list(gid_map = gid_map, admap = admap_main)
47 | }
48 | 


--------------------------------------------------------------------------------
/R/detect_LibreOffice.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # sallow level detection of LibreOffice for support of doc
 4 | # check this https://github.com/hrbrmstr/docxtractr/issues/23
 5 | detect_LibreOffice <- function(type = 1) {
 6 |   out <- FALSE
 7 |   if (is_available("docxtractr")) {
 8 |     if (!identical(type, 2)) {
 9 |       get_lo <- function() {
10 |         # an internal function of docxtractr
11 |         # this will work even if they change `lo_find`
12 |         # as then it will be operational by 2nd option below
13 |         lo_find()
14 |       }
15 |       # this is to avoid docxtractr's internal lo_find
16 |       environment(get_lo) <- environment(docxtractr::read_docx)
17 |       test <- try(get_lo(), silent = TRUE)
18 |       if (!inherits(test, "try-error")) {
19 |         if (is.character(test)) {
20 |           if (file.exists(test)) {
21 |             out <- TRUE
22 |           }
23 |         }
24 |       }
25 |     }
26 | 
27 |     if (identical(type, 2) & !out) {
28 |       # heuristic option (should work even when lo_find change)
29 |       bad_doc <- "*?@:bad.doc"
30 |       try(docxtractr::read_docx(bad_doc), silent = TRUE)
31 |       test_op <- options("path_to_libreoffice")[[1]]
32 |       if (!is.null(test_op)) {
33 |         if (is.character(test_op)) {
34 |           if (file.exists(test_op)) {
35 |             out <- TRUE
36 |           }
37 |         }
38 |       }
39 |     }
40 |   }
41 |   out
42 | }
43 | 


--------------------------------------------------------------------------------
/R/extend_data_block.R:
--------------------------------------------------------------------------------
 1 | 
 2 | extend_data_block <- function(dbry, att_map, d_att) {
 3 |   att_map %>%
 4 |     select(gid_att = attr_gid, gid = data_gid) %>%
 5 |     right_join(dbry, by = "gid") %>%
 6 |     left_join(d_att$group_id_boundary, by = c("gid_att" = "gid")) %>%
 7 |     mutate(
 8 |       r_min = pmin(r_min.x, r_min.y, na.rm = TRUE),
 9 |       c_min = pmin(c_min.x, c_min.y, na.rm = TRUE),
10 |       r_max = pmax(r_max.x, r_max.y, na.rm = TRUE),
11 |       c_max = pmax(c_max.x, c_max.y, na.rm = TRUE)
12 |     ) %>%
13 |     group_by(gid) %>%
14 |     summarise(
15 |       r_min = min(r_min, na.rm = TRUE),
16 |       c_min = min(c_min, na.rm = TRUE),
17 |       r_max = max(r_max, na.rm = TRUE),
18 |       c_max = max(c_max, na.rm = TRUE)
19 |     )
20 | }
21 | 


--------------------------------------------------------------------------------
/R/file_etc.R:
--------------------------------------------------------------------------------
 1 | this_file_ext <- function(fn) {
 2 |   x <- stringr::str_split(fn, "\\.")
 3 | 
 4 |   x %>% map_chr(~ {
 5 |     if (length(.x) == 1) {
 6 |       ""
 7 |     } else {
 8 |       rev(.x)[1]
 9 |     }
10 |   })
11 | }
12 | 
13 | is_txt_file <- function(fn) {
14 |   f <- file(fn, "rb", raw = TRUE)
15 |   bytes <- readBin(f, "int", 1000, size = 1, signed = FALSE)
16 |   close(f)
17 |   return(max(bytes) <= 128)
18 | }
19 | 
20 | 
21 | 
22 | common_file_error <- function(fn) {
23 |   if (missing(fn)) {
24 |     abort("No file name given")
25 |   }
26 |   if (length(fn) != 1) {
27 |     abort("only one file name supported")
28 |   }
29 |   if (!file.exists(fn)) {
30 |     abort("file does not exists (or possibly you do not have permission)")
31 |   }
32 | }
33 | 
34 | crude_format_from_signature <- function(fn) {
35 |   common_file_error(fn)
36 | 
37 |   f_8 <- readBin(fn, n = 8, what = "raw")
38 |   # as xls and doc both have same magic number "D0 CF 11 E0 A1 B1 1A E1"
39 |   # many other have it though
40 |   # ref : https://asecuritysite.com/forensics/magic
41 |   xls_doc_magic <- as.raw(c(0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1))
42 |   # either xlsx or docx
43 |   # as xlsx and docx both have same magic number "50 4B 03 04"
44 |   xlsx_docx_magic <- as.raw(c(0x50, 0x4b, 0x03, 0x04))
45 |   pdf_magic <- as.raw(c(0x25, 0x50, 0x44, 0x46))
46 | 
47 |   if (identical(f_8, xls_doc_magic)) {
48 |     return("xls_doc")
49 |   }
50 |   if (identical(f_8[1:4], xlsx_docx_magic)) {
51 |     return("xlsx_docx")
52 |   }
53 |   if (identical(f_8[1:4], pdf_magic)) {
54 |     return("pdf")
55 |   }
56 |   return("unknown")
57 | }
58 | 


--------------------------------------------------------------------------------
/R/fj.R:
--------------------------------------------------------------------------------
 1 | 
 2 | fj <- function(x, y,
 3 |                join_by = c("row", "col", "value", "data_block"),
 4 |                sallow_join = FALSE, sep = " :: ") {
 5 |   c1 <- colnames(x)
 6 |   c2 <- colnames(y)
 7 |   comm_cols <- c1 %>%
 8 |     intersect(c2) %>%
 9 |     setdiff(join_by)
10 |   chk <- comm_cols %>%
11 |     length()
12 |   extra <- NULL
13 |   if (chk > 0) {
14 |     if (identical(sallow_join, TRUE)) {
15 |       # try to merge the columns
16 |       # check first content
17 |       # need to update x, y so using for loop
18 | 
19 |       fmg_wc <- full_join(x, y, by = join_by, suffix = c(".fj1", ".fj2"))
20 | 
21 |       for (cn in comm_cols) {
22 |         xc <- fmg_wc[[paste0(cn, ".fj1")]] %>% stringr::str_trim()
23 |         yc <- fmg_wc[[paste0(cn, ".fj2")]] %>% stringr::str_trim()
24 | 
25 |         xc[is.na(xc)] <- ""
26 |         yc[is.na(yc)] <- ""
27 | 
28 |         fmg_wc[[paste0(cn, ".fj1")]] <- xc
29 |         fmg_wc[[paste0(cn, ".fj2")]] <- yc
30 | 
31 |         # back to NA again to remove them from comparison
32 |         xc[nchar(xc) == 0] <- NA
33 |         yc[nchar(yc) == 0] <- NA
34 | 
35 |         if (all(xc == yc, na.rm = TRUE)) {
36 |           # both the columns are actually equal
37 |           # removing from x, y
38 |           fmg_wc[[cn]] <- fmg_wc[[paste0(cn, ".fj1")]]
39 |           x[[cn]] <- NULL
40 |           y[[cn]] <- NULL
41 |         } else {
42 |           # string join in x
43 |           fmg_wc[[cn]] <- paste0(fmg_wc[[paste0(cn, ".fj1")]], sep, fmg_wc[[paste0(cn, ".fj2")]])
44 |           # removing from x, y
45 |           x[[cn]] <- NULL
46 |           y[[cn]] <- NULL
47 |         }
48 |       }
49 | 
50 |       extra <- unique(fmg_wc[c(join_by, comm_cols)])
51 |     } else {
52 |       abort(paste(
53 |         "unexpected error while joining.",
54 |         "(Please contact developer)"
55 |       ))
56 |     }
57 |   }
58 |   f0 <- full_join(x, y, by = join_by)
59 |   if (!is.null(extra)) {
60 |     if (is.data.frame(extra)) {
61 |       if (nrow(extra) > 0) {
62 |         if (length(setdiff(intersect(colnames(extra), colnames(f0)), join_by)) == 0) {
63 |           f0 <- full_join(f0, extra, by = join_by)
64 |         }
65 |       }
66 |     }
67 |   }
68 |   f0
69 | }
70 | 


--------------------------------------------------------------------------------
/R/generics-read_cell_part.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #' @export
 4 | print.read_cell_part <- function(x, ...) {
 5 |   msg <- cli_bb("A partial read_cell")
 6 |   msg <- paste0(
 7 |     msg,
 8 |     "\n",
 9 |     cli_b("At stage "), x$stage
10 |   )
11 |   cat(msg)
12 | }
13 | 


--------------------------------------------------------------------------------
/R/get_all_col_representative.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | get_col_representative <- function(x, cut_th = 500L, lower_it = TRUE, silent = TRUE) {
 5 |   x <- x[!is.na(x)]
 6 |   x <- unique(x)
 7 |   x <- as.character(x)
 8 |   if (lower_it) {
 9 |     x <- tolower(x)
10 |   }
11 |   x <- stringr::str_trim(x)
12 |   x <- unique(x)
13 |   dxt <- tibble(x, nc = nchar(x))
14 |   dxt <- dxt[dxt$nc > 0, ]
15 |   x <- dxt$x
16 |   if (length(x) > cut_th) {
17 |     if (!silent) {
18 |       message(paste0(
19 |         "Representatives for column is selected based on a sample.",
20 |         "\nYou may need to set.seed() to have a reproducible outcome"
21 |       ))
22 |     }
23 |     dxt <- dxt %>% arrange(nc)
24 |     fidx <- round(seq(from = 1.5, to = nrow(dxt) - 0.5, length.out = 10))
25 |     dxtr <- dxt[-fidx, ]
26 |     x1 <- sample(dxtr$x, max(round(cut_th / 10), min(4, length(dxtr$x))))
27 |     x2 <- dxt$x[fidx]
28 |     x <- unique(x1, x2)
29 |   }
30 |   sort(unique(x))
31 | }
32 | 
33 | get_all_col_representative <- function(d, except_cols = defcols, ...) {
34 |   d <- d[setdiff(colnames(d), except_cols)]
35 |   crs <- d %>% map(get_col_representative, ...)
36 |   crsd <- tibble(nn = names(crs), tinf = crs %>% map_chr(~ paste0(sort(.x), collapse = " :: ")))
37 |   crsd <- crsd %>% mutate(tndup = !duplicated(tinf))
38 |   crs <- crs[crsd$nn[crsd$tndup]]
39 | 
40 |   crs
41 | }
42 | 


--------------------------------------------------------------------------------
/R/get_cells_from_admap.R:
--------------------------------------------------------------------------------
 1 | 
 2 | get_cells_from_admap <- function(admap, d_dat, d_att) {
 3 |   cells <- d_dat$group_id_map %>%
 4 |     select(gid, row, col) %>%
 5 |     mutate(cell_group_type = "data")
 6 | 
 7 |   cells <- admap$map %>%
 8 |     mutate(cell_group_type = paste0(attr_group, "_attr")) %>%
 9 |     select(attr_gid, data_gid, cell_group_type) %>%
10 |     left_join(d_att$group_id_map, by = c("attr_gid" = "gid")) %>%
11 |     select(gid = data_gid, row, col, cell_group_type) %>%
12 |     bind_rows(cells) %>%
13 |     distinct()
14 | 
15 |   cells
16 | }
17 | 


--------------------------------------------------------------------------------
/R/get_definiteness_checks.R:
--------------------------------------------------------------------------------
 1 | 
 2 | get_definiteness_checks <- function(definiteness_details, silent = TRUE) {
 3 |   definiteness_checks <- list(
 4 |     # each gid (attr) should map to only one data_gid (this can happen)
 5 |     gid_att = nrow(definiteness_details$gid_att) == 0,
 6 |     # all data id should have at least one NS and WE attr_gid
 7 |     # (this also can happen then data_gid joining maybe required)
 8 |     gid_data = nrow(definiteness_details$gid_data) == 0,
 9 |     all_gid_att = length(definiteness_details$all_gid_att) == 0
10 |   )
11 | 
12 |   if (!definiteness_checks$gid_att) {
13 |     attr(definiteness_checks$gid_att, "msg") <-
14 |       "At least one attribute group-id (gid) is mapped to more than one data_gid (this can happen for overlapped blocks)"
15 |   }
16 | 
17 |   if (!definiteness_checks$gid_data) {
18 |     attr(definiteness_checks$gid_data, "msg") <-
19 |       "At least one data group-id (gid) is lacking either NS or WE direction_group attirbute (this means 2D data block has only 1D attribute)"
20 |   }
21 | 
22 |   if (!definiteness_checks$all_gid_att) {
23 |     attr(definiteness_checks$all_gid_att, "msg") <-
24 |       "At least one attribute group-id (gid) is not mapped with any block"
25 |   }
26 | 
27 |   if (!all(unlist(definiteness_checks))) {
28 |     if (!silent) {
29 |       warn("Heuristic data detection may not be correct. Look at definiteness_checks.")
30 |     }
31 |   }
32 | 
33 |   definiteness_checks
34 | }
35 | 
36 | 
37 | get_definiteness_details <- function(admap_raw, all_attr_gids) {
38 |   unmapped_attr_gids <- setdiff(all_attr_gids, admap_raw$attr_gid)
39 | 
40 |   this_admap <- admap_raw %>% distinct(attr_gid, data_gid, direction_group)
41 | 
42 |   definiteness_details <- list(
43 |     # each gid (attr) should map to only one data_gid (this can happen)
44 |     gid_att = this_admap %>%
45 |       count(attr_gid) %>% filter(n > 1),
46 |     # all data id should have at least one NS and WE attr_gid
47 |     # (this also can happen then data_gid joining maybe required)
48 |     gid_data = this_admap %>%
49 |       group_by(data_gid) %>%
50 |       summarise(ch = all(c("NS", "WE") %in% direction_group)) %>%
51 |       filter(!ch),
52 |     all_gid_att = unmapped_attr_gids
53 |   )
54 | 
55 |   definiteness_details
56 | }
57 | 


--------------------------------------------------------------------------------
/R/get_direction.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #' get optimal direction (in terms of `unpivotr` directions)
 4 | #'
 5 | #' @param d_part parts of `data_attr_map_raw`
 6 | #' @details Used internally
 7 | #' @keywords internal
 8 | #' @return a string value denoting direction
 9 | #'
10 | 
11 | get_direction <- function(d_part) {
12 |   directions <- get_unpivotr_direction_names()
13 | 
14 |   if (d_part$direction[1] %in% names(directions)) {
15 |     dirs <- directions[[d_part$direction[1]]]
16 | 
17 |     if (length(dirs) > 1) {
18 |       d0 <- d_part %>%
19 |         distinct(data_gid, row = row_d, col = col_d)
20 | 
21 |       d0s <- d0 %>% summarise(mnr = min(row), mxr = max(row), mnc = min(col), mxc = max(col))
22 | 
23 |       d1 <- tibble(row = seq(from = d0s$mnr, to = d0s$mxr, by = 1), col = d0s$mnc) %>%
24 |         bind_rows(
25 |           tibble(col = seq(from = d0s$mnc, to = d0s$mxc, by = 1), row = d0s$mnr)
26 |         ) %>%
27 |         mutate(data_gid = d0$data_gid[1]) %>%
28 |         unique()
29 | 
30 |       a1 <- d_part %>%
31 |         distinct(attr_gid, row = row_a, col = col_a)
32 | 
33 |       dmd <- tibble()
34 |       for (dir in dirs) {
35 |         dm_now <- get_direction_metric(d1, a1, dir)
36 |         dmd <- dmd %>% bind_rows(tibble(dm = dm_now, dir = dir))
37 |         if (dm_now == 1) break()
38 |       }
39 |       dmd$dir[which.max(dmd$dm)]
40 |     } else {
41 |       dirs[1]
42 |     }
43 |   } else {
44 |     abort("direction name not known.\n(have you tampered a cell-analysis?)")
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/R/get_direction_df.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' Get Directional Orientation for Attributes corresponding to a given data
  4 | #' group
  5 | #'
  6 | #' @param dp single data group boundary information
  7 | #' @param datt attribute data (including attribute group id)
  8 | #' @details Used internally by get_data_block_information function
  9 | #' @keywords internal
 10 | #' @return Directional Orientation for Attributes
 11 | #'
 12 | get_direction_df <- function(dp, datt, allow_inside = FALSE) {
 13 |   directions <- list()
 14 | 
 15 |   directions$N <- datt %>%
 16 |     filter(
 17 |       row < dp$r_min,
 18 |       col >= dp$c_min,
 19 |       col <= dp$c_max
 20 |     ) %>%
 21 |     mutate(dist = (dp$r_min - row)) %>%
 22 |     mutate(
 23 |       direction = "N",
 24 |       direction_group = "NS",
 25 |       data_gid = dp$gid
 26 |     )
 27 | 
 28 |   directions$S <- datt %>%
 29 |     filter(
 30 |       row > dp$r_max,
 31 |       col >= dp$c_min,
 32 |       col <= dp$c_max
 33 |     ) %>%
 34 |     mutate(dist = (row - dp$r_max)) %>%
 35 |     mutate(
 36 |       direction = "S",
 37 |       direction_group = "NS",
 38 |       data_gid = dp$gid
 39 |     )
 40 | 
 41 |   directions$W <- datt %>%
 42 |     filter(
 43 |       col < dp$c_min,
 44 |       row >= dp$r_min,
 45 |       row <= dp$r_max
 46 |     ) %>%
 47 |     mutate(dist = (dp$c_min - col)) %>%
 48 |     mutate(
 49 |       direction = "W",
 50 |       direction_group = "WE",
 51 |       data_gid = dp$gid
 52 |     )
 53 | 
 54 |   directions$E <- datt %>%
 55 |     filter(
 56 |       col > dp$c_max,
 57 |       row >= dp$r_min,
 58 |       row <= dp$r_max
 59 |     ) %>%
 60 |     mutate(dist = (col - dp$c_max)) %>%
 61 |     mutate(
 62 |       direction = "E",
 63 |       direction_group = "WE",
 64 |       data_gid = dp$gid
 65 |     )
 66 | 
 67 |   # corner directions
 68 | 
 69 |   directions$NW <- datt %>%
 70 |     filter(
 71 |       row < dp$r_min,
 72 |       col < dp$c_min
 73 |     ) %>%
 74 |     mutate(dist = sqrt((dp$r_min - row)^2 + (dp$c_min - col)^2)) %>%
 75 |     mutate(
 76 |       direction = "NW",
 77 |       direction_group = "corner",
 78 |       data_gid = dp$gid
 79 |     )
 80 | 
 81 |   directions$NE <- datt %>%
 82 |     filter(
 83 |       row < dp$r_min,
 84 |       col > dp$c_max
 85 |     ) %>%
 86 |     mutate(dist = sqrt((dp$r_min - row)^2 + (dp$c_max - col)^2)) %>%
 87 |     mutate(
 88 |       direction = "NE",
 89 |       direction_group = "corner",
 90 |       data_gid = dp$gid
 91 |     )
 92 | 
 93 |   directions$SE <- datt %>%
 94 |     filter(
 95 |       row > dp$r_max,
 96 |       col > dp$c_max
 97 |     ) %>%
 98 |     mutate(dist = sqrt((dp$r_max - row)^2 + (dp$c_max - col)^2)) %>%
 99 |     mutate(
100 |       direction = "SE",
101 |       direction_group = "corner",
102 |       data_gid = dp$gid
103 |     )
104 | 
105 |   directions$SW <- datt %>%
106 |     filter(
107 |       row > dp$r_max,
108 |       col < dp$c_min
109 |     ) %>%
110 |     mutate(dist = sqrt((dp$r_max - row)^2 + (dp$c_min - col)^2)) %>%
111 |     mutate(
112 |       direction = "SW",
113 |       direction_group = "corner",
114 |       data_gid = dp$gid
115 |     )
116 | 
117 |   if (allow_inside) {
118 |     directions$INSIDE <- datt %>%
119 |       filter(
120 |         row >= dp$r_min,
121 |         row <= dp$r_max,
122 |         col >= dp$c_min,
123 |         col <= dp$c_max
124 |       ) %>%
125 |       mutate(dist = 0) %>%
126 |       mutate(
127 |         direction = "INSIDE",
128 |         direction_group = "inside",
129 |         data_gid = dp$gid
130 |       )
131 |   }
132 | 
133 | 
134 |   direction_df <- directions %>% bind_rows()
135 | 
136 |   direction_df
137 | }
138 | 


--------------------------------------------------------------------------------
/R/get_direction_metric.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #' get direction metric
 4 | #'
 5 | #' @param d1 part of d_part with data_gid
 6 | #' @param a1 part of d_part with attr_gid
 7 | #' @param direction direction name (compatible with `unpivotr`)
 8 | #' should be one of [`get_unpivotr_direction_names`][get_unpivotr_direction_names()]
 9 | #'
10 | #' @details Used internally by [`get_direction`][get_direction()] function
11 | #' @keywords internal
12 | #' @return a scaled fraction denoting coverage (1 means full coverage) for the supplied direction.
13 | #'
14 | get_direction_metric <- function(d1, a1, direction) {
15 |   l1 <- try(get_direction_metric_part_raw(d1, a1, direction), silent = TRUE)
16 | 
17 |   if (inherits(l1, "try-error")) l1 <- 0
18 |   if (length(l1) != 1) l1 <- 0
19 |   if (is.na(l1)) l1 <- 0
20 | 
21 |   l1 / nrow(d1)
22 | }
23 | 
24 | get_direction_metric_part_raw <- function(d1, a1, direction) {
25 |   # suppressWarnings should be removed once unpivotr::enhead chages
26 |   # this is happening as "All elements of `...` must be named." warning in tidyr
27 |   # ref: https://github.com/tidyverse/tidyr/issues/714
28 |   # ref: https://github.com/nacnudus/unpivotr/issues/26
29 |   suppressWarnings({
30 |     d1 %>%
31 |       enhead(a1, direction) %>%
32 |       filter(!is.na(attr_gid)) %>%
33 |       pull(attr_gid) %>%
34 |       length()
35 |   })
36 | }
37 | 


--------------------------------------------------------------------------------
/R/get_group_id.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | #' Get Group ID for joined cells
 5 | #'
 6 | #' @param dat the (data or attribute) cells (in at least row-col format)
 7 | #'
 8 | #' @details Used internally by get_data_block_information function
 9 | #' @keywords internal
10 | #' @return Group ID attached information in a list
11 | #'
12 | get_group_id <- function(dat) {
13 |   digi_sep <- dat %>%
14 |     summarise(rm = max(row), cm = max(col)) %>%
15 |     map(~ log(.x, base = 10) %>% ceiling()) %>%
16 |     unlist() %>%
17 |     max()
18 |   digi_sep <- digi_sep + 1
19 | 
20 |   # attach r_id, c_id
21 |   dat %>%
22 |     select(row, col) %>%
23 |     arrange(row, col) %>%
24 |     group_by(row) %>%
25 |     mutate(cid = (c(1, diff(col)) != 1) %>% cumsum()) %>%
26 |     ungroup() %>%
27 |     mutate(cid = cid + row * 10^digi_sep) %>%
28 |     arrange(col, row) %>%
29 |     group_by(col) %>%
30 |     mutate(rid = (c(1, diff(row)) != 1) %>% cumsum()) %>%
31 |     ungroup() %>%
32 |     mutate(rid = rid + col * 10^digi_sep) -> drc_id
33 | 
34 |   # attach g_id
35 |   drc_id <- drc_id %>% mutate(gid = rid)
36 | 
37 |   n_gid <- drc_id %>%
38 |     summarise(n_distinct(gid)) %>%
39 |     pull(1)
40 | 
41 |   repeat ({
42 |     drc_id %>%
43 |       group_by(cid) %>%
44 |       mutate(gid = min(gid)) %>%
45 |       group_by(rid) %>%
46 |       mutate(gid = min(gid)) %>%
47 |       ungroup() -> drc_id
48 | 
49 |     if (n_gid > (drc_id %>% summarise(n_distinct(gid)) %>% pull(1))) {
50 |       n_gid <- drc_id %>%
51 |         summarise(n_distinct(gid)) %>%
52 |         pull(1)
53 |     } else {
54 |       break()
55 |     }
56 |   })
57 | 
58 |   drc_id <- drc_id %>% mutate(gid = as.character(gid))
59 | 
60 |   # boundary
61 |   drc_boundary <- get_group_id_boundary(drc_id)
62 | 
63 |   list(group_id_map = drc_id, group_id_boundary = drc_boundary)
64 | }
65 | 
66 | 
67 | get_group_id_boundary <- function(drc_id) {
68 |   drc_id %>%
69 |     group_by(gid) %>%
70 |     summarise(r_min = min(row), c_min = min(col), r_max = max(row), c_max = max(col))
71 | }
72 | 
73 | 
74 | get_group_id_join_gids <- function(old_group_id_info, gid_map) {
75 |   old_group_id_info$group_id_map <- old_group_id_info$group_id_map %>%
76 |     left_join(gid_map, by = "gid") %>%
77 |     mutate(new_gid = if_else(is.na(new_gid), gid, new_gid)) %>%
78 |     select(-gid) %>%
79 |     rename(gid = new_gid)
80 |   old_group_id_info$group_id_boundary <- get_group_id_boundary(old_group_id_info$group_id_map)
81 |   old_group_id_info
82 | }
83 | 


--------------------------------------------------------------------------------
/R/get_links_df.R:
--------------------------------------------------------------------------------
 1 | 
 2 | get_links_df <- function(gid_joins) {
 3 |   gids <- unlist(gid_joins) %>%
 4 |     unique() %>%
 5 |     sort()
 6 |   ldf <- NULL
 7 |   repeat({
 8 |     l0 <- linked_to(gids[1], gid_joins)
 9 |     ldf <- ldf %>% bind_rows(tibble(gid = l0, new_gid = min(l0)))
10 |     gids <- setdiff(gids, ldf$gid)
11 |     if (length(gids) == 0) break()
12 |   })
13 |   ldf
14 | }
15 | 
16 | linked_to_part <- function(x, gid_joins) {
17 |   gid_joins %>%
18 |     map(~ {
19 |       if (x %in% .x) {
20 |         .x
21 |       } else {
22 |         NULL
23 |       }
24 |     }) %>%
25 |     unlist() %>%
26 |     unique() %>%
27 |     sort()
28 | }
29 | 
30 | linked_to <- function(x, gid_joins) {
31 |   l0 <- linked_to_part(x, gid_joins)
32 |   repeat({
33 |     l1 <- l0 %>%
34 |       map(~ linked_to_part(.x, gid_joins)) %>%
35 |       unlist() %>%
36 |       unique() %>%
37 |       sort()
38 |     if (!identical(l1, l0)) {
39 |       l0 <- l1
40 |     } else {
41 |       break()
42 |     }
43 |   })
44 | 
45 |   l1
46 | }
47 | 


--------------------------------------------------------------------------------
/R/get_unpivotr_direction_names.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #' get unpivotr direction names
 4 | #'
 5 | #' @details Kept for compatibility. Used internally by get_direction function.
 6 | #' @keywords internal
 7 | #' @return directions as used in unpivotr package with directional grouping
 8 | #'
 9 | get_unpivotr_direction_names <- function() {
10 | 
11 |   # direction order
12 |   # "N" "NNW" "NNE" "ABOVE"
13 |   # "W" "WNW" "WSW" "LEFT"
14 |   # "S" "SSW" "SSE" "BELOW"
15 |   # "E" "ENE" "ESE" "RIGHT"
16 | 
17 |   list(
18 |     N = c("N", "NNW", "NNE", "ABOVE"),
19 |     W = c("W", "WNW", "WSW", "LEFT"),
20 |     S = c("S", "SSW", "SSE", "BELOW"),
21 |     E = c("E", "ENE", "ESE", "RIGHT"),
22 |     NE = "NNE",
23 |     NW = "NNW",
24 |     SE = "SSE",
25 |     SW = "SSW"
26 |   )
27 | }
28 | 


--------------------------------------------------------------------------------
/R/global_vars.R:
--------------------------------------------------------------------------------
 1 | ## fix for R CMD check
 2 | if (getRversion() >= "2.15.1") {
 3 |   gvars <- c(
 4 | 
 5 |     ########
 6 |     # for dplyr oprations : non-standard evaluation (nse)
 7 | 
 8 |     ".", "ad", "ag",
 9 |     "aid", "attr_gid", "attr_gid_split", "attr_group",
10 |     "c_dim", "c_dim_data", "c_max", "c_max.x",
11 |     "c_max.y", "c_min", "c_min.x", "c_min.y",
12 |     "ccn", "cdt", "cell_group_type", "ch",
13 |     "cid", "cn", "cn_id", "cn_id_",
14 |     "cname", "cname_new", "cname_ord", "coc",
15 |     "col_a", "col_d", "col_names", "col_orig",
16 |     "d", "d1", "d2", "data_block",
17 |     "data_gid", "data_type", "data_types", "date_raw",
18 |     "decision", "dir_n", "direction", "direction_basic",
19 |     "direction_group", "dist", "dist_", "dty",
20 |     "dummy_order", "file_type", "full_dim", "full_dim_orig",
21 |     "g_id_a", "g_id_e", "g_id_v", "gid",
22 |     "implemented", "is_blank", "is_blank_not_num_c", "is_blank_not_num_cb",
23 |     "is_full_dim_present", "is_num", "m_dist", "md",
24 |     "n_att", "n_dirs", "natural_gid", "nc",
25 |     "new_attr_gid", "new_attr_group", "new_dist", "new_gid",
26 |     "new_type", "new_type_c", "new_type_r", "not_num_c",
27 |     "not_num_cb", "nt", "num_and_pm", "num_c",
28 |     "num_c_len", "num_try", "optional_cols", "package",
29 |     "pkg_installed", "present_num_c_b", "r", "r_dim",
30 |     "r_dim_data", "r_max", "r_max.x", "r_max.y",
31 |     "r_min", "r_min.x", "r_min.y", "raw_value",
32 |     "rc", "rc_n", "rel_dim", "rid",
33 |     "RN", "row_a", "row_d", "row_orig",
34 |     "sheet", "support_possible", "this_attr_max_rel", "txt",
35 |     "txt_orig", "txt_size_", "type", "val_type",
36 |     "value", "value_chk", "id", "block",
37 |     "cn_id_raw", "cr", "n1", "n2",
38 |     "new_name", "old_name", "tinf", "fsn",
39 |     "is_major", "new_cn", "sn", "sn_m",
40 |     "table_tag", "fp", "file_type_raw", "original_file_type",
41 | 
42 |     ########
43 |     # for shiny
44 | 
45 |     "absolutePanel", "actionButton", "br",
46 |     "brushedPoints", "brushOpts", "callModule", "checkboxInput",
47 |     "clickOpts", "conditionalPanel", "dialogViewer", "div",
48 |     "h3", "h5", "icon", "incProgress",
49 |     "is.reactive", "isolate", "nearPoints", "NS",
50 |     "observe", "observeEvent", "plotOutput", "radioButtons",
51 |     "reactive", "reactiveVal", "removeNotification", "renderPlot",
52 |     "runGadget", "selectizeInput", "shinyApp", "showNotification",
53 |     "sliderInput", "span", "stopApp", "tags",
54 |     "updateSelectizeInput", "wellPanel", "withProgress",
55 | 
56 |     ########
57 |     # for miniUI
58 | 
59 |     "gadgetTitleBar", "miniButtonBlock", "miniContentPanel",
60 |     "miniPage", "miniTabPanel", "miniTabstripPanel",
61 | 
62 |     ########
63 |     # for rJava and docxtractr internal
64 | 
65 |     "lo_find", ".jnew"
66 |   )
67 | 
68 |   utils::globalVariables(gvars)
69 |   rm(gvars)
70 | }
71 | 


--------------------------------------------------------------------------------
/R/is_attachable.R:
--------------------------------------------------------------------------------
 1 | 
 2 | is_attachable <- function(gid1, gid2, group_info, whole_data, data_attr_map) {
 3 | 
 4 |   # should have similar major sides (of attributes)
 5 |   if (!identical(
 6 |     data_attr_map %>% filter(data_gid == gid1) %>% pull(direction) %>% sort(),
 7 |     data_attr_map %>% filter(data_gid == gid2) %>% pull(direction) %>% sort()
 8 |   )) {
 9 |     return(FALSE)
10 |   }
11 | 
12 |   # if any intersecting cell present
13 |   gc1 <- group_info$group_id_map %>%
14 |     filter(gid == gid1) %>%
15 |     select(row, col)
16 |   gc2 <- group_info$group_id_map %>%
17 |     filter(gid == gid2) %>%
18 |     select(row, col)
19 |   gcj <- gc1 %>% inner_join(gc2, by = c("row", "col"))
20 |   if (nrow(gcj) > 0) {
21 |     return(TRUE)
22 |   }
23 | 
24 |   # should have no other entry within the enclosed combined boundary
25 |   this_group_info <- group_info
26 |   this_group_info$group_id_map <- this_group_info$group_id_map %>% filter(gid %in% c(gid1, gid2))
27 |   this_group_info <- get_group_id_join_gids(
28 |     this_group_info,
29 |     tibble(gid = c(gid1, gid2), new_gid = gid1)
30 |   )
31 |   combined_boundary <- this_group_info$group_id_boundary
32 |   this_region_data <- whole_data %>%
33 |     filter(
34 |       row <= combined_boundary$r_max,
35 |       row >= combined_boundary$r_min,
36 |       col <= combined_boundary$c_max,
37 |       col >= combined_boundary$c_min
38 |     )
39 |   this_region_data_rest <- this_region_data %>%
40 |     anti_join(this_group_info$group_id_map, by = c("row", "col")) %>%
41 |     filter(type %in% c("value", "attribute"))
42 | 
43 |   if (nrow(this_region_data_rest) > 0) {
44 |     return(FALSE)
45 |   }
46 | 
47 |   return(TRUE)
48 | }
49 | 


--------------------------------------------------------------------------------
/R/is_available.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # this is designed to test and simulate
 3 | # non availability of certain packages
 4 | is_available <- function(pkgname) {
 5 |   if (pkgname %in% tidycells_pkg_env$na_pkgs) {
 6 |     return(FALSE)
 7 |   }
 8 |   # kept for safe_dependency_check
 9 |   if (pkgname %in% names(tidycells_pkg_env$safe_dependency_check)) {
10 |     chk <- tidycells_pkg_env$safe_dependency_check[[pkgname]]()
11 |     if (isFALSE(chk)) {
12 |       return(FALSE)
13 |     }
14 |   }
15 |   rlang::is_installed(pkgname)
16 | }
17 | 
18 | not_available <- function(pkgs, add = TRUE) {
19 |   if (missing(pkgs)) {
20 |     tidycells_pkg_env$na_pkgs <- NULL
21 |   } else {
22 |     if (length(pkgs) >= 1) {
23 |       if (is.character(pkgs)) {
24 |         if (add) {
25 |           old <- tidycells_pkg_env$na_pkgs
26 |           pkgs <- c(old, pkgs)
27 |         }
28 |         tidycells_pkg_env$na_pkgs <- pkgs
29 |       }
30 |     }
31 |   }
32 |   return(invisible(0))
33 | }
34 | 


--------------------------------------------------------------------------------
/R/is_conforms_to_rcdf.R:
--------------------------------------------------------------------------------
  1 | # row col df : rc_df
  2 | 
  3 | is_conforms_to_rcdf <- function(d) {
  4 |   UseMethod("is_conforms_to_rcdf")
  5 | }
  6 | 
  7 | is_conforms_to_rcdf.data.frame <- function(d) {
  8 |   msg <- character(0)
  9 |   lvl <- 0
 10 | 
 11 |   decision <- FALSE
 12 | 
 13 |   if (hasName(d, "row") &
 14 |     hasName(d, "col")) {
 15 |     int_chk <- FALSE
 16 | 
 17 |     if (is.integer(d$row) & is.integer(d$col)) {
 18 |       int_chk <- TRUE
 19 |     } else {
 20 |       suppressMessages(suppressWarnings({
 21 | 
 22 |         # delete potential columns with same name
 23 |         # this is for checking only
 24 |         d$row_orig <- NULL
 25 |         d$col_orig <- NULL
 26 |         d$chk <- NULL
 27 |         # (so that no duplicate name come in following section of code)
 28 | 
 29 |         d <- d %>%
 30 |           rename(
 31 |             row_orig = row,
 32 |             col_orig = col
 33 |           ) %>%
 34 |           mutate(
 35 |             row = as.integer(row_orig),
 36 |             col = as.integer(col_orig),
 37 |             # diff with orig
 38 |             row_d = as.numeric(row_orig) - row,
 39 |             col_d = as.numeric(col_orig) - col,
 40 |             # collect overall checks
 41 |             chk = (row_d == 0) & (col_d == 0)
 42 |           )
 43 |       }))
 44 | 
 45 |       int_chk <- all(d$chk, na.rm = TRUE)
 46 | 
 47 |       if (length(int_chk) != 1) int_chk <- FALSE
 48 | 
 49 |       if (is.na(int_chk)) int_chk <- FALSE
 50 |     }
 51 | 
 52 | 
 53 |     if (int_chk) {
 54 |       rest_chk <- tibble(chks = c(
 55 |         all(d$row > 0),
 56 |         all(d$col > 0),
 57 |         !any(is.na(d$row)),
 58 |         !any(is.na(d$col)),
 59 |         (d %>% count(row, col) %>% filter(n > 1) %>% nrow()) == 0
 60 |       ), neg_msgs = c(
 61 |         "all rows are not positive",
 62 |         "all cols are not positive",
 63 |         "NA present in rows",
 64 |         "NA present in cols",
 65 |         "for each (row, col) pair only one cell should be present: which is not true"
 66 |       ))
 67 | 
 68 |       decision <- all(rest_chk$chks)
 69 |       msg <- rest_chk$neg_msgs[!rest_chk$chks]
 70 |       if (any(!rest_chk$chks)) {
 71 |         lvl <- max(which(!rest_chk$chks), na.rm = TRUE) + 2
 72 |       }
 73 |     } else {
 74 |       decision <- FALSE
 75 |       msg <- "row/col is not interger (or meaningful coercion not possible)"
 76 |       lvl <- 2
 77 |     }
 78 |   } else {
 79 |     decision <- FALSE
 80 |     msg <- "row/col column not present"
 81 |     lvl <- 1 + (hasName(d, "row") + hasName(d, "col")) / 2
 82 |   }
 83 | 
 84 |   if (length(msg)) {
 85 |     attr(decision, "msg") <- msg[!is.na(msg)]
 86 |   }
 87 | 
 88 |   if (lvl > 0) {
 89 |     attr(decision, "lvl") <- lvl
 90 |   }
 91 |   decision
 92 | }
 93 | 
 94 | is_conforms_to_rcdf.matrix <- function(d) {
 95 |   d %>%
 96 |     as.data.frame() %>%
 97 |     is_conforms_to_rcdf.data.frame()
 98 | }
 99 | 
100 | 
101 | as_rc_df <- function(d) {
102 |   d0 <- d %>% select(row, col)
103 |   if (!is_conforms_to_rcdf(d0)) {
104 |     abort("Data does not conforms to rcdf format")
105 |   }
106 |   class(d0) <- setdiff(class(d0), c("cell_df", "cells")) %>%
107 |     c("rc_df", .)
108 |   d0
109 | }
110 | 


--------------------------------------------------------------------------------
/R/is_xlsx_ok.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # function to detect if xlsx working
 4 | is_xlsx_ok <- function(no_pkg_check = F) {
 5 |   if (!no_pkg_check) {
 6 |     # this is created for safe dependency checks
 7 |     if (!is_available("xlsx")) {
 8 |       return(FALSE)
 9 |     }
10 |   }
11 | 
12 |   sample_xls <- system.file("extdata", "messy", "xls.pdf", package = "tidycells", mustWork = TRUE)
13 | 
14 |   suppressMessages(
15 |     suppressWarnings(
16 |       dxt <- try(xlsx::read.xlsx(sample_xls, sheetIndex = 1), silent = TRUE)
17 |     )
18 |   )
19 | 
20 |   if (is.data.frame(dxt)) {
21 |     return(TRUE)
22 |   } else {
23 |     return(FALSE)
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/R/make_read_cell_part.R:
--------------------------------------------------------------------------------
 1 | 
 2 | make_read_cell_part <- function(obj) {
 3 |   stage <- attr(obj, "read_cells_stage")
 4 |   if (is.null(stage)) {
 5 |     abort(paste0(
 6 |       "No 'read_cells_stage' attribute found!",
 7 |       "\nMake sure the 'obj' is outcome from read_cells with 'simplify = TRUE' and 'at_level<6'"
 8 |     ))
 9 |   }
10 | 
11 |   if (!(stage %in% read_cell_task_orders)) {
12 |     abort("Unknown stage of the object!")
13 |   }
14 | 
15 |   attr(obj, "read_cells_stage") <- NULL
16 | 
17 |   out_l <- list(stage = stage)
18 |   if (stage == read_cell_task_orders[1]) {
19 |     out_l$info <- list(content = obj)
20 |   }
21 | 
22 |   if (stage == read_cell_task_orders[2]) {
23 |     out_l$is_empty <- is.null(obj)
24 |     out_l$cell_list <- obj
25 |   }
26 | 
27 |   if (stage == read_cell_task_orders[3]) {
28 |     out_l$is_empty <- is.null(obj)
29 |     out_l$cell_list <- obj
30 |   }
31 | 
32 |   if (stage == read_cell_task_orders[4]) {
33 |     out_l$is_empty <- is.null(obj)
34 |     out_l$cell_analysis_list <- obj
35 |   }
36 | 
37 |   # kept for compatibilty maybe this is not required
38 |   if (stage == read_cell_task_orders[5]) {
39 |     out_l$is_empty <- is.null(obj)
40 |     out_l$final_composition <- obj
41 |   }
42 | 
43 |   class(out_l) <- read_cell_part_class
44 | 
45 |   out_l
46 | }
47 | 


--------------------------------------------------------------------------------
/R/mask_data.R:
--------------------------------------------------------------------------------
 1 | # for sharing data
 2 | mask_data <- function(d) {
 3 |   validate_cells(d)
 4 |   if (hasName(d, "type")) {
 5 |     d0 <- d %>% mutate(value = recode(type, attribute = "A", value = "0", empty = ""))
 6 |   } else {
 7 |     d0 <- d %>% mutate(value = recode(data_type, character = "A", numeric = "0"))
 8 |   }
 9 |   as_tibble(d0)
10 | }
11 | 


--------------------------------------------------------------------------------
/R/merge_admaps.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | merge_admaps <- function(x, y) {
 4 |   this <- list()
 5 |   this$raw_map <- x$raw_map %>%
 6 |     bind_rows(y$raw_map) %>%
 7 |     distinct()
 8 |   this$map <- x$map %>%
 9 |     bind_rows(y$map) %>%
10 |     distinct()
11 |   this$dimension_analysis <- list()
12 |   this$dimension_analysis$data_gid_dim <- x$dimension_analysis$data_gid_dim %>%
13 |     bind_rows(y$dimension_analysis$data_gid_dim) %>%
14 |     distinct()
15 |   this$dimension_analysis$attr_data_dim <- x$dimension_analysis$attr_data_dim %>%
16 |     bind_rows(y$dimension_analysis$attr_data_dim) %>%
17 |     distinct()
18 |   this
19 | }
20 | 


--------------------------------------------------------------------------------
/R/multi_names.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ## This file keeps entries for multiple names of same functions
 3 | ##
 4 | ## analyse_cells <- analyze_cells
 5 | 
 6 | 
 7 | #' Analyse Cells
 8 | #'
 9 | #' @description Note that this is an other name for `tidycells::analyze_cells`.
10 | #' After [`Value Attribute Classification`][value_attribute_classify()] done on a [`cell_df`][cell_df-class] next task to do is
11 | #' analyze it's contents for data block detection, attribute orientation identification etc. The function `analyze_cells` (and also `analyse_cells`)
12 | #' does the same for you.
13 | #'
14 | #' **Note**:
15 | #' \if{html}{If you are not sure about what package functions actually do or how they work together,
16 | #' please start with \href{../doc/tidycells-intro.html}{\code{vignette("tidycells-intro")}}.}
17 | #' \if{latex}{If you are not sure about what package functions actually do or how they work together,
18 | #' please start with \code{vignette("tidycells-intro")}.}
19 | #'
20 | #' @param d A [`cell_df`][cell_df-class] after [`Value Attribute Classification`][value_attribute_classify()] done
21 | #' @param silent logical scalar indicating whether to raise a warning if heuristic detection fails. (Default TRUE).
22 | #'
23 | #' @details it returns detailed analysis of the data structure including data block detection, attribute orientation detection etc.
24 | #' The argument `silent` is set to `TRUE` by default, as the warning will be given whenever the [`cell_analysis`][cell_analysis-class] is printed.
25 | #'
26 | #' After this step one may like to do :
27 | #' * [`compose_cells`][compose_cells()]
28 | #'
29 | #' If in an interactive session, following additional functions can be helpful for interactive visualizations:
30 | #' * [`visual_data_block_inspection`][visual_data_block_inspection()]
31 | #' * [`visual_orientation_modification`][visual_orientation_modification()]
32 | #' * [`visual_traceback`][visual_traceback()]
33 | #'
34 | #' @return Detailed analysis of the cell data structure.
35 | #' Which will be a [`cell_analysis`][cell_analysis-class] class object.
36 | #'
37 | #' @seealso [`compose_cells`][compose_cells()], [`collate_columns`][collate_columns()], [`analyze_cells`][analyze_cells()]
38 | #' @keywords internal
39 | #' @export
40 | analyse_cells <- function(d, silent = TRUE) {
41 |   analyze_cells_raw(d = d, silent = silent)
42 | }
43 | 


--------------------------------------------------------------------------------
/R/read_excel_whole.R:
--------------------------------------------------------------------------------
 1 | read_excel_whole <- function(fn) {
 2 |   if (is_available("xlsx")) {
 3 |     # preferred
 4 |     read_xls_from_xlsx(fn)
 5 |   } else {
 6 |     message("Using readxl to read xls. Manually check date and numeric cells. (for better result install xlsx package)")
 7 |     read_excel_whole_readxl(fn)
 8 |   }
 9 | }
10 | 


--------------------------------------------------------------------------------
/R/read_excel_whole_readxl.R:
--------------------------------------------------------------------------------
 1 | # readxl
 2 | # possible_date_range is somehow inspired by LibreOffice
 3 | read_excel_whole_part_readxl <- function(fn, sheet = 1L, possible_date_range = c(as.Date("1930-01-01"), Sys.Date() + 3800)) {
 4 |   if (!is_available("readxl")) {
 5 |     abort("'readxl' package is required")
 6 |   }
 7 | 
 8 |   suppressWarnings({
 9 |     d0 <- readxl::read_excel(fn,
10 |       col_names = FALSE, col_types = "text", sheet = sheet, .name_repair = "minimal"
11 |     )
12 |     d1 <- readxl::read_excel(fn,
13 |       col_names = FALSE, col_types = "date", sheet = sheet, .name_repair = "minimal"
14 |     )
15 |   })
16 | 
17 |   possible_date_range <- as.character(possible_date_range)
18 | 
19 |   d0f <- as.matrix(d0) %>% as.character()
20 |   d1f <- as.matrix(d1) %>% as.character()
21 |   d1f <- if_else(d1f <= max(possible_date_range) & d1f >= min(possible_date_range), d1f, NA_character_)
22 | 
23 |   df <- if_else(is.na(d1f), d0f, d1f)
24 | 
25 |   d <- matrix(df, nrow = nrow(d0))
26 | 
27 |   as.data.frame(d, stringsAsFactors = FALSE) %>%
28 |     tibble::as_tibble()
29 | }
30 | 
31 | read_excel_whole_readxl <- function(fn) {
32 |   if (!is_available("readxl")) {
33 |     abort("'readxl' package is required")
34 |   }
35 |   sheets <- readxl::excel_sheets(fn)
36 |   lout <- sheets %>% map(~ read_excel_whole_part_readxl(fn, sheet = .x))
37 |   names(lout) <- sheets
38 |   lout
39 | }
40 | 


--------------------------------------------------------------------------------
/R/read_pdf_from_tabulizer.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | read_pdf_from_tabulizer <- function(fn, no_pkg_check = F) {
 4 |   if (!no_pkg_check) {
 5 |     if (!is_available("tabulizer")) {
 6 |       abort("'tabulizer' package is required")
 7 |     }
 8 |   }
 9 | 
10 | 
11 |   # read in both ways [time consuming!]
12 |   # as decide may not function as expected
13 |   pl <- try(tabulizer::extract_tables(fn, method = "lattice"), silent = TRUE)
14 |   ps <- try(tabulizer::extract_tables(fn, method = "stream"), silent = TRUE)
15 | 
16 |   # final
17 |   pf <- NULL
18 | 
19 |   # if both are try-error return one of them
20 |   if (inherits(pl, "try-error") & inherits(ps, "try-error")) {
21 |     pf <- pl
22 |   } else {
23 |     if (!inherits(pl, "try-error") & is.list(pl) & inherits(ps, "try-error")) {
24 |       pf <- pl
25 |     }
26 | 
27 |     if (inherits(pl, "try-error") & !inherits(ps, "try-error") & is.list(ps)) {
28 |       pf <- ps
29 |     }
30 | 
31 |     if (!inherits(pl, "try-error") & !inherits(ps, "try-error")) {
32 |       # if length unequal return the same with more length
33 |       if (length(pl) > length(ps)) {
34 |         pf <- pl
35 |       }
36 | 
37 |       if (length(pl) < length(ps)) {
38 |         pf <- ps
39 |       }
40 | 
41 |       # if both of them have same length
42 |       if (length(pl) == length(ps)) {
43 |         # do degenrate study
44 |         degenrate_l <- pl %>%
45 |           map(dim) %>%
46 |           map_lgl(~ any(.x < 2))
47 |         degenrate_s <- ps %>%
48 |           map(dim) %>%
49 |           map_lgl(~ any(.x < 2))
50 | 
51 |         pf <- seq_along(pl) %>%
52 |           map(~ {
53 |             if (degenrate_l[.x]) {
54 |               return(ps[[.x]])
55 |             }
56 |             if (degenrate_s[.x]) {
57 |               return(pl[[.x]])
58 |             }
59 | 
60 |             # prefer low area
61 |             al <- dim(pl[[.x]]) %>% prod()
62 |             as <- dim(ps[[.x]]) %>% prod()
63 |             if (al <= as) {
64 |               return(pl[[.x]])
65 |             }
66 |             if (al > as) {
67 |               return(ps[[.x]])
68 |             }
69 |           })
70 |       }
71 |     }
72 |   }
73 | 
74 |   return(pf)
75 | }
76 | 


--------------------------------------------------------------------------------
/R/read_xls_from_xlsx.R:
--------------------------------------------------------------------------------
  1 | 
  2 | read_xls_from_xlsx <- function(fn, no_pkg_check = F) {
  3 |   if (!no_pkg_check) {
  4 |     # this is created for safe dependency checks
  5 |     if (!is_available("xlsx")) {
  6 |       abort("'xlsx' package is required")
  7 |     }
  8 |   }
  9 | 
 10 | 
 11 |   # local functions
 12 |   get_date <- function(x) {
 13 |     if (identical(x, round(x))) {
 14 |       xo <- as.Date(x - 25569, origin = "1970-01-01")
 15 |     } else {
 16 |       xo <- as.POSIXct((x - 25569) * 86400,
 17 |         tz = "GMT",
 18 |         origin = "1970-01-01"
 19 |       )
 20 |     }
 21 |     xo
 22 |   }
 23 | 
 24 |   for_a_sheet <- function(sheet) {
 25 |     rows <- xlsx::getRows(sheet)
 26 |     if (length(rows) == 0) {
 27 |       return(NULL)
 28 |     } # exit early
 29 | 
 30 |     cells <- xlsx::getCells(rows)
 31 |     res <- cells %>% map(xlsx::getCellValue)
 32 | 
 33 |     dat <- names(res) %>%
 34 |       stringr::str_split("\\.") %>%
 35 |       map(as.integer) %>%
 36 |       reduce(rbind) %>%
 37 |       as_tibble(.name_repair = "minimal")
 38 | 
 39 |     colnames(dat) <- c("row", "col")
 40 | 
 41 |     dat <- dat %>% mutate(raw_value = res)
 42 |     # credit goes directly to xlsx-R-Package creators
 43 |     dateUtil <- date_utils()
 44 | 
 45 | 
 46 |     dat <- dat %>% mutate(is_num = raw_value %>% map_lgl(is.numeric))
 47 | 
 48 |     dat_n <- dat %>%
 49 |       filter(is_num)
 50 | 
 51 |     dat_c <- dat %>%
 52 |       filter(!is_num) %>%
 53 |       mutate(data_type = "chr")
 54 | 
 55 |     if (nrow(dat_c) > 0) {
 56 |       dat_c <- dat_c %>%
 57 |         mutate(chr = raw_value %>% map_chr(1))
 58 |     }
 59 | 
 60 |     num_cells <- cells[dat$is_num]
 61 | 
 62 |     if (length(num_cells) > 0) {
 63 |       # possible dates
 64 |       is_date <- num_cells %>% map_lgl(dateUtil$isCellDateFormatted)
 65 | 
 66 |       dat_n <- dat_n %>%
 67 |         mutate(is_date = is_date)
 68 | 
 69 |       dat_n_nd <- dat_n %>%
 70 |         filter(!is_date) %>%
 71 |         mutate(data_type = "dbl")
 72 | 
 73 |       if (nrow(dat_n_nd) > 0) {
 74 |         dat_n_nd <- dat_n_nd %>%
 75 |           mutate(dbl = raw_value %>% map_dbl(1))
 76 |       }
 77 | 
 78 | 
 79 |       dat_n_d <- dat_n %>%
 80 |         filter(is_date)
 81 | 
 82 |       if (nrow(dat_n_d) > 0) {
 83 |         dat_n_d <- dat_n_d %>%
 84 |           mutate(date_raw = raw_value %>% map(get_date))
 85 | 
 86 |         dat_n_d <- dat_n_d %>%
 87 |           mutate(data_type = if_else(
 88 |             date_raw %>% map_lgl(~ inherits(.x, "POSIXct")),
 89 |             true = "dttm",
 90 |             false = "date"
 91 |           ))
 92 | 
 93 |         dat_n_d_dt <- dat_n_d %>%
 94 |           filter(data_type == "date")
 95 | 
 96 |         if (nrow(dat_n_d_dt) > 0) {
 97 |           dat_n_d_dt <- dat_n_d_dt %>%
 98 |             mutate(date = date_raw %>% map(1) %>% reduce(c))
 99 |         }
100 | 
101 |         dat_n_d_dttm <- dat_n_d %>%
102 |           filter(data_type == "dttm")
103 | 
104 |         if (nrow(dat_n_d_dttm) > 0) {
105 |           dat_n_d_dttm <- dat_n_d_dttm %>%
106 |             mutate(dttm = date_raw %>% map(1) %>% reduce(c))
107 |         }
108 | 
109 | 
110 |         dat_n_d <- dat_n_d_dt %>% bind_rows(dat_n_d_dttm)
111 |       }
112 | 
113 |       dat_n <- dat_n_d %>% bind_rows(dat_n_nd)
114 |     }
115 | 
116 |     dat <- dat_c %>% bind_rows(dat_n)
117 | 
118 |     dat <- dat %>% arrange(row, col)
119 |     dat[c("row", "col", "data_type", sort(unique(dat$data_type)))]
120 |   }
121 | 
122 |   read_xls_for_tidycells <- function(filename) {
123 |     wb <- xlsx::loadWorkbook(filename)
124 |     sheets <- xlsx::getSheets(wb)
125 | 
126 |     sheets %>% map(for_a_sheet)
127 |   }
128 | 
129 |   date_utils <- function() {
130 |     # credit goes directly to xlsx-R-Package creators
131 |     .jnew("org/apache/poi/ss/usermodel/DateUtil")
132 |   }
133 | 
134 |   # put them in xlsx environment
135 |   environment(date_utils) <- environment(xlsx::read.xlsx)
136 | 
137 |   suppressMessages(
138 |     suppressWarnings(
139 |       read_xls_for_tidycells(fn)
140 |     )
141 |   )
142 | }
143 | 


--------------------------------------------------------------------------------
/R/reduce_2dfs.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | get_connected_cols <- function(col_map_with_dist) {
  4 |   col_map_with_dist
  5 |   col_map <- tibble()
  6 |   repeat({
  7 |     if (nrow(col_map_with_dist) == 0) break()
  8 |     col_map_this <- col_map_with_dist %>% filter(dist == min(dist))
  9 |     if (nrow(col_map_this) == 0) break()
 10 |     col_map <- col_map %>% bind_rows(col_map_this)
 11 |     col_map_with_dist <- col_map_with_dist %>% filter(!(n1 %in% col_map$n1), !(n2 %in% col_map$n2))
 12 |   })
 13 |   col_map
 14 | }
 15 | 
 16 | reduce_2dfs <- function(dc1, dc2, combine_th = 0.6, rest_cols = Inf, retain_other_cols = FALSE) {
 17 |   colnames(dc1) <- stringr::str_replace_all(colnames(dc1), "uncollated_", "d1_old_uc_")
 18 |   colnames(dc1) <- stringr::str_replace_all(colnames(dc1), "collated_", "d1_old_c_")
 19 | 
 20 |   colnames(dc2) <- stringr::str_replace_all(colnames(dc2), "uncollated_", "d2_old_uc_")
 21 |   colnames(dc2) <- stringr::str_replace_all(colnames(dc2), "collated_", "d2_old_c_")
 22 | 
 23 | 
 24 |   cr1 <- get_all_col_representative(dc1)
 25 |   cr2 <- get_all_col_representative(dc2)
 26 | 
 27 |   # if either of cr1 or cr2 is empty simply rbind and return
 28 |   if (length(cr1) * length(cr2) == 0) {
 29 |     dcnew <- dc1 %>% bind_rows(dc2)
 30 |     return(dcnew)
 31 |   }
 32 | 
 33 |   all_maps <- expand.grid(n1 = names(cr1), n2 = names(cr2), stringsAsFactors = FALSE)
 34 |   m1 <- seq_len(nrow(all_maps)) %>%
 35 |     map(~ similarity_score(cr1[[all_maps$n1[.x]]], cr2[[all_maps$n2[.x]]]) %>% t()) %>%
 36 |     reduce(rbind)
 37 |   m2 <- as.data.frame(m1) %>%
 38 |     map(norm_this) %>%
 39 |     as.data.frame()
 40 |   # KFL : kept for later
 41 |   wts <- rep(1, ncol(m2))
 42 |   m3 <- m2 %>% apply(MARGIN = 1, function(x) sum(x * wts) / sum(wts))
 43 |   all_maps <- all_maps %>% mutate(dist = m3)
 44 | 
 45 |   all_maps_c <- get_connected_cols(all_maps)
 46 | 
 47 |   amap_ok <- all_maps_c %>% filter(dist <= combine_th)
 48 |   amap_not_ok <- all_maps_c %>% filter(dist > combine_th)
 49 | 
 50 |   if (nrow(amap_not_ok) > 0) {
 51 |     repeat({
 52 |       induced_cols <- amap_not_ok %>%
 53 |         mutate(n1 = paste0("n1.", n1), n2 = paste0("n2.", n2)) %>%
 54 |         select(n1, n2) %>%
 55 |         unlist() %>%
 56 |         unique()
 57 | 
 58 |       if (length(induced_cols) > rest_cols) {
 59 |         amap_not_ok <- amap_not_ok %>% arrange(dist)
 60 |         if (rest_cols <= 0) {
 61 |           amap_not_ok <- amap_not_ok %>% filter(FALSE)
 62 |         } else {
 63 |           # pass a single row to amap_ok
 64 |           amap_ok <- amap_ok %>% bind_rows(amap_not_ok[1, ])
 65 |           amap_not_ok <- amap_not_ok[-1, ]
 66 |         }
 67 |       } else {
 68 |         break()
 69 |       }
 70 |     })
 71 |   }
 72 | 
 73 |   cmap <- tibble(new_name = NA_character_, old_name = NA_character_, block = NA_real_) %>% filter(FALSE)
 74 | 
 75 |   if (nrow(amap_ok) > 0) {
 76 |     amap_ok <- amap_ok %>%
 77 |       arrange(dist) %>%
 78 |       mutate(new_name = paste0("collated_", seq_along(dist)))
 79 | 
 80 |     cmap_this <- amap_ok %>%
 81 |       select(-dist) %>%
 82 |       tidyr::gather(cr, old_name, -new_name)
 83 | 
 84 |     cmap_this <- cmap_this %>%
 85 |       mutate(block = recode(cr, n1 = dc1$data_block[1], n2 = dc2$data_block[1])) %>%
 86 |       select(-cr)
 87 | 
 88 |     cmap <- cmap %>% bind_rows(cmap_this)
 89 |   }
 90 | 
 91 |   if (nrow(amap_not_ok) > 0) {
 92 |     cmap_this <- amap_not_ok %>%
 93 |       select(-dist) %>%
 94 |       tidyr::gather(cr, old_name) %>%
 95 |       mutate(new_name = paste0("uncollated_", seq_along(old_name)))
 96 | 
 97 |     cmap_this <- cmap_this %>%
 98 |       mutate(block = recode(cr, n1 = dc1$data_block[1], n2 = dc2$data_block[1])) %>%
 99 |       select(-cr)
100 | 
101 |     cmap <- cmap %>% bind_rows(cmap_this)
102 |   }
103 | 
104 |   cmap1 <- cmap %>% filter(block == dc1$data_block[1])
105 |   cmap2 <- cmap %>% filter(block == dc2$data_block[1])
106 | 
107 |   for (i in seq_len(nrow(cmap1))) {
108 |     colnames(dc1)[which(colnames(dc1) == cmap1$old_name[[i]])] <- cmap1$new_name[[i]]
109 |   }
110 | 
111 |   for (i in seq_len(nrow(cmap2))) {
112 |     colnames(dc2)[which(colnames(dc2) == cmap2$old_name[[i]])] <- cmap2$new_name[[i]]
113 |   }
114 | 
115 |   dcnew <- dc1 %>% bind_rows(dc2)
116 | 
117 |   if (!retain_other_cols) {
118 |     nc_cols <- colnames(dcnew) %>%
119 |       stringr::str_detect("collated") %>%
120 |       colnames(dcnew)[.]
121 |     dcnew <- dcnew[c(intersect(defcols, colnames(dcnew)), nc_cols)]
122 |   }
123 | 
124 |   dcnew
125 | }
126 | 


--------------------------------------------------------------------------------
/R/refine_cols.R:
--------------------------------------------------------------------------------
 1 | 
 2 | refine_cols <- function(dcomp_part, cn_df, sep = " :: ") {
 3 |   cn_df <- cn_df %>% filter(cname %in% colnames(dcomp_part))
 4 |   out <- dcomp_part
 5 |   if (nrow(cn_df) > 0) {
 6 |     cn_df <- cn_df %>%
 7 |       group_by(ag) %>%
 8 |       mutate(cname_new = rc_n %>% as.factor() %>% as.integer() %>% paste0(ag, "_", .)) %>%
 9 |       ungroup() %>%
10 |       arrange(cname_new, cname_ord)
11 | 
12 |     cn_l <- cn_df %>%
13 |       select(cname, cname_new) %>%
14 |       split(.$cname_new)
15 | 
16 |     out_p <- cn_l %>%
17 |       map(~ {
18 |         .d0 <- dcomp_part[.x$cname]
19 |         .d0 <- .d0 %>% mutate_all(stringr::str_trim)
20 |         dcomp_part[.x$cname_new[1]] <- .d0 %>% apply(1, paste, collapse = sep)
21 |         dcomp_part[c("row", "col", .x$cname_new[1])]
22 |       }) %>%
23 |       reduce(fj, join_by = c("row", "col"))
24 | 
25 |     out <- out_p %>% fj(dcomp_part, join_by = c("row", "col"))
26 |   }
27 | 
28 |   out
29 | }
30 | 


--------------------------------------------------------------------------------
/R/safe_dependency_check.R:
--------------------------------------------------------------------------------
 1 | 
 2 | assign("safe_dependency_check", list(
 3 |   # this is for rJava deps
 4 |   xlsx = function() {
 5 |     chk <- F
 6 |     try(
 7 |       {
 8 |         if (is_xlsx_ok(no_pkg_check = T)) {
 9 |           sample_xls <- system.file("extdata", "messy", "xls.pdf", package = "tidycells", mustWork = TRUE)
10 |           x <- read_xls_from_xlsx(sample_xls, no_pkg_check = T)
11 |           if (is.data.frame(x[[1]])) {
12 |             if (nrow(x[[1]]) > 0) {
13 |               chk <- T
14 |             }
15 |           }
16 |         }
17 |       },
18 |       silent = T
19 |     )
20 |     chk
21 |   },
22 |   # for tabulizer
23 |   # mainly for https://github.com/ropensci/tabulizer/issues/106
24 |   tabulizer = function() {
25 |     chk <- F
26 |     try(
27 |       {
28 |         if (requireNamespace("tabulizer", quietly = T)) {
29 |           sample_pdf <- system.file("extdata", "messy", "pdf.docx", package = "tidycells", mustWork = TRUE)
30 |           x <- read_pdf_from_tabulizer(sample_pdf, no_pkg_check = T)
31 |           if (is.matrix(x[[1]])) {
32 |             if (nrow(x[[1]]) > 0) {
33 |               chk <- T
34 |             }
35 |           }
36 |         }
37 |       },
38 |       silent = T
39 |     )
40 |     chk
41 |   }
42 | ), envir = tidycells_pkg_env)
43 | 


--------------------------------------------------------------------------------
/R/shiny_etc.R:
--------------------------------------------------------------------------------
  1 | 
  2 | shiny_check <- function(force_load = FALSE) {
  3 |   if (!force_load) {
  4 |     if (!interactive()) {
  5 |       abort("need an interactive session for this functionality.")
  6 |     }
  7 |   }
  8 | 
  9 |   if (!is_available("shiny")) {
 10 |     abort("'shiny' package is required for this functionality.")
 11 |   } else {
 12 |     if (!is_available("miniUI")) {
 13 |       abort("'miniUI' package is required for this functionality.")
 14 |     }
 15 |   }
 16 | 
 17 |   es <- loadNamespace("shiny")
 18 |   try(attachNamespace(es), silent = TRUE)
 19 | 
 20 |   em <- loadNamespace("miniUI")
 21 |   try(attachNamespace(em), silent = TRUE)
 22 | }
 23 | 
 24 | shiny_unload <- function() {
 25 |   AutoUnloadShiny <- options("AutoUnloadShiny")[[1]]
 26 |   if (is.null(AutoUnloadShiny)) AutoUnloadShiny <- TRUE
 27 |   if (AutoUnloadShiny) {
 28 |     suppressWarnings(try(unloadNamespace("miniUI"), silent = TRUE))
 29 |     suppressWarnings(try(unloadNamespace("shiny"), silent = TRUE))
 30 |   }
 31 | }
 32 | 
 33 | global_objects <- function(cls = c("cell_df", "cell_analysis", "rc_df"), check_rc_df = FALSE) {
 34 |   cls <- match.arg(cls)
 35 | 
 36 |   objs <- ls(envir = globalenv())
 37 | 
 38 |   cobjs <- objs %>%
 39 |     map_lgl(~ inherits(x = get(.x, envir = globalenv()), cls)) %>%
 40 |     objs[.]
 41 | 
 42 |   cobjs
 43 | }
 44 | 
 45 | command_prompt_ask <- function(title, message, default = NULL) {
 46 |   cat(paste0(cli_bb(message), "\n"))
 47 |   cm <- readline(prompt = paste0(title, ":"))
 48 |   if (nchar(cm) == 0) {
 49 |     default
 50 |   } else {
 51 |     cm
 52 |   }
 53 | }
 54 | 
 55 | rstudioapi_ask <- function(title, message, default = NULL, is_question = FALSE, ...) {
 56 |   ok <- FALSE
 57 |   if (is_available("rstudioapi")) {
 58 |     if (rstudioapi::hasFun("viewer")) {
 59 |       ok <- TRUE
 60 |     }
 61 |   }
 62 | 
 63 |   if (ok) {
 64 |     if (is_question) {
 65 |       rstudioapi::showQuestion(title = title, message = message, ...)
 66 |     } else {
 67 |       rstudioapi::showPrompt(title = title, message = message, default = default)
 68 |     }
 69 |   } else {
 70 |     command_prompt_ask(title = title, message = message, default = default)
 71 |   }
 72 | }
 73 | 
 74 | global_object_picker <- function(what = c("cell_df", "cell_analysis", "rc_df")) {
 75 |   what <- match.arg(what)
 76 |   objs <- global_objects(cls = what)
 77 |   if (length(objs) == 1) {
 78 |     return(invisible(get(objs, envir = globalenv())))
 79 |   }
 80 |   if (length(objs) < 1) {
 81 |     abort(paste0("No ", what, " found in R global environment."))
 82 |   }
 83 |   if (length(objs) > 1) {
 84 |     this_title <- paste0("Select a ", what, " to proceed")
 85 |     this_msg <- paste0(
 86 |       "Pick from one of these (type the variable name)\n",
 87 |       paste0(objs, collapse = ", ")
 88 |     )
 89 |     this_msg_warned <- FALSE
 90 |     repeat({
 91 |       sel <- rstudioapi_ask(
 92 |         title = this_title,
 93 |         message = this_msg
 94 |       )
 95 |       if (length(sel) == 0) abort("User Cancelled")
 96 |       if (sel %in% objs) {
 97 |         break()
 98 |       } else {
 99 |         if (!this_msg_warned) {
100 |           this_msg_warned <- TRUE
101 |           this_msg <- paste0(this_msg, "\n(Wrong input try again)")
102 |         }
103 |       }
104 |     })
105 |     objs <- sel
106 |     return(invisible(get(objs, envir = globalenv())))
107 |   }
108 | 
109 |   abort("unknown error")
110 | }
111 | 


--------------------------------------------------------------------------------
/R/shiny_main.R:
--------------------------------------------------------------------------------
  1 | common_finish <- function(ui, server, title, viewer_pane, test_this) {
  2 |   if (test_this) {
  3 |     es <- new.env()
  4 |     assign("ui", ui, envir = es)
  5 |     assign("server", server, envir = es)
  6 |     return(es)
  7 |   }
  8 | 
  9 |   if (viewer_pane) {
 10 |     runGadget(shinyApp(ui, server),
 11 |       stopOnCancel = FALSE
 12 |     )
 13 |   } else {
 14 |     viewer <- dialogViewer(title,
 15 |       width = 900,
 16 |       height = 700
 17 |     )
 18 |     runGadget(shinyApp(ui, server),
 19 |       viewer = viewer,
 20 |       stopOnCancel = FALSE
 21 |     )
 22 |   }
 23 | }
 24 | 
 25 | shiny_app_va_classify <- function(d, viewer_pane = FALSE, test_this = FALSE) {
 26 | 
 27 | 
 28 |   ########## UI ##########
 29 |   ui <- miniPage(
 30 |     gadgetTitleBar("Value/Attribute Classifier"),
 31 |     miniTabstripPanel(
 32 |       ui_part_va_classify(),
 33 |       ui_part_crop(reset_msg = "Load initial data (With which you started this tab. To load original data reset from Classify Tab.)"),
 34 |       ui_part_plot_tune(selected_fill = "type"),
 35 |       ui_part_visualize(),
 36 |       id = "now_tab_main"
 37 |     )
 38 |   )
 39 | 
 40 |   ########## Server ##########
 41 |   server <- server_va_select(d)
 42 | 
 43 |   common_finish(ui, server,
 44 |     title = "TidyCells: Value/Attribute Classifier",
 45 |     viewer_pane = viewer_pane, test_this = test_this
 46 |   )
 47 | }
 48 | 
 49 | shiny_app_crop <- function(d, viewer_pane = FALSE, test_this = FALSE) {
 50 | 
 51 | 
 52 |   ########## UI ##########
 53 |   ui <- miniPage(
 54 |     gadgetTitleBar("Data Crop"),
 55 |     miniTabstripPanel(
 56 |       ui_part_crop(),
 57 |       ui_part_plot_tune(),
 58 |       ui_part_visualize(),
 59 |       id = "now_tab_main"
 60 |     )
 61 |   )
 62 | 
 63 |   ########## Server ##########
 64 |   server <- server_crop(d)
 65 | 
 66 |   common_finish(ui, server,
 67 |     title = "TidyCells: Data Crop",
 68 |     viewer_pane = viewer_pane, test_this = test_this
 69 |   )
 70 | }
 71 | 
 72 | shiny_app_data_block <- function(x, viewer_pane = FALSE, test_this = FALSE) {
 73 | 
 74 | 
 75 |   ########## UI ##########
 76 |   ui <- miniPage(
 77 |     gadgetTitleBar("Data Blocks Inspection"),
 78 |     miniTabstripPanel(
 79 |       ui_part_data_block(),
 80 |       ui_part_plot_tune(txt_alpha_max = 0.9, txt_alpha = 0.2, selected_fill = "type"),
 81 |       ui_part_visualize(),
 82 |       id = "now_tab_main"
 83 |     )
 84 |   )
 85 | 
 86 |   ########## Server ##########
 87 |   server <- server_data_block(x)
 88 | 
 89 |   common_finish(ui, server,
 90 |     title = "TidyCells: Data Blocks Inspection",
 91 |     viewer_pane = viewer_pane, test_this = test_this
 92 |   )
 93 | }
 94 | 
 95 | shiny_app_orientation_modification <- function(x, viewer_pane = FALSE, test_this = FALSE) {
 96 | 
 97 | 
 98 |   ########## UI ##########
 99 |   ui <- miniPage(
100 |     gadgetTitleBar("Data Blocks Orientation Modification"),
101 |     miniTabstripPanel(
102 |       ui_part_data_block(zoom_this = TRUE, plot_issues_option = FALSE),
103 |       ui_part_orientation_modification(),
104 |       ui_part_plot_tune(txt_alpha_max = 0.9, txt_alpha = 0.2, selected_fill = "type"),
105 |       ui_part_visualize(),
106 |       id = "now_tab_main"
107 |     )
108 |   )
109 | 
110 |   ########## Server ##########
111 |   server <- server_orientation_modification(x)
112 | 
113 |   common_finish(ui, server,
114 |     title = "TidyCells: Data Blocks Orientation Modification",
115 |     viewer_pane = viewer_pane, test_this = test_this
116 |   )
117 | }
118 | 
119 | shiny_app_traceback <- function(x, dcomp, viewer_pane = FALSE, test_this = FALSE) {
120 |   if (!DT_present()) {
121 |     abort(paste("The package 'DT' is required for this functionality",
122 |       "If you want static plot, please use 'cell_traceback_plot'.",
123 |       sep = "\n"
124 |     ))
125 |   }
126 | 
127 | 
128 |   ########## UI ##########
129 |   ui <- miniPage(
130 |     gadgetTitleBar("Composition Traceback"),
131 |     miniTabstripPanel(
132 |       ui_part_traceback(),
133 |       ui_part_data_block(zoom_this = TRUE, direction_text_this = FALSE, plot_issues_option = FALSE),
134 |       ui_part_plot_tune(txt_alpha_max = 0.9, selected_fill = "type"),
135 |       ui_part_visualize(),
136 |       id = "now_tab_main"
137 |     )
138 |   )
139 | 
140 |   ########## Server ##########
141 |   server <- server_traceback(x, dcomp)
142 | 
143 |   common_finish(ui, server,
144 |     title = "TidyCells: Composition Traceback (Composition Viewer)",
145 |     viewer_pane = viewer_pane, test_this = test_this
146 |   )
147 | }
148 | 


--------------------------------------------------------------------------------
/R/similarity_score.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ads_score <- function(ads) {
 3 |   ads[ads < 0] <- 0
 4 |   ads[ads > 10^10] <- 10^10
 5 |   (stats::median(ads, na.rm = TRUE) * 5 + mean(ads, na.rm = TRUE) * 4 + min(ads, na.rm = TRUE) * 1) / 10
 6 | }
 7 | 
 8 | similarity_score <- function(x, y) {
 9 |   comm <- intersect(x, y)
10 |   bscore <- 1 - length(comm) / mean(length(x), length(y))
11 | 
12 |   if (is.na(bscore)) bscore <- 0
13 |   if (bscore > 10^10) bscore <- 10^10
14 |   if (bscore < 0) bscore <- 0
15 | 
16 |   # Levenshtein distance
17 |   ads <- utils::adist(x, y)
18 | 
19 |   ads_l <- list(ads)
20 | 
21 |   if (is_available("stringdist")) {
22 |     suppressMessages(suppressWarnings({
23 |       mthds <- c("osa", "jaccard", "soundex", "jw")
24 |       ads_sdl <- mthds %>% map(~ stringdist::stringdistmatrix(x, y, method = .x, p = 0.1, q = 3))
25 |       ads_l <- c(ads_l, ads_sdl)
26 |     }))
27 |   }
28 | 
29 |   ascore <- ads_l %>% map_dbl(ads_score)
30 |   c(ascore, bscore)
31 | }
32 | 


--------------------------------------------------------------------------------
/R/stitch_direction.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | stitch_direction <- function(d_part, dat, attr_name, trace_it = FALSE) {
 4 |   d0 <- d_part %>%
 5 |     distinct(data_gid, row = row_d, col = col_d)
 6 | 
 7 |   a0 <- d_part %>%
 8 |     distinct(attr_gid, row = row_a, col = col_a)
 9 | 
10 |   chkd <- d0$data_gid %>%
11 |     unique() %>%
12 |     length()
13 |   chka <- a0$attr_gid %>%
14 |     unique() %>%
15 |     length()
16 |   chkatsp <- d_part$attr_gid_split %>%
17 |     unique() %>%
18 |     length()
19 | 
20 |   chkdir <- d_part$direction %>%
21 |     unique() %>%
22 |     length()
23 | 
24 |   chkaty <- d_part$attr_group %>%
25 |     unique() %>%
26 |     length()
27 | 
28 |   if (chkd != 1 | chka != 1 | chkdir != 1 | chkatsp != 1 | chkaty != 1) {
29 |     abort(paste("failed to stitch.",
30 |       "(have you tampered a cell-analysis?)",
31 |       "(make sure the cell-analysis is free from error or potential issues)",
32 |       "If still the error persists, please contact the developer.",
33 |       sep = "\n"
34 |     ))
35 |   }
36 | 
37 | 
38 |   if (missing(attr_name)) {
39 |     daty <- d_part$attr_group[1]
40 | 
41 |     dgn <- d_part$direction_group[1] %>%
42 |       recode(WE = "row", NS = "col")
43 | 
44 |     dn <- d_part$direction_basic[1] %>%
45 |       recode(
46 |         N = "top", W = "left", S = "bottom", E = "right",
47 |         NW = "topLeft", NE = "topRight", SW = "bottomLeft", SE = "bottomRight"
48 |       )
49 | 
50 |     datsp <- d_part$attr_gid_split_order[1]
51 | 
52 |     dd <- d_part$dist_order[1]
53 | 
54 | 
55 |     attr_name <- paste(daty, dgn, dn, datsp, dd, sep = "_")
56 |   }
57 | 
58 | 
59 | 
60 |   direction <- d_part$direction %>% unique()
61 |   direction <- direction[1]
62 | 
63 |   d0 <- d0 %>%
64 |     select(-data_gid) %>%
65 |     left_join(dat %>% select(row, col, value), by = c("row", "col"))
66 |   a0 <- a0 %>%
67 |     select(-attr_gid) %>%
68 |     left_join(dat %>% select(row, col, attr = value), by = c("row", "col"))
69 | 
70 |   if (trace_it) {
71 |     a0 <- a0 %>% mutate(cadd = paste(row, col, sep = "_"))
72 |   }
73 | 
74 |   # suppressWarnings should be removed once unpivotr::enhead chages
75 |   # this is happening as "All elements of `...` must be named." warning in tidyr
76 |   # ref: https://github.com/tidyverse/tidyr/issues/714
77 |   # ref: https://github.com/nacnudus/unpivotr/issues/26
78 |   # directions NNW (and similar) is strict; it does not allow multi block
79 |   # possibly need separate enhead like function
80 |   suppressWarnings({
81 |     d1 <- d0 %>%
82 |       enhead(a0, direction)
83 |   })
84 | 
85 | 
86 |   colnames(d1)[which(colnames(d1) == "attr")] <- attr_name
87 | 
88 |   if (trace_it) {
89 |     colnames(d1)[which(colnames(d1) == "cadd")] <- paste0("cellAddress_", attr_name)
90 |   }
91 | 
92 |   d1 <- d1 %>% mutate(data_block = d_part$natural_gid[1])
93 | 
94 |   d1
95 | }
96 | 


--------------------------------------------------------------------------------
/R/tidycells-package.R:
--------------------------------------------------------------------------------
 1 | #' `tidycells` package
 2 | #'
 3 | #' **Read Tabular Data from Diverse Sources and Easily Make Them Tidy**
 4 | #'
 5 | #' The package provides utilities to read, cells from complex tabular data
 6 | #' and heuristic detection based structural assignment of those cells to a
 7 | #' columnar or tidy format. Read functionality has the ability to read (in
 8 | #' a unified manner) structured, partially structured or unstructured
 9 | #' tabular data (usually spreadsheets for public data dissemination and
10 | #' aimed for common human understanding) from various types of documents.
11 | #' The tabular information is read as cells. The 'structure assignment'
12 | #' functionality has both supervised and unsupervised way of assigning
13 | #' cells data to columnar/tidy format. Multiple disconnected blocks of
14 | #' tables in a single sheet are also handled appropriately. These tools are
15 | #' suitable for unattended conversation of (maybe a pile of) messy tables
16 | #' (like government data) into a consumable format(usable for further
17 | #' analysis and data wrangling).
18 | #'
19 | #' To get started check out \href{../doc/tidycells-intro.html}{\code{vignette("tidycells-intro")}}.
20 | #'
21 | #' @name tidycells-package
22 | #' @keywords internal
23 | "_PACKAGE"
24 | 


--------------------------------------------------------------------------------
/R/utils-deps.R:
--------------------------------------------------------------------------------
  1 | #### dplyr ####
  2 | 
  3 | #' @importFrom dplyr bind_rows
  4 | NULL
  5 | 
  6 | #' @importFrom dplyr distinct
  7 | NULL
  8 | 
  9 | #' @importFrom dplyr ungroup
 10 | NULL
 11 | 
 12 | #' @importFrom dplyr group_split
 13 | NULL
 14 | 
 15 | #' @importFrom dplyr pull
 16 | NULL
 17 | 
 18 | #' @importFrom dplyr if_else
 19 | NULL
 20 | 
 21 | #' @importFrom dplyr rename
 22 | NULL
 23 | 
 24 | #' @importFrom dplyr arrange
 25 | NULL
 26 | 
 27 | #' @importFrom dplyr full_join
 28 | NULL
 29 | 
 30 | #' @importFrom dplyr inner_join
 31 | NULL
 32 | 
 33 | #' @importFrom dplyr right_join
 34 | NULL
 35 | 
 36 | #' @importFrom dplyr left_join
 37 | NULL
 38 | 
 39 | #' @importFrom dplyr anti_join
 40 | NULL
 41 | 
 42 | #' @importFrom dplyr select
 43 | NULL
 44 | 
 45 | #' @importFrom dplyr filter
 46 | NULL
 47 | 
 48 | #' @importFrom dplyr group_by
 49 | NULL
 50 | 
 51 | #' @importFrom dplyr mutate
 52 | NULL
 53 | 
 54 | #' @importFrom dplyr mutate_all
 55 | NULL
 56 | 
 57 | #' @importFrom dplyr summarise
 58 | NULL
 59 | 
 60 | #' @importFrom dplyr n
 61 | NULL
 62 | 
 63 | #' @importFrom dplyr desc
 64 | NULL
 65 | 
 66 | #' @importFrom dplyr count
 67 | NULL
 68 | 
 69 | #' @importFrom dplyr case_when
 70 | NULL
 71 | 
 72 | #' @importFrom dplyr recode
 73 | NULL
 74 | 
 75 | #' @importFrom dplyr n_distinct
 76 | NULL
 77 | 
 78 | 
 79 | #### tibble ####
 80 | 
 81 | #' @importFrom tibble tibble
 82 | NULL
 83 | 
 84 | #' @importFrom tibble as_tibble
 85 | NULL
 86 | 
 87 | 
 88 | #### purrr ####
 89 | 
 90 | #' @importFrom purrr map
 91 | NULL
 92 | 
 93 | #' @importFrom purrr map_df
 94 | NULL
 95 | 
 96 | #' @importFrom purrr map_lgl
 97 | NULL
 98 | 
 99 | #' @importFrom purrr map_dbl
100 | NULL
101 | 
102 | #' @importFrom purrr map_int
103 | NULL
104 | 
105 | #' @importFrom purrr map_chr
106 | NULL
107 | 
108 | #' @importFrom purrr imap_dfr
109 | NULL
110 | 
111 | #' @importFrom purrr reduce
112 | NULL
113 | 
114 | 
115 | #### unpivotr ####
116 | 
117 | #' @importFrom unpivotr enhead
118 | NULL
119 | 
120 | #### rlang ####
121 | 
122 | #' @importFrom rlang abort
123 | NULL
124 | 
125 | #' @importFrom rlang warn
126 | NULL
127 | 


--------------------------------------------------------------------------------
/R/utils-pipe.R:
--------------------------------------------------------------------------------
 1 | #' Pipe operator
 2 | #'
 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
 4 | #'
 5 | #' @name %>%
 6 | #' @rdname pipe
 7 | #' @keywords internal
 8 | #' @export
 9 | #' @importFrom magrittr %>%
10 | #' @usage lhs \%>\% rhs
11 | NULL
12 | 


--------------------------------------------------------------------------------
/R/validate_cells.R:
--------------------------------------------------------------------------------
 1 | #' Validate cell-DF
 2 | #'
 3 | #' @param dat An R object
 4 | #'
 5 | #' @return Logical scalar. If the value is `FALSE`, it will contain attribute named `msg`.
 6 | #' @details It checks for following facts:
 7 | #' * Whether `dat` is a `data.frame`
 8 | #' * Whether `dat` conforms to `rc_df` format
 9 | #' * Whether `dat` conforms to `cell_df` format
10 | #' If all the checks are passed then it returns `TRUE` else it returns `FALSE` with `msg` attribute, indicating reason for validation failure.
11 | #'
12 | #' @export
13 | #' @keywords internal
14 | #'
15 | #' @examples
16 | #' # returns TRUE
17 | #' validate_cells(tibble::tibble(row = 1, col = 2, data_type = "numeric", value = "1"))
18 | #'
19 | #' # this is FALSE
20 | #' chk <- validate_cells(tibble::tibble(row = 1, col = 2, data_type = "numeric"))
21 | #'
22 | #' # msg
23 | #' attr(chk, "msg")
24 | validate_cells <- function(dat) {
25 |   msg <- character(0)
26 | 
27 |   if (!inherits(dat, "data.frame")) {
28 |     msg <- msg %>% c("data.frame expected")
29 |   }
30 | 
31 |   if (!is_conforms_to_rcdf(dat)) {
32 |     msg <- msg %>% c("does not conforms to rc_df format")
33 |   }
34 | 
35 |   if (!all(hasName(dat, c("row", "col", "data_type", "value")))) {
36 |     msg <- msg %>% c("does not has required columns")
37 |   }
38 | 
39 |   # optional test if the dat is VA classified
40 |   if (hasName(dat, "type")) {
41 |     dtyl <- dat$type %>%
42 |       setdiff(c("value", "attribute", "empty")) %>%
43 |       length()
44 |     if (dtyl != 0) {
45 |       msg <- msg %>% c("type column present but content differs from value, attribute, empty")
46 |     }
47 |   }
48 | 
49 |   if (hasName(dat, "data_type")) {
50 |     dtl <- dat$data_type %>%
51 |       setdiff(c("numeric", "character")) %>%
52 |       length()
53 | 
54 |     if (dtl != 0) {
55 |       msg <- msg %>% c("data_type has fields apart from numeric, character")
56 |     }
57 | 
58 |     if (!rlang::is_atomic(dat$data_type)) {
59 |       msg <- msg %>% c("data_type is not atomic")
60 |     } else {
61 |       if (!is.character(dat$data_type)) {
62 |         msg <- msg %>% c("data_type must be character")
63 |       }
64 |     }
65 |   }
66 | 
67 | 
68 |   if (hasName(dat, "value")) {
69 |     if (!rlang::is_atomic(dat$value)) {
70 |       msg <- msg %>% c("value is not atomic")
71 |     } else {
72 |       if (!is.character(dat$value)) {
73 |         msg <- msg %>% c("value must be character")
74 |       }
75 |     }
76 |   }
77 | 
78 | 
79 |   if (length(msg) == 0) {
80 |     TRUE
81 |   } else {
82 |     res <- FALSE
83 |     attr(res, "msg") <- msg
84 |     res
85 |   }
86 | }
87 | 


--------------------------------------------------------------------------------
/R/validate_read_cell_part_object.R:
--------------------------------------------------------------------------------
 1 | 
 2 | validate_read_cell_part_object <- function(x, level) {
 3 |   stage <- attr(x, "read_cells_stage")
 4 |   if (missing(level)) {
 5 |     level <- stage
 6 |   }
 7 | 
 8 |   if (is.null(level)) {
 9 |     abort("read_cell_part object is not valid")
10 |   }
11 | 
12 |   if (length(level) != 1) {
13 |     abort("read_cell_part object is not valid")
14 |   }
15 | 
16 |   if (is.numeric(level)) {
17 |     level <- as.integer(level)
18 |     if (level >= 1 & level <= length(read_cell_task_orders)) {
19 |       level <- read_cell_task_orders[level]
20 |     }
21 |   }
22 | 
23 |   if (is.character(level)) {
24 |     if (level %in% read_cell_task_orders) {
25 |       if (level == read_cell_task_orders[1]) {
26 |         # NULL is possible
27 |         if (is.null(x)) {
28 |           return(list(chk = TRUE, level = level))
29 |         }
30 |         # a df
31 |         if (is.data.frame(x)) {
32 |           if (nrow(x) > 0) {
33 |             return(list(chk = TRUE, level = level))
34 |           }
35 |         }
36 |         # list of dfs
37 |         if (is.list(x)) {
38 |           if (length(x) > 0) {
39 |             if (is.data.frame(x[[1]])) {
40 |               return(list(chk = TRUE, level = level))
41 |             }
42 |           }
43 |         }
44 |       }
45 |       if (level == read_cell_task_orders[2]) {
46 |         if (is.list(x)) {
47 |           if (length(x) > 0) {
48 |             if (x %>% map_lgl(is_cell_df) %>% all()) {
49 |               return(list(chk = TRUE, level = level))
50 |             }
51 |           }
52 |         }
53 |       }
54 |       if (level == read_cell_task_orders[3]) {
55 |         if (is.list(x)) {
56 |           if (length(x) > 0) {
57 |             if (x %>% map_lgl(is_cell_df) %>% all()) {
58 |               if (x %>% map_lgl(~ hasName(.x, "type")) %>% all()) {
59 |                 return(list(chk = TRUE, level = level))
60 |               }
61 |             }
62 |           }
63 |         }
64 |       }
65 |       if (level == read_cell_task_orders[4]) {
66 |         if (is.list(x)) {
67 |           if (length(x) > 0) {
68 |             if (x %>% map_lgl(~ inherits(.x, cell_df_analysis_class[1])) %>% all()) {
69 |               return(list(chk = TRUE, level = level))
70 |             }
71 |           }
72 |         }
73 |       }
74 |       if (level == read_cell_task_orders[5]) {
75 |         if (is.data.frame(x)) {
76 |           if (all(utils::hasName(x, setdiff(defcols, "table_tag")))) {
77 |             return(list(chk = TRUE, level = level))
78 |           }
79 |         }
80 |       }
81 |     } else {
82 |       abort(paste0(level, " is not a valid level."))
83 |     }
84 |   }
85 | 
86 |   return(list(chk = FALSE, level = level))
87 | }
88 | 


--------------------------------------------------------------------------------
/R/value_attribute_classify.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #' Value/Attribute Classifier
 4 | #'
 5 | #' @description After [`as_cell_df`][as_cell_df()] (entry  point to `tidycells`) you may need to use this function
 6 | #' or individual _Value/Attribute Classifier_-functions as listed below in _"see also"_ - section.
 7 | #'
 8 | #' Here the idea is to classify all cells into either `value`, `attribute`, `empty` which will be
 9 | #' used by [`analyze_cells`][analyze_cells()] for further processing.
10 | #'
11 | #' @param d a Cell DF
12 | #' @param classifier a classifier
13 | #'
14 | #' @return a Cell DF with Value/Attribute Classification. The underlying tibble will contain an extra column named `type`.
15 | #' @export
16 | #' @details In order to understand the data orientation and detect data-blocks Cell DF requires additional column named `type`.
17 | #' This `type` column potentially contains either `value`, `attribute`, `empty`. The `value` are given corresponding to cells
18 | #' with observations in it. The tag, `attribute` is for the identifier of these cells. Lastly, `empty` cells are useless cells or
19 | #' cells with no meaningful information.
20 | #'
21 | #' For `classifier` following options are present:
22 | #' * `basic_classifier` : naive classifier which recode `data_type`.
23 | #' * `sample_based_classifier` : sample-based classifier.
24 | #' * `numeric_values_classifier` : considers number like cells as values.
25 | #'
26 | #' Each of the above are available as individual functions. Those can also be directly applied on a `cell-df`.
27 | #' However, it is recommended to use `value_attribute_classify` as it tests for integrity after classification.
28 | #'
29 | #' @examples
30 | #'
31 | #' iris %>%
32 | #'   as_cell_df() %>%
33 | #'   sample_based_classifier(value_sample = "setosa") %>%
34 | #'   plot()
35 | #'
36 | #' iris %>%
37 | #'   as_cell_df() %>%
38 | #'   sample_based_classifier(value_sample = "setosa") %>%
39 | #'   numeric_values_classifier() %>%
40 | #'   plot()
41 | #'
42 | #' if (rlang::is_installed("tidyxl")) {
43 | #'   cdn <- system.file("extdata", "RBI_HBS_Table_No_166.xlsx", package = "tidycells") %>%
44 | #'     tidyxl::xlsx_cells()
45 | #'   cdn <- cdn %>%
46 | #'     dplyr::filter(sheet == sheet[1]) %>%
47 | #'     as_cell_df()
48 | #'
49 | #'   # all of these are same except value_attribute_classify will perform validate_cells once again
50 | #'   cd1 <- sample_based_classifier(cdn, value_sample = "APR")
51 | #'   cd2 <- sample_based_classifier(value_sample = "APR")(cdn)
52 | #'   cd3 <- value_attribute_classify(cdn,
53 | #'     classifier = sample_based_classifier(value_sample = "APR")
54 | #'   )
55 | #'   # see it
56 | #'   plot(cd3)
57 | #' }
58 | #' @seealso
59 | #' Individual classifier functions:
60 | #' * [basic_classifier][basic_classifier()]
61 | #' * [sample_based_classifier][sample_based_classifier()]
62 | #' * [numeric_values_classifier][numeric_values_classifier()],
63 | #'
64 | #' For interactive Value/Attribute Classification check [visual_va_classify][visual_va_classify()]
65 | #'
66 | value_attribute_classify <- function(d, classifier = basic_classifier()) {
67 |   if (!is_cell_df(d)) {
68 |     abort("Cell DF Expected")
69 |   }
70 | 
71 |   classifier_try <- try(purrr::as_mapper(classifier), silent = TRUE)
72 |   if (!inherits(classifier_try, "try-error")) {
73 |     classifier <- classifier_try
74 |   }
75 | 
76 |   if (!is.function(classifier)) {
77 |     abort("classifier is not a function")
78 |   }
79 | 
80 |   cd <- classifier(d)
81 | 
82 |   chk <- validate_cells(cd)
83 | 
84 |   if (!chk) {
85 |     abort(paste0(attr(chk, "msg"), collapse = "\n"))
86 |   }
87 | 
88 |   cd
89 | }
90 | 


--------------------------------------------------------------------------------
/R/visual_functions.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' Interactive operations and Visualizations
  4 | #'
  5 | #' @param d a cell_df.
  6 | #' @description These functions require \code{\link[shiny:shiny-package]{shiny}} and `miniUI`.
  7 | #' Each of these modules has option for interactive plots (using `plotly`).
  8 | #' However, for the same you need to have `plotly` installed.
  9 | #'
 10 | #' @details Each of these functions are designed for interactive sessions only.
 11 | #' Arguments to these functions are optional as it tries to pick those from global environment.
 12 | #' If there are multiple variables defined in global environment which are of same desired class then you will get option to choose
 13 | #' one of them. These are also available as \href{https://rstudio.github.io/rstudioaddins/}{RStudio Addin}
 14 | #' (if you are using RStudio IDE you'll get those addin in the RStudio addin panel).
 15 | #'
 16 | #' Following are the names used in RStudio Addin
 17 | #' * _Crop Cells_: `visual_crop`
 18 | #' * _Cell Analysis Inspection_: `visual_data_block_inspection`
 19 | #' * _Orientation Modification_: `visual_orientation_modification`
 20 | #' * _Traceback Composition_: `visual_traceback`
 21 | #' * _Classify Value/Attribute_: `visual_va_classify`
 22 | #'
 23 | #' For each of the modules it tries to load and unload `shiny` before and after the execution of the module.
 24 | #' You can disable the same by setting `options(AutoUnloadShiny = FALSE)`.
 25 | #'
 26 | #' @return `visual_crop` returns a cell_df after the interactive operations.
 27 | #' @export
 28 | #' @keywords internal
 29 | #' @rdname visual_functions
 30 | visual_crop <- function(d) {
 31 |   if (missing(d)) {
 32 |     d <- global_object_picker("cell_df")
 33 |   }
 34 | 
 35 |   shiny_check()
 36 |   dout <- shiny_app_crop(d)
 37 |   shiny_unload()
 38 | 
 39 |   return(invisible(dout))
 40 | }
 41 | 
 42 | #' @return `visual_va_classify` returns a cell_df after the interactive operations.
 43 | #' @export
 44 | #' @keywords internal
 45 | #' @rdname visual_functions
 46 | #' @seealso [`value_attribute_classify`][value_attribute_classify()]
 47 | visual_va_classify <- function(d) {
 48 |   if (missing(d)) {
 49 |     d <- global_object_picker("cell_df")
 50 |   }
 51 | 
 52 |   shiny_check()
 53 |   dout <- shiny_app_va_classify(d)
 54 |   shiny_unload()
 55 | 
 56 |   return(invisible(dout))
 57 | }
 58 | 
 59 | #' @param x a cell_analysis.
 60 | #' @export
 61 | #' @keywords internal
 62 | #' @rdname visual_functions
 63 | visual_data_block_inspection <- function(x) {
 64 |   if (missing(x)) {
 65 |     x <- global_object_picker("cell_analysis")
 66 |   }
 67 | 
 68 |   shiny_check()
 69 |   shiny_app_data_block(x)
 70 |   shiny_unload()
 71 | 
 72 |   return(invisible(NULL))
 73 | }
 74 | 
 75 | #' @return `visual_orientation_modification` returns a cell_analysis after the interactive operations.
 76 | #' @export
 77 | #' @keywords internal
 78 | #' @rdname visual_functions
 79 | visual_orientation_modification <- function(x) {
 80 |   if (missing(x)) {
 81 |     x <- global_object_picker("cell_analysis")
 82 |   }
 83 | 
 84 |   shiny_check()
 85 |   this_out <- shiny_app_orientation_modification(x)
 86 |   shiny_unload()
 87 | 
 88 |   return(invisible(this_out))
 89 | }
 90 | 
 91 | #' @param dcomp a composition (a tibble rendered by [`compose_cells`][compose_cells()]).
 92 | #' @return `visual_traceback` returns a composed tidy data (as a tibble).
 93 | #' @details `visual_traceback` requires package `DT`.
 94 | #' If `DT` is not present you may use [`cell_composition_traceback`][cell_composition_traceback()]
 95 | #'
 96 | #' @export
 97 | #' @keywords internal
 98 | #' @rdname visual_functions
 99 | #' @seealso [`cell_composition_traceback`][cell_composition_traceback()]
100 | visual_traceback <- function(x, dcomp) {
101 |   if (!DT_present()) {
102 |     abort("package `DT` is required")
103 |   }
104 | 
105 |   if (missing(x)) {
106 |     x <- global_object_picker("cell_analysis")
107 |   }
108 | 
109 |   shiny_check()
110 |   this_out <- shiny_app_traceback(x, dcomp)
111 |   shiny_unload()
112 | 
113 |   return(invisible(this_out))
114 | }
115 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | destination: docs
2 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | # DO NOT CHANGE the "init" and "install" sections below
 2 | 
 3 | # Download script file from GitHub
 4 | init:
 5 |   ps: |
 6 |         $ErrorActionPreference = "Stop"
 7 |         Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
 8 |         Import-Module '..\appveyor-tool.ps1'
 9 | 
10 | install:
11 |   ps: Bootstrap
12 | 
13 | cache:
14 |   - C:\RLibrary
15 | 
16 | 
17 | environment:
18 |   NOT_CRAN: true
19 | 
20 |   global:
21 |     WARNINGS_ARE_ERRORS: 1
22 |     PKGTYPE: both
23 |     NOT_CRAN: true
24 |     USE_RTOOLS: true
25 | 
26 |   matrix:
27 |   - R_VERSION: release
28 |     R_ARCH: x64
29 | 
30 |   - R_VERSION: devel
31 | 
32 |   - R_VERSION: patched
33 | 
34 |   - R_VERSION: oldrel
35 |     RTOOLS_VERSION: 33
36 |     CRAN: http://cran.rstudio.com
37 | 
38 | 
39 | build_script:
40 |   - travis-tool.sh install_deps
41 | 
42 | test_script:
43 |   - travis-tool.sh run_tests
44 | 
45 | on_failure:
46 |   - 7z a failure.zip *.Rcheck\*
47 |   - appveyor PushArtifact failure.zip
48 | 
49 | artifacts:
50 |   - path: '*.Rcheck\**\*.log'
51 |     name: Logs
52 | 
53 |   - path: '*.Rcheck\**\*.out'
54 |     name: Logs
55 | 
56 |   - path: '*.Rcheck\**\*.fail'
57 |     name: Logs
58 | 
59 |   - path: '*.Rcheck\**\*.Rout'
60 |     name: Logs
61 | 
62 |   - path: '\*_*.tar.gz'
63 |     name: Bits
64 | 
65 |   - path: '\*_*.zip'
66 |     name: Bits
67 | 
68 | notifications:
69 |   - provider: Email
70 |     to:
71 |      - nil.gayen@gmail.com
72 |     subject: 'r-rudra tidycells build {{status}}'
73 |     message: "https://ci.appveyor.com/project/bedantaguru/tidycells"
74 |     on_build_success: false
75 |     on_build_failure: true
76 |     on_build_status_changed: true
77 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |     patch:
10 |       default:
11 |         target: auto
12 |         threshold: 1%
13 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Test environments
 2 | 
 3 | ### Local
 4 | 
 5 | * Windows 10 x64 (build 17134) [x86_64-w64-mingw32/x64 (64-bit)]
 6 |   * R version 3.6.1 (2019-07-05)
 7 |   * R version 3.6.2 (2019-12-12)
 8 | 
 9 | ### WinBuilder
10 | 
11 | * Windows [x86_64-w64-mingw32 (64-bit)]
12 |   * R version 3.5.3 (2019-03-11)
13 |   * R version 3.6.2 (2019-12-12)
14 |   * R Under development (unstable) (2020-01-03 r77629)
15 | 
16 | ### Travis
17 | 
18 | * macOS High Sierra 10.13.6 [x86_64-apple-darwin15.6.0 (64-bit)]
19 |   * (R: release) R version 3.6.2 (2019-12-12)
20 | * Ubuntu 16.04.6 LTS [x86_64-pc-linux-gnu (64-bit)]
21 |   * (R: oldrel) R version 3.5.3 (2017-01-27)
22 |   * (R: release) R version 3.6.1 (2017-01-27)
23 |   * (R: devel) R Under development (unstable) (2020-01-03 r77628)
24 | 
25 | ### AppVeyor
26 | 
27 | * Windows Server 2012 R2 x64 (build 9600) [x86_64-w64-mingw32 (64-bit)]
28 |   * (release) R version 3.6.2 (2019-12-12)
29 |   * (devel) R Under development (unstable) (2020-01-03 r77629)
30 |   * (patched) R version 3.6.2 Patched (2020-01-03 r77629)
31 |   * (oldrel) R version 3.5.3 (2019-03-11)
32 | 
33 | ### RHub
34 | 
35 | #### Successful Builds
36 | 
37 | * CentOS 6 with Redhat Developer Toolset [x86_64-redhat-linux-gnu (64-bit)]
38 |   * (R from EPEL) R version 3.5.2 (2018-12-20)
39 | * Fedora Linux [x86_64-pc-linux-gnu (64-bit)]
40 |   * (R-devel GCC) R Under development (unstable) (2020-01-03 r77629)
41 |   * (R-devel clang gfortran) R Under development (unstable) (2020-01-03 r77629)
42 | * macOS 10.11 El Capitan [x86_64-apple-darwin15.6.0 (64-bit)]
43 |   * (R-release) R version 3.6.2 (2019-12-12)
44 | * Oracle Solaris 10 x86 32 bit [i386-pc-solaris2.10 (32-bit)]
45 |   * (R-patched) R version 3.6.0 (2019-04-26)
46 | * Windows Server 2008 R2 SP1 [x86_64-w64-mingw32 (64-bit)]
47 |   * (R-release 32/64 bit) R version 3.6.2 (2019-12-12)
48 |   * (R-patched 32/64 bit) R version 3.6.2 Patched (2019-12-12 r77564)
49 |   * (R-oldrel 32/64 bit) R version 3.5.3 (2019-03-11)
50 |   * (R-devel 32/64 bit) R Under development (unstable) (2019-11-08 r77393)
51 | * Windows Server 2012 [x86_64-w64-mingw32 (64-bit)]
52 |   * (R-devel Rtools4.0 32/64 bit) R version 4.0.0 Under development (Testing Rtools) (2019-09-30 r77236)
53 | 
54 | See other builds in [Dev-Notes](https://github.com/r-rudra/tidycells/blob/master/dev-notes.md#r-hub-other-builds)
55 | 
56 | ## R CMD check results
57 | 
58 | 0 errors | 0 warnings | 0 note
59 | 
60 | * **This is a update of tidycells.**
61 | * **There are _NO_ references describing the methods in this package.** 
62 | 
63 | **Note**: The methods in the package are purely based on experiences and knowledge of the author and not based on any published article. Hence there are no references to attach currently. 
64 | 
65 | Also, note that the methods used in the package are novel in this field (as per the knowledge of the author). Any potential similarity with other related work is completely unintentional.
66 | 
67 | 
68 | ## Downstream dependencies
69 | 
70 | * No downstream dependencies currently
71 | 
72 | ## (_Optional_) Other Notes 
73 | 
74 | Also Check [Dev-Notes](https://github.com/r-rudra/tidycells/blob/master/dev-notes.md)
75 | 


--------------------------------------------------------------------------------
/docs/articles/ext/compose_cells_cli1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/articles/ext/compose_cells_cli1.png


--------------------------------------------------------------------------------
/docs/articles/ext/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/articles/ext/logo.png


--------------------------------------------------------------------------------
/docs/articles/ext/marks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/articles/ext/marks.png


--------------------------------------------------------------------------------
/docs/articles/ext/read_cells_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/articles/ext/read_cells_out.png


--------------------------------------------------------------------------------
/docs/articles/ext/v12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/articles/ext/v12.png


--------------------------------------------------------------------------------
/docs/articles/ext/v34.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/articles/ext/v34.png


--------------------------------------------------------------------------------
/docs/articles/ext/v56.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/articles/ext/v56.png


--------------------------------------------------------------------------------
/docs/articles/tidycells-intro_files/figure-html/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/articles/tidycells-intro_files/figure-html/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/docs/articles/tidycells-intro_files/figure-html/unnamed-chunk-21-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/articles/tidycells-intro_files/figure-html/unnamed-chunk-21-1.png


--------------------------------------------------------------------------------
/docs/articles/tidycells-intro_files/figure-html/unnamed-chunk-22-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/articles/tidycells-intro_files/figure-html/unnamed-chunk-22-1.png


--------------------------------------------------------------------------------
/docs/articles/tidycells-intro_files/figure-html/unnamed-chunk-25-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/articles/tidycells-intro_files/figure-html/unnamed-chunk-25-1.png


--------------------------------------------------------------------------------
/docs/articles/tidycells-intro_files/figure-html/unnamed-chunk-30-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/articles/tidycells-intro_files/figure-html/unnamed-chunk-30-1.png


--------------------------------------------------------------------------------
/docs/articles/tidycells-intro_files/figure-html/unnamed-chunk-30-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/articles/tidycells-intro_files/figure-html/unnamed-chunk-30-2.png


--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
 1 | $(function() {
 2 | 
 3 |   // register a handler to move the focus to the search bar
 4 |   // upon pressing shift + "/" (i.e. "?")
 5 |   $(document).on('keydown', function(e) {
 6 |     if (e.shiftKey && e.keyCode == 191) {
 7 |       e.preventDefault();
 8 |       $("#search-input").focus();
 9 |     }
10 |   });
11 | 
12 |   $(document).ready(function() {
13 |     // do keyword highlighting
14 |     /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 |     var mark = function() {
16 | 
17 |       var referrer = document.URL ;
18 |       var paramKey = "q" ;
19 | 
20 |       if (referrer.indexOf("?") !== -1) {
21 |         var qs = referrer.substr(referrer.indexOf('?') + 1);
22 |         var qs_noanchor = qs.split('#')[0];
23 |         var qsa = qs_noanchor.split('&');
24 |         var keyword = "";
25 | 
26 |         for (var i = 0; i < qsa.length; i++) {
27 |           var currentParam = qsa[i].split('=');
28 | 
29 |           if (currentParam.length !== 2) {
30 |             continue;
31 |           }
32 | 
33 |           if (currentParam[0] == paramKey) {
34 |             keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 |           }
36 |         }
37 | 
38 |         if (keyword !== "") {
39 |           $(".contents").unmark({
40 |             done: function() {
41 |               $(".contents").mark(keyword);
42 |             }
43 |           });
44 |         }
45 |       }
46 |     };
47 | 
48 |     mark();
49 |   });
50 | });
51 | 
52 | /* Search term highlighting ------------------------------*/
53 | 
54 | function matchedWords(hit) {
55 |   var words = [];
56 | 
57 |   var hierarchy = hit._highlightResult.hierarchy;
58 |   // loop to fetch from lvl0, lvl1, etc.
59 |   for (var idx in hierarchy) {
60 |     words = words.concat(hierarchy[idx].matchedWords);
61 |   }
62 | 
63 |   var content = hit._highlightResult.content;
64 |   if (content) {
65 |     words = words.concat(content.matchedWords);
66 |   }
67 | 
68 |   // return unique words
69 |   var words_uniq = [...new Set(words)];
70 |   return words_uniq;
71 | }
72 | 
73 | function updateHitURL(hit) {
74 | 
75 |   var words = matchedWords(hit);
76 |   var url = "";
77 | 
78 |   if (hit.anchor) {
79 |     url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 |   } else {
81 |     url = hit.url + '?q=' + escape(words.join(" "));
82 |   }
83 | 
84 |   return url;
85 | }
86 | 


--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 20 20" style="enable-background:new 0 0 20 20;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#75AADB;}
 7 | </style>
 8 | <path class="st0" d="M4,11.3h1.3v1.3H4c-2,0-4-2.3-4-4.7s2.1-4.7,4-4.7h5.3c1.9,0,4,2.3,4,4.7c0,1.9-1.2,3.6-2.7,4.3v-1.5
 9 | 	C11.4,10.2,12,9.1,12,8c0-1.7-1.4-3.3-2.7-3.3H4C2.7,4.7,1.3,6.3,1.3,8S2.7,11.3,4,11.3z M16,7.3h-1.3v1.3H16c1.3,0,2.7,1.6,2.7,3.3
10 | 	s-1.4,3.3-2.7,3.3h-5.3C9.4,15.3,8,13.7,8,12c0-1.1,0.6-2.2,1.3-2.8V7.7C7.9,8.4,6.7,10.1,6.7,12c0,2.4,2.1,4.7,4,4.7H16
11 | 	c1.9,0,4-2.3,4-4.7S18,7.3,16,7.3z"/>
12 | </svg>
13 | 


--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/logo.png


--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
  1 | /* http://gregfranko.com/blog/jquery-best-practices/ */
  2 | (function($) {
  3 |   $(function() {
  4 | 
  5 |     $('.navbar-fixed-top').headroom();
  6 | 
  7 |     $('body').css('padding-top', $('.navbar').height() + 10);
  8 |     $(window).resize(function(){
  9 |       $('body').css('padding-top', $('.navbar').height() + 10);
 10 |     });
 11 | 
 12 |     $('body').scrollspy({
 13 |       target: '#sidebar',
 14 |       offset: 60
 15 |     });
 16 | 
 17 |     $('[data-toggle="tooltip"]').tooltip();
 18 | 
 19 |     var cur_path = paths(location.pathname);
 20 |     var links = $("#navbar ul li a");
 21 |     var max_length = -1;
 22 |     var pos = -1;
 23 |     for (var i = 0; i < links.length; i++) {
 24 |       if (links[i].getAttribute("href") === "#")
 25 |         continue;
 26 |       // Ignore external links
 27 |       if (links[i].host !== location.host)
 28 |         continue;
 29 | 
 30 |       var nav_path = paths(links[i].pathname);
 31 | 
 32 |       var length = prefix_length(nav_path, cur_path);
 33 |       if (length > max_length) {
 34 |         max_length = length;
 35 |         pos = i;
 36 |       }
 37 |     }
 38 | 
 39 |     // Add class to parent <li>, and enclosing <li> if in dropdown
 40 |     if (pos >= 0) {
 41 |       var menu_anchor = $(links[pos]);
 42 |       menu_anchor.parent().addClass("active");
 43 |       menu_anchor.closest("li.dropdown").addClass("active");
 44 |     }
 45 |   });
 46 | 
 47 |   function paths(pathname) {
 48 |     var pieces = pathname.split("/");
 49 |     pieces.shift(); // always starts with /
 50 | 
 51 |     var end = pieces[pieces.length - 1];
 52 |     if (end === "index.html" || end === "")
 53 |       pieces.pop();
 54 |     return(pieces);
 55 |   }
 56 | 
 57 |   // Returns -1 if not found
 58 |   function prefix_length(needle, haystack) {
 59 |     if (needle.length > haystack.length)
 60 |       return(-1);
 61 | 
 62 |     // Special case for length-0 haystack, since for loop won't run
 63 |     if (haystack.length === 0) {
 64 |       return(needle.length === 0 ? 0 : -1);
 65 |     }
 66 | 
 67 |     for (var i = 0; i < haystack.length; i++) {
 68 |       if (needle[i] != haystack[i])
 69 |         return(i);
 70 |     }
 71 | 
 72 |     return(haystack.length);
 73 |   }
 74 | 
 75 |   /* Clipboard --------------------------*/
 76 | 
 77 |   function changeTooltipMessage(element, msg) {
 78 |     var tooltipOriginalTitle=element.getAttribute('data-original-title');
 79 |     element.setAttribute('data-original-title', msg);
 80 |     $(element).tooltip('show');
 81 |     element.setAttribute('data-original-title', tooltipOriginalTitle);
 82 |   }
 83 | 
 84 |   if(ClipboardJS.isSupported()) {
 85 |     $(document).ready(function() {
 86 |       var copyButton = "<button type='button' class='btn btn-primary btn-copy-ex' type = 'submit' title='Copy to clipboard' aria-label='Copy to clipboard' data-toggle='tooltip' data-placement='left auto' data-trigger='hover' data-clipboard-copy><i class='fa fa-copy'></i></button>";
 87 | 
 88 |       $(".examples, div.sourceCode").addClass("hasCopyButton");
 89 | 
 90 |       // Insert copy buttons:
 91 |       $(copyButton).prependTo(".hasCopyButton");
 92 | 
 93 |       // Initialize tooltips:
 94 |       $('.btn-copy-ex').tooltip({container: 'body'});
 95 | 
 96 |       // Initialize clipboard:
 97 |       var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', {
 98 |         text: function(trigger) {
 99 |           return trigger.parentNode.textContent;
100 |         }
101 |       });
102 | 
103 |       clipboardBtnCopies.on('success', function(e) {
104 |         changeTooltipMessage(e.trigger, 'Copied!');
105 |         e.clearSelection();
106 |       });
107 | 
108 |       clipboardBtnCopies.on('error', function() {
109 |         changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
110 |       });
111 |     });
112 |   }
113 | })(window.jQuery || window.$)
114 | 


--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
1 | pandoc: 2.7.2
2 | pkgdown: 1.4.1
3 | pkgdown_sha: ~
4 | articles:
5 |   tidycells-intro: tidycells-intro.html
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/reference/analyze_cells-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/reference/analyze_cells-1.png


--------------------------------------------------------------------------------
/docs/reference/analyze_cells-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/reference/analyze_cells-2.png


--------------------------------------------------------------------------------
/docs/reference/value_attribute_classify-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/reference/value_attribute_classify-1.png


--------------------------------------------------------------------------------
/docs/reference/value_attribute_classify-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/reference/value_attribute_classify-2.png


--------------------------------------------------------------------------------
/docs/reference/value_attribute_classify-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/docs/reference/value_attribute_classify-3.png


--------------------------------------------------------------------------------
/inst/extdata/RBI_HBS_Table_No_166.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/inst/extdata/RBI_HBS_Table_No_166.xlsx


--------------------------------------------------------------------------------
/inst/extdata/marks.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/inst/extdata/marks.xlsx


--------------------------------------------------------------------------------
/inst/extdata/marks_cells.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/inst/extdata/marks_cells.rds


--------------------------------------------------------------------------------
/inst/extdata/messy/csv.docx:
--------------------------------------------------------------------------------
1 | Kid Name,Weight,Age
2 | Nakshatra,12,1.5
3 | Titas,16,6
4 | 


--------------------------------------------------------------------------------
/inst/extdata/messy/docx.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/inst/extdata/messy/docx.html


--------------------------------------------------------------------------------
/inst/extdata/messy/html.csv:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
 2 | 
 3 | <html>
 4 | <head>
 5 | 	
 6 | 	<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
 7 | 	<title></title>
 8 | 	<meta name="generator" content="LibreOffice 6.2.5.2 (Linux)"/>
 9 | 	<meta name="created" content="2015-06-05T18:17:20"/>
10 | 	<meta name="changed" content="2019-07-22T07:22:29"/>
11 | 	<meta name="AppVersion" content="15.0300"/>
12 | 	<meta name="DocSecurity" content="0"/>
13 | 	<meta name="HyperlinksChanged" content="false"/>
14 | 	<meta name="LinksUpToDate" content="false"/>
15 | 	<meta name="ScaleCrop" content="false"/>
16 | 	<meta name="ShareDoc" content="false"/>
17 | 	
18 | 	<style type="text/css">
19 | 		body,div,table,thead,tbody,tfoot,tr,th,td,p { font-family:"Calibri"; font-size:x-small }
20 | 		a.comment-indicator:hover + comment { background:#ffd; position:absolute; display:block; border:1px solid black; padding:0.5em;  } 
21 | 		a.comment-indicator { background:red; display:inline-block; border:1px solid black; width:0.5em; height:0.5em;  } 
22 | 		comment { display:none;  } 
23 | 	</style>
24 | 	
25 | </head>
26 | 
27 | <body>
28 | <table cellspacing="0" border="0">
29 | 	<colgroup width="89"></colgroup>
30 | 	<colgroup span="2" width="64"></colgroup>
31 | 	<tr>
32 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" height="20" align="left" valign=bottom bgcolor="#DEEBF7"><b><font color="#000000">Kid Name</font></b></td>
33 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="left" valign=bottom bgcolor="#DEEBF7"><b><font color="#000000">Weight</font></b></td>
34 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="left" valign=bottom bgcolor="#DEEBF7"><b><font color="#000000">Age</font></b></td>
35 | 	</tr>
36 | 	<tr>
37 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" height="20" align="left" valign=bottom><i><font color="#000000">Nakshatra</font></i></td>
38 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="right" valign=bottom sdval="12" sdnum="1033;"><font color="#000000">12</font></td>
39 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="right" valign=bottom sdval="1.5" sdnum="1033;"><font color="#000000">1.5</font></td>
40 | 	</tr>
41 | 	<tr>
42 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" height="20" align="left" valign=bottom><i><font color="#000000">Titas</font></i></td>
43 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="right" valign=bottom sdval="16" sdnum="1033;"><font color="#000000">16</font></td>
44 | 		<td style="border-top: 1px solid #000000; border-bottom: 1px solid #000000; border-left: 1px solid #000000; border-right: 1px solid #000000" align="right" valign=bottom sdval="6" sdnum="1033;"><font color="#000000">6</font></td>
45 | 	</tr>
46 | </table>
47 | <!-- ************************************************************************** -->
48 | </body>
49 | 
50 | </html>
51 | 


--------------------------------------------------------------------------------
/inst/extdata/messy/pdf.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/inst/extdata/messy/pdf.docx


--------------------------------------------------------------------------------
/inst/extdata/messy/xls.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/inst/extdata/messy/xls.pdf


--------------------------------------------------------------------------------
/inst/extdata/messy/xlsx.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/inst/extdata/messy/xlsx.csv


--------------------------------------------------------------------------------
/inst/extdata/untidy.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/inst/extdata/untidy.xlsx


--------------------------------------------------------------------------------
/inst/rstudio/addins.dcf:
--------------------------------------------------------------------------------
 1 | Name: Crop Cells
 2 | Description: Interactive way to crop or delete cells
 3 | Binding: visual_crop
 4 | Interactive: true
 5 | 
 6 | Name: Cell Analysis Inspection
 7 | Description: Interactive way to inspect cell analysis
 8 | Binding: visual_data_block_inspection
 9 | Interactive: true
10 | 
11 | Name: Orientation Modification
12 | Description: Interactive way to modify attribute orientation in a cell analysis
13 | Binding: visual_orientation_modification
14 | Interactive: true
15 | 
16 | Name: Traceback Composition
17 | Description: Interactive way to see how the cell analysis is composed
18 | Binding: visual_traceback
19 | Interactive: true
20 | 
21 | Name: Classify Value/Attribute
22 | Description: Interactive way to classify cells in value/attribute
23 | Binding: visual_va_classify
24 | Interactive: true
25 | 
26 | 


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/logo.png


--------------------------------------------------------------------------------
/man/analyse_cells.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/multi_names.R
 3 | \name{analyse_cells}
 4 | \alias{analyse_cells}
 5 | \title{Analyse Cells}
 6 | \usage{
 7 | analyse_cells(d, silent = TRUE)
 8 | }
 9 | \arguments{
10 | \item{d}{A \code{\link[=cell_df-class]{cell_df}} after \code{\link[=value_attribute_classify]{Value Attribute Classification}} done}
11 | 
12 | \item{silent}{logical scalar indicating whether to raise a warning if heuristic detection fails. (Default TRUE).}
13 | }
14 | \value{
15 | Detailed analysis of the cell data structure.
16 | Which will be a \code{\link[=cell_analysis-class]{cell_analysis}} class object.
17 | }
18 | \description{
19 | Note that this is an other name for \code{tidycells::analyze_cells}.
20 | After \code{\link[=value_attribute_classify]{Value Attribute Classification}} done on a \code{\link[=cell_df-class]{cell_df}} next task to do is
21 | analyze it's contents for data block detection, attribute orientation identification etc. The function \code{analyze_cells} (and also \code{analyse_cells})
22 | does the same for you.
23 | 
24 | \strong{Note}:
25 | \if{html}{If you are not sure about what package functions actually do or how they work together,
26 | please start with \href{../doc/tidycells-intro.html}{\code{vignette("tidycells-intro")}}.}
27 | \if{latex}{If you are not sure about what package functions actually do or how they work together,
28 | please start with \code{vignette("tidycells-intro")}.}
29 | }
30 | \details{
31 | it returns detailed analysis of the data structure including data block detection, attribute orientation detection etc.
32 | The argument \code{silent} is set to \code{TRUE} by default, as the warning will be given whenever the \code{\link[=cell_analysis-class]{cell_analysis}} is printed.
33 | 
34 | After this step one may like to do :
35 | \itemize{
36 | \item \code{\link[=compose_cells]{compose_cells}}
37 | }
38 | 
39 | If in an interactive session, following additional functions can be helpful for interactive visualizations:
40 | \itemize{
41 | \item \code{\link[=visual_data_block_inspection]{visual_data_block_inspection}}
42 | \item \code{\link[=visual_orientation_modification]{visual_orientation_modification}}
43 | \item \code{\link[=visual_traceback]{visual_traceback}}
44 | }
45 | }
46 | \seealso{
47 | \code{\link[=compose_cells]{compose_cells}}, \code{\link[=collate_columns]{collate_columns}}, \code{\link[=analyze_cells]{analyze_cells}}
48 | }
49 | \keyword{internal}
50 | 


--------------------------------------------------------------------------------
/man/analyze_cells.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/analyze_cells.R
 3 | \name{analyze_cells}
 4 | \alias{analyze_cells}
 5 | \title{Analyze Cells}
 6 | \usage{
 7 | analyze_cells(d, silent = TRUE)
 8 | }
 9 | \arguments{
10 | \item{d}{A \code{\link[=cell_df-class]{cell_df}} after \code{\link[=value_attribute_classify]{Value Attribute Classification}} done}
11 | 
12 | \item{silent}{logical scalar indicating whether to raise a warning if heuristic detection fails. (Default TRUE).}
13 | }
14 | \value{
15 | Detailed analysis of the cell data structure.
16 | Which will be a \code{\link[=cell_analysis-class]{cell_analysis}} class object.
17 | }
18 | \description{
19 | After \code{\link[=value_attribute_classify]{Value Attribute Classification}} done on a \code{\link[=cell_df-class]{cell_df}} next task to do is
20 | analyze it's contents for data block detection, attribute orientation identification etc. The function \code{analyze_cells} (and also \code{analyse_cells})
21 | does the same for you.
22 | 
23 | \strong{Note}:
24 | \if{html}{If you are not sure about what package functions actually do or how they work together,
25 | please start with \href{../doc/tidycells-intro.html}{\code{vignette("tidycells-intro")}}.}
26 | \if{latex}{If you are not sure about what package functions actually do or how they work together,
27 | please start with \code{vignette("tidycells-intro")}.}
28 | }
29 | \details{
30 | it returns detailed analysis of the data structure including data block detection, attribute orientation detection etc.
31 | The argument \code{silent} is set to \code{TRUE} by default, as the warning will be given whenever the \code{\link[=cell_analysis-class]{cell_analysis}} is printed.
32 | 
33 | After this step one may like to do :
34 | \itemize{
35 | \item \code{\link[=compose_cells]{compose_cells}}
36 | }
37 | 
38 | If in an interactive session, following additional functions can be helpful for interactive visualizations:
39 | \itemize{
40 | \item \code{\link[=visual_data_block_inspection]{visual_data_block_inspection}}
41 | \item \code{\link[=visual_orientation_modification]{visual_orientation_modification}}
42 | \item \code{\link[=visual_traceback]{visual_traceback}}
43 | }
44 | }
45 | \examples{
46 | d <- structure(c(
47 |   "block 1", "", "C", "D", "", "block 2", "", "C",
48 |   "D", "", "A", "1", "2", "", "", "A", "10", "20", "", "B", "3",
49 |   "4", "", "", "B", "30", "40"
50 | ), .Dim = c(9L, 3L))
51 | d <- as.data.frame(d)
52 | cd <- as_cell_df(d) \%>\% numeric_values_classifier()
53 | 
54 | # see it
55 | cd \%>\% plot(adaptive_txt_size = FALSE)
56 | ca <- analyze_cells(cd)
57 | 
58 | # look at the plot for detected directions
59 | plot(ca)
60 | }
61 | \seealso{
62 | \code{\link[=compose_cells]{compose_cells}}, \code{\link[=collate_columns]{collate_columns}}
63 | }
64 | 


--------------------------------------------------------------------------------
/man/as_cell_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/as_cell_df.R
 3 | \name{as_cell_df}
 4 | \alias{as_cell_df}
 5 | \title{Transform data into Cell-DF Structure}
 6 | \usage{
 7 | as_cell_df(d, take_row_names = FALSE, take_col_names = FALSE)
 8 | }
 9 | \arguments{
10 | \item{d}{the data (either a matrix with column name or a data.frame)}
11 | 
12 | \item{take_row_names}{consider row names as separate cells
13 | (applicable only for data with no (row, col) information). Default is \code{FALSE}.}
14 | 
15 | \item{take_col_names}{consider column names as separate cells
16 | (applicable only for data with no (row, col) information). Default is \code{FALSE}.}
17 | }
18 | \value{
19 | An object of class \code{\link[=cell_df-class]{cell_df}}.
20 | 
21 | \strong{Note}: After this, you may like to do \code{\link[=value_attribute_classify]{Value Attribute Classification}}.
22 | }
23 | \description{
24 | Transform an R object (mostly matrix or data.frame) into a \code{\link[=cell_df-class]{cell_df}}
25 | for further processing in other \code{tidycells} functions.
26 | }
27 | \examples{
28 | 
29 | as_cell_df(iris)
30 | 
31 | # consider column name as cell
32 | as_cell_df(iris, take_col_names = TRUE)
33 | 
34 | # if the data is already in a similar format it will not further transform
35 | # which is not true for ---> unpivotr::as_cells
36 | # check ---> unpivotr::as_cells(iris) \%>\% unpivotr::as_cells()
37 | unpivotr::as_cells(iris) \%>\% as_cell_df()
38 | }
39 | \seealso{
40 | \itemize{
41 | \item \code{\link[=validate_cells]{validate_cells}} which is used to validate \code{cell_df}.
42 | \item \code{\link[unpivotr:as_cells]{as_cells}} from \code{unpivotr} package.
43 | }
44 | }
45 | 


--------------------------------------------------------------------------------
/man/basic_classifier.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/value_attribute_classifiers.R
 3 | \name{basic_classifier}
 4 | \alias{basic_classifier}
 5 | \alias{basic_classifier.default}
 6 | \alias{basic_classifier.cell_df}
 7 | \title{Naive \code{data_type} Based Value/Attribute Classifier}
 8 | \usage{
 9 | basic_classifier(x, ...)
10 | 
11 | \method{basic_classifier}{default}(x, ...)
12 | 
13 | \method{basic_classifier}{cell_df}(x, ...)
14 | }
15 | \arguments{
16 | \item{x}{Either a \code{\link[=cell_df-class]{cell_df}} or leave blank for function output
17 | (required by \code{\link[=value_attribute_classify]{value_attribute_classify}})}
18 | }
19 | \description{
20 | Naive \code{data_type} Based Value/Attribute Classifier
21 | }
22 | \seealso{
23 | \link[=value_attribute_classify]{value_attribute_classify}
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/cell_analysis-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/class-cell_df_analysis.R
 3 | \name{cell_analysis-class}
 4 | \alias{cell_analysis-class}
 5 | \title{\code{cell_analysis} class}
 6 | \description{
 7 | The \code{cell_analysis} class is based on list,
 8 | created in order to store analysis of cell level information (\code{\link[=cell_df-class]{cell_df}}).
 9 | }
10 | \section{Properties of \code{cell_analysis}}{
11 | 
12 | 
13 | Objects of class \code{cell_analysis} have following named nodes:
14 | \itemize{
15 | \item \code{cells} : Contains information about \code{cell_group_type} in terms of (data, minor and major attributes).(a tibble)
16 | \item \code{sections} : Contains boundaries of each data block. (a tibble)
17 | \item \code{details} : a list containing further information
18 | \item \code{cell_df} : The original \code{\link[=cell_df-class]{cell_df}} which is passed for processing
19 | }
20 | }
21 | 
22 | \section{Applicable methods on \code{cell_analysis}}{
23 | 
24 | \itemize{
25 | \item \code{print}: Prints identifier of the class and the number of blocks (and potential issues if any).
26 | \item \code{plot}: Plots (using \code{\link[ggplot2:ggplot]{ggplot2}}) the data-block information.
27 | }
28 | }
29 | 
30 | \keyword{internal}
31 | 


--------------------------------------------------------------------------------
/man/cell_composition_traceback.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cell_trace_plot.R
 3 | \name{cell_composition_traceback}
 4 | \alias{cell_composition_traceback}
 5 | \title{Display static composition traceback plot}
 6 | \usage{
 7 | cell_composition_traceback(ca, dc, trace_row = 1L)
 8 | }
 9 | \arguments{
10 | \item{ca}{A cell analysis}
11 | 
12 | \item{dc}{(optional) a composed tidy data.frame}
13 | 
14 | \item{trace_row}{the row number to trace. (Default 1)}
15 | }
16 | \description{
17 | Display static composition traceback plot
18 | }
19 | \keyword{internal}
20 | 


--------------------------------------------------------------------------------
/man/cell_df-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/class-cell_df.R
 3 | \name{cell_df-class}
 4 | \alias{cell_df-class}
 5 | \title{\code{cell_df} class}
 6 | \description{
 7 | The \code{cell_df} class is a subclass of \code{\link[tibble:tibble]{tbl_df}} and \code{\link[base:data.frame]{data.frame}},
 8 | created in order to store cell level information.
 9 | }
10 | \section{Properties of \code{cell_df}}{
11 | 
12 | 
13 | Objects of class \code{cell_df} have:
14 | \itemize{
15 | \item A \code{class} attribute of \code{c("cell_df", "cells", "rc_df", "tbl_df", "tbl", "data.frame")}.
16 | \item Two column of \code{integer} type named \code{row} and \code{col}. All entries must be positive and not \code{NA}.
17 | This represents a cell address.
18 | \item A column of \code{character} type named \code{data_type}. Which can contain only \code{numeric} or \code{character} as entries.
19 | This represents the data type of the cell (classified to only two categories).
20 | \item A column of \code{character} type named \code{value}. This stores value of corresponding cells.
21 | \item Apart from these columns it can contain other columns.
22 | However, (\code{row}, \code{col}) together should identify the row of the \code{cell_df} uniquely.
23 | }
24 | }
25 | 
26 | \section{Applicable methods on \code{cell_df}}{
27 | 
28 | \itemize{
29 | \item \code{print}: Prints identifier of the class and the content of the underlying tibble.
30 | \item \code{summary}: Calculates basic stats like number of rows and columns, number of characters and numeric fields, density etc.
31 | \item \code{plot}: Plots (using \code{\link[ggplot2:ggplot]{ggplot2}}) the cell information as ordinary table.
32 | \item \code{as.matrix}: This will transform the data back into tabular form and create a character matrix.
33 | \item \code{as.data.frame}: Similar to \code{as.matrix} this will also transform the data back into tabular form.
34 | }
35 | }
36 | 
37 | \seealso{
38 | The function \code{\link[=validate_cells]{validate_cells}} which is used to validate \code{cell_df}.
39 | 
40 | The \code{as.matrix} and \code{as.data.frame} method is similar to \code{\link[unpivotr:rectify]{unpivotr::rectify}}.
41 | 
42 | Object of this class is also compatible to most of the functions from \code{\link[unpivotr:unpivotr-package]{unpivotr}} package.
43 | }
44 | \keyword{internal}
45 | 


--------------------------------------------------------------------------------
/man/collate_columns.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/collate_columns.R
 3 | \name{collate_columns}
 4 | \alias{collate_columns}
 5 | \title{Collate Columns Based on Content}
 6 | \usage{
 7 | collate_columns(
 8 |   composed_data,
 9 |   combine_threshold = 1,
10 |   rest_cols = Inf,
11 |   retain_other_cols = FALSE,
12 |   retain_cell_address = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{composed_data}{output of \code{\link[=compose_cells]{compose_cells}} (preferably not processed)}
17 | 
18 | \item{combine_threshold}{a numerical threshold (between 0-1) for content-based collation of columns. (Default 1)}
19 | 
20 | \item{rest_cols}{number of rest columns (beyond \code{combine_threshold} joins these many numbers of columns to keep)}
21 | 
22 | \item{retain_other_cols}{whether to keep other intermediate (and possibly not so important) columns. (Default \code{FALSE})}
23 | 
24 | \item{retain_cell_address}{whether to keep columns like (\code{row}, \code{col}, \code{data_block}).
25 | This may be required for \code{\link[=cell_composition_traceback]{traceback}} (Default \code{FALSE})}
26 | }
27 | \value{
28 | A column collated data.frame
29 | }
30 | \description{
31 | After \code{\link[=compose_cells]{compose_cells}}, this function rearranges and rename attribute-columns in order to
32 | make columns properly aligned, based on the content of the columns.
33 | }
34 | \details{
35 | \itemize{
36 | \item \strong{Dependency on \emph{stringdist}}: If you have \code{\link[stringdist:stringdist-package]{stringdist}} installed,
37 | the approximate string matching will be enhanced. There may be variations in outcome if you have \code{stringdist}
38 | vs if you don't have it.
39 | \item \strong{Possibility of randomness}: If the attribute column is containing many distinct values, then a column representative sample will be drawn.
40 | Hence it is always recommended to \code{\link[base:set.seed]{set.seed}} if reproducibility is a matter of concern.
41 | }
42 | }
43 | 


--------------------------------------------------------------------------------
/man/compose_cells.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compose_cells.R
 3 | \name{compose_cells}
 4 | \alias{compose_cells}
 5 | \title{Compose a Cell Analysis to a tidy form}
 6 | \usage{
 7 | compose_cells(
 8 |   ca,
 9 |   post_process = TRUE,
10 |   attr_sep = " :: ",
11 |   discard_raw_cols = FALSE,
12 |   print_attribute_overview = FALSE,
13 |   silent = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{ca}{a cell_analysis to process}
18 | 
19 | \item{post_process}{logical scalar. If disabled a list will be returned without performing post-processing. (Default \code{TRUE})}
20 | 
21 | \item{attr_sep}{a character string to separate the attributes. (Default is \verb{<space>::<space>})}
22 | 
23 | \item{discard_raw_cols}{logical scalar. If enabled only main processed columns will be returned. (Default \code{FALSE})}
24 | 
25 | \item{print_attribute_overview}{print the overview of the attributes (4 distinct values from each attribute of each block)}
26 | 
27 | \item{silent}{whether to suppress warning message on compose failure (Default \code{FALSE})}
28 | }
29 | \value{
30 | a data.frame (as tibble) in tidy form.
31 | }
32 | \description{
33 | After \code{\link[=analyze_cells]{analyze_cells}} carried out, you may like to use this function in order to
34 | stitch the cells together as per the analyzed results, to form a meaningful structural representation (like tidy format).
35 | }
36 | \examples{
37 | cd <- 1:(9) \%>\%
38 |   matrix(nrow = 3) \%>\%
39 |   as_cell_df()
40 | cd <- sample_based_classifier(cd, attribute_sample = "1")
41 | cd <- cd \%>\% dplyr::filter(value != "1")
42 | ca <- analyze_cells(cd)
43 | 
44 | compose_cells(ca)
45 | }
46 | 


--------------------------------------------------------------------------------
/man/get_direction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_direction.R
 3 | \name{get_direction}
 4 | \alias{get_direction}
 5 | \title{get optimal direction (in terms of \code{unpivotr} directions)}
 6 | \usage{
 7 | get_direction(d_part)
 8 | }
 9 | \arguments{
10 | \item{d_part}{parts of \code{data_attr_map_raw}}
11 | }
12 | \value{
13 | a string value denoting direction
14 | }
15 | \description{
16 | get optimal direction (in terms of \code{unpivotr} directions)
17 | }
18 | \details{
19 | Used internally
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/man/get_direction_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_direction_df.R
 3 | \name{get_direction_df}
 4 | \alias{get_direction_df}
 5 | \title{Get Directional Orientation for Attributes corresponding to a given data
 6 | group}
 7 | \usage{
 8 | get_direction_df(dp, datt, allow_inside = FALSE)
 9 | }
10 | \arguments{
11 | \item{dp}{single data group boundary information}
12 | 
13 | \item{datt}{attribute data (including attribute group id)}
14 | }
15 | \value{
16 | Directional Orientation for Attributes
17 | }
18 | \description{
19 | Get Directional Orientation for Attributes corresponding to a given data
20 | group
21 | }
22 | \details{
23 | Used internally by get_data_block_information function
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/get_direction_metric.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_direction_metric.R
 3 | \name{get_direction_metric}
 4 | \alias{get_direction_metric}
 5 | \title{get direction metric}
 6 | \usage{
 7 | get_direction_metric(d1, a1, direction)
 8 | }
 9 | \arguments{
10 | \item{d1}{part of d_part with data_gid}
11 | 
12 | \item{a1}{part of d_part with attr_gid}
13 | 
14 | \item{direction}{direction name (compatible with \code{unpivotr})
15 | should be one of \code{\link[=get_unpivotr_direction_names]{get_unpivotr_direction_names}}}
16 | }
17 | \value{
18 | a scaled fraction denoting coverage (1 means full coverage) for the supplied direction.
19 | }
20 | \description{
21 | get direction metric
22 | }
23 | \details{
24 | Used internally by \code{\link[=get_direction]{get_direction}} function
25 | }
26 | \keyword{internal}
27 | 


--------------------------------------------------------------------------------
/man/get_group_id.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_group_id.R
 3 | \name{get_group_id}
 4 | \alias{get_group_id}
 5 | \title{Get Group ID for joined cells}
 6 | \usage{
 7 | get_group_id(dat)
 8 | }
 9 | \arguments{
10 | \item{dat}{the (data or attribute) cells (in at least row-col format)}
11 | }
12 | \value{
13 | Group ID attached information in a list
14 | }
15 | \description{
16 | Get Group ID for joined cells
17 | }
18 | \details{
19 | Used internally by get_data_block_information function
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/man/get_unpivotr_direction_names.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_unpivotr_direction_names.R
 3 | \name{get_unpivotr_direction_names}
 4 | \alias{get_unpivotr_direction_names}
 5 | \title{get unpivotr direction names}
 6 | \usage{
 7 | get_unpivotr_direction_names()
 8 | }
 9 | \value{
10 | directions as used in unpivotr package with directional grouping
11 | }
12 | \description{
13 | get unpivotr direction names
14 | }
15 | \details{
16 | Kept for compatibility. Used internally by get_direction function.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/numeric_values_classifier.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/value_attribute_classifiers.R
 3 | \name{numeric_values_classifier}
 4 | \alias{numeric_values_classifier}
 5 | \alias{numeric_values_classifier.default}
 6 | \alias{numeric_values_classifier.cell_df}
 7 | \title{Value/Attribute Classifier which considers number like cells as values}
 8 | \usage{
 9 | numeric_values_classifier(x, ..., allow_chars, verbose = FALSE)
10 | 
11 | \method{numeric_values_classifier}{default}(x, ..., allow_chars = NULL, verbose = FALSE)
12 | 
13 | \method{numeric_values_classifier}{cell_df}(x, ..., allow_chars, verbose = FALSE)
14 | }
15 | \arguments{
16 | \item{x}{Either a \code{\link[=cell_df-class]{cell_df}} or
17 | leave blank for function output (required by \code{\link[=value_attribute_classify]{value_attribute_classify}})}
18 | 
19 | \item{allow_chars}{(Optional) Allow characters together with numbers. (a character vector)}
20 | 
21 | \item{verbose}{Logical scalar for printing discovered cells (which gets new type by this rule)}
22 | }
23 | \description{
24 | Value/Attribute Classifier which considers number like cells as values
25 | }
26 | \seealso{
27 | \link[=value_attribute_classify]{value_attribute_classify}
28 | }
29 | \keyword{internal}
30 | 


--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils-pipe.R
 3 | \name{\%>\%}
 4 | \alias{\%>\%}
 5 | \title{Pipe operator}
 6 | \usage{
 7 | lhs \%>\% rhs
 8 | }
 9 | \description{
10 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/read_cell_part-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/class-read_cell_part.R
 3 | \name{read_cell_part-class}
 4 | \alias{read_cell_part-class}
 5 | \title{\code{read_cell_part} class}
 6 | \description{
 7 | The \code{read_cell_part} class is based on list,
 8 | created in order to store (possible) intermediate \code{\link[=read_cells]{read_cells}} output in detailed format.
 9 | 
10 | \strong{Note:} This class is designed mainly for \emph{internal use}.
11 | It is recommended to manipulate objects of this class only for cases where desired output is not coming.
12 | }
13 | \section{Properties of \code{read_cell_part}}{
14 | 
15 | 
16 | Objects of class \code{read_cell_part} \strong{may} have following named nodes (the nodes may change based on stage):
17 | \itemize{
18 | \item \code{file_name} : file name which was given to process
19 | \item \code{stage} : stage at which it was last processed.
20 | \item \code{info} : a list containing further information on type and content of the file (provided the file got read)
21 | \item \code{is_empty} : whether the file contains no 'known tabular information'
22 | \item \code{cell_list} : list of \code{\link[=cell_df-class]{cell_df}}
23 | (possibly after \verb{Value Attribute Classification} done if stage is higher than \code{make_cells})
24 | \item \code{cell_analysis_list} : list of \code{\link[=cell_analysis-class]{cell_analysis}}
25 | \item \code{final_composition} : final composition (a \code{tibble}) with only main columns
26 | (or all column if \code{compose_main_cols_only = FALSE})
27 | \item \code{final_composition_main} : only appear if \code{compose_main_cols_only = FALSE}.
28 | This holds final composition (a \code{tibble}) with only main columns
29 | }
30 | }
31 | 
32 | \section{Applicable methods on \code{read_cell_part}}{
33 | 
34 | \itemize{
35 | \item \code{print}: Prints identifier of the class and the stage at which it is last processed.
36 | }
37 | }
38 | 
39 | \keyword{internal}
40 | 


--------------------------------------------------------------------------------
/man/read_cells.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read_cells.R
 3 | \name{read_cells}
 4 | \alias{read_cells}
 5 | \title{Read Cells from file}
 6 | \usage{
 7 | read_cells(
 8 |   x,
 9 |   at_level = c("collate", "detect_and_read", "make_cells", "va_classify", "analyze",
10 |     "compose"),
11 |   omit = NULL,
12 |   simplify = TRUE,
13 |   compose_main_cols_only = TRUE,
14 |   from_level,
15 |   silent = TRUE,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{x}{either a valid file path or a \code{\link[=read_cell_part-class]{read_cell_part}}}
21 | 
22 | \item{at_level}{till which level to process.
23 | Should be one of \code{detect_and_read}, \code{make_cells}, \code{va_classify}, \code{analyze}, \code{compose}, \code{collate}.
24 | Or simply a number (like 1 means \code{detect_and_read}, 5 means \code{compose}).}
25 | 
26 | \item{omit}{(Optional) the file-types to omit. A character vector.}
27 | 
28 | \item{simplify}{whether to simplify the output. (Default \code{TRUE}). If set to \code{FALSE} a \code{\link[=read_cell_part-class]{read_cell_part}}
29 | will be returned.}
30 | 
31 | \item{compose_main_cols_only}{whether to compose main columns only. (Default \code{TRUE}).}
32 | 
33 | \item{from_level}{(Optional) override start level. (\code{read_cells} will process after \code{from_level})}
34 | 
35 | \item{silent}{if \code{TRUE} no message will be displayed.(Default \code{TRUE})}
36 | 
37 | \item{...}{further arguments}
38 | }
39 | \value{
40 | If \code{simplify=TRUE} then different kind of object is returned in different levels (depends on \code{at_level}).
41 | If \code{at_level="compose"} then only final tibble is returned otherwise if the output is not \code{NULL} an attribute will be present
42 | named \code{"read_cells_stage"}.
43 | 
44 | If \code{simplify=FALSE} then it will return a \code{\link[=read_cell_part-class]{read_cell_part}} which you can process manually
45 | and continue again with \code{read_cells} (perhaps then \code{from_level} may be useful).
46 | }
47 | \description{
48 | This function is designed to read cell level information
49 | (and the finally \link[=analyze_cells]{analyze}, \link[=compose_cells]{compose} and \link[=collate_columns]{collate_columns})
50 | from many file types like xls, pdf, doc etc.
51 | This is a wrapper function to functions from multiple packages. The support for a specific file is dependent on
52 | the installed packages. To see the list of supported files and potentially required packages (if any) just
53 | run \code{read_cells()} in the console. This function supports the file format based on content and not based on just the file
54 | extension. That means if a file is saved as pdf and then the extension is removed (or extension modified to say \code{.xlsx})
55 | then also the \code{read_cells} will detect it as pdf and read its content.
56 | 
57 | \strong{Note} :
58 | \itemize{
59 | \item \code{read_cells} is supposed to work for any kind of data. However, if it fails in intermediate stage it will raise
60 | a warning and give results till successfully processed stage.
61 | \item The heuristic-algorithm are not well-optimized (yet) so may be slow on large files.
62 | \item If the target table has numerical values as data and text as their attribute (identifier of the data elements),
63 | straight forward method is sufficient in the majority of situations. Otherwise, you may need to utilize other functions.
64 | }
65 | 
66 | \strong{A Word of Warning} :
67 | 
68 | \emph{The functions used inside \code{read_cells} are heuristic-algorithm based. Thus, outcomes may be unexpected.
69 | It is recommend to try \code{read_cells} on the target file. If the outcome is expected., it is fine.
70 | If not try again with \code{read_cells(file_name, at_level = "compose")}. If after that also the output is not as expected
71 | then other functions are required to be used. At that time start again with \code{read_cells(file_name, at_level = "make_cells")}
72 | and proceed to further functions.}
73 | }
74 | \details{
75 | It performs following set of actions if called with default \code{at_level}.
76 | \itemize{
77 | \item \strong{detect_and_read}: Detect file type based on content and attempt to read the same in a format suitable to convert as \code{\link[=cell_df-class]{cell_df}}.
78 | \item \strong{make_cells}: Convert the file content to \code{\link[=cell_df-class]{cell_df}} using \code{\link[=as_cell_df]{as_cell_df}}.
79 | \item \strong{va_classify}: Run \link[=value_attribute_classify]{Value Attribute Classification} using \code{\link[=numeric_values_classifier]{numeric_values_classifier}}.
80 | \item \strong{analyze}: Analyze the cells using \code{\link[=analyze_cells]{analyze_cells}}.
81 | \item \strong{compose}: Compose the cell-analysis to a tidy form using \code{\link[=compose_cells]{compose_cells}}.
82 | \item \strong{collate}: Finally, collate columns based on content using \code{\link[=collate_columns]{collate_columns}}.
83 | }
84 | 
85 | \if{html}{
86 | 
87 | Here is the flowchart of the same:
88 | 
89 | \figure{read_cells.svg}{options: width=400}
90 | 
91 | }
92 | }
93 | 


--------------------------------------------------------------------------------
/man/read_cells_internal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read_cells.R
 3 | \name{read_cells.read_cell_part}
 4 | \alias{read_cells.read_cell_part}
 5 | \alias{read_cells.character}
 6 | \alias{read_cells.default}
 7 | \alias{read_cells.NULL}
 8 | \title{Read Cells from file}
 9 | \usage{
10 | \method{read_cells}{read_cell_part}(
11 |   x,
12 |   at_level = c("collate", "detect_and_read", "make_cells", "va_classify", "analyze",
13 |     "compose"),
14 |   omit = NULL,
15 |   simplify = TRUE,
16 |   compose_main_cols_only = TRUE,
17 |   from_level,
18 |   silent = TRUE,
19 |   ...
20 | )
21 | 
22 | \method{read_cells}{character}(
23 |   x,
24 |   at_level = c("collate", "detect_and_read", "make_cells", "va_classify", "analyze",
25 |     "compose"),
26 |   omit = NULL,
27 |   simplify = TRUE,
28 |   compose_main_cols_only = TRUE,
29 |   ...
30 | )
31 | 
32 | \method{read_cells}{default}(
33 |   x,
34 |   at_level = c("collate", "detect_and_read", "make_cells", "va_classify", "analyze",
35 |     "compose"),
36 |   omit = NULL,
37 |   simplify = TRUE,
38 |   compose_main_cols_only = TRUE,
39 |   from_level,
40 |   ...
41 | )
42 | 
43 | \method{read_cells}{`NULL`}(x, ...)
44 | }
45 | \arguments{
46 | \item{x}{either a valid file path or a \code{\link[=read_cell_part-class]{read_cell_part}}}
47 | 
48 | \item{at_level}{till which level to process.
49 | Should be one of \code{detect_and_read}, \code{make_cells}, \code{va_classify}, \code{analyze}, \code{compose}. Or simply a number.}
50 | 
51 | \item{omit}{(Optional) the file-types to omit. A character vector.}
52 | 
53 | \item{simplify}{whether to simplify the output. (Default \code{TRUE}). If set to \code{FALSE} a \code{\link[=read_cell_part-class]{read_cell_part}}
54 | will be returned.}
55 | 
56 | \item{compose_main_cols_only}{whether to compose main columns only. (Default \code{TRUE}).}
57 | 
58 | \item{from_level}{(Optional) override start level. (\code{read_cells} will process after \code{from_level})}
59 | 
60 | \item{silent}{if \code{TRUE} no message will be displayed.(Default \code{TRUE})}
61 | 
62 | \item{...}{further arguments}
63 | }
64 | \description{
65 | Read Cells from file
66 | }
67 | \seealso{
68 | \code{\link[=read_cells]{read_cells}}
69 | }
70 | \keyword{internal}
71 | 


--------------------------------------------------------------------------------
/man/sample_based_classifier.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/value_attribute_classifiers.R
 3 | \name{sample_based_classifier}
 4 | \alias{sample_based_classifier}
 5 | \alias{sample_based_classifier.default}
 6 | \alias{sample_based_classifier.cell_df}
 7 | \title{Sample Based Value/Attribute Classifier}
 8 | \usage{
 9 | sample_based_classifier(
10 |   x,
11 |   ...,
12 |   value_sample,
13 |   attribute_sample,
14 |   empty_sample,
15 |   partial_match = FALSE,
16 |   case_insensitive = FALSE,
17 |   verbose = FALSE
18 | )
19 | 
20 | \method{sample_based_classifier}{default}(
21 |   x,
22 |   ...,
23 |   value_sample = NULL,
24 |   attribute_sample = NULL,
25 |   empty_sample = NULL,
26 |   partial_match = FALSE,
27 |   case_insensitive = FALSE,
28 |   verbose = FALSE
29 | )
30 | 
31 | \method{sample_based_classifier}{cell_df}(
32 |   x,
33 |   ...,
34 |   value_sample,
35 |   attribute_sample,
36 |   empty_sample,
37 |   partial_match = FALSE,
38 |   case_insensitive = FALSE,
39 |   verbose = FALSE
40 | )
41 | }
42 | \arguments{
43 | \item{x}{Either a \code{\link[=cell_df-class]{cell_df}} or leave blank for function output
44 | (required by \code{\link[=value_attribute_classify]{value_attribute_classify}})}
45 | 
46 | \item{value_sample}{Sample of values (an optional \code{character} vector)}
47 | 
48 | \item{attribute_sample}{Sample of attribute (an optional \code{character} vector)}
49 | 
50 | \item{empty_sample}{Sample of empty cells (an optional \code{character} vector)}
51 | 
52 | \item{partial_match}{Logical scalar indicating whether partial match is allowed. (Default \code{FALSE})}
53 | 
54 | \item{case_insensitive}{Logical scalar indicating whether match will be case insensitive. (Default \code{FALSE})}
55 | 
56 | \item{verbose}{Logical scalar for printing discovered cells (which gets new type by this rule)}
57 | }
58 | \description{
59 | Sample Based Value/Attribute Classifier
60 | }
61 | \seealso{
62 | \link[=value_attribute_classify]{value_attribute_classify}
63 | }
64 | \keyword{internal}
65 | 


--------------------------------------------------------------------------------
/man/tidycells-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tidycells-package.R
 3 | \docType{package}
 4 | \name{tidycells-package}
 5 | \alias{tidycells}
 6 | \alias{tidycells-package}
 7 | \title{\code{tidycells} package}
 8 | \description{
 9 | \strong{Read Tabular Data from Diverse Sources and Easily Make Them Tidy}
10 | }
11 | \details{
12 | The package provides utilities to read, cells from complex tabular data
13 | and heuristic detection based structural assignment of those cells to a
14 | columnar or tidy format. Read functionality has the ability to read (in
15 | a unified manner) structured, partially structured or unstructured
16 | tabular data (usually spreadsheets for public data dissemination and
17 | aimed for common human understanding) from various types of documents.
18 | The tabular information is read as cells. The 'structure assignment'
19 | functionality has both supervised and unsupervised way of assigning
20 | cells data to columnar/tidy format. Multiple disconnected blocks of
21 | tables in a single sheet are also handled appropriately. These tools are
22 | suitable for unattended conversation of (maybe a pile of) messy tables
23 | (like government data) into a consumable format(usable for further
24 | analysis and data wrangling).
25 | 
26 | To get started check out \href{../doc/tidycells-intro.html}{\code{vignette("tidycells-intro")}}.
27 | }
28 | \seealso{
29 | Useful links:
30 | \itemize{
31 |   \item \url{https://r-rudra.github.io/tidycells/}
32 |   \item \url{https://github.com/r-rudra/tidycells}
33 |   \item Report bugs at \url{https://github.com/r-rudra/tidycells/issues}
34 | }
35 | 
36 | }
37 | \author{
38 | \strong{Maintainer}: Indranil Gayen \email{nil.gayen@gmail.com} (\href{https://orcid.org/0000-0003-0197-1944}{ORCID})
39 | 
40 | }
41 | \keyword{internal}
42 | 


--------------------------------------------------------------------------------
/man/validate_cells.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validate_cells.R
 3 | \name{validate_cells}
 4 | \alias{validate_cells}
 5 | \title{Validate cell-DF}
 6 | \usage{
 7 | validate_cells(dat)
 8 | }
 9 | \arguments{
10 | \item{dat}{An R object}
11 | }
12 | \value{
13 | Logical scalar. If the value is \code{FALSE}, it will contain attribute named \code{msg}.
14 | }
15 | \description{
16 | Validate cell-DF
17 | }
18 | \details{
19 | It checks for following facts:
20 | \itemize{
21 | \item Whether \code{dat} is a \code{data.frame}
22 | \item Whether \code{dat} conforms to \code{rc_df} format
23 | \item Whether \code{dat} conforms to \code{cell_df} format
24 | If all the checks are passed then it returns \code{TRUE} else it returns \code{FALSE} with \code{msg} attribute, indicating reason for validation failure.
25 | }
26 | }
27 | \examples{
28 | # returns TRUE
29 | validate_cells(tibble::tibble(row = 1, col = 2, data_type = "numeric", value = "1"))
30 | 
31 | # this is FALSE
32 | chk <- validate_cells(tibble::tibble(row = 1, col = 2, data_type = "numeric"))
33 | 
34 | # msg
35 | attr(chk, "msg")
36 | }
37 | \keyword{internal}
38 | 


--------------------------------------------------------------------------------
/man/value_attribute_classify.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/value_attribute_classify.R
 3 | \name{value_attribute_classify}
 4 | \alias{value_attribute_classify}
 5 | \title{Value/Attribute Classifier}
 6 | \usage{
 7 | value_attribute_classify(d, classifier = basic_classifier())
 8 | }
 9 | \arguments{
10 | \item{d}{a Cell DF}
11 | 
12 | \item{classifier}{a classifier}
13 | }
14 | \value{
15 | a Cell DF with Value/Attribute Classification. The underlying tibble will contain an extra column named \code{type}.
16 | }
17 | \description{
18 | After \code{\link[=as_cell_df]{as_cell_df}} (entry  point to \code{tidycells}) you may need to use this function
19 | or individual \emph{Value/Attribute Classifier}-functions as listed below in \emph{"see also"} - section.
20 | 
21 | Here the idea is to classify all cells into either \code{value}, \code{attribute}, \code{empty} which will be
22 | used by \code{\link[=analyze_cells]{analyze_cells}} for further processing.
23 | }
24 | \details{
25 | In order to understand the data orientation and detect data-blocks Cell DF requires additional column named \code{type}.
26 | This \code{type} column potentially contains either \code{value}, \code{attribute}, \code{empty}. The \code{value} are given corresponding to cells
27 | with observations in it. The tag, \code{attribute} is for the identifier of these cells. Lastly, \code{empty} cells are useless cells or
28 | cells with no meaningful information.
29 | 
30 | For \code{classifier} following options are present:
31 | \itemize{
32 | \item \code{basic_classifier} : naive classifier which recode \code{data_type}.
33 | \item \code{sample_based_classifier} : sample-based classifier.
34 | \item \code{numeric_values_classifier} : considers number like cells as values.
35 | }
36 | 
37 | Each of the above are available as individual functions. Those can also be directly applied on a \code{cell-df}.
38 | However, it is recommended to use \code{value_attribute_classify} as it tests for integrity after classification.
39 | }
40 | \examples{
41 | 
42 | iris \%>\%
43 |   as_cell_df() \%>\%
44 |   sample_based_classifier(value_sample = "setosa") \%>\%
45 |   plot()
46 | 
47 | iris \%>\%
48 |   as_cell_df() \%>\%
49 |   sample_based_classifier(value_sample = "setosa") \%>\%
50 |   numeric_values_classifier() \%>\%
51 |   plot()
52 | 
53 | if (rlang::is_installed("tidyxl")) {
54 |   cdn <- system.file("extdata", "RBI_HBS_Table_No_166.xlsx", package = "tidycells") \%>\%
55 |     tidyxl::xlsx_cells()
56 |   cdn <- cdn \%>\%
57 |     dplyr::filter(sheet == sheet[1]) \%>\%
58 |     as_cell_df()
59 | 
60 |   # all of these are same except value_attribute_classify will perform validate_cells once again
61 |   cd1 <- sample_based_classifier(cdn, value_sample = "APR")
62 |   cd2 <- sample_based_classifier(value_sample = "APR")(cdn)
63 |   cd3 <- value_attribute_classify(cdn,
64 |     classifier = sample_based_classifier(value_sample = "APR")
65 |   )
66 |   # see it
67 |   plot(cd3)
68 | }
69 | }
70 | \seealso{
71 | Individual classifier functions:
72 | \itemize{
73 | \item \link[=basic_classifier]{basic_classifier}
74 | \item \link[=sample_based_classifier]{sample_based_classifier}
75 | \item \link[=numeric_values_classifier]{numeric_values_classifier},
76 | }
77 | 
78 | For interactive Value/Attribute Classification check \link[=visual_va_classify]{visual_va_classify}
79 | }
80 | 


--------------------------------------------------------------------------------
/man/visual_functions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/visual_functions.R
 3 | \name{visual_crop}
 4 | \alias{visual_crop}
 5 | \alias{visual_va_classify}
 6 | \alias{visual_data_block_inspection}
 7 | \alias{visual_orientation_modification}
 8 | \alias{visual_traceback}
 9 | \title{Interactive operations and Visualizations}
10 | \usage{
11 | visual_crop(d)
12 | 
13 | visual_va_classify(d)
14 | 
15 | visual_data_block_inspection(x)
16 | 
17 | visual_orientation_modification(x)
18 | 
19 | visual_traceback(x, dcomp)
20 | }
21 | \arguments{
22 | \item{d}{a cell_df.}
23 | 
24 | \item{x}{a cell_analysis.}
25 | 
26 | \item{dcomp}{a composition (a tibble rendered by \code{\link[=compose_cells]{compose_cells}}).}
27 | }
28 | \value{
29 | \code{visual_crop} returns a cell_df after the interactive operations.
30 | 
31 | \code{visual_va_classify} returns a cell_df after the interactive operations.
32 | 
33 | \code{visual_orientation_modification} returns a cell_analysis after the interactive operations.
34 | 
35 | \code{visual_traceback} returns a composed tidy data (as a tibble).
36 | }
37 | \description{
38 | These functions require \code{\link[shiny:shiny-package]{shiny}} and \code{miniUI}.
39 | Each of these modules has option for interactive plots (using \code{plotly}).
40 | However, for the same you need to have \code{plotly} installed.
41 | }
42 | \details{
43 | Each of these functions are designed for interactive sessions only.
44 | Arguments to these functions are optional as it tries to pick those from global environment.
45 | If there are multiple variables defined in global environment which are of same desired class then you will get option to choose
46 | one of them. These are also available as \href{https://rstudio.github.io/rstudioaddins/}{RStudio Addin}
47 | (if you are using RStudio IDE you'll get those addin in the RStudio addin panel).
48 | 
49 | Following are the names used in RStudio Addin
50 | \itemize{
51 | \item \emph{Crop Cells}: \code{visual_crop}
52 | \item \emph{Cell Analysis Inspection}: \code{visual_data_block_inspection}
53 | \item \emph{Orientation Modification}: \code{visual_orientation_modification}
54 | \item \emph{Traceback Composition}: \code{visual_traceback}
55 | \item \emph{Classify Value/Attribute}: \code{visual_va_classify}
56 | }
57 | 
58 | For each of the modules it tries to load and unload \code{shiny} before and after the execution of the module.
59 | You can disable the same by setting \code{options(AutoUnloadShiny = FALSE)}.
60 | 
61 | \code{visual_traceback} requires package \code{DT}.
62 | If \code{DT} is not present you may use \code{\link[=cell_composition_traceback]{cell_composition_traceback}}
63 | }
64 | \seealso{
65 | \code{\link[=value_attribute_classify]{value_attribute_classify}}
66 | 
67 | \code{\link[=cell_composition_traceback]{cell_composition_traceback}}
68 | }
69 | \keyword{internal}
70 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(tidycells)
3 | 
4 | 
5 | test_check("tidycells")
6 | 


--------------------------------------------------------------------------------
/tests/testthat/test-VA_classifier.R:
--------------------------------------------------------------------------------
 1 | test_that("numeric_values_classifier works", {
 2 |   skip_if_not_installed("tidyxl")
 3 |   cd <- system.file("extdata", "untidy.xlsx", package = "tidycells") %>%
 4 |     tidyxl::xlsx_cells()
 5 |   cd <- as_cell_df(cd)
 6 | 
 7 |   cdn <- system.file("extdata", "RBI_HBS_Table_No_166.xlsx", package = "tidycells") %>%
 8 |     tidyxl::xlsx_cells()
 9 |   cdn <- cdn %>%
10 |     dplyr::filter(sheet == sheet[1]) %>%
11 |     as_cell_df()
12 | 
13 |   cd1 <- numeric_values_classifier(cd)
14 |   cd2 <- numeric_values_classifier()(cd)
15 |   cd3 <- value_attribute_classify(cd, classifier = numeric_values_classifier())
16 | 
17 |   cdn1 <- numeric_values_classifier(cdn)
18 | 
19 |   expect_output(
20 |     numeric_values_classifier(cd, verbose = TRUE),
21 |     "New cells detected as different type\n# A tibble: 579 x 6"
22 |   )
23 |   expect_output(
24 |     numeric_values_classifier(cdn, verbose = TRUE),
25 |     "New cells detected as different type\n# A tibble: 50 x 6"
26 |   )
27 |   expect_output(
28 |     summary(cdn1),
29 |     "Types: 838 values, 181 attributes and 50 empty cells"
30 |   )
31 |   expect_output(
32 |     print(cdn1),
33 |     "With Value/Attribute Classification"
34 |   )
35 |   expect_identical(cd1, cd2)
36 |   expect_identical(cd1, cd3)
37 |   expect_equal(c(table(cd1$type)), c("attribute" = 244, "value" = 579))
38 | })
39 | 
40 | 
41 | test_that("sample_based_classifier works", {
42 |   skip_if_not_installed("tidyxl")
43 |   cdn <- system.file("extdata", "RBI_HBS_Table_No_166.xlsx", package = "tidycells") %>%
44 |     tidyxl::xlsx_cells()
45 |   cdn <- cdn %>%
46 |     dplyr::filter(sheet == sheet[1]) %>%
47 |     as_cell_df()
48 | 
49 |   cd1 <- sample_based_classifier(cdn, value_sample = "APR")
50 |   cd2 <- sample_based_classifier(value_sample = "APR")(cdn)
51 |   cd3 <- value_attribute_classify(cdn, classifier = sample_based_classifier(value_sample = "APR"))
52 | 
53 | 
54 |   cdn0 <- cdn %>% numeric_values_classifier()
55 |   cdn2 <- cdn0 %>% sample_based_classifier(value_sample = "APR")
56 |   cdn3 <- cdn0 %>% sample_based_classifier(value_sample = "aug")
57 | 
58 |   cdn4 <- cdn0 %>%
59 |     sample_based_classifier(attribute_sample = "APR") %>%
60 |     # revese mapping
61 |     sample_based_classifier(value_sample = "APR")
62 | 
63 |   expect_output(summary(cdn2), "929 values, 90 attributes and 50 empty cells")
64 |   expect_output(summary(cdn3), "838 values, 181 attributes and 50 empty cells")
65 |   expect_identical(summary(cdn3, no_print = TRUE), summary(cdn0, no_print = TRUE))
66 |   expect_output(summary(cdn4), "929 values, 90 attributes and 50 empty cells")
67 |   expect_identical(cd1, cd2)
68 |   expect_identical(cd1, cd3)
69 |   expect_equal(c(table(cd1$type)), c("attribute" = 137, "value" = 932))
70 | })
71 | 


--------------------------------------------------------------------------------
/tests/testthat/test-analyze_cells.R:
--------------------------------------------------------------------------------
 1 | test_that("analyze_cell works: base", {
 2 |   d <- structure(c(
 3 |     "block 1", "", "C", "D", "", "block 2", "", "C",
 4 |     "D", "", "A", "1", "2", "", "", "A", "10", "20", "", "B", "3",
 5 |     "4", "", "", "B", "30", "40"
 6 |   ), .Dim = c(9L, 3L))
 7 |   d <- as.data.frame(d)
 8 |   cd <- as_cell_df(d) %>% numeric_values_classifier()
 9 |   ca <- analyze_cells(cd)
10 | 
11 |   expect_output(print(ca), "Total blocks: 2")
12 |   expect_equal(ca$sections %>%
13 |     dplyr::select(-gid) %>%
14 |     unlist() %>%
15 |     unique() %>%
16 |     sort(), c(1, 2, 3, 4, 6, 9))
17 |   expect_identical(ca$cell_df, cd)
18 |   expect_equal(names(ca$details), c(
19 |     "attr_details", "data_details",
20 |     "data_attr_map_raw", "definiteness_checks"
21 |   ))
22 | })
23 | 
24 | 
25 | test_that("analyze_cell works: tidyxl", {
26 |   skip_if_not_installed("tidyxl")
27 | 
28 |   cd <- system.file("extdata", "untidy.xlsx", package = "tidycells", mustWork = TRUE) %>%
29 |     tidyxl::xlsx_cells() %>%
30 |     as_cell_df()
31 | 
32 |   cd <- numeric_values_classifier(cd)
33 | 
34 |   ca <- analyze_cells(cd)
35 | 
36 |   dc <- compose_cells(ca, discard_raw_cols = TRUE)
37 | 
38 |   dc_sel <- dc %>%
39 |     dplyr::select(-row, -col, -data_block, -value) %>%
40 |     unique()
41 | 
42 |   expect_equal(ca$details$data_attr_map_raw$data_gid %>% unique() %>% length(), 9)
43 |   expect_equal(
44 |     dc_sel$minor_1 %>% unique() %>% sort(),
45 |     c(
46 |       "ABOVE LEFT", "ABOVE LEFT border", "BELOW RIGHT", "BELOW RIGHT border",
47 |       "NNE WSW", "NNW WNW :: NNE WSW", "SSE ESE", "SSE ESE Disconnected",
48 |       "SSW ENE"
49 |     )
50 |   )
51 | })
52 | 


--------------------------------------------------------------------------------
/tests/testthat/test-as_cell_df.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_that("as_cell_df() works on tidyxl::xlsx_cells on single sheet", {
 3 |   skip_if_not_installed("tidyxl")
 4 |   cd <- system.file("extdata", "untidy.xlsx", package = "tidycells") %>%
 5 |     tidyxl::xlsx_cells()
 6 |   d0 <- as_cell_df(cd)
 7 | 
 8 |   cd <- system.file("extdata", "RBI_HBS_Table_No_166.xlsx", package = "tidycells") %>%
 9 |     tidyxl::xlsx_cells()
10 | 
11 |   d1 <- cd %>%
12 |     dplyr::filter(sheet == sheet[1]) %>%
13 |     as_cell_df()
14 | 
15 |   expect_identical(d0, as_cell_df(d0))
16 |   expect_true(inherits(d0, "cell_df"))
17 |   expect_output(summary(d0), "88 x 25")
18 |   expect_true(inherits(d1, "cell_df"))
19 |   expect_output(summary(d1), "231 characters and 838 numbers")
20 | })
21 | 
22 | test_that("as_cell_df() does not work on tidyxl::xlsx_cells on multiple sheets", {
23 |   skip_if_not_installed("tidyxl")
24 |   cd <- system.file("extdata", "RBI_HBS_Table_No_166.xlsx", package = "tidycells") %>%
25 |     tidyxl::xlsx_cells()
26 | 
27 |   expect_error(as_cell_df(cd))
28 | })
29 | 
30 | test_that("as_cell_df() works on readr::melt_csv", {
31 |   skip_if_not_installed("readr")
32 |   skip_if_not_installed("datasets")
33 |   tf <- tempfile()
34 |   write.csv(head(iris, n = 7), tf, row.names = FALSE)
35 |   d0 <- readr::melt_csv(tf) %>% as_cell_df()
36 |   unlink(tf)
37 | 
38 |   expect_identical(d0, as_cell_df(d0))
39 |   expect_true(inherits(d0, "cell_df"))
40 |   expect_output(summary(d0), "8 x 5")
41 | })
42 | 
43 | test_that("as_cell_df() works on unpivotr::as_cells", {
44 |   d0 <- unpivotr::as_cells(data.frame(x = 1:26, letters)) %>% as_cell_df()
45 | 
46 |   expect_identical(d0, as_cell_df(d0))
47 |   expect_true(inherits(d0, "cell_df"))
48 |   expect_output(summary(d0), "26 characters and 26 numbers")
49 | })
50 | 
51 | test_that("as_cell_df() works on data.frame", {
52 |   d0 <- as_cell_df(data.frame(x = 1:26, y = letters))
53 | 
54 |   expect_identical(d0, as_cell_df(d0))
55 |   expect_true(inherits(d0, "cell_df"))
56 |   expect_output(summary(d0), "26 characters and 26 numbers")
57 | 
58 |   skip_if_not_installed("datasets")
59 |   d1 <- as_cell_df(datasets::iris)
60 |   expect_true(inherits(d1, "cell_df"))
61 | })
62 | 


--------------------------------------------------------------------------------
/tests/testthat/test-cells_to_df.R:
--------------------------------------------------------------------------------
 1 | test_that("cell_df to DF (and vice versa) works", {
 2 |   d0 <- data.frame(x = 1:26, letters, stringsAsFactors = FALSE)
 3 |   d1 <- as_cell_df(d0)
 4 | 
 5 |   d2 <- as.data.frame(d1)
 6 |   d3 <- as_cell_df(d2)
 7 |   d4 <- as.data.frame(d3)
 8 | 
 9 |   expect_equal(dim(d1), c(52, 4))
10 |   expect_equal(dim(d3), c(52, 4))
11 |   expect_equal(dim(d2), dim(d0))
12 |   expect_equal(dim(d4), dim(d0))
13 |   expect_identical(d2, d4)
14 |   expect_identical(d3, as_cell_df(d4))
15 | })
16 | 


--------------------------------------------------------------------------------
/tests/testthat/test-collate_columns.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | test_that("collate_columns works", {
 5 |   d <- system.file("extdata", "marks_cells.rds", package = "tidycells", mustWork = TRUE) %>%
 6 |     readRDS()
 7 |   d <- numeric_values_classifier(d)
 8 |   da <- analyze_cells(d)
 9 | 
10 |   expect_output(
11 |     dc1 <- compose_cells(da, print_attribute_overview = TRUE),
12 |     "Nakshatra Gayen, Titas Gupta, Ujjaini Gayen, Utsyo Roy"
13 |   )
14 |   dc2 <- compose_cells(da, post_process = FALSE)
15 |   cc1 <- collate_columns(dc1)
16 |   cc2 <- collate_columns(dc2)
17 | 
18 |   expect_equal(cc1, cc2)
19 |   expect_identical(
20 |     cc1 %>% unlist() %>% unique() %>% sort(),
21 |     cc2 %>% unlist() %>% unique() %>% sort()
22 |   )
23 | 
24 |   #  all School should appear in single column
25 |   expect_equal(
26 |     cc1 %>% purrr::map_lgl(~ any(stringr::str_detect(.x, "School"))) %>% sum(),
27 |     1
28 |   )
29 |   # all Male/Female should appear in single column
30 |   expect_equal(
31 |     cc1 %>% purrr::map_lgl(~ any(stringr::str_detect(.x, "Male"))) %>% sum(),
32 |     1
33 |   )
34 |   # strict test (may be dropped in future)
35 |   expect_equal(
36 |     cc1 %>% map(~ .x %>%
37 |       unique() %>%
38 |       length()) %>% unlist() %>% sort() %>% as.numeric(),
39 |     c(1, 2, 3, 3, 12, 12)
40 |   )
41 | })
42 | 
43 | 
44 | 
45 | 
46 | test_that("collate_columns colname convention works", {
47 |   expect_error(collate_columns(NULL), "Given composed_data has no known format")
48 |   expect_error(collate_columns(tibble()), "Given composed_data has no known format")
49 |   expect_error(collate_columns(tibble(col = 1, value = 1, data_block = 1, table_tag = 1, major_1 = 1)), "Given composed_data has no known format")
50 |   expect_error(collate_columns(tibble(row = 1, value = 1, data_block = 1, table_tag = 1, major_1 = 1)), "Given composed_data has no known format")
51 |   expect_error(collate_columns(tibble(row = 1, col = 1, data_block = 1, table_tag = 1, major_1 = 1)), "Given composed_data has no known format")
52 |   expect_error(collate_columns(tibble(row = 1, col = 1, value = 1, table_tag = 1, major_1 = 1)), "Given composed_data has no known format")
53 | })
54 | 


--------------------------------------------------------------------------------
/tests/testthat/test-optional_package.R:
--------------------------------------------------------------------------------
 1 | test_that("optional_package dependency test", {
 2 |   fold <- system.file("extdata", "messy", package = "tidycells", mustWork = TRUE)
 3 |   dm <- tibble::tibble(fn = list.files(fold, full.names = TRUE))
 4 | 
 5 |   dm <- dm %>%
 6 |     dplyr::mutate(original = dm$fn %>%
 7 |       purrr::map_chr(~ basename(.x) %>%
 8 |         stringr::str_split("\\.") %>%
 9 |         purrr::map_chr(1)))
10 | 
11 |   # both option for csv must result in same output (on test data at least)
12 |   if (is_available("readr")) {
13 |     d1 <- read_cells(dm$fn[dm$original == "csv"])
14 |     not_available("readr")
15 |     d2 <- read_cells(dm$fn[dm$original == "csv"])
16 |     not_available()
17 |     expect_equal(d1, d2)
18 |   }
19 | 
20 |   # both option for xls must result in same output (on test data at least)
21 |   if (is_available("xlsx") & is_available("readxl")) {
22 |     d1 <- read_cells(dm$fn[dm$original == "xls"])
23 |     not_available("xlsx")
24 |     expect_message(d2 <- read_cells(dm$fn[dm$original == "xls"]), "readxl")
25 |     expect_equal(d1, d2)
26 |     not_available(c("readxl", "docxtractr"))
27 |     not_available("cli")
28 |     expect_output(possible_to_support(), "readxl")
29 |     expect_output(possible_to_support(), "docxtractr")
30 |     expect_null(read_cells(dm$fn[dm$original == "xls"]))
31 |     not_available()
32 |   }
33 | })
34 | 


--------------------------------------------------------------------------------
/tests/testthat/test-print_cell_analysis.R:
--------------------------------------------------------------------------------
 1 | test_that("print cell-analysis works works", {
 2 |   x <- head(iris)
 3 |   cd <- x %>%
 4 |     as_cell_df(take_col_names = TRUE) %>%
 5 |     numeric_values_classifier()
 6 |   ca <- analyze_cells(cd)
 7 | 
 8 |   # following should run without error or warning
 9 |   wnow <- options("warn")[[1]]
10 |   options(warn = 2)
11 | 
12 |   plot(ca)
13 |   plot(ca, adaptive_txt_size = FALSE, no_txt = FALSE)
14 |   plot(ca,
15 |     prior_plot = plot(cd, no_plot = TRUE, adaptive_txt_size = FALSE)
16 |   )
17 |   plot(ca,
18 |     prior_plot = plot(cd, no_plot = TRUE, adaptive_txt_size = FALSE),
19 |     gids = 0
20 |   )
21 |   plot(ca,
22 |     zoom_selected_gids = TRUE,
23 |     dat_att_boundary = TRUE, block_boundary = FALSE
24 |   )
25 |   plot(ca, plot_issues = TRUE)
26 | 
27 |   # also the traceback plot works
28 |   cell_composition_traceback(ca, trace_row = 1)
29 |   cell_composition_traceback(ca, trace_row = 10)
30 | 
31 |   options(warn = wnow)
32 | 
33 |   expect_true(inherits(plot(ca, no_plot = TRUE), "ggplot"))
34 | })
35 | 


--------------------------------------------------------------------------------
/tests/testthat/test-print_cell_df.R:
--------------------------------------------------------------------------------
 1 | test_that("print cell-df works", {
 2 |   x <- head(iris)
 3 |   cd <- x %>% as_cell_df()
 4 |   cdv <- cd %>% basic_classifier()
 5 |   g1 <- plot(cd, no_plot = TRUE)
 6 |   g2 <- plot(cdv, no_plot = TRUE)
 7 |   # following should run without error or warning
 8 |   wnow <- options("warn")[[1]]
 9 |   options(warn = 2)
10 | 
11 |   plot(cdv, no_fill = TRUE)
12 |   plot(cdv, adaptive_txt_size = FALSE)
13 |   plot(cdv, no_txt = TRUE)
14 | 
15 |   options(warn = wnow)
16 | 
17 |   expect_equal(rlang::as_name(g1$mapping$fill), "data_type")
18 |   expect_equal(rlang::as_name(g2$mapping$fill), "type")
19 | })
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-read_cells_real.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_that("read_cells on real data works I", {
 3 |   skip_if_not_installed("tidyxl")
 4 | 
 5 |   dcpi <- system.file("extdata", "RBI_HBS_Table_No_166.xlsx", package = "tidycells", mustWork = TRUE) %>%
 6 |     read_cells(at_level = "compose")
 7 | 
 8 |   chk1 <- dcpi$major_1 %>%
 9 |     unique() %>%
10 |     stringr::str_detect("[a-zA-Z]")
11 |   chk1 <- all(!chk1)
12 |   expect_equal(unique(dcpi$minor_2), "Year/Month")
13 |   expect_true(chk1)
14 |   expect_equal(
15 |     dcpi$major_2 %>% unique() %>% tolower() %>% sort(),
16 |     month.abb %>% tolower() %>% sort()
17 |   )
18 | })
19 | 
20 | 
21 | test_that("sample analysis on real data works II", {
22 |   dwss <- readRDS("testdata/WSS.rds")
23 | 
24 |   da <- analyze_cells(dwss)
25 |   dcmp <- compose_cells(da, discard_raw_cols = TRUE)
26 | 
27 |   chk1 <- dcmp$major_1 %>%
28 |     unique() %>%
29 |     stringr::str_detect("[a-zA-Z]")
30 |   chk1 <- all(!chk1)
31 | 
32 |   expect_true(chk1)
33 |   expect_equal(
34 |     dcmp$major_2 %>% unique() %>% sort() %>% .[[15]],
35 |     "3 Rupee Securities (including Treasury Bills)"
36 |   )
37 | })
38 | 
39 | 
40 | test_that("sample analysis on real data works III", {
41 |   cd <- readRDS("testdata/enron_from_unpivotr_processed.rds")
42 | 
43 |   ca <- analyze_cells(cd)
44 |   expect_warning(dcmp <- compose_cells_raw(ca, discard_raw_cols = TRUE, ask_user = FALSE), "failed to compose")
45 | 
46 | 
47 |   expect_equal(
48 |     dcmp$data_block %>% unique() %>% length(),
49 |     7
50 |   )
51 |   expect_equal(
52 |     dcmp$major_1 %>% unique() %>% sort() %>% .[[6]],
53 |     "Cash :: to :: 2002-03-01"
54 |   )
55 | })
56 | 


--------------------------------------------------------------------------------
/tests/testthat/test-use_cases.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # I'll add few important masked data and expected outcome to make the logic robust
 3 | # ex1 added just after CRAN release. Thanks to the user for this contribution
 4 | 
 5 | test_that("use cases from users works - I", {
 6 |   cd <- readRDS("testusecases/ex1.rds")
 7 |   ca <- analyze_cells(cd)
 8 |   d0 <- ca$details$data_attr_map_raw %>% distinct(attr_gid, data_gid, direction, direction_group, attr_group)
 9 |   chk_str <- d0 %>%
10 |     filter(attr_group == "major") %>%
11 |     distinct(direction, direction_group) %>%
12 |     arrange(direction, direction_group) %>%
13 |     summarise(paste0(direction, ",", direction_group, collapse = ";")) %>%
14 |     pull()
15 |   expect_output(print(ca), "Total blocks: 1")
16 |   expect_equal(nrow(ca$details$data_details$group_id_boundary), 1)
17 |   expect_equal(chk_str, "E,WE;N,NS;W,WE")
18 | })
19 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/WSS.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/tests/testthat/testdata/WSS.rds


--------------------------------------------------------------------------------
/tests/testthat/testdata/badcells.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/tests/testthat/testdata/badcells.rds


--------------------------------------------------------------------------------
/tests/testthat/testdata/doc.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/tests/testthat/testdata/doc.doc


--------------------------------------------------------------------------------
/tests/testthat/testdata/enron_from_unpivotr_processed.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/tests/testthat/testdata/enron_from_unpivotr_processed.rds


--------------------------------------------------------------------------------
/tests/testthat/testdata/test.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/tests/testthat/testdata/test.xls


--------------------------------------------------------------------------------
/tests/testthat/testusecases/ex1.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/tests/testthat/testusecases/ex1.rds


--------------------------------------------------------------------------------
/tidycells.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: No
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageCheckArgs: --as-cran
22 | PackageRoxygenize: rd,collate,namespace
23 | 
24 | QuitChildProcessesOnExit: Yes
25 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/ext/compose_cells_cli1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/vignettes/ext/compose_cells_cli1.png


--------------------------------------------------------------------------------
/vignettes/ext/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/vignettes/ext/logo.png


--------------------------------------------------------------------------------
/vignettes/ext/marks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/vignettes/ext/marks.png


--------------------------------------------------------------------------------
/vignettes/ext/read_cells_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/vignettes/ext/read_cells_out.png


--------------------------------------------------------------------------------
/vignettes/ext/v12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/vignettes/ext/v12.png


--------------------------------------------------------------------------------
/vignettes/ext/v34.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/vignettes/ext/v34.png


--------------------------------------------------------------------------------
/vignettes/ext/v56.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-rudra/tidycells/fdf37ecea15ef6513a935845c8d9a408d6fb30c1/vignettes/ext/v56.png


--------------------------------------------------------------------------------