├── .DS_Store
├── .coveragerc
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report_cleaning.md
│ ├── bug_report_connector.md
│ ├── bug_report_eda.md
│ ├── feature_request.md
│ └── task-template.md
├── pull_request_template.md
└── workflows
│ ├── benchmark.yml
│ ├── ci.yml
│ ├── clean_gui.yml
│ └── release.yml
├── .gitignore
├── .gitmodules
├── .pylintrc
├── Justfile
├── LICENSE
├── README.md
├── assets
├── clean_example_1.jpg
├── clean_example_2.jpg
├── clean_example_3.jpg
├── clean_video_cover.png
├── connector.png
├── connector_concurrency.gif
├── connector_main.gif
├── connector_pagination.gif
├── eda_demo.gif
├── eda_video_cover.png
├── icon_cropped.jpg
├── logo.png
├── logo_cropped.jpg
├── plot(df).gif
├── plot(df,x)_cat.gif
├── plot(df,x)_num.gif
├── plot_correlation(df).gif
├── plot_missing(df).gif
└── plot_missing(df, x).gif
├── codecov.yaml
├── dataprep
├── .DS_Store
├── __init__.py
├── assets
│ ├── ellipse.npz
│ └── english_stopwords.py
├── clean
│ ├── .DS_Store
│ ├── __init__.py
│ ├── address_utils.py
│ ├── clean_ad_nrt.py
│ ├── clean_address.py
│ ├── clean_al_nipt.py
│ ├── clean_ar_cbu.py
│ ├── clean_ar_cuit.py
│ ├── clean_ar_dni.py
│ ├── clean_at_uid.py
│ ├── clean_at_vnr.py
│ ├── clean_au_abn.py
│ ├── clean_au_acn.py
│ ├── clean_au_tfn.py
│ ├── clean_be_iban.py
│ ├── clean_be_vat.py
│ ├── clean_bg_egn.py
│ ├── clean_bg_pnf.py
│ ├── clean_bg_vat.py
│ ├── clean_bic.py
│ ├── clean_bitcoin.py
│ ├── clean_br_cnpj.py
│ ├── clean_br_cpf.py
│ ├── clean_by_unp.py
│ ├── clean_ca_bn.py
│ ├── clean_ca_sin.py
│ ├── clean_casrn.py
│ ├── clean_ch_esr.py
│ ├── clean_ch_ssn.py
│ ├── clean_ch_uid.py
│ ├── clean_ch_vat.py
│ ├── clean_cl_rut.py
│ ├── clean_cn_ric.py
│ ├── clean_cn_uscc.py
│ ├── clean_co_nit.py
│ ├── clean_country.py
│ ├── clean_cr_cpf.py
│ ├── clean_cr_cpj.py
│ ├── clean_cr_cr.py
│ ├── clean_cu_ni.py
│ ├── clean_currency.py
│ ├── clean_cusip.py
│ ├── clean_cy_vat.py
│ ├── clean_cz_dic.py
│ ├── clean_cz_rc.py
│ ├── clean_date.py
│ ├── clean_date_utils.py
│ ├── clean_de_handelsregisternummer.py
│ ├── clean_de_idnr.py
│ ├── clean_de_stnr.py
│ ├── clean_de_vat.py
│ ├── clean_de_wkn.py
│ ├── clean_df.py
│ ├── clean_df_gui.py
│ ├── clean_dk_cpr.py
│ ├── clean_dk_cvr.py
│ ├── clean_do_cedula.py
│ ├── clean_do_ncf.py
│ ├── clean_do_rnc.py
│ ├── clean_duplication.py
│ ├── clean_duplication_utils.py
│ ├── clean_ean.py
│ ├── clean_ec_ci.py
│ ├── clean_ec_ruc.py
│ ├── clean_ee_ik.py
│ ├── clean_ee_kmkr.py
│ ├── clean_ee_registrikood.py
│ ├── clean_email.py
│ ├── clean_es_ccc.py
│ ├── clean_es_cif.py
│ ├── clean_es_cups.py
│ ├── clean_es_dni.py
│ ├── clean_es_iban.py
│ ├── clean_es_nie.py
│ ├── clean_es_nif.py
│ ├── clean_es_referenciacatastral.py
│ ├── clean_eu_at_02.py
│ ├── clean_eu_banknote.py
│ ├── clean_eu_eic.py
│ ├── clean_eu_nace.py
│ ├── clean_eu_vat.py
│ ├── clean_fi_alv.py
│ ├── clean_fi_associationid.py
│ ├── clean_fi_hetu.py
│ ├── clean_fi_veronumero.py
│ ├── clean_fi_ytunnus.py
│ ├── clean_figi.py
│ ├── clean_fr_nif.py
│ ├── clean_fr_nir.py
│ ├── clean_fr_siren.py
│ ├── clean_fr_siret.py
│ ├── clean_fr_tva.py
│ ├── clean_gb_nhs.py
│ ├── clean_gb_sedol.py
│ ├── clean_gb_upn.py
│ ├── clean_gb_utr.py
│ ├── clean_gb_vat.py
│ ├── clean_gr_amka.py
│ ├── clean_gr_vat.py
│ ├── clean_grid.py
│ ├── clean_gt_nit.py
│ ├── clean_headers.py
│ ├── clean_hr_oib.py
│ ├── clean_hu_anum.py
│ ├── clean_iban.py
│ ├── clean_id_npwp.py
│ ├── clean_ie_pps.py
│ ├── clean_ie_vat.py
│ ├── clean_il_hp.py
│ ├── clean_il_idnr.py
│ ├── clean_imei.py
│ ├── clean_imo.py
│ ├── clean_imsi.py
│ ├── clean_in_aadhaar.py
│ ├── clean_in_pan.py
│ ├── clean_ip.py
│ ├── clean_is_kennitala.py
│ ├── clean_is_vsk.py
│ ├── clean_isan.py
│ ├── clean_isbn.py
│ ├── clean_isil.py
│ ├── clean_isin.py
│ ├── clean_ismn.py
│ ├── clean_issn.py
│ ├── clean_it_aic.py
│ ├── clean_it_codicefiscale.py
│ ├── clean_it_iva.py
│ ├── clean_jp_cn.py
│ ├── clean_json.py
│ ├── clean_kr_brn.py
│ ├── clean_kr_rrn.py
│ ├── clean_lat_long.py
│ ├── clean_lei.py
│ ├── clean_li_peid.py
│ ├── clean_lt_asmens.py
│ ├── clean_lt_pvm.py
│ ├── clean_lu_tva.py
│ ├── clean_lv_pvn.py
│ ├── clean_mc_tva.py
│ ├── clean_md_idno.py
│ ├── clean_me_iban.py
│ ├── clean_meid.py
│ ├── clean_ml.py
│ ├── clean_mt_vat.py
│ ├── clean_mu_nid.py
│ ├── clean_mx_curp.py
│ ├── clean_mx_rfc.py
│ ├── clean_my_nric.py
│ ├── clean_nl_brin.py
│ ├── clean_nl_bsn.py
│ ├── clean_nl_btw.py
│ ├── clean_nl_onderwijsnummer.py
│ ├── clean_nl_postcode.py
│ ├── clean_no_fodselsnummer.py
│ ├── clean_no_iban.py
│ ├── clean_no_kontonr.py
│ ├── clean_no_mva.py
│ ├── clean_no_orgnr.py
│ ├── clean_nz_bankaccount.py
│ ├── clean_nz_ird.py
│ ├── clean_pe_cui.py
│ ├── clean_pe_ruc.py
│ ├── clean_phone.py
│ ├── clean_pl_nip.py
│ ├── clean_pl_pesel.py
│ ├── clean_pl_regon.py
│ ├── clean_pt_nif.py
│ ├── clean_py_ruc.py
│ ├── clean_ro_cf.py
│ ├── clean_ro_cnp.py
│ ├── clean_ro_cui.py
│ ├── clean_ro_onrc.py
│ ├── clean_text.py
│ ├── clean_url.py
│ ├── clean_vatin.py
│ ├── components
│ │ ├── .DS_Store
│ │ ├── __init__.py
│ │ ├── cat_encoder.py
│ │ ├── cat_encoding
│ │ │ ├── __init__.py
│ │ │ └── one_hot_encoding.py
│ │ ├── cat_imputation
│ │ │ ├── .DS_Store
│ │ │ ├── __init__.py
│ │ │ ├── constant_imputer.py
│ │ │ ├── drop_imputer.py
│ │ │ └── most_frequent_imputer.py
│ │ ├── cat_imputer.py
│ │ ├── num_imputation
│ │ │ ├── __init__.py
│ │ │ ├── drop_imputer.py
│ │ │ ├── mean_imputer.py
│ │ │ ├── median_imputer.py
│ │ │ └── most_frequent_imputer.py
│ │ ├── num_imputer.py
│ │ ├── num_scaler.py
│ │ ├── num_scaling
│ │ │ ├── __init__.py
│ │ │ ├── maxabs_scaler.py
│ │ │ ├── minmax_scaler.py
│ │ │ └── standard_scaler.py
│ │ ├── variance_threshold
│ │ │ ├── __init__.py
│ │ │ └── variance_threshold.py
│ │ └── variance_thresholder.py
│ ├── country_data.tsv
│ ├── cryptocurrency.json
│ ├── currencies.json
│ ├── gui
│ │ ├── .DS_Store
│ │ ├── clean_frontend
│ │ │ ├── .babelrc
│ │ │ ├── .editorconfig
│ │ │ ├── .eslintrc.js
│ │ │ ├── .gitignore
│ │ │ ├── .postcssrc.js
│ │ │ ├── README.md
│ │ │ ├── build_frontend
│ │ │ │ ├── build.js
│ │ │ │ ├── check-versions.js
│ │ │ │ ├── logo.png
│ │ │ │ ├── utils.js
│ │ │ │ ├── vue-loader.conf.js
│ │ │ │ ├── webpack.base.conf.js
│ │ │ │ ├── webpack.dev.conf.js
│ │ │ │ └── webpack.prod.conf.js
│ │ │ ├── config
│ │ │ │ ├── dev.env.js
│ │ │ │ ├── index.js
│ │ │ │ └── prod.env.js
│ │ │ ├── index.html
│ │ │ ├── package-lock.json
│ │ │ ├── package.json
│ │ │ ├── src
│ │ │ │ ├── App.vue
│ │ │ │ ├── assets
│ │ │ │ │ ├── cleanning.png
│ │ │ │ │ ├── columns.png
│ │ │ │ │ ├── footerlog.png
│ │ │ │ │ ├── logo.png
│ │ │ │ │ └── para.png
│ │ │ │ ├── components
│ │ │ │ │ ├── CleanFunctions.vue
│ │ │ │ │ ├── CleanWholeDF.vue
│ │ │ │ │ ├── Cleanning.vue
│ │ │ │ │ ├── FooterLog.vue
│ │ │ │ │ ├── Header.vue
│ │ │ │ │ ├── HeaderMenu.vue
│ │ │ │ │ ├── Loading.vue
│ │ │ │ │ ├── MainTable.vue
│ │ │ │ │ ├── SampleDF.vue
│ │ │ │ │ └── Table.vue
│ │ │ │ ├── main.js
│ │ │ │ ├── router
│ │ │ │ │ └── index.js
│ │ │ │ └── util
│ │ │ │ │ └── endpoint.js
│ │ │ └── static
│ │ │ │ └── .gitkeep
│ │ ├── clean_gui.py
│ │ └── frontend_dist
│ │ │ ├── index.html
│ │ │ └── static
│ │ │ ├── css
│ │ │ ├── app.0cfb91da89a9917aa6ef7f322f2bf520.css
│ │ │ └── app.0cfb91da89a9917aa6ef7f322f2bf520.css.map
│ │ │ ├── fonts
│ │ │ ├── element-icons.535877f.woff
│ │ │ └── element-icons.732389d.ttf
│ │ │ └── js
│ │ │ ├── app.27d28f3a558ed4ec0a89.js
│ │ │ ├── app.27d28f3a558ed4ec0a89.js.map
│ │ │ ├── manifest.2ae2e69a05c33dfc65f8.js
│ │ │ ├── manifest.2ae2e69a05c33dfc65f8.js.map
│ │ │ ├── vendor.6a87832c0707ffde74f1.js
│ │ │ └── vendor.6a87832c0707ffde74f1.js.map
│ ├── pipeline.py
│ ├── usaddr.crfsuite
│ └── utils.py
├── connector
│ ├── __init__.py
│ ├── assets
│ │ └── info.html
│ ├── config_manager.py
│ ├── connector.py
│ ├── errors.py
│ ├── generator
│ │ ├── __init__.py
│ │ ├── generator.py
│ │ ├── state.py
│ │ ├── table.py
│ │ └── ui.py
│ ├── implicit_database.py
│ ├── info.py
│ ├── info_ui.py
│ ├── ref.py
│ ├── schema
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── defs.py
│ │ └── oauth2.html
│ ├── sql.py
│ ├── throttler.py
│ └── utils.py
├── datasets
│ ├── __init__.py
│ ├── _base.py
│ ├── data
│ │ ├── adult.csv
│ │ ├── countries.csv
│ │ ├── covid19.csv
│ │ ├── house_prices_test.csv
│ │ ├── house_prices_train.csv
│ │ ├── iris.csv
│ │ ├── patient_info.csv
│ │ ├── titanic.csv
│ │ ├── waste_hauler.csv
│ │ └── wine-quality-red.csv
│ └── database
│ │ └── sakila.db
├── eda
│ ├── __init__.py
│ ├── configs.py
│ ├── container.py
│ ├── correlation
│ │ ├── __init__.py
│ │ ├── compute
│ │ │ ├── __init__.py
│ │ │ ├── bivariate.py
│ │ │ ├── common.py
│ │ │ ├── overview.py
│ │ │ └── univariate.py
│ │ └── render.py
│ ├── create_db_report
│ │ ├── __init__.py
│ │ ├── db_models
│ │ │ ├── __init__.py
│ │ │ ├── constraint.py
│ │ │ ├── database.py
│ │ │ ├── db_meta.py
│ │ │ ├── table.py
│ │ │ ├── table_column.py
│ │ │ ├── table_index.py
│ │ │ └── view.py
│ │ ├── diagram_factory.py
│ │ ├── header
│ │ │ ├── __init__.py
│ │ │ └── sql_metadata.py
│ │ ├── layout
│ │ │ ├── anomalies.html
│ │ │ ├── anomalies.js
│ │ │ ├── bower
│ │ │ │ ├── admin-lte
│ │ │ │ │ ├── bootstrap
│ │ │ │ │ │ ├── css
│ │ │ │ │ │ │ ├── bootstrap-theme.css
│ │ │ │ │ │ │ ├── bootstrap-theme.css.map
│ │ │ │ │ │ │ ├── bootstrap-theme.min.css
│ │ │ │ │ │ │ ├── bootstrap-theme.min.css.map
│ │ │ │ │ │ │ ├── bootstrap.css
│ │ │ │ │ │ │ ├── bootstrap.css.map
│ │ │ │ │ │ │ ├── bootstrap.min.css
│ │ │ │ │ │ │ └── bootstrap.min.css.map
│ │ │ │ │ │ ├── fonts
│ │ │ │ │ │ │ ├── glyphicons-halflings-regular.eot
│ │ │ │ │ │ │ ├── glyphicons-halflings-regular.svg
│ │ │ │ │ │ │ ├── glyphicons-halflings-regular.ttf
│ │ │ │ │ │ │ ├── glyphicons-halflings-regular.woff
│ │ │ │ │ │ │ └── glyphicons-halflings-regular.woff2
│ │ │ │ │ │ └── js
│ │ │ │ │ │ │ ├── bootstrap.js
│ │ │ │ │ │ │ ├── bootstrap.min.js
│ │ │ │ │ │ │ └── npm.js
│ │ │ │ │ ├── dist
│ │ │ │ │ │ ├── css
│ │ │ │ │ │ │ ├── AdminLTE.css
│ │ │ │ │ │ │ ├── AdminLTE.min.css
│ │ │ │ │ │ │ ├── alt
│ │ │ │ │ │ │ │ ├── AdminLTE-bootstrap-social.css
│ │ │ │ │ │ │ │ ├── AdminLTE-bootstrap-social.min.css
│ │ │ │ │ │ │ │ ├── AdminLTE-fullcalendar.css
│ │ │ │ │ │ │ │ ├── AdminLTE-fullcalendar.min.css
│ │ │ │ │ │ │ │ ├── AdminLTE-select2.css
│ │ │ │ │ │ │ │ ├── AdminLTE-select2.min.css
│ │ │ │ │ │ │ │ ├── AdminLTE-without-plugins.css
│ │ │ │ │ │ │ │ └── AdminLTE-without-plugins.min.css
│ │ │ │ │ │ │ └── skins
│ │ │ │ │ │ │ │ ├── _all-skins.css
│ │ │ │ │ │ │ │ ├── _all-skins.min.css
│ │ │ │ │ │ │ │ ├── skin-black-light.css
│ │ │ │ │ │ │ │ ├── skin-black-light.min.css
│ │ │ │ │ │ │ │ ├── skin-black.css
│ │ │ │ │ │ │ │ ├── skin-black.min.css
│ │ │ │ │ │ │ │ ├── skin-blue-light.css
│ │ │ │ │ │ │ │ ├── skin-blue-light.min.css
│ │ │ │ │ │ │ │ ├── skin-blue.css
│ │ │ │ │ │ │ │ ├── skin-blue.min.css
│ │ │ │ │ │ │ │ ├── skin-green-light.css
│ │ │ │ │ │ │ │ ├── skin-green-light.min.css
│ │ │ │ │ │ │ │ ├── skin-green.css
│ │ │ │ │ │ │ │ ├── skin-green.min.css
│ │ │ │ │ │ │ │ ├── skin-purple-light.css
│ │ │ │ │ │ │ │ ├── skin-purple-light.min.css
│ │ │ │ │ │ │ │ ├── skin-purple.css
│ │ │ │ │ │ │ │ ├── skin-purple.min.css
│ │ │ │ │ │ │ │ ├── skin-red-light.css
│ │ │ │ │ │ │ │ ├── skin-red-light.min.css
│ │ │ │ │ │ │ │ ├── skin-red.css
│ │ │ │ │ │ │ │ ├── skin-red.min.css
│ │ │ │ │ │ │ │ ├── skin-yellow-light.css
│ │ │ │ │ │ │ │ ├── skin-yellow-light.min.css
│ │ │ │ │ │ │ │ ├── skin-yellow.css
│ │ │ │ │ │ │ │ └── skin-yellow.min.css
│ │ │ │ │ │ └── js
│ │ │ │ │ │ │ ├── app.js
│ │ │ │ │ │ │ ├── app.min.js
│ │ │ │ │ │ │ ├── demo.js
│ │ │ │ │ │ │ └── pages
│ │ │ │ │ │ │ ├── dashboard.js
│ │ │ │ │ │ │ └── dashboard2.js
│ │ │ │ │ └── plugins
│ │ │ │ │ │ ├── fastclick
│ │ │ │ │ │ ├── fastclick.js
│ │ │ │ │ │ └── fastclick.min.js
│ │ │ │ │ │ ├── jQuery
│ │ │ │ │ │ └── jquery-2.2.3.min.js
│ │ │ │ │ │ ├── jQueryUI
│ │ │ │ │ │ ├── jquery-ui.js
│ │ │ │ │ │ └── jquery-ui.min.js
│ │ │ │ │ │ └── slimScroll
│ │ │ │ │ │ ├── jquery.slimscroll.js
│ │ │ │ │ │ └── jquery.slimscroll.min.js
│ │ │ │ ├── anchor-js
│ │ │ │ │ └── anchor.min.js
│ │ │ │ ├── codemirror
│ │ │ │ │ ├── codemirror.css
│ │ │ │ │ ├── codemirror.js
│ │ │ │ │ └── sql.js
│ │ │ │ ├── datatables.net-bs
│ │ │ │ │ ├── css
│ │ │ │ │ │ ├── dataTables.bootstrap.css
│ │ │ │ │ │ └── dataTables.bootstrap.min.css
│ │ │ │ │ └── js
│ │ │ │ │ │ ├── dataTables.bootstrap.js
│ │ │ │ │ │ └── dataTables.bootstrap.min.js
│ │ │ │ ├── datatables.net-buttons-bs
│ │ │ │ │ ├── css
│ │ │ │ │ │ ├── buttons.bootstrap.css
│ │ │ │ │ │ └── buttons.bootstrap.min.css
│ │ │ │ │ └── js
│ │ │ │ │ │ ├── buttons.bootstrap.js
│ │ │ │ │ │ └── buttons.bootstrap.min.js
│ │ │ │ ├── datatables.net-buttons
│ │ │ │ │ ├── buttons.colVis.js
│ │ │ │ │ ├── buttons.colVis.min.js
│ │ │ │ │ ├── buttons.flash.js
│ │ │ │ │ ├── buttons.flash.min.js
│ │ │ │ │ ├── buttons.html5.js
│ │ │ │ │ ├── buttons.html5.min.js
│ │ │ │ │ ├── buttons.print.js
│ │ │ │ │ ├── buttons.print.min.js
│ │ │ │ │ ├── dataTables.buttons.js
│ │ │ │ │ └── dataTables.buttons.min.js
│ │ │ │ ├── datatables.net
│ │ │ │ │ └── jquery.dataTables.min.js
│ │ │ │ ├── font-awesome
│ │ │ │ │ ├── css
│ │ │ │ │ │ ├── font-awesome.css
│ │ │ │ │ │ ├── font-awesome.css.map
│ │ │ │ │ │ └── font-awesome.min.css
│ │ │ │ │ └── fonts
│ │ │ │ │ │ ├── FontAwesome.otf
│ │ │ │ │ │ ├── fontawesome-webfont.eot
│ │ │ │ │ │ ├── fontawesome-webfont.svg
│ │ │ │ │ │ ├── fontawesome-webfont.ttf
│ │ │ │ │ │ ├── fontawesome-webfont.woff
│ │ │ │ │ │ └── fontawesome-webfont.woff2
│ │ │ │ ├── gojs
│ │ │ │ │ ├── Figures.js
│ │ │ │ │ ├── ZoomSlider.css
│ │ │ │ │ ├── ZoomSlider.js
│ │ │ │ │ └── go.js
│ │ │ │ ├── html5shiv
│ │ │ │ │ └── html5shiv.min.js
│ │ │ │ ├── ionicons
│ │ │ │ │ ├── css
│ │ │ │ │ │ ├── ionicons.css
│ │ │ │ │ │ └── ionicons.min.css
│ │ │ │ │ └── fonts
│ │ │ │ │ │ ├── ionicons.eot
│ │ │ │ │ │ ├── ionicons.svg
│ │ │ │ │ │ ├── ionicons.ttf
│ │ │ │ │ │ └── ionicons.woff
│ │ │ │ ├── jquery
│ │ │ │ │ └── jquery.js
│ │ │ │ ├── js-xlsx
│ │ │ │ │ └── xlsx.full.min.js
│ │ │ │ ├── pdfmake
│ │ │ │ │ ├── pdfmake.min.js
│ │ │ │ │ └── vfs_fonts.js
│ │ │ │ ├── respond
│ │ │ │ │ └── respond.min.js
│ │ │ │ └── salvattore
│ │ │ │ │ ├── salvattore.css
│ │ │ │ │ └── salvattore.min.js
│ │ │ ├── column.html
│ │ │ ├── column.js
│ │ │ ├── constraint.html
│ │ │ ├── constraint.js
│ │ │ ├── container.html
│ │ │ ├── diagram.html
│ │ │ ├── favicon.png
│ │ │ ├── fonts
│ │ │ │ ├── indieflower
│ │ │ │ │ ├── indie-flower-v8-latin-regular.eot
│ │ │ │ │ ├── indie-flower-v8-latin-regular.svg
│ │ │ │ │ ├── indie-flower-v8-latin-regular.ttf
│ │ │ │ │ ├── indie-flower-v8-latin-regular.woff
│ │ │ │ │ ├── indie-flower-v8-latin-regular.woff2
│ │ │ │ │ └── indie-flower.css
│ │ │ │ └── source-sans-pro
│ │ │ │ │ ├── source-sans-pro-v10-latin-300.eot
│ │ │ │ │ ├── source-sans-pro-v10-latin-300.svg
│ │ │ │ │ ├── source-sans-pro-v10-latin-300.ttf
│ │ │ │ │ ├── source-sans-pro-v10-latin-300.woff
│ │ │ │ │ ├── source-sans-pro-v10-latin-300.woff2
│ │ │ │ │ ├── source-sans-pro-v10-latin-300italic.eot
│ │ │ │ │ ├── source-sans-pro-v10-latin-300italic.svg
│ │ │ │ │ ├── source-sans-pro-v10-latin-300italic.ttf
│ │ │ │ │ ├── source-sans-pro-v10-latin-300italic.woff
│ │ │ │ │ ├── source-sans-pro-v10-latin-300italic.woff2
│ │ │ │ │ ├── source-sans-pro-v10-latin-600.eot
│ │ │ │ │ ├── source-sans-pro-v10-latin-600.svg
│ │ │ │ │ ├── source-sans-pro-v10-latin-600.ttf
│ │ │ │ │ ├── source-sans-pro-v10-latin-600.woff
│ │ │ │ │ ├── source-sans-pro-v10-latin-600.woff2
│ │ │ │ │ ├── source-sans-pro-v10-latin-600italic.eot
│ │ │ │ │ ├── source-sans-pro-v10-latin-600italic.svg
│ │ │ │ │ ├── source-sans-pro-v10-latin-600italic.ttf
│ │ │ │ │ ├── source-sans-pro-v10-latin-600italic.woff
│ │ │ │ │ ├── source-sans-pro-v10-latin-600italic.woff2
│ │ │ │ │ ├── source-sans-pro-v10-latin-700.eot
│ │ │ │ │ ├── source-sans-pro-v10-latin-700.svg
│ │ │ │ │ ├── source-sans-pro-v10-latin-700.ttf
│ │ │ │ │ ├── source-sans-pro-v10-latin-700.woff
│ │ │ │ │ ├── source-sans-pro-v10-latin-700.woff2
│ │ │ │ │ ├── source-sans-pro-v10-latin-regular.eot
│ │ │ │ │ ├── source-sans-pro-v10-latin-regular.svg
│ │ │ │ │ ├── source-sans-pro-v10-latin-regular.ttf
│ │ │ │ │ ├── source-sans-pro-v10-latin-regular.woff
│ │ │ │ │ ├── source-sans-pro-v10-latin-regular.woff2
│ │ │ │ │ └── source-sans-pro.css
│ │ │ ├── images
│ │ │ │ ├── dataprep-logo.png
│ │ │ │ ├── foreignKey.png
│ │ │ │ ├── foreignKeys.png
│ │ │ │ ├── primaryKey.png
│ │ │ │ └── primaryKeys.png
│ │ │ ├── main.html
│ │ │ ├── main.js
│ │ │ ├── multi.html
│ │ │ ├── orphan.html
│ │ │ ├── relationship.html
│ │ │ ├── relationship.js
│ │ │ ├── routines.html
│ │ │ ├── routines.js
│ │ │ ├── routines
│ │ │ │ ├── routine.html
│ │ │ │ └── routine.js
│ │ │ ├── schemaSpy.css
│ │ │ ├── schemaSpy.js
│ │ │ └── tables
│ │ │ │ ├── table.html
│ │ │ │ └── table.js
│ │ ├── page_models
│ │ │ ├── __init__.py
│ │ │ ├── page_data.py
│ │ │ └── page_template.py
│ │ ├── report.py
│ │ ├── run_function.py
│ │ ├── template_models
│ │ │ ├── __init__.py
│ │ │ ├── constraint.py
│ │ │ ├── table.py
│ │ │ ├── table_column.py
│ │ │ └── table_index.py
│ │ └── views
│ │ │ ├── __init__.py
│ │ │ ├── column.py
│ │ │ ├── constraint.py
│ │ │ ├── main.py
│ │ │ ├── orphan.py
│ │ │ ├── relationship.py
│ │ │ └── table.py
│ ├── create_diff_report
│ │ ├── __init__.py
│ │ ├── diff_formatter.py
│ │ └── templates
│ │ │ ├── base.html
│ │ │ ├── overview.html
│ │ │ ├── scripts.html
│ │ │ ├── styles.html
│ │ │ └── variables.html
│ ├── create_report
│ │ ├── __init__.py
│ │ ├── formatter.py
│ │ ├── report.py
│ │ └── templates
│ │ │ ├── base.html
│ │ │ ├── correlation.html
│ │ │ ├── error.html
│ │ │ ├── interactions.html
│ │ │ ├── missing.html
│ │ │ ├── overview.html
│ │ │ ├── scripts.html
│ │ │ ├── styles.html
│ │ │ └── variables.html
│ ├── diff
│ │ ├── __init__.py
│ │ ├── compute
│ │ │ ├── __init__.py
│ │ │ ├── multiple_column.py
│ │ │ └── multiple_df.py
│ │ └── render.py
│ ├── distribution
│ │ ├── __init__.py
│ │ ├── compute
│ │ │ ├── __init__.py
│ │ │ ├── bivariate.py
│ │ │ ├── common.py
│ │ │ ├── overview.py
│ │ │ ├── trivariate.py
│ │ │ └── univariate.py
│ │ ├── country.json
│ │ ├── name_dict.json
│ │ └── render.py
│ ├── dtypes.py
│ ├── dtypes_v2.py
│ ├── eda_frame.py
│ ├── intermediate.py
│ ├── missing
│ │ ├── __init__.py
│ │ ├── compute
│ │ │ ├── __init__.py
│ │ │ ├── bivariate.py
│ │ │ ├── common.py
│ │ │ ├── nullivariate.py
│ │ │ └── univariate.py
│ │ └── render.py
│ ├── outlier
│ │ ├── __init__.py
│ │ └── computation.py
│ ├── palette.py
│ ├── staged.py
│ ├── templates
│ │ ├── grid_base.html
│ │ ├── grid_scripts.html
│ │ ├── grid_styles.html
│ │ ├── jquery.html
│ │ ├── pagination.html
│ │ ├── tab_base.html
│ │ ├── tab_base_corr.html
│ │ ├── tab_scripts.html
│ │ └── tab_styles.html
│ └── utils.py
├── errors.py
├── lineage
│ ├── __init__.py
│ └── lx.py
├── progress_bar.py
├── tests
│ ├── __init__.py
│ ├── benchmarks
│ │ ├── __init__.py
│ │ └── eda.py
│ ├── clean
│ │ ├── __init__.py
│ │ ├── test_clean_address.py
│ │ ├── test_clean_country.py
│ │ ├── test_clean_date.py
│ │ ├── test_clean_duplication.py
│ │ ├── test_clean_email.py
│ │ ├── test_clean_headers.py
│ │ ├── test_clean_ip.py
│ │ ├── test_clean_lat_long.py
│ │ ├── test_clean_phone.py
│ │ ├── test_clean_text.py
│ │ └── test_clean_url.py
│ ├── connector
│ │ ├── __init__.py
│ │ ├── test_integration.py
│ │ └── test_read_sql.py
│ ├── datasets
│ │ ├── __init__.py
│ │ └── test_datasets.py
│ ├── eda
│ │ ├── __init__.py
│ │ ├── random_data_generator.py
│ │ ├── test.py
│ │ ├── test_config.py
│ │ ├── test_create_db_report.py
│ │ ├── test_create_diff_report.py
│ │ ├── test_create_report.py
│ │ ├── test_plot.py
│ │ ├── test_plot_correlation.py
│ │ ├── test_plot_diff.py
│ │ ├── test_plot_missing.py
│ │ └── test_show.py
│ └── lineage
│ │ ├── __init__.py
│ │ ├── dependency_example
│ │ ├── a_table.sql
│ │ ├── aa_table.sql
│ │ ├── basic_patient_info.sql
│ │ ├── from_aa_table.sql
│ │ └── no_dob.sql
│ │ └── test_lineagex.py
└── utils.py
├── docs
├── Makefile
├── make.bat
└── source
│ ├── _static
│ └── images
│ │ ├── connector
│ │ ├── connector_auto_pagination_off.png
│ │ ├── connector_auto_pagination_on.png
│ │ ├── connector_pagination_offset_offset_limit.png
│ │ ├── connector_pagination_offset_page_perpage.png
│ │ ├── connector_yelp_query.png
│ │ ├── connector_yelp_query_2.png
│ │ ├── connector_yelp_show_schema.png
│ │ ├── connector_youtube_query.png
│ │ ├── connector_youtube_query_2.png
│ │ ├── info.png
│ │ ├── query.png
│ │ └── show_schema.png
│ │ ├── create_report
│ │ └── titanic_dp.html
│ │ └── tutorial
│ │ ├── .DS_Store
│ │ ├── App.js_config.png
│ │ ├── App_find.png
│ │ ├── Config_destination.png
│ │ ├── ID_and_secret.png
│ │ ├── Node_js.png
│ │ ├── SFU_Spotify.png
│ │ ├── Spotify_authentication.png
│ │ ├── Spotify_dashboard.png
│ │ ├── Spotify_git_page.png
│ │ ├── Spotify_server.png
│ │ ├── URI_.png
│ │ ├── Yelp_API_Key.png
│ │ ├── Yelp_authentication.png
│ │ ├── connector_yelp_query.png
│ │ ├── connector_yelp_query_2.png
│ │ ├── connector_yelp_show_schema.png
│ │ ├── connector_youtube_query.png
│ │ ├── connector_youtube_query_2.png
│ │ ├── dc_dblp_author.png
│ │ ├── dc_dblp_info.png
│ │ ├── dc_dblp_pagination.png
│ │ ├── dc_dblp_query.png
│ │ ├── dc_git.png
│ │ ├── dc_git_clone.png
│ │ ├── dc_query.png
│ │ ├── dc_spotify_info.png
│ │ ├── dc_spotify_query.png
│ │ ├── dc_spotify_query_pag.png
│ │ ├── dc_yelp_query.png
│ │ └── dc_yelp_query_pag.png
│ ├── acknowledgement.rst
│ ├── api_reference
│ ├── dataprep.clean.rst
│ ├── dataprep.connector.rst
│ ├── dataprep.eda.correlation.rst
│ ├── dataprep.eda.create_report.rst
│ ├── dataprep.eda.diff.rst
│ ├── dataprep.eda.distribution.rst
│ ├── dataprep.eda.missing.rst
│ ├── dataprep.eda.rst
│ └── dataprep.rst
│ ├── bokeh
│ ├── bokeh_license.txt
│ ├── docs-navbar.html
│ ├── docs-sidebar.html
│ ├── docs-toc.html
│ ├── favicon.html
│ ├── fonts.html
│ ├── footer.html
│ ├── ga.html
│ ├── layout.html
│ ├── meta.html
│ ├── scripts.html
│ ├── static
│ │ ├── custom.css
│ │ └── sphinx-bootstrap.css_t
│ ├── styles.html
│ ├── theme.conf
│ └── theme.py
│ ├── conf.py
│ ├── contribute.rst
│ ├── index.rst
│ ├── installation.rst
│ └── user_guide
│ ├── clean
│ ├── assets
│ │ ├── after_origin.png
│ │ ├── after_redo.png
│ │ ├── after_undo.png
│ │ ├── click_csv.png
│ │ ├── click_df.png
│ │ ├── click_log.png
│ │ ├── click_origin.png
│ │ ├── click_redo.png
│ │ ├── click_undo.png
│ │ ├── exported_df.png
│ │ ├── exported_log.png
│ │ ├── init_view.png
│ │ ├── log_after_origin.png
│ │ ├── log_after_redo.png
│ │ ├── single_col.png
│ │ └── whole_df.png
│ ├── clean_address.ipynb
│ ├── clean_au_abn.ipynb
│ ├── clean_au_acn.ipynb
│ ├── clean_au_tfn.ipynb
│ ├── clean_be_iban.ipynb
│ ├── clean_be_vat.ipynb
│ ├── clean_bg_egn.ipynb
│ ├── clean_bg_pnf.ipynb
│ ├── clean_bg_vat.ipynb
│ ├── clean_br_cnpj.ipynb
│ ├── clean_br_cpf.ipynb
│ ├── clean_by_unp.ipynb
│ ├── clean_ca_bn.ipynb
│ ├── clean_ca_sin.ipynb
│ ├── clean_ch_esr.ipynb
│ ├── clean_ch_ssn.ipynb
│ ├── clean_ch_uid.ipynb
│ ├── clean_ch_vat.ipynb
│ ├── clean_cl_rut.ipynb
│ ├── clean_cn_ric.ipynb
│ ├── clean_cn_uscc.ipynb
│ ├── clean_co_nit.ipynb
│ ├── clean_country.ipynb
│ ├── clean_cr_cpf.ipynb
│ ├── clean_cr_cpj.ipynb
│ ├── clean_cr_cr.ipynb
│ ├── clean_cu_ni.ipynb
│ ├── clean_cy_vat.ipynb
│ ├── clean_cz_dic.ipynb
│ ├── clean_cz_rc.ipynb
│ ├── clean_date.ipynb
│ ├── clean_de_handelsregisternummer.ipynb
│ ├── clean_de_idnr.ipynb
│ ├── clean_de_stnr.ipynb
│ ├── clean_de_vat.ipynb
│ ├── clean_de_wkn.ipynb
│ ├── clean_df.ipynb
│ ├── clean_df_gui.ipynb
│ ├── clean_dk_cpr.ipynb
│ ├── clean_dk_cvr.ipynb
│ ├── clean_do_cedula.ipynb
│ ├── clean_do_ncf.ipynb
│ ├── clean_do_rnc.ipynb
│ ├── clean_duplication.ipynb
│ ├── clean_ec_ci.ipynb
│ ├── clean_ec_ruc.ipynb
│ ├── clean_ee_ik.ipynb
│ ├── clean_ee_kmkr.ipynb
│ ├── clean_ee_registrikood.ipynb
│ ├── clean_email.ipynb
│ ├── clean_es_ccc.ipynb
│ ├── clean_es_cif.ipynb
│ ├── clean_es_cups.ipynb
│ ├── clean_es_dni.ipynb
│ ├── clean_es_iban.ipynb
│ ├── clean_es_nie.ipynb
│ ├── clean_es_nif.ipynb
│ ├── clean_es_referenciacatastral.ipynb
│ ├── clean_eu_at_02.ipynb
│ ├── clean_eu_banknote.ipynb
│ ├── clean_eu_eic.ipynb
│ ├── clean_eu_nace.ipynb
│ ├── clean_eu_vat.ipynb
│ ├── clean_fi_alv.ipynb
│ ├── clean_fi_associationid.ipynb
│ ├── clean_fi_hetu.ipynb
│ ├── clean_fi_veronumero.ipynb
│ ├── clean_fi_ytunnus.ipynb
│ ├── clean_fr_nif.ipynb
│ ├── clean_fr_nir.ipynb
│ ├── clean_fr_siren.ipynb
│ ├── clean_fr_siret.ipynb
│ ├── clean_fr_tva.ipynb
│ ├── clean_gb_nhs.ipynb
│ ├── clean_gb_sedol.ipynb
│ ├── clean_gb_upn.ipynb
│ ├── clean_gb_utr.ipynb
│ ├── clean_gb_vat.ipynb
│ ├── clean_gr_amka.ipynb
│ ├── clean_gr_vat.ipynb
│ ├── clean_gt_nit.ipynb
│ ├── clean_headers.ipynb
│ ├── clean_hr_oib.ipynb
│ ├── clean_hu_anum.ipynb
│ ├── clean_id_npwp.ipynb
│ ├── clean_ie_pps.ipynb
│ ├── clean_ie_vat.ipynb
│ ├── clean_il_hp.ipynb
│ ├── clean_il_idnr.ipynb
│ ├── clean_in_aadhaar.ipynb
│ ├── clean_in_pan.ipynb
│ ├── clean_ip.ipynb
│ ├── clean_is_kennitala.ipynb
│ ├── clean_is_vsk.ipynb
│ ├── clean_isbn.ipynb
│ ├── clean_it_aic.ipynb
│ ├── clean_it_codicefiscale.ipynb
│ ├── clean_it_iva.ipynb
│ ├── clean_jp_cn.ipynb
│ ├── clean_kr_brn.ipynb
│ ├── clean_kr_rrn.ipynb
│ ├── clean_lat_long.ipynb
│ ├── clean_li_peid.ipynb
│ ├── clean_lt_asmens.ipynb
│ ├── clean_lt_pvm.ipynb
│ ├── clean_lu_tva.ipynb
│ ├── clean_lv_pvn.ipynb
│ ├── clean_mc_tva.ipynb
│ ├── clean_md_idno.ipynb
│ ├── clean_me_iban.ipynb
│ ├── clean_ml.ipynb
│ ├── clean_mt_vat.ipynb
│ ├── clean_mu_nid.ipynb
│ ├── clean_mx_curp.ipynb
│ ├── clean_mx_rfc.ipynb
│ ├── clean_my_nric.ipynb
│ ├── clean_nl_brin.ipynb
│ ├── clean_nl_bsn.ipynb
│ ├── clean_nl_btw.ipynb
│ ├── clean_nl_onderwijsnummer.ipynb
│ ├── clean_nl_postcode.ipynb
│ ├── clean_no_fodselsnummer.ipynb
│ ├── clean_no_iban.ipynb
│ ├── clean_no_kontonr.ipynb
│ ├── clean_no_mva.ipynb
│ ├── clean_no_orgnr.ipynb
│ ├── clean_nz_bankaccount.ipynb
│ ├── clean_nz_ird.ipynb
│ ├── clean_pe_cui.ipynb
│ ├── clean_pe_ruc.ipynb
│ ├── clean_phone.ipynb
│ ├── clean_pl_nip.ipynb
│ ├── clean_pl_pesel.ipynb
│ ├── clean_pl_regon.ipynb
│ ├── clean_pt_nif.ipynb
│ ├── clean_py_ruc.ipynb
│ ├── clean_ro_cf.ipynb
│ ├── clean_ro_cnp.ipynb
│ ├── clean_ro_cui.ipynb
│ ├── clean_ro_onrc.ipynb
│ ├── clean_text.ipynb
│ ├── clean_url.ipynb
│ ├── index_df.csv
│ └── introduction.ipynb
│ ├── connector
│ ├── assets
│ │ ├── yelp-1.png
│ │ └── yelp-2.png
│ ├── authorization.ipynb
│ ├── config.ipynb
│ ├── connect.ipynb
│ ├── dblp.ipynb
│ ├── info.ipynb
│ ├── introduction.ipynb
│ ├── pagination.ipynb
│ ├── query.ipynb
│ └── sql.ipynb
│ ├── datasets
│ └── introduction.ipynb
│ ├── eda
│ ├── create_db_report.ipynb
│ ├── create_diff_report.ipynb
│ ├── create_report.ipynb
│ ├── get_intermediates.ipynb
│ ├── house_price.ipynb
│ ├── how_to_guide.ipynb
│ ├── insights.ipynb
│ ├── introduction.ipynb
│ ├── parameter_configurations.ipynb
│ ├── plot.ipynb
│ ├── plot_correlation.ipynb
│ ├── plot_diff.ipynb
│ ├── plot_missing.ipynb
│ ├── titanic.ipynb
│ └── use_case.ipynb
│ └── user_guide.rst
├── examples
├── DataConnector_DBLP.ipynb
├── DataConnector_Finnhub.ipynb
├── DataConnector_Twitter.ipynb
├── DataConnector_Yelp.ipynb
├── DataConnector_Youtube.ipynb
├── EDA.ipynb
├── EDA_COVID19_SouthKorea.ipynb
├── fetch_and_analyze_COVID-19_tweets_using_DataPrep.ipynb
├── house_price.ipynb
├── images
│ ├── youtube_credentials.png
│ └── youtube_enabled.png
└── titanic.ipynb
├── mypy.ini
├── poetry.lock
├── pyproject.toml
├── pyrightconfig.json
├── pytype.cfg
└── scripts
├── gen-setup.py
└── release-note.py
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/.DS_Store
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | source=dataprep
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report_cleaning.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report for the Cleaning module
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: 'type: bug'
6 | assignees: qidanrui
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | Or:
21 |
22 | ```python
23 | paste your code here
24 | ```
25 |
26 | **Expected behavior**
27 | A clear and concise description of what you expected to happen.
28 |
29 | **Screenshots**
30 | If applicable, add screenshots to help explain your problem.
31 |
32 | **Desktop (please complete the following information):**
33 | - OS: [e.g. Windows]
34 | - Browser [e.g. chrome, safari]
35 | - Platform [Jupyter Notebook, Jupyter Lab, Google Colab, VSCode, Python script]
36 | - Platform Version [e.g. 1.0]
37 | - Python Version [e.g. 3.7.2]
38 | - Dataprep Version [e.g. 0.2.2]
39 |
40 | **Additional context**
41 | Add any other context about the problem here.
42 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report_connector.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report for the Connector module
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: 'type: bug'
6 | assignees: wangxiaoying
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | Or:
21 |
22 | ```python
23 | paste your code here
24 | ```
25 |
26 | **Expected behavior**
27 | A clear and concise description of what you expected to happen.
28 |
29 | **Screenshots**
30 | If applicable, add screenshots to help explain your problem.
31 |
32 | **Desktop (please complete the following information):**
33 | - OS: [e.g. Windows]
34 | - Browser [e.g. chrome, safari]
35 | - Platform [Jupyter Notebook, Jupyter Lab, Google Colab, VSCode, Python script]
36 | - Platform Version [e.g. 1.0]
37 | - Python Version [e.g. 3.7.2]
38 | - Dataprep Version [e.g. 0.2.2]
39 |
40 | **Additional context**
41 | Add any other context about the problem here.
42 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report_eda.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report for the EDA module
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: 'type: bug'
6 | assignees: jinglinpeng
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | Or:
21 |
22 | ```python
23 | paste your code here
24 | ```
25 |
26 | **Expected behavior**
27 | A clear and concise description of what you expected to happen.
28 |
29 | **Screenshots**
30 | If applicable, add screenshots to help explain your problem.
31 |
32 | **Desktop (please complete the following information):**
33 | - OS: [e.g. Windows]
34 | - Browser [e.g. chrome, safari]
35 | - Platform [Jupyter Notebook, Jupyter Lab, Google Colab, VSCode, Python script]
36 | - Platform Version [e.g. 1.0]
37 | - Python Version [e.g. 3.7.2]
38 | - Dataprep Version [e.g. 0.2.2]
39 |
40 | **Additional context**
41 | Add any other context about the problem here.
42 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: 'type: enhancement'
6 | assignees: dovahcrow
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/task-template.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Task Template
3 | about: Task template (for team use)
4 | title: 'Feature Proposal: xxxyyyzzz'
5 | labels: 'type: enhancement'
6 | assignees: ''
7 |
8 | ---
9 |
10 |
11 |
12 |
13 | ## Summary
14 |
15 |
16 | ## Design-level Explanation Actions
17 |
18 |
19 | ## Design-level Explanation
20 |
21 |
22 | ## Implementation-level Explanation
23 |
24 |
25 | ## Rational and Alternatives
26 |
31 |
32 | ## Prior Art
33 |
34 |
35 | ## Future Possibilities
36 |
37 |
38 | ## Implementation-level Actions
39 |
40 |
41 | ## Additional Tasks
42 |
43 |
44 | - [ ] This task is put into a correct pipeline (Development Backlog or In Progress).
45 | - [ ] The label of this task is setting correctly.
46 | - [ ] The issue is assigned to the correct person.
47 | - [ ] The issue is linked to related Epic.
48 |
49 |
50 | - [ ] The documentation is changed accordingly.
51 | - [ ] Tests are added accordingly.
52 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | # Description
2 |
3 | Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.
4 |
5 | # How Has This Been Tested?
6 |
7 | Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration
8 |
9 | # Snapshots:
10 |
11 | Include snapshots for easier review.
12 |
13 | # Checklist:
14 |
15 | - [ ] My code follows the style guidelines of this project
16 | - [ ] I have already squashed the commits and make the commit message conform to the project standard.
17 | - [ ] I have already marked the commit with "BREAKING CHANGE" or "Fixes #" if needed.
18 | - [ ] I have performed a self-review of my own code
19 | - [ ] I have commented my code, particularly in hard-to-understand areas
20 | - [ ] I have made corresponding changes to the documentation
21 | - [ ] My changes generate no new warnings
22 | - [ ] I have added tests that prove my fix is effective or that my feature works
23 | - [ ] New and existing unit tests pass locally with my changes
24 | - [ ] Any dependent changes have been merged and published in downstream modules
25 |
--------------------------------------------------------------------------------
/.github/workflows/benchmark.yml:
--------------------------------------------------------------------------------
1 | name: Performance Benchmarks
2 |
3 | on:
4 | push:
5 | branches:
6 | - develop
7 | pull_request:
8 | branches:
9 | - develop
10 |
11 | jobs:
12 | benchmark:
13 | name: ${{ matrix.os }} x ${{ matrix.python }}
14 | runs-on: ${{ matrix.os }}
15 | strategy:
16 | fail-fast: false
17 | matrix:
18 | os: [ubuntu-latest]
19 | python: ["3.8"]
20 | steps:
21 | - uses: actions/checkout@v2
22 | with:
23 | fetch-depth: 0
24 |
25 | - uses: actions/setup-python@v1
26 | with:
27 | python-version: ${{ matrix.python }}
28 |
29 | - name: Install dependencies
30 | run: |
31 | pip install poetry
32 | poetry config virtualenvs.in-project true
33 | poetry install
34 | poetry run pip install pytest-benchmark
35 |
36 | - name: Run benchmark
37 | run: poetry run pytest dataprep/tests/benchmarks/eda.py --benchmark-json benchmark.json
38 |
39 | - name: Show benchmark result for pull request
40 | if: ${{ github.event_name == 'pull_request'}}
41 | uses: rhysd/github-action-benchmark@v1
42 | with:
43 | name: DataPrep.EDA Benchmarks
44 | tool: "pytest"
45 | output-file-path: benchmark.json
46 | github-token: ${{ secrets.GITHUB_TOKEN }}
47 | auto-push: false
48 | save-data-file: false
49 | fail-threshold: "200%"
50 | comment-always: true
51 | fail-on-alert: true
52 |
53 | - name: Store benchmark result for push operator
54 | if: ${{ github.event_name == 'push'}}
55 | uses: rhysd/github-action-benchmark@v1
56 | with:
57 | name: DataPrep.EDA Benchmarks
58 | tool: "pytest"
59 | output-file-path: benchmark.json
60 | github-token: ${{ secrets.GITHUB_TOKEN }}
61 | auto-push: true
62 | fail-threshold: "200%"
63 | comment-always: true
64 | fail-on-alert: true
65 |
--------------------------------------------------------------------------------
/.github/workflows/clean_gui.yml:
--------------------------------------------------------------------------------
1 | name: Clean GUI CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - develop
7 | - release
8 | pull_request:
9 | branches:
10 | - develop
11 |
12 | jobs:
13 | build:
14 | runs-on: ubuntu-latest
15 |
16 | strategy:
17 | matrix:
18 | node-version: [14.x]
19 |
20 | steps:
21 | - uses: actions/checkout@v2
22 | - name: Use Node.js ${{ matrix.node-version }}
23 | uses: actions/setup-node@v1
24 | with:
25 | node-version: ${{ matrix.node-version }}
26 | - name: Install Dependencies & Build Frontend
27 | run: |
28 | cd dataprep/clean/gui/clean_frontend
29 | rm -rf node_modules package-lock.json
30 | npm install
31 | npm run build
32 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Justfile:
--------------------------------------------------------------------------------
1 | set dotenv-load := true
2 |
3 | #### Documentations ####
4 | doc-clean-notebooks:
5 | fd ".*\.ipynb" -t f docs/source/ -x jupyter nbconvert --clear-output
6 |
7 | build-docs:
8 | poetry run sphinx-build -M html docs/source docs/build
9 |
10 | publish-docs: build-docs
11 | touch docs/build/html/.nojekyll
12 | gh-pages --dotfiles --message "[skip ci] Updates" --dist docs/build/html
13 |
14 | gen-apidocs:
15 | poetry run sphinx-apidoc --ext-doctest --ext-autodoc --ext-mathjax -f -o docs/source dataprep
16 |
17 | #### CI ####
18 | ci: black pyright test pylint
19 |
20 | black:
21 | poetry run black dataprep
22 |
23 | test +ARGS="":
24 | poetry run pytest dataprep/tests {{ARGS}}
25 |
26 | pylint:
27 | poetry run pylint dataprep
28 |
29 | pyright:
30 | poetry run pyright dataprep
31 |
32 | build:
33 | poetry build
34 |
35 | @release-note hash="":
36 | echo ================ Release Note ================
37 | poetry run python scripts/release-note.py {{hash}}
38 | echo ================ Release Note ================
39 |
40 | setuppy:
41 | python scripts/gen-setup.py
42 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 sfu-db
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/assets/clean_example_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/clean_example_1.jpg
--------------------------------------------------------------------------------
/assets/clean_example_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/clean_example_2.jpg
--------------------------------------------------------------------------------
/assets/clean_example_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/clean_example_3.jpg
--------------------------------------------------------------------------------
/assets/clean_video_cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/clean_video_cover.png
--------------------------------------------------------------------------------
/assets/connector.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/connector.png
--------------------------------------------------------------------------------
/assets/connector_concurrency.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/connector_concurrency.gif
--------------------------------------------------------------------------------
/assets/connector_main.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/connector_main.gif
--------------------------------------------------------------------------------
/assets/connector_pagination.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/connector_pagination.gif
--------------------------------------------------------------------------------
/assets/eda_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/eda_demo.gif
--------------------------------------------------------------------------------
/assets/eda_video_cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/eda_video_cover.png
--------------------------------------------------------------------------------
/assets/icon_cropped.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/icon_cropped.jpg
--------------------------------------------------------------------------------
/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/logo.png
--------------------------------------------------------------------------------
/assets/logo_cropped.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/logo_cropped.jpg
--------------------------------------------------------------------------------
/assets/plot(df).gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/plot(df).gif
--------------------------------------------------------------------------------
/assets/plot(df,x)_cat.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/plot(df,x)_cat.gif
--------------------------------------------------------------------------------
/assets/plot(df,x)_num.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/plot(df,x)_num.gif
--------------------------------------------------------------------------------
/assets/plot_correlation(df).gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/plot_correlation(df).gif
--------------------------------------------------------------------------------
/assets/plot_missing(df).gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/plot_missing(df).gif
--------------------------------------------------------------------------------
/assets/plot_missing(df, x).gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/assets/plot_missing(df, x).gif
--------------------------------------------------------------------------------
/codecov.yaml:
--------------------------------------------------------------------------------
1 | codecov:
2 | require_ci_to_pass: yes
3 |
4 | coverage:
5 | precision: 2
6 | round: down
7 | range: "70...100"
8 |
9 | parsers:
10 | gcov:
11 | branch_detection:
12 | conditional: yes
13 | loop: yes
14 | method: no
15 | macro: no
16 |
17 | comment:
18 | layout: "reach,diff,flags,tree"
19 | behavior: default
20 | require_changes: no
21 |
22 | ignore:
23 | - "**/*.ipynb"
24 |
--------------------------------------------------------------------------------
/dataprep/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/.DS_Store
--------------------------------------------------------------------------------
/dataprep/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | dataprep
3 | ========
4 |
5 | Dataprep let you prepare your data using a single library with a few lines of code.
6 | """
7 |
8 | import logging
9 |
10 | DEFAULT_PARTITIONS = 1
11 |
12 | logging.basicConfig(level=logging.INFO, format="%(message)s")
13 |
14 | __version__ = "0.4.6"
15 |
--------------------------------------------------------------------------------
/dataprep/assets/ellipse.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/assets/ellipse.npz
--------------------------------------------------------------------------------
/dataprep/clean/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/clean/.DS_Store
--------------------------------------------------------------------------------
/dataprep/clean/clean_df_gui.py:
--------------------------------------------------------------------------------
1 | """
2 | Conduct a set of operations that would be useful for
3 | cleaning and standardizing a full Pandas DataFrame.
4 | """
5 |
6 | # pylint: disable-msg=relative-beyond-top-level
7 | # pylint: disable-msg=cyclic-import
8 | # type: ignore
9 |
10 | from typing import Any
11 |
12 | import pandas as pd
13 |
14 | from IPython.display import IFrame, display
15 |
16 | from dataprep.clean.gui.clean_gui import launch
17 |
18 |
19 | def clean_df_gui(
20 | df: pd.DataFrame,
21 | ) -> Any:
22 | """
23 | This function shows the GUI of clean module.
24 |
25 | Parameters
26 | ----------
27 | df
28 | A Pandas DataFrame containing the data to be cleaned.
29 | """
30 | # pylint: disable=too-many-arguments
31 | # pylint: disable-msg=too-many-locals
32 | # pylint:disable=too-many-branches
33 | # type: ignore
34 |
35 | return UserInterface(df).display()
36 |
37 |
38 | class UserInterface:
39 | """
40 | A user interface used by clean module.
41 | """
42 |
43 | # pylint: disable=too-many-instance-attributes
44 | def __init__(self, df: pd.DataFrame):
45 | self.df = df
46 |
47 | def display(self) -> None:
48 | """Display the GUI."""
49 | launch(self.df)
50 |
51 | path_to_local_server = "http://localhost:7680"
52 | display(IFrame(path_to_local_server, width=900, height=500))
53 |
--------------------------------------------------------------------------------
/dataprep/clean/components/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/clean/components/.DS_Store
--------------------------------------------------------------------------------
/dataprep/clean/components/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Initialize component dictionary.
3 | """
4 |
5 | from .cat_encoder import CatEncoder
6 | from .cat_imputer import CatImputer
7 | from .num_imputer import NumImputer
8 | from .num_scaler import NumScaler
9 | from .variance_thresholder import VarianceThresholder
10 |
11 | component_dic = {
12 | "cat_encoding": CatEncoder,
13 | "cat_imputation": CatImputer,
14 | "num_imputation": NumImputer,
15 | "num_scaling": NumScaler,
16 | "variance_threshold": VarianceThresholder,
17 | }
18 |
--------------------------------------------------------------------------------
/dataprep/clean/components/cat_encoding/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Initialize dictionary of categorical encoders.
3 | """
4 |
5 | from .one_hot_encoding import OneHotEncoder
6 |
7 | operator_dic = {
8 | "one_hot": OneHotEncoder,
9 | }
10 |
--------------------------------------------------------------------------------
/dataprep/clean/components/cat_imputation/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/clean/components/cat_imputation/.DS_Store
--------------------------------------------------------------------------------
/dataprep/clean/components/cat_imputation/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Initialize dictionary of categorical imputers.
3 | """
4 |
5 | from .constant_imputer import ConstantImputer
6 | from .most_frequent_imputer import MostFrequentImputer
7 | from .drop_imputer import DropImputer
8 |
9 | operator_dic = {
10 | "constant": ConstantImputer,
11 | "most_frequent": MostFrequentImputer,
12 | "drop": DropImputer,
13 | }
14 |
--------------------------------------------------------------------------------
/dataprep/clean/components/num_imputation/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Initialize dictionary of numerical imputers.
3 | """
4 |
5 | from .mean_imputer import MeanImputer
6 | from .median_imputer import MedianImputer
7 | from .most_frequent_imputer import MostFrequentImputer
8 | from .drop_imputer import DropImputer
9 |
10 | operator_dic = {
11 | "mean": MeanImputer,
12 | "median": MedianImputer,
13 | "most_frequent": MostFrequentImputer,
14 | "drop": DropImputer,
15 | }
16 |
--------------------------------------------------------------------------------
/dataprep/clean/components/num_scaling/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Initialize dictionary of numerical scalers.
3 | """
4 |
5 | from .standard_scaler import StandardScaler
6 | from .minmax_scaler import MinmaxScaler
7 | from .maxabs_scaler import MaxAbsScaler
8 |
9 | operator_dic = {
10 | "standardize": StandardScaler,
11 | "minmax": MinmaxScaler,
12 | "maxabs": MaxAbsScaler,
13 | }
14 |
--------------------------------------------------------------------------------
/dataprep/clean/components/num_scaling/maxabs_scaler.py:
--------------------------------------------------------------------------------
1 | """
2 | Implement numerical maxabs scaler.
3 | """
4 |
5 | from typing import Any, Union
6 | import dask.dataframe as dd
7 |
8 |
9 | class MaxAbsScaler:
10 | """Max Absolute Value Scaler for scaling numerical values
11 | Attributes:
12 | name
13 | Name of scaler
14 | maxabs
15 | Max absolute value of provided data column
16 | """
17 |
18 | def __init__(self) -> None:
19 | """
20 | This function initiate numerical scaler.
21 | """
22 |
23 | self.name = "maxabsScaler"
24 | self.maxabs = 0
25 |
26 | def fit(self, col_df: dd.Series) -> Any:
27 | """
28 | Extract max absolute value for MaxAbs Scaler according to the provided column.
29 |
30 | Parameters
31 | ----------
32 | col_df
33 | Provided data column.
34 | """
35 |
36 | self.maxabs = max(abs(col_df.drop_duplicates().values.tolist()))
37 | return self
38 |
39 | def transform(self, col_df: dd.Series) -> dd.Series:
40 | """
41 | Transform the provided data column with the extracted max absolute value.
42 |
43 | Parameters
44 | ----------
45 | col_df
46 | Provided data column.
47 | """
48 |
49 | result = col_df.map(self.compute_val)
50 | return result
51 |
52 | def fit_transform(self, col_df: dd.Series) -> dd.Series:
53 | """
54 | Extract max absolute value for MaxAbs Scaler according to the provided column.
55 | Transform the provided data column with the extracted max absolute value.
56 |
57 | Parameters
58 | ----------
59 | col_df
60 | Data column.
61 | """
62 |
63 | return self.fit(col_df).transform(col_df)
64 |
65 | def compute_val(self, val: Union[int, float]) -> Union[int, float]:
66 | """
67 | Compute scaling value of provided value with fitted max absolute value.
68 |
69 | Parameters
70 | ----------
71 | val
72 | Value should be scaled.
73 | """
74 |
75 | return val / self.maxabs
76 |
--------------------------------------------------------------------------------
/dataprep/clean/components/num_scaling/minmax_scaler.py:
--------------------------------------------------------------------------------
1 | """
2 | Implement numerical minmax scaler.
3 | """
4 |
5 | from typing import Any, Union
6 | import dask.dataframe as dd
7 |
8 |
9 | class MinmaxScaler:
10 | """Min Value and Max Value Scaler for scaling numerical values
11 | Attributes:
12 | name
13 | Name of scaler
14 | min
15 | Min value of provided data column
16 | max
17 | Max value of provided data column
18 | """
19 |
20 | def __init__(self) -> None:
21 | """
22 | This function initiate numerical scaler.
23 | """
24 | self.name = "minmaxScaler"
25 | self.min = 0
26 | self.max = 0
27 |
28 | def fit(self, col_df: dd.Series) -> Any:
29 | """
30 | Extract min value and max value for Minmax Scaler according to the provided column.
31 |
32 | Parameters
33 | ----------
34 | col_df
35 | Provided data column.
36 | """
37 |
38 | self.min = col_df.min()
39 | self.max = col_df.max()
40 | return self
41 |
42 | def transform(self, col_df: dd.Series) -> dd.Series:
43 | """
44 | Transform the provided data column with the extracted min value and max value.
45 |
46 | Parameters
47 | ----------
48 | col_df
49 | Provided data column.
50 | """
51 |
52 | result = col_df.map(self.compute_val)
53 | return result
54 |
55 | def fit_transform(self, col_df: dd.Series) -> dd.Series:
56 | """ "
57 | Extract min value and max value for Minmax Scaler according to the provided column.
58 | Transform the provided data column with the extracted min value and max value.
59 |
60 | Parameters
61 | ----------
62 | col_df
63 | Data column.
64 | """
65 |
66 | return self.fit(col_df).transform(col_df)
67 |
68 | def compute_val(self, val: Union[int, float]) -> Union[int, float]:
69 | """
70 | Compute scaling value of provided value with fitted min value and max value.
71 |
72 | Parameters
73 | ----------
74 | val
75 | Value should be scaled.
76 | """
77 |
78 | return (val - self.min) / (self.max - self.min)
79 |
--------------------------------------------------------------------------------
/dataprep/clean/components/num_scaling/standard_scaler.py:
--------------------------------------------------------------------------------
1 | """
2 | Implement numerical standard scaler.
3 | """
4 |
5 | from typing import Any, Union
6 | import dask.dataframe as dd
7 |
8 |
9 | class StandardScaler:
10 | """Standard Scaler for scaling numerical values
11 | Attributes:
12 | name
13 | Name of scaler
14 | mean
15 | Mean value of provided data column
16 | std
17 | Std value of provided data column
18 | """
19 |
20 | def __init__(self) -> None:
21 | """
22 | This function initiate numerical scaler.
23 | """
24 |
25 | self.name = "standardScaler"
26 | self.mean = 0
27 | self.std = 0
28 |
29 | def fit(self, col_df: dd.Series) -> Any:
30 | """
31 | Extract mean value and std value for Standard Scaler according to the provided column.
32 |
33 | Parameters
34 | ----------
35 | col_df
36 | Provided data column.
37 | """
38 |
39 | self.mean = col_df.mean()
40 | self.std = col_df.std()
41 | return self
42 |
43 | def transform(self, col_df: dd.Series) -> dd.Series:
44 | """
45 | Transform the provided data column with the extracted mean value and std value.
46 |
47 | Parameters
48 | ----------
49 | col_df
50 | Provided data column.
51 | """
52 |
53 | result = col_df.map(self.compute_val)
54 | return result
55 |
56 | def fit_transform(self, col_df: dd.Series) -> dd.Series:
57 | """ "
58 | Extract mean value and std value for Standard Scaler according to the provided column.
59 | Transform the provided data column with the extracted mean value and std value.
60 |
61 | Parameters
62 | ----------
63 | col_df
64 | Data column.
65 | """
66 |
67 | return self.fit(col_df).transform(col_df)
68 |
69 | def compute_val(self, val: Union[int, float]) -> Union[int, float]:
70 | """
71 | Compute scaling value of provided value with fitted mean value and std value.
72 |
73 | Parameters
74 | ----------
75 | val
76 | Value should be scaled.
77 | """
78 |
79 | return (val - self.mean) / self.std
80 |
--------------------------------------------------------------------------------
/dataprep/clean/components/variance_threshold/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Initialize dictionary of numerical variance thresholders.
3 | """
4 |
5 | from .variance_threshold import VarThreholder
6 |
7 | operator_dic = {"variance_threshold": VarThreholder}
8 |
--------------------------------------------------------------------------------
/dataprep/clean/components/variance_threshold/variance_threshold.py:
--------------------------------------------------------------------------------
1 | """
2 | Implement numerical variance thresholder.
3 | """
4 |
5 | from typing import Any, Union
6 | from dask import dataframe as dd
7 | from dask.dataframe import from_pandas
8 | import pandas as pd
9 |
10 |
11 | class VarThreholder:
12 | """Drop column if the variance of this column is less than a threshold.
13 | Attributes:
14 | variance_thresh
15 | Specified variance threshold.
16 | variance
17 | Variance of provided data column.
18 | """
19 |
20 | def __init__(self, variance: Union[int, float]) -> None:
21 | """
22 | This function initiate variance thresholder.
23 |
24 | Parameters
25 | ----------
26 | variance_thresh
27 | Variance threshold provided by user. The default value is 0.
28 | """
29 | self.variance_thresh = variance
30 | self.variance = 0
31 |
32 | def fit(self, col_df: dd.Series) -> Any:
33 | """
34 | Extract the variance of the provided column.
35 |
36 | Parameters
37 | ----------
38 | col_df
39 | Provided data column.
40 | """
41 |
42 | self.variance = col_df.var()
43 | return self
44 |
45 | def transform(self, col_df: dd.Series) -> dd.Series:
46 | """
47 | Check if variance of provided column is larger than threshold.
48 | If yes, then keep the provided column
49 | If no, just drop it.
50 |
51 | Parameters
52 | ----------
53 | col_df
54 | Provided data column.
55 | """
56 |
57 | if self.variance > self.variance_thresh:
58 | return col_df
59 | return from_pandas(pd.Series([]), npartitions=2)
60 |
61 | def fit_transform(self, col_df: dd.Series) -> dd.Series:
62 | """
63 | Extract the variance of the provided column.
64 | Check if variance of provided column is larger than threshold.
65 | If yes, then keep the provided column
66 | If no, just drop it.
67 |
68 | Parameters
69 | ----------
70 | col_df
71 | Provided data column.
72 | """
73 | return self.fit(col_df).transform(col_df)
74 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/clean/gui/.DS_Store
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/.babelrc:
--------------------------------------------------------------------------------
1 | {
2 | "presets": [
3 | ["env", {
4 | "modules": false,
5 | "targets": {
6 | "browsers": ["> 1%", "last 2 versions", "not ie <= 8"]
7 | }
8 | }],
9 | "stage-2"
10 | ],
11 | "plugins": ["transform-vue-jsx", "transform-runtime"]
12 | }
13 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | charset = utf-8
5 | indent_style = space
6 | indent_size = 2
7 | end_of_line = lf
8 | insert_final_newline = true
9 | trim_trailing_whitespace = true
10 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/.eslintrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | env: {
3 | browser: true,
4 | es2021: true,
5 | },
6 | extends: [
7 | 'plugin:vue/essential',
8 | 'airbnb-base',
9 | ],
10 | parserOptions: {
11 | ecmaVersion: 13,
12 | sourceType: 'module',
13 | },
14 | plugins: [
15 | 'vue',
16 | ],
17 | rules: {
18 | },
19 | };
20 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | package-lock.json
3 | node_modules/
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 |
8 | # Editor directories and files
9 | .idea
10 | .vscode
11 | *.suo
12 | *.ntvs*
13 | *.njsproj
14 | *.sln
15 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/.postcssrc.js:
--------------------------------------------------------------------------------
1 | // https://github.com/michael-ciniawsky/postcss-load-config
2 |
3 | module.exports = {
4 | "plugins": {
5 | "postcss-import": {},
6 | "postcss-url": {},
7 | // to edit target browsers: use "browserslist" field in package.json
8 | "autoprefixer": {}
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/README.md:
--------------------------------------------------------------------------------
1 | # clean_frontend
2 |
3 | > A Vue.js project
4 |
5 | ## Build Setup
6 |
7 | ``` bash
8 | # install dependencies
9 | npm install
10 |
11 | # serve with hot reload at localhost:8080
12 | npm run dev
13 |
14 | # build for production with minification
15 | npm run build
16 |
17 | # Easier developing command line for developer
18 | npm run build && npm run dev
19 |
20 | # build for production and view the bundle analyzer report
21 | npm run build --report
22 | ```
23 |
24 | For a detailed explanation on how things work, check out the [guide](http://vuejs-templates.github.io/webpack/) and [docs for vue-loader](http://vuejs.github.io/vue-loader).
25 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/build_frontend/build.js:
--------------------------------------------------------------------------------
1 | require('./check-versions')();
2 |
3 | process.env.NODE_ENV = 'production';
4 |
5 | const ora = require('ora');
6 | const rm = require('rimraf');
7 | const path = require('path');
8 | const chalk = require('chalk');
9 | const webpack = require('webpack');
10 | const config = require('../config');
11 | const webpackConfig = require('./webpack.prod.conf');
12 |
13 | const spinner = ora('building for production...');
14 | spinner.start();
15 |
16 | rm(path.join(config.build.assetsRoot, config.build.assetsSubDirectory), (err) => {
17 | if (err) throw err;
18 | webpack(webpackConfig, (err, stats) => {
19 | spinner.stop();
20 | if (err) throw err;
21 | process.stdout.write(`${stats.toString({
22 | colors: true,
23 | modules: false,
24 | children: false, // If you are using ts-loader, setting this to true will make TypeScript errors show up during build.
25 | chunks: false,
26 | chunkModules: false,
27 | })}\n\n`);
28 |
29 | if (stats.hasErrors()) {
30 | console.log(chalk.red(' Build failed with errors.\n'));
31 | process.exit(1);
32 | }
33 |
34 | console.log(chalk.cyan(' Build complete.\n'));
35 | console.log(chalk.yellow(
36 | ' Tip: built files are meant to be served over an HTTP server.\n'
37 | + ' Opening index.html over file:// won"t work.\n',
38 | ));
39 | });
40 | });
41 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/build_frontend/check-versions.js:
--------------------------------------------------------------------------------
1 | const chalk = require('chalk');
2 | const semver = require('semver');
3 | const shell = require('shelljs');
4 | const packageConfig = require('../package.json');
5 |
6 | function exec(cmd) {
7 | return require('child_process').execSync(cmd).toString().trim();
8 | }
9 |
10 | const versionRequirements = [
11 | {
12 | name: 'node',
13 | currentVersion: semver.clean(process.version),
14 | versionRequirement: packageConfig.engines.node,
15 | },
16 | ];
17 |
18 | if (shell.which('npm')) {
19 | versionRequirements.push({
20 | name: 'npm',
21 | currentVersion: exec('npm --version'),
22 | versionRequirement: packageConfig.engines.npm,
23 | });
24 | }
25 |
26 | module.exports = function () {
27 | const warnings = [];
28 |
29 | for (let i = 0; i < versionRequirements.length; i++) {
30 | const mod = versionRequirements[i];
31 |
32 | if (!semver.satisfies(mod.currentVersion, mod.versionRequirement)) {
33 | warnings.push(`${mod.name}: ${
34 | chalk.red(mod.currentVersion)} should be ${
35 | chalk.green(mod.versionRequirement)}`);
36 | }
37 | }
38 |
39 | if (warnings.length) {
40 | console.log('');
41 | console.log(chalk.yellow('To use this template, you must update following to modules:'));
42 | console.log();
43 |
44 | for (let i = 0; i < warnings.length; i++) {
45 | const warning = warnings[i];
46 | console.log(` ${warning}`);
47 | }
48 |
49 | console.log();
50 | process.exit(1);
51 | }
52 | };
53 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/build_frontend/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/clean/gui/clean_frontend/build_frontend/logo.png
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/build_frontend/vue-loader.conf.js:
--------------------------------------------------------------------------------
1 | const utils = require('./utils');
2 | const config = require('../config');
3 |
4 | const isProduction = process.env.NODE_ENV === 'production';
5 | const sourceMapEnabled = isProduction
6 | ? config.build.productionSourceMap
7 | : config.dev.cssSourceMap;
8 |
9 | module.exports = {
10 | loaders: utils.cssLoaders({
11 | sourceMap: sourceMapEnabled,
12 | extract: isProduction,
13 | }),
14 | cssSourceMap: sourceMapEnabled,
15 | cacheBusting: config.dev.cacheBusting,
16 | transformToRequire: {
17 | video: ['src', 'poster'],
18 | source: 'src',
19 | img: 'src',
20 | image: 'xlink:href',
21 | },
22 | };
23 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/build_frontend/webpack.base.conf.js:
--------------------------------------------------------------------------------
1 | const path = require('path');
2 | const utils = require('./utils');
3 | const config = require('../config');
4 | const vueLoaderConfig = require('./vue-loader.conf');
5 |
6 | function resolve(dir) {
7 | return path.join(__dirname, '..', dir);
8 | }
9 |
10 | module.exports = {
11 | context: path.resolve(__dirname, '../'),
12 | entry: {
13 | app: './src/main.js',
14 | },
15 | output: {
16 | path: config.build.assetsRoot,
17 | filename: '[name].js',
18 | publicPath: process.env.NODE_ENV === 'production'
19 | ? config.build.assetsPublicPath
20 | : config.dev.assetsPublicPath,
21 | },
22 | resolve: {
23 | extensions: ['.js', '.vue', '.json'],
24 | alias: {
25 | vue$: 'vue/dist/vue.esm.js',
26 | '@': resolve('src'),
27 | },
28 | },
29 | module: {
30 | rules: [
31 | {
32 | test: /\.vue$/,
33 | loader: 'vue-loader',
34 | options: vueLoaderConfig,
35 | },
36 | {
37 | test: /\.js$/,
38 | loader: 'babel-loader',
39 | include: [resolve('src'), resolve('test'), resolve('node_modules/webpack-dev-server/client')],
40 | },
41 | {
42 | test: /\.(png|jpe?g|gif|svg)(\?.*)?$/,
43 | loader: 'url-loader',
44 | options: {
45 | limit: 10000,
46 | name: utils.assetsPath('img/[name].[hash:7].[ext]'),
47 | },
48 | },
49 | {
50 | test: /\.(mp4|webm|ogg|mp3|wav|flac|aac)(\?.*)?$/,
51 | loader: 'url-loader',
52 | options: {
53 | limit: 10000,
54 | name: utils.assetsPath('media/[name].[hash:7].[ext]'),
55 | },
56 | },
57 | {
58 | test: /\.(woff2?|eot|ttf|otf)(\?.*)?$/,
59 | loader: 'url-loader',
60 | options: {
61 | limit: 10000,
62 | name: utils.assetsPath('fonts/[name].[hash:7].[ext]'),
63 | },
64 | },
65 | ],
66 | },
67 | node: {
68 | // prevent webpack from injecting useless setImmediate polyfill because Vue
69 | // source contains it (although only uses it if it"s native).
70 | setImmediate: false,
71 | // prevent webpack from injecting mocks to Node native modules
72 | // that does not make sense for the client
73 | dgram: 'empty',
74 | fs: 'empty',
75 | net: 'empty',
76 | tls: 'empty',
77 | child_process: 'empty',
78 | },
79 | };
80 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/config/dev.env.js:
--------------------------------------------------------------------------------
1 | "use strict"
2 | const merge = require("webpack-merge")
3 | const prodEnv = require("./prod.env")
4 |
5 | module.exports = merge(prodEnv, {
6 | NODE_ENV: "'development'"
7 | })
8 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/config/prod.env.js:
--------------------------------------------------------------------------------
1 | "use strict"
2 | module.exports = {
3 | NODE_ENV: "'production'"
4 | }
5 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | clean_frontend
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "clean_frontend",
3 | "version": "1.0.0",
4 | "description": "A Vue.js project",
5 | "author": "qidanrui ",
6 | "private": true,
7 | "scripts": {
8 | "dev": "webpack-dev-server --inline --progress --config build_frontend/webpack.dev.conf.js",
9 | "start": "npm run dev",
10 | "build": "node build_frontend/build.js"
11 | },
12 | "dependencies": {
13 | "axios": "^0.24.0",
14 | "element-ui": "^2.15.6",
15 | "http-proxy-middleware": "^2.0.1",
16 | "vue": "^2.5.2",
17 | "vue-router": "^3.0.1"
18 | },
19 | "devDependencies": {
20 | "autoprefixer": "^7.1.2",
21 | "babel-core": "^6.22.1",
22 | "babel-helper-vue-jsx-merge-props": "^2.0.3",
23 | "babel-loader": "^7.1.1",
24 | "babel-plugin-syntax-jsx": "^6.18.0",
25 | "babel-plugin-transform-runtime": "^6.22.0",
26 | "babel-plugin-transform-vue-jsx": "^3.5.0",
27 | "babel-preset-env": "^1.3.2",
28 | "babel-preset-stage-2": "^6.22.0",
29 | "chalk": "^2.0.1",
30 | "copy-webpack-plugin": "^4.0.1",
31 | "css-loader": "^0.28.0",
32 | "eslint": "^8.3.0",
33 | "eslint-config-airbnb-base": "^15.0.0",
34 | "eslint-plugin-import": "^2.25.3",
35 | "eslint-plugin-vue": "^8.1.1",
36 | "extract-text-webpack-plugin": "^3.0.0",
37 | "file-loader": "^1.1.4",
38 | "friendly-errors-webpack-plugin": "^1.6.1",
39 | "html-webpack-plugin": "^2.30.1",
40 | "node-notifier": "^8.0.1",
41 | "optimize-css-assets-webpack-plugin": "^3.2.0",
42 | "ora": "^1.2.0",
43 | "portfinder": "^1.0.13",
44 | "postcss-import": "^11.0.0",
45 | "postcss-loader": "^2.0.8",
46 | "postcss-url": "^7.2.1",
47 | "rimraf": "^2.6.0",
48 | "semver": "^5.3.0",
49 | "shelljs": "^0.8.5",
50 | "uglifyjs-webpack-plugin": "^1.1.1",
51 | "url-loader": "^0.5.8",
52 | "vue-loader": "^13.3.0",
53 | "vue-style-loader": "^3.0.1",
54 | "vue-template-compiler": "^2.5.2",
55 | "webpack": "^3.6.0",
56 | "webpack-bundle-analyzer": "^2.9.0",
57 | "webpack-dev-server": "^2.9.1",
58 | "webpack-merge": "^4.1.0"
59 | },
60 | "engines": {
61 | "node": ">= 6.0.0",
62 | "npm": ">= 3.0.0"
63 | },
64 | "browserslist": [
65 | "> 1%",
66 | "last 2 versions",
67 | "not ie <= 8"
68 | ]
69 | }
70 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/src/App.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
16 |
17 |
27 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/src/assets/cleanning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/clean/gui/clean_frontend/src/assets/cleanning.png
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/src/assets/columns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/clean/gui/clean_frontend/src/assets/columns.png
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/src/assets/footerlog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/clean/gui/clean_frontend/src/assets/footerlog.png
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/src/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/clean/gui/clean_frontend/src/assets/logo.png
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/src/assets/para.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/clean/gui/clean_frontend/src/assets/para.png
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/src/components/FooterLog.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
log
5 |
6 |
7 |
8 | - {{ li }}
9 |
10 |
11 |
12 |
13 |
14 |
34 |
53 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/src/components/HeaderMenu.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | {{ menuText }}
7 |
8 |
9 |
14 | {{ item }}
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
69 |
70 |
72 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/src/components/Loading.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
10 |
11 |
22 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/src/components/MainTable.vue:
--------------------------------------------------------------------------------
1 |
2 |
9 |
17 |
18 |
19 |
20 |
21 |
45 |
47 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/src/components/Table.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
12 |
13 |
14 |
15 |
16 |
66 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/src/main.js:
--------------------------------------------------------------------------------
1 | // The Vue build version to load with the `import` command
2 | // (runtime-only or standalone) has been set in webpack.base.conf with an alias.
3 | import Vue from 'vue'
4 | import ElementUI from 'element-ui'
5 | import 'element-ui/lib/theme-chalk/index.css'
6 | import App from './App'
7 | import router from './router'
8 |
9 | Vue.config.productionTip = false
10 | Vue.use(ElementUI)
11 |
12 | /* eslint-disable no-new */
13 | new Vue({
14 | el: '#app',
15 | router,
16 | components: { App },
17 | template: ''
18 | })
19 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/src/router/index.js:
--------------------------------------------------------------------------------
1 | import Vue from 'vue'
2 | import Router from 'vue-router'
3 | import SampleDF from '../components/SampleDF'
4 |
5 | Vue.use(Router)
6 |
7 | export default new Router({
8 | routes: [
9 | {
10 | path: '/',
11 | name: 'SampleDF',
12 | component: SampleDF
13 | }
14 | ]
15 | })
16 |
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/src/util/endpoint.js:
--------------------------------------------------------------------------------
1 | /* API endpoint for local temp used*/
2 | const API_URL = `http://${window.location.hostname}:7680/`;
3 | export default {
4 | API_URL: API_URL,
5 | }
--------------------------------------------------------------------------------
/dataprep/clean/gui/clean_frontend/static/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/clean/gui/clean_frontend/static/.gitkeep
--------------------------------------------------------------------------------
/dataprep/clean/gui/frontend_dist/index.html:
--------------------------------------------------------------------------------
1 | clean_frontend
--------------------------------------------------------------------------------
/dataprep/clean/gui/frontend_dist/static/fonts/element-icons.535877f.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/clean/gui/frontend_dist/static/fonts/element-icons.535877f.woff
--------------------------------------------------------------------------------
/dataprep/clean/gui/frontend_dist/static/fonts/element-icons.732389d.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/clean/gui/frontend_dist/static/fonts/element-icons.732389d.ttf
--------------------------------------------------------------------------------
/dataprep/clean/gui/frontend_dist/static/js/manifest.2ae2e69a05c33dfc65f8.js:
--------------------------------------------------------------------------------
1 | !function(r){var n=window.webpackJsonp;window.webpackJsonp=function(e,u,c){for(var f,i,p,a=0,l=[];a Connector:
29 | """Connect to a website.
30 |
31 | Parameters
32 | ----------
33 | config_path
34 | The path to the config. It can be hosted, e.g. "yelp", or from
35 | local filesystem, e.g. "./yelp"
36 | _auth: Optional[Dict[str, Any]] = None
37 | The parameters for authentication, e.g. OAuth2
38 | _concurrency: int = 5
39 | The concurrency setting. By default it is 1 reqs/sec.
40 | update: bool = True
41 | Force update the config file even if the local version exists.
42 | **kwargs
43 | Parameters that shared by different queries.
44 |
45 | Returns
46 | -------
47 | Connector
48 | a Connector object.
49 |
50 | Example
51 | -------
52 | >>> from dataprep.connector import connect
53 | >>> dc = connect("yelp", _auth={"access_token": access_token}, _concurrency=3)
54 | """
55 | return Connector(config_path, update=update, _auth=_auth, _concurrency=_concurrency, **kwargs)
56 |
57 |
58 | def config_generator_ui(existing: Optional[Dict[str, Any]] = None) -> None:
59 | """Create a Config Generator UI.
60 |
61 | Parameters
62 | ----------
63 | existing: Optional[Dict[str, Any]] = None
64 | Optionally pass in an existing configuration.
65 | """
66 |
67 | ConfigGeneratorUI(existing).display()
68 |
--------------------------------------------------------------------------------
/dataprep/connector/generator/__init__.py:
--------------------------------------------------------------------------------
1 | """ConfigGenerator"""
2 |
3 | from .generator import ConfigGenerator
4 | from .ui import ConfigGeneratorUI
5 |
6 | __all__ = ["ConfigGenerator", "ConfigGeneratorUI"]
7 |
--------------------------------------------------------------------------------
/dataprep/connector/generator/state.py:
--------------------------------------------------------------------------------
1 | """Defines ConfigState."""
2 |
3 | from typing import Optional
4 |
5 | from dataprep.connector.schema.defs import ConfigDef
6 |
7 |
8 | class ConfigState:
9 | """ConfigState"""
10 |
11 | config: Optional[ConfigDef] = None
12 |
13 | def __init__(self, config: Optional[ConfigDef]) -> None:
14 | self.config = config
15 |
16 | def __add__(self, rhs: ConfigDef) -> "ConfigState":
17 | if self.config is None:
18 | return ConfigState(rhs)
19 |
20 | return ConfigState(self.config.merge(rhs))
21 |
22 | def __str__(self) -> str:
23 | return str(self.config)
24 |
25 | def __repr__(self) -> str:
26 | return str(self)
27 |
--------------------------------------------------------------------------------
/dataprep/connector/info_ui.py:
--------------------------------------------------------------------------------
1 | """This module handles displaying information on how to connect and query."""
2 |
3 | from typing import Any, Dict
4 | from jinja2 import Environment, PackageLoader, select_autoescape
5 | from ..utils import display_html
6 |
7 | LOADER = PackageLoader("dataprep", "connector/assets")
8 | ENV_LOADER = Environment(loader=LOADER, autoescape=select_autoescape("html"))
9 |
10 |
11 | def info_ui(dbname: str, tbs: Dict[str, Any]) -> None:
12 | """Fills out info.txt template file. Renders the template to an html file.
13 |
14 | Parameters
15 | ----------
16 | dbname
17 | Name of the website
18 | tbs
19 | Table containing info to be displayed.
20 | """
21 | template = ENV_LOADER.get_template("info.html")
22 |
23 | jinja_vars = {"dbname": dbname, "tbs": tbs}
24 |
25 | html_content = template.render(jinja_vars)
26 |
27 | display_html(html_content)
28 |
--------------------------------------------------------------------------------
/dataprep/connector/ref.py:
--------------------------------------------------------------------------------
1 | """ref: defines a reference type of value."""
2 |
3 | from typing import TypeVar, Generic
4 |
5 | T = TypeVar("T") # pylint: disable=invalid-name
6 |
7 |
8 | class Ref(Generic[T]):
9 | """A reference to a value."""
10 |
11 | __slots__ = ("val",)
12 |
13 | val: T
14 |
15 | def __init__(self, val: T) -> None:
16 | self.val = val
17 |
18 | def __int__(self) -> int:
19 | return int(self.val) # type: ignore
20 |
21 | def __bool__(self) -> bool:
22 | return bool(self.val)
23 |
24 | def set(self, val: T) -> None:
25 | """set the value."""
26 | self.val = val
27 |
28 | def __str__(self) -> str:
29 | return str(self.val)
30 |
31 | def __repr__(self) -> str:
32 | return str(self.val)
33 |
--------------------------------------------------------------------------------
/dataprep/connector/schema/__init__.py:
--------------------------------------------------------------------------------
1 | """Module contains the loaded config schema."""
2 |
3 | from .defs import *
4 |
--------------------------------------------------------------------------------
/dataprep/connector/schema/oauth2.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | OAuth2 Success
5 |
6 |
7 |
8 | OAuth2 Success. This window can be closed now.
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/dataprep/connector/sql.py:
--------------------------------------------------------------------------------
1 | """
2 | This module contains the method of read_sql.
3 | It is a wrapper on connectorx.read_sql function.
4 | """
5 |
6 | from typing import Optional, Tuple, Union, List, Any
7 |
8 | try:
9 | import connectorx as cx
10 |
11 | _WITH_CX = True
12 | except ImportError:
13 | _WITH_CX = False
14 |
15 |
16 | def read_sql(
17 | conn: str,
18 | query: Union[List[str], str],
19 | *,
20 | return_type: str = "pandas",
21 | protocol: str = "binary",
22 | partition_on: Optional[str] = None,
23 | partition_range: Optional[Tuple[int, int]] = None,
24 | partition_num: Optional[int] = None,
25 | ) -> Any:
26 | """
27 | Run the SQL query, download the data from database into a dataframe.
28 | Please check out https://github.com/sfu-db/connector-x for more details.
29 |
30 | Parameters
31 | ----------
32 | conn
33 | the connection string.
34 | query
35 | a SQL query or a list of SQL query.
36 | return_type
37 | the return type of this function. It can be "arrow", "pandas", "modin", "dask" or "polars".
38 | protocol
39 | the protocol used to fetch data from source. Valid protocols are database dependent
40 | (https://github.com/sfu-db/connector-x/blob/main/Types.md).
41 | partition_on
42 | the column to partition the result.
43 | partition_range
44 | the value range of the partition column.
45 | partition_num
46 | how many partition to generate.
47 |
48 | Example
49 | --------
50 | >>> db_url = "postgresql://username:password@server:port/database"
51 | >>> query = "SELECT * FROM lineitem"
52 | >>> read_sql(db_url, query, partition_on="partition_col", partition_num=10)
53 | """
54 | if _WITH_CX:
55 | df = cx.read_sql(
56 | conn=conn,
57 | query=query,
58 | return_type=return_type,
59 | protocol=protocol,
60 | partition_on=partition_on,
61 | partition_range=partition_range,
62 | partition_num=partition_num,
63 | )
64 | return df
65 | else:
66 | raise ImportError("connectorx is not installed." "Please run pip install connectorx")
67 |
--------------------------------------------------------------------------------
/dataprep/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | """This module implements load dataset related functions"""
2 |
3 | from ._base import load_dataset, _load_dataset_as_dask, load_db
4 | from ._base import get_dataset_names, get_db_names
5 |
6 | __all__ = ["load_dataset", "get_dataset_names", "_load_dataset_as_dask", "load_db", "get_db_names"]
7 |
--------------------------------------------------------------------------------
/dataprep/datasets/database/sakila.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/datasets/database/sakila.db
--------------------------------------------------------------------------------
/dataprep/eda/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | dataprep.eda
3 | ============
4 | """
5 |
6 | from bokeh.io import output_notebook
7 |
8 | from ..utils import is_notebook
9 | from .correlation import compute_correlation, plot_correlation, render_correlation
10 | from .create_report import create_report
11 | from .create_db_report import create_db_report
12 | from .create_diff_report import create_diff_report
13 | from .distribution import compute, plot, render
14 | from .dtypes import (
15 | Categorical,
16 | Continuous,
17 | GeoGraphy,
18 | GeoPoint,
19 | DateTime,
20 | Discrete,
21 | DType,
22 | Nominal,
23 | Numerical,
24 | Ordinal,
25 | Text,
26 | )
27 | from .missing import compute_missing, plot_missing, render_missing
28 | from .diff import plot_diff, compute_diff, render_diff
29 |
30 | __all__ = [
31 | "plot_correlation",
32 | "compute_correlation",
33 | "render_correlation",
34 | "compute_missing",
35 | "render_missing",
36 | "plot_missing",
37 | "plot",
38 | "compute",
39 | "render",
40 | "DType",
41 | "Categorical",
42 | "Nominal",
43 | "Ordinal",
44 | "Numerical",
45 | "Continuous",
46 | "Discrete",
47 | "DateTime",
48 | "Text",
49 | "create_report",
50 | "create_db_report",
51 | "create_diff_report",
52 | "plot_diff",
53 | "compute_diff",
54 | "render_diff",
55 | ]
56 |
57 |
58 | if is_notebook():
59 | output_notebook(hide_banner=True)
60 |
--------------------------------------------------------------------------------
/dataprep/eda/correlation/compute/common.py:
--------------------------------------------------------------------------------
1 | """Common components for compute correlation."""
2 |
3 | from enum import Enum, auto
4 |
5 | import dask
6 | import numpy as np
7 | from scipy.stats.mstats import rankdata
8 | from scipy.stats import kendalltau as kendalltau_
9 |
10 |
11 | class CorrelationMethod(Enum):
12 | """Supported correlation methods"""
13 |
14 | # pylint: disable=invalid-name
15 | Pearson = auto()
16 | Spearman = auto()
17 | KendallTau = auto()
18 |
19 |
20 | # @dask.delayed(name="rankdata-bottleneck", pure=True) # pylint: disable=no-value-for-parameter
21 | # def nanrankdata(data: np.ndarray, axis: int = 0) -> np.ndarray:
22 | # """delayed version of rankdata."""
23 | # return nanrankdata_(data, axis=axis)
24 |
25 |
26 | @dask.delayed(name="nanrankdata", pure=True) # pylint: disable=no-value-for-parameter
27 | def nanrankdata(data: np.ndarray, axis: int = 0) -> np.ndarray:
28 | """delayed version of rankdata."""
29 | ranks = rankdata(np.ma.masked_invalid(data), axis=axis)
30 | ranks[ranks == 0] = np.nan
31 | return ranks
32 |
33 |
34 | @dask.delayed(name="kendalltau-scipy", pure=True) # pylint: disable=no-value-for-parameter
35 | def kendalltau(a: np.ndarray, b: np.ndarray) -> np.ndarray: # pylint: disable=invalid-name
36 | """delayed version of kendalltau."""
37 | corr = kendalltau_(a, b).correlation
38 | return np.float64(corr) # Sometimes corr is a float, causes dask error
39 |
40 |
41 | @dask.delayed(name="kendalltau-scipy", pure=True) # pylint: disable=no-value-for-parameter
42 | def corrcoef(arr: np.ndarray) -> np.ndarray:
43 | """delayed version of np.corrcoef."""
44 | _, (corr, _) = np.corrcoef(arr, rowvar=False)
45 | return corr
46 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This module implements the create_db_report(sql_engine) function.
3 | """
4 |
5 | import warnings
6 | from sqlalchemy.engine.base import Engine
7 | from .run_function import generate_db_report
8 | from .report import Report
9 |
10 | __all__ = ["create_db_report"]
11 |
12 |
13 | def create_db_report(
14 | sql_engine: Engine,
15 | analyze: bool = False,
16 | ) -> Report:
17 | """
18 | This function is to generate and render database report and show in browser.
19 |
20 | Parameters
21 | ----------
22 | sql_engine
23 | SQL Alchemy Engine object returned from create_engine() with an url passed
24 | E.g. sql_engine = create_engine(url)
25 | analyze
26 | Whether to execute ANALYZE to write database statistics to the database
27 |
28 | Examples
29 | --------
30 | >>> from dataprep.eda import create_db_report
31 | >>> from dataprep.datasets import load_db
32 | >>> db_engine = load_db('sakila.db')
33 | >>> create_db_report(db_engine)
34 | """
35 | _suppress_warnings()
36 | return Report(*generate_db_report(sql_engine, analyze))
37 |
38 |
39 | def _suppress_warnings() -> None:
40 | """
41 | suppress warnings in create_db_report
42 | """
43 | warnings.filterwarnings(
44 | "ignore",
45 | "The default value of regex will change from True to False in a future version",
46 | category=FutureWarning,
47 | )
48 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/db_models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/db_models/__init__.py
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/db_models/database.py:
--------------------------------------------------------------------------------
1 | from .db_meta import DbMeta
2 |
3 |
4 | class Database:
5 | def __init__(self, name: str, schema: str, stats: DbMeta) -> None:
6 | self.name = name.replace("'", "")
7 | self.schema = schema
8 | self.tables = {}
9 | self.views = {}
10 | for key, value in stats.__dict__.items():
11 | if not hasattr(self, key):
12 | setattr(self, key, value)
13 |
14 | def add_table(self, table_name: str, table_object):
15 | self.tables[table_name] = table_object
16 |
17 | def add_view(self, view_name: str, view_object):
18 | self.views[view_name] = view_object
19 |
20 | def get_tables(self):
21 | return self.tables.values()
22 |
23 | def get_tables_dict(self):
24 | return self.tables
25 |
26 | def get_views(self):
27 | return self.views.values()
28 |
29 | def get_name(self):
30 | return self.name
31 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/db_models/db_meta.py:
--------------------------------------------------------------------------------
1 | class DbMeta:
2 | num_of_views = 0
3 | num_of_schemas = 0
4 | num_of_fk = 0
5 | num_of_uk = 0
6 | num_of_pk = 0
7 | num_of_tables = 0
8 |
9 | engine_name_dict = {
10 | "mysql": "MySQL",
11 | "postgresql": "PostgreSQL",
12 | "sqlite": "SQLite",
13 | }
14 |
15 | def __init__(
16 | self,
17 | engine_name: str,
18 | num_of_views: int,
19 | num_of_schemas: int,
20 | num_of_fk: int,
21 | num_of_uk: int,
22 | num_of_pk: int,
23 | num_of_tables: int,
24 | product_version: str,
25 | connection_url: str,
26 | ) -> None:
27 | self.num_of_views = num_of_views
28 | self.num_of_schemas = num_of_schemas
29 | self.num_of_fk = num_of_fk
30 | self.num_of_uk = num_of_uk
31 | self.num_of_pk = num_of_pk
32 | self.num_of_table = num_of_tables
33 | self.connection_url = connection_url
34 | self.database_product = self.engine_name_dict[engine_name] + " - " + product_version
35 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/db_models/table_column.py:
--------------------------------------------------------------------------------
1 | from .table import Table
2 | from .constraint import ForeignKeyConstraint
3 |
4 |
5 | class TableColumn:
6 | def __init__(
7 | self,
8 | table: Table,
9 | name: str,
10 | type_name: str,
11 | not_null: bool,
12 | default_value: str,
13 | is_auto_updated: bool,
14 | comments: str,
15 | ):
16 | self.table = table
17 | self.name = name.replace("'", "")
18 | self.type_name = type_name
19 | self.not_null = not_null
20 | self.default_value = default_value.replace("'", "") if default_value else ""
21 | self.comments = comments.replace("'", "") if comments else ""
22 | self.parents = {}
23 | self.children = {}
24 | self.type = 0
25 | self.decimal_digits = 0
26 | self.detailed_size = ""
27 | self.is_auto_updated = is_auto_updated
28 | self.index = False
29 |
30 | def set_index(self):
31 | self.index = True
32 |
33 | def is_primary(self):
34 | if self.table.primary_keys is not None:
35 | return self in self.table.primary_keys
36 | return False
37 |
38 | def is_foreign_key(self):
39 | return len(self.parents) != 0
40 |
41 | def get_default_value(self):
42 | return self.default_value
43 |
44 | def set_comments(self, comments: str):
45 | if comments is None or len(comments.strip()) == 0:
46 | self.comments = None
47 | else:
48 | self.comments = comments.strip()
49 |
50 | def add_parent(self, parent, constraint: ForeignKeyConstraint):
51 | self.parents[parent] = constraint
52 | self.table.add_max_parents()
53 |
54 | def add_child(self, child, constraint: ForeignKeyConstraint):
55 | self.children[child] = constraint
56 | self.table.add_max_children()
57 |
58 | def get_parents(self):
59 | return list(self.parents.values())
60 |
61 | def get_children(self):
62 | return list(self.children.values())
63 |
64 | def get_name(self):
65 | return self.name
66 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/db_models/table_index.py:
--------------------------------------------------------------------------------
1 | class TableIndex:
2 | def __init__(self, name: str, index_type: str) -> None:
3 | self.name = name.replace("'", "")
4 | self.is_unique = False
5 | self.is_primary = False
6 | self.columns = {}
7 | self.index_type = index_type
8 |
9 | def add_column(self, col_string: str, column):
10 | if column is not None:
11 | self.columns[col_string] = column
12 |
13 | def set_primary(self):
14 | self.is_primary = True
15 |
16 | def set_unique(self):
17 | self.is_unique = True
18 |
19 | def get_type(self):
20 | if self.is_primary:
21 | return "Primary key"
22 | if self.is_unique:
23 | return "Must be unique"
24 | return "Performance"
25 |
26 | def get_index_type(self):
27 | return self.index_type
28 |
29 | def columns_as_string(self):
30 | return "".join(self.columns.keys())
31 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/db_models/view.py:
--------------------------------------------------------------------------------
1 | from .table import Table
2 | from .database import Database
3 |
4 |
5 | class View(Table):
6 | def __init__(self, db: Database, schema: str, name: str, view_def: str) -> None:
7 | self.view_definition = None
8 | super().__init__(db, schema, name)
9 | self.set_view_definition(view_def)
10 |
11 | def set_view_definition(self, view_definition: str):
12 | if view_definition is not None and len(view_definition.strip()) > 0:
13 | self.view_definition = view_definition
14 |
15 | def get_view_definition(self):
16 | return self.view_definition
17 |
18 | def is_view(self):
19 | return True
20 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/header/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/header/__init__.py
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/anomalies.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/anomalies.js
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/admin-lte/bootstrap/fonts/glyphicons-halflings-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/bower/admin-lte/bootstrap/fonts/glyphicons-halflings-regular.eot
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/admin-lte/bootstrap/fonts/glyphicons-halflings-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/bower/admin-lte/bootstrap/fonts/glyphicons-halflings-regular.ttf
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/admin-lte/bootstrap/fonts/glyphicons-halflings-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/bower/admin-lte/bootstrap/fonts/glyphicons-halflings-regular.woff
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/admin-lte/bootstrap/fonts/glyphicons-halflings-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/bower/admin-lte/bootstrap/fonts/glyphicons-halflings-regular.woff2
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/admin-lte/bootstrap/js/npm.js:
--------------------------------------------------------------------------------
1 | // This file is autogenerated via the `commonjs` Grunt task. You can require() this file in a CommonJS environment.
2 | require('../../js/transition.js')
3 | require('../../js/alert.js')
4 | require('../../js/button.js')
5 | require('../../js/carousel.js')
6 | require('../../js/collapse.js')
7 | require('../../js/dropdown.js')
8 | require('../../js/modal.js')
9 | require('../../js/tooltip.js')
10 | require('../../js/popover.js')
11 | require('../../js/scrollspy.js')
12 | require('../../js/tab.js')
13 | require('../../js/affix.js')
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/admin-lte/dist/css/alt/AdminLTE-fullcalendar.css:
--------------------------------------------------------------------------------
1 | /*
2 | * Plugin: Full Calendar
3 | * ---------------------
4 | */
5 | .fc-button {
6 | background: #f4f4f4;
7 | background-image: none;
8 | color: #444;
9 | border-color: #ddd;
10 | border-bottom-color: #ddd;
11 | }
12 | .fc-button:hover,
13 | .fc-button:active,
14 | .fc-button.hover {
15 | background-color: #e9e9e9;
16 | }
17 | .fc-header-title h2 {
18 | font-size: 15px;
19 | line-height: 1.6em;
20 | color: #666;
21 | margin-left: 10px;
22 | }
23 | .fc-header-right {
24 | padding-right: 10px;
25 | }
26 | .fc-header-left {
27 | padding-left: 10px;
28 | }
29 | .fc-widget-header {
30 | background: #fafafa;
31 | }
32 | .fc-grid {
33 | width: 100%;
34 | border: 0;
35 | }
36 | .fc-widget-header:first-of-type,
37 | .fc-widget-content:first-of-type {
38 | border-left: 0;
39 | border-right: 0;
40 | }
41 | .fc-widget-header:last-of-type,
42 | .fc-widget-content:last-of-type {
43 | border-right: 0;
44 | }
45 | .fc-toolbar {
46 | padding: 10px;
47 | margin: 0;
48 | }
49 | .fc-day-number {
50 | font-size: 20px;
51 | font-weight: 300;
52 | padding-right: 10px;
53 | }
54 | .fc-color-picker {
55 | list-style: none;
56 | margin: 0;
57 | padding: 0;
58 | }
59 | .fc-color-picker > li {
60 | float: left;
61 | font-size: 30px;
62 | margin-right: 5px;
63 | line-height: 30px;
64 | }
65 | .fc-color-picker > li .fa {
66 | -webkit-transition: -webkit-transform linear 0.3s;
67 | -moz-transition: -moz-transform linear 0.3s;
68 | -o-transition: -o-transform linear 0.3s;
69 | transition: transform linear 0.3s;
70 | }
71 | .fc-color-picker > li .fa:hover {
72 | -webkit-transform: rotate(30deg);
73 | -ms-transform: rotate(30deg);
74 | -o-transform: rotate(30deg);
75 | transform: rotate(30deg);
76 | }
77 | #add-new-event {
78 | -webkit-transition: all linear 0.3s;
79 | -o-transition: all linear 0.3s;
80 | transition: all linear 0.3s;
81 | }
82 | .external-event {
83 | padding: 5px 10px;
84 | font-weight: bold;
85 | margin-bottom: 4px;
86 | box-shadow: 0 1px 1px rgba(0, 0, 0, 0.1);
87 | text-shadow: 0 1px 1px rgba(0, 0, 0, 0.1);
88 | border-radius: 3px;
89 | cursor: move;
90 | }
91 | .external-event:hover {
92 | box-shadow: inset 0 0 90px rgba(0, 0, 0, 0.2);
93 | }
94 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/admin-lte/dist/css/alt/AdminLTE-fullcalendar.min.css:
--------------------------------------------------------------------------------
1 | .fc-button{background:#f4f4f4;background-image:none;color:#444;border-color:#ddd;border-bottom-color:#ddd}.fc-button:hover,.fc-button:active,.fc-button.hover{background-color:#e9e9e9}.fc-header-title h2{font-size:15px;line-height:1.6em;color:#666;margin-left:10px}.fc-header-right{padding-right:10px}.fc-header-left{padding-left:10px}.fc-widget-header{background:#fafafa}.fc-grid{width:100%;border:0}.fc-widget-header:first-of-type,.fc-widget-content:first-of-type{border-left:0;border-right:0}.fc-widget-header:last-of-type,.fc-widget-content:last-of-type{border-right:0}.fc-toolbar{padding:10px;margin:0}.fc-day-number{font-size:20px;font-weight:300;padding-right:10px}.fc-color-picker{list-style:none;margin:0;padding:0}.fc-color-picker>li{float:left;font-size:30px;margin-right:5px;line-height:30px}.fc-color-picker>li .fa{-webkit-transition:-webkit-transform linear .3s;-moz-transition:-moz-transform linear .3s;-o-transition:-o-transform linear .3s;transition:transform linear .3s}.fc-color-picker>li .fa:hover{-webkit-transform:rotate(30deg);-ms-transform:rotate(30deg);-o-transform:rotate(30deg);transform:rotate(30deg)}#add-new-event{-webkit-transition:all linear .3s;-o-transition:all linear .3s;transition:all linear .3s}.external-event{padding:5px 10px;font-weight:bold;margin-bottom:4px;box-shadow:0 1px 1px rgba(0,0,0,0.1);text-shadow:0 1px 1px rgba(0,0,0,0.1);border-radius:3px;cursor:move}.external-event:hover{box-shadow:inset 0 0 90px rgba(0,0,0,0.2)}
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/datatables.net-bs/js/dataTables.bootstrap.min.js:
--------------------------------------------------------------------------------
1 | /*!
2 | DataTables Bootstrap 3 integration
3 | ©2011-2015 SpryMedia Ltd - datatables.net/license
4 | */
5 | (function(b){"function"===typeof define&&define.amd?define(["jquery","datatables.net"],function(a){return b(a,window,document)}):"object"===typeof exports?module.exports=function(a,d){a||(a=window);if(!d||!d.fn.dataTable)d=require("datatables.net")(a,d).$;return b(d,a,a.document)}:b(jQuery,window,document)})(function(b,a,d,m){var f=b.fn.dataTable;b.extend(!0,f.defaults,{dom:"<'row'<'col-sm-6'l><'col-sm-6'f>><'row'<'col-sm-12'tr>><'row'<'col-sm-5'i><'col-sm-7'p>>",renderer:"bootstrap"});b.extend(f.ext.classes,
6 | {sWrapper:"dataTables_wrapper form-inline dt-bootstrap",sFilterInput:"form-control input-sm",sLengthSelect:"form-control input-sm",sProcessing:"dataTables_processing panel panel-default"});f.ext.renderer.pageButton.bootstrap=function(a,h,r,s,j,n){var o=new f.Api(a),t=a.oClasses,k=a.oLanguage.oPaginate,u=a.oLanguage.oAria.paginate||{},e,g,p=0,q=function(d,f){var l,h,i,c,m=function(a){a.preventDefault();!b(a.currentTarget).hasClass("disabled")&&o.page()!=a.data.action&&o.page(a.data.action).draw("page")};
7 | l=0;for(h=f.length;l",{"class":t.sPageButton+" "+g,id:0===r&&"string"===typeof c?a.sTableId+"_"+c:null}).append(b("",{href:"#",
8 | "aria-controls":a.sTableId,"aria-label":u[c],"data-dt-idx":p,tabindex:a.iTabIndex}).html(e)).appendTo(d),a.oApi._fnBindAction(i,{action:c},m),p++)}},i;try{i=b(h).find(d.activeElement).data("dt-idx")}catch(v){}q(b(h).empty().html('').children("ul"),s);i!==m&&b(h).find("[data-dt-idx="+i+"]").focus()};return f});
9 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/datatables.net-buttons-bs/js/buttons.bootstrap.js:
--------------------------------------------------------------------------------
1 | /*! Bootstrap integration for DataTables' Buttons
2 | * ©2016 SpryMedia Ltd - datatables.net/license
3 | */
4 |
5 | (function( factory ){
6 | if ( typeof define === 'function' && define.amd ) {
7 | // AMD
8 | define( ['jquery', 'datatables.net-bs', 'datatables.net-buttons'], function ( $ ) {
9 | return factory( $, window, document );
10 | } );
11 | }
12 | else if ( typeof exports === 'object' ) {
13 | // CommonJS
14 | module.exports = function (root, $) {
15 | if ( ! root ) {
16 | root = window;
17 | }
18 |
19 | if ( ! $ || ! $.fn.dataTable ) {
20 | $ = require('datatables.net-bs')(root, $).$;
21 | }
22 |
23 | if ( ! $.fn.dataTable.Buttons ) {
24 | require('datatables.net-buttons')(root, $);
25 | }
26 |
27 | return factory( $, root, root.document );
28 | };
29 | }
30 | else {
31 | // Browser
32 | factory( jQuery, window, document );
33 | }
34 | }(function( $, window, document, undefined ) {
35 | 'use strict';
36 | var DataTable = $.fn.dataTable;
37 |
38 |
39 | $.extend( true, DataTable.Buttons.defaults, {
40 | dom: {
41 | container: {
42 | className: 'dt-buttons btn-group'
43 | },
44 | button: {
45 | className: 'btn btn-default'
46 | },
47 | collection: {
48 | tag: 'ul',
49 | className: 'dt-button-collection dropdown-menu',
50 | button: {
51 | tag: 'li',
52 | className: 'dt-button'
53 | },
54 | buttonLiner: {
55 | tag: 'a',
56 | className: ''
57 | }
58 | }
59 | }
60 | } );
61 |
62 | DataTable.ext.buttons.collection.text = function ( dt ) {
63 | return dt.i18n('buttons.collection', 'Collection ');
64 | };
65 |
66 |
67 | return DataTable.Buttons;
68 | }));
69 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/datatables.net-buttons-bs/js/buttons.bootstrap.min.js:
--------------------------------------------------------------------------------
1 | /*!
2 | Bootstrap integration for DataTables' Buttons
3 | ©2016 SpryMedia Ltd - datatables.net/license
4 | */
5 | (function(c){"function"===typeof define&&define.amd?define(["jquery","datatables.net-bs","datatables.net-buttons"],function(a){return c(a,window,document)}):"object"===typeof exports?module.exports=function(a,b){a||(a=window);if(!b||!b.fn.dataTable)b=require("datatables.net-bs")(a,b).$;b.fn.dataTable.Buttons||require("datatables.net-buttons")(a,b);return c(b,a,a.document)}:c(jQuery,window,document)})(function(c){var a=c.fn.dataTable;c.extend(!0,a.Buttons.defaults,{dom:{container:{className:"dt-buttons btn-group"},
6 | button:{className:"btn btn-default"},collection:{tag:"ul",className:"dt-button-collection dropdown-menu",button:{tag:"li",className:"dt-button"},buttonLiner:{tag:"a",className:""}}}});a.ext.buttons.collection.text=function(a){return a.i18n("buttons.collection",'Collection ')};return a.Buttons});
7 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/datatables.net-buttons/buttons.print.min.js:
--------------------------------------------------------------------------------
1 | (function(d){"function"===typeof define&&define.amd?define(["jquery","datatables.net","datatables.net-buttons"],function(f){return d(f,window,document)}):"object"===typeof exports?module.exports=function(f,b){f||(f=window);if(!b||!b.fn.dataTable)b=require("datatables.net")(f,b).$;b.fn.dataTable.Buttons||require("datatables.net-buttons")(f,b);return d(b,f,f.document)}:d(jQuery,window,document)})(function(d,f,b){var i=d.fn.dataTable,h=b.createElement("a"),m=function(a){h.href=a;a=h.host;-1===a.indexOf("/")&&
2 | 0!==h.pathname.indexOf("/")&&(a+="/");return h.protocol+"//"+a+h.pathname+h.search};i.ext.buttons.print={className:"buttons-print",text:function(a){return a.i18n("buttons.print","Print")},action:function(a,b,h,e){var c=b.buttons.exportData(e.exportOptions),k=function(a,c){for(var b="",d=0,e=a.length;d"+a[d]+""+c+">";return b+"
"},a='';e.header&&(a+=""+k(c.header,"th")+"");for(var a=a+"",l=0,i=c.body.length;l<
3 | i;l++)a+=k(c.body[l],"td");a+="";e.footer&&c.footer&&(a+=""+k(c.footer,"th")+"");var g=f.open("",""),c=e.title;"function"===typeof c&&(c=c());-1!==c.indexOf("*")&&(c=c.replace("*",d("title").text()));g.document.close();var j=""+c+"";d("style, link").each(function(){var a=j,b=d(this).clone()[0];"link"===b.nodeName.toLowerCase()&&(b.href=m(b.href));j=a+b.outerHTML});try{g.document.head.innerHTML=j}catch(n){d(g.document.head).html(j)}g.document.body.innerHTML=""+
4 | c+"
"+("function"===typeof e.message?e.message(b,h,e):e.message)+"
"+a;d(g.document.body).addClass("dt-print-view");d("img",g.document.body).each(function(a,b){b.setAttribute("src",m(b.getAttribute("src")))});e.customize&&e.customize(g);setTimeout(function(){e.autoPrint&&(g.print(),g.close())},250)},title:"*",message:"",exportOptions:{},header:!0,footer:!1,autoPrint:!0,customize:null};return i.Buttons});
5 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/font-awesome/fonts/FontAwesome.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/bower/font-awesome/fonts/FontAwesome.otf
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/font-awesome/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/bower/font-awesome/fonts/fontawesome-webfont.eot
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/font-awesome/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/bower/font-awesome/fonts/fontawesome-webfont.ttf
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/font-awesome/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/bower/font-awesome/fonts/fontawesome-webfont.woff
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/font-awesome/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/bower/font-awesome/fonts/fontawesome-webfont.woff2
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/gojs/ZoomSlider.css:
--------------------------------------------------------------------------------
1 | .zoomSlider {
2 | position: absolute;
3 | padding: 0;
4 | opacity: .75;
5 | z-index: 99;
6 | width: 125px;
7 | height: 25px;
8 | top: 0px;
9 | left: 0px;
10 | }
11 |
12 | .zoomButton {
13 | display: inline-block;
14 | vertical-align: top;
15 | text-align: center;
16 | padding: 0;
17 | margin: 0;
18 | transition: opacity .2s;
19 | }
20 |
21 | .zoomRangeContainer {
22 | display: inline-block;
23 | vertical-align: top;
24 | padding: 0;
25 | }
26 |
27 | .zoomRangeInput {
28 | margin: 0;
29 | padding: 0;
30 | outline: none;
31 | transition: opacity .2s;
32 | background: transparent;
33 | -webkit-appearance: none;
34 | }
35 |
36 | /* Set up additional styling to ensure consistenty across browsers */
37 | .zoomRangeInput::-webkit-slider-runnable-track {
38 | box-sizing: border-box;
39 | border: none;
40 | width: 100%;
41 | height: 3px;
42 | background: #ccc;
43 | }
44 | .zoomRangeInput::-moz-range-track {
45 | box-sizing: border-box;
46 | border: none;
47 | width: 100%;
48 | height: 3px;
49 | background: #ccc;
50 | }
51 | .zoomRangeInput::-ms-track {
52 | box-sizing: border-box;
53 | border: none;
54 | width: 100%;
55 | height: 3px;
56 | background: #ccc;
57 | color: transparent;
58 | }
59 | .zoomRangeInput::-webkit-slider-thumb {
60 | -webkit-appearance: none;
61 | margin-top: -3.33px;
62 | box-sizing: border-box;
63 | border: none;
64 | width: 10px;
65 | height: 10px;
66 | border-radius: 50%;
67 | background: #444;
68 | }
69 | .zoomRangeInput::-moz-range-thumb {
70 | box-sizing: border-box;
71 | border: none;
72 | width: 10px;
73 | height: 10px;
74 | border-radius: 50%;
75 | background: #444;
76 | }
77 | .zoomRangeInput::-ms-thumb {
78 | margin-top: 0;
79 | box-sizing: border-box;
80 | border: none;
81 | width: 10px;
82 | height: 10px;
83 | border-radius: 50%;
84 | background: #444;
85 | }
86 | .zoomRangeInput::-ms-tooltip,
87 | .zoomRangeInput::-ms-fill-lower,
88 | .zoomRangeInput::-ms-fill-upper {
89 | display: none;
90 | }
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/ionicons/fonts/ionicons.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/bower/ionicons/fonts/ionicons.eot
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/ionicons/fonts/ionicons.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/bower/ionicons/fonts/ionicons.ttf
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/ionicons/fonts/ionicons.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/bower/ionicons/fonts/ionicons.woff
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/bower/salvattore/salvattore.css:
--------------------------------------------------------------------------------
1 | /*
2 | Spezific styling for salvattore
3 | Feel free to edit it as you like
4 | More info at http://salvattore.com
5 | */
6 |
7 | /* Base styles */
8 | .column {
9 | float: left;
10 | }
11 | .size-1of5 {
12 | width: 20%;
13 | }
14 | .size-1of4 {
15 | width: 25%;
16 | }
17 | .size-1of3 {
18 | width: 33.333%;
19 | }
20 | .size-1of2 {
21 | width: 50%;
22 | }
23 |
24 | /* Configurate salvattore with media queries */
25 | @media screen and (max-width: 450px) {
26 | #grid[data-columns]::before {
27 | content: '1 .column';
28 | }
29 | }
30 |
31 | @media screen and (min-width: 451px) and (max-width: 700px) {
32 | #grid[data-columns]::before {
33 | content: '2 .column.size-1of2';
34 | }
35 | }
36 |
37 | @media screen and (min-width: 701px) and (max-width: 850px) {
38 | #grid[data-columns]::before {
39 | content: '3 .column.size-1of3';
40 | }
41 | }
42 |
43 | @media screen and (min-width: 851px) and (max-width: 1600px) {
44 | #grid[data-columns]::before {
45 | content: '4 .column.size-1of4';
46 | }
47 | }
48 |
49 | @media screen and (min-width: 1601px) {
50 | #grid[data-columns]::before {
51 | content: '5 .column.size-1of5';
52 | }
53 | }
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/constraint.js:
--------------------------------------------------------------------------------
1 | $(document).ready(function() {
2 | $.fn.dataTableExt.afnFiltering.length = 0;
3 | var options = {
4 | lengthChange: false,
5 | ordering: true,
6 | paging: config.pagination,
7 | pageLength: 50,
8 | autoWidth: true,
9 | processing: true,
10 | order: [[ 0, "asc" ]]
11 | }
12 | $('#fk_table').DataTable(options);
13 | $('#check_table').DataTable(options);
14 | } );
15 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/favicon.png
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/indieflower/indie-flower-v8-latin-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/indieflower/indie-flower-v8-latin-regular.eot
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/indieflower/indie-flower-v8-latin-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/indieflower/indie-flower-v8-latin-regular.ttf
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/indieflower/indie-flower-v8-latin-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/indieflower/indie-flower-v8-latin-regular.woff
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/indieflower/indie-flower-v8-latin-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/indieflower/indie-flower-v8-latin-regular.woff2
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/indieflower/indie-flower.css:
--------------------------------------------------------------------------------
1 | @font-face {
2 | font-family: 'Indie Flower';
3 | font-style: normal;
4 | font-weight: 400;
5 | src: url('indie-flower-v8-latin-regular.eot'); /* IE9 Compat Modes */
6 | src: local('Indie Flower'), local('IndieFlower'),
7 | url('indie-flower-v8-latin-regular.eot?#iefix') format('embedded-opentype'), /* IE6-IE8 */
8 | url('indie-flower-v8-latin-regular.woff2') format('woff2'), /* Super Modern Browsers */
9 | url('indie-flower-v8-latin-regular.woff') format('woff'), /* Modern Browsers */
10 | url('indie-flower-v8-latin-regular.ttf') format('truetype'), /* Safari, Android, iOS */
11 | url('indie-flower-v8-latin-regular.svg#IndieFlower') format('svg'); /* Legacy iOS */
12 | }
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300.eot
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300.ttf
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300.woff
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300.woff2
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300italic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300italic.eot
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300italic.ttf
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300italic.woff
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-300italic.woff2
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600.eot
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600.ttf
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600.woff
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600.woff2
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600italic.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600italic.eot
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600italic.ttf
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600italic.woff
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-600italic.woff2
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-700.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-700.eot
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-700.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-700.ttf
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-700.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-700.woff
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-700.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-700.woff2
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-regular.eot
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-regular.ttf
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-regular.woff
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/fonts/source-sans-pro/source-sans-pro-v10-latin-regular.woff2
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/images/dataprep-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/images/dataprep-logo.png
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/images/foreignKey.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/images/foreignKey.png
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/images/foreignKeys.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/images/foreignKeys.png
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/images/primaryKey.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/images/primaryKey.png
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/images/primaryKeys.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/layout/images/primaryKeys.png
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/main.js:
--------------------------------------------------------------------------------
1 | var filterBy = function(table_type) {
2 | $.fn.dataTableExt.afnFiltering.length = 0;
3 | $.fn.dataTable.ext.search.push(
4 | function( settings, data, dataIndex ) {
5 | var type = data[data.length - 2]; // use data for the Type column, which is the second to last
6 |
7 | if ( type == table_type || table_type=='All' )
8 | {
9 | return true;
10 | }
11 | return false;
12 | }
13 | );
14 | }
15 |
16 | $(document).ready(function() {
17 | var activeObject;
18 | var table = $('#database_objects').DataTable( {
19 | lengthChange: false,
20 | paging: config.pagination,
21 | pageLength: 50,
22 | order: [[ 0, "asc" ]],
23 | buttons: [
24 | {
25 | text: 'All',
26 | action: function ( e, dt, node, config ) {
27 | filterBy('All');
28 | if (activeObject != null) {
29 | activeObject.active(false);
30 | }
31 | table.draw();
32 | }
33 | },
34 | {
35 | text: 'Tables',
36 | action: function ( e, dt, node, config ) {
37 | filterBy('Table');
38 | if (activeObject != null) {
39 | activeObject.active(false);
40 | }
41 | this.active( !this.active() );
42 | activeObject = this;
43 | table.draw();
44 | }
45 | },
46 | {
47 | text: 'Views',
48 | action: function ( e, dt, node, config ) {
49 | filterBy('View');
50 | if (activeObject != null) {
51 | activeObject.active(false);
52 | }
53 | this.active( !this.active() );
54 | activeObject = this;
55 | table.draw();
56 | }
57 | },
58 | {
59 | extend: 'columnsToggle',
60 | columns: '.toggle'
61 | }
62 | ]
63 |
64 | } );
65 |
66 | //schemaSpy.js
67 | dataTableExportButtons(table);
68 |
69 | } );
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/relationship.html:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 | {% if diagram_tables or diagram_relationships %}
8 |
9 |
13 |
14 |
15 |
22 |
23 |
24 |
25 |
26 |
27 | {% endif %}
28 | {% if not diagram_relationships %}
29 |
30 |
31 |
Missed Relationships!
32 |
No relationships were detected in the schema.
33 |
34 | {% endif %}
35 |
36 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/relationship.js:
--------------------------------------------------------------------------------
1 | $(function() {
2 | var pgurl = window.location.href.substr(window.location.href.lastIndexOf("/")+1);
3 | $("#navbar-collapse ul li a").each(function(){
4 | if($(this).attr("href") == pgurl || $(this).attr("href") == '' )
5 | $(this).parent().addClass("active");
6 | })
7 | });
8 |
9 | $(function() {
10 | var $imgs = $('img.diagram, object.diagram');
11 | $imgs.css("cursor", "move")
12 | $imgs.draggable();
13 | });
14 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/routines.js:
--------------------------------------------------------------------------------
1 | var filterBy = function(functionType) {
2 | $.fn.dataTableExt.afnFiltering.length = 0;
3 | $.fn.dataTable.ext.search.push(
4 | function( settings, data, dataIndex ) {
5 | var type = data[1]; // use data for the Type column
6 |
7 | if ( type.toUpperCase() == functionType || functionType == 'All' )
8 | {
9 | return true;
10 | }
11 | return false;
12 | }
13 | );
14 | }
15 |
16 | $(document).ready(function() {
17 | var activeObject;
18 | var table = $('#routine_table').DataTable( {
19 | lengthChange: false,
20 | ordering: true,
21 | paging: config.pagination,
22 | pageLength: 50,
23 | autoWidth: true,
24 | processing: true,
25 | order: [[ 0, "asc" ]],
26 | buttons: [
27 | {
28 | text: 'All',
29 | action: function ( e, dt, node, config ) {
30 | filterBy('All');
31 | if (activeObject != null) {
32 | activeObject.active(false);
33 | }
34 | table.draw();
35 | }
36 | },
37 | {
38 | text: 'Functions',
39 | action: function ( e, dt, node, config ) {
40 | filterBy('FUNCTION');
41 | if (activeObject != null) {
42 | activeObject.active(false);
43 | }
44 | this.active( !this.active() );
45 | activeObject = this;
46 | table.draw();
47 | }
48 | },
49 | {
50 | text: 'Procedures',
51 | action: function ( e, dt, node, config ) {
52 | filterBy('PROCEDURE');
53 | if (activeObject != null) {
54 | activeObject.active(false);
55 | }
56 | this.active( !this.active() );
57 | activeObject = this;
58 | table.draw();
59 | }
60 | },
61 | {
62 | extend: 'columnsToggle',
63 | columns: '.toggle'
64 | }
65 | ]
66 | } );
67 |
68 | //schemaSpy.js
69 | dataTableExportButtons(table);
70 | } );
71 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/routines/routine.js:
--------------------------------------------------------------------------------
1 | function enableAnchors() {
2 | anchors.options.visible = 'always';
3 | anchors.add('h3');
4 | }
5 |
6 | $(document).ready(function() {
7 | enableAnchors();
8 |
9 | var table = $('#standard_table').DataTable( {
10 | lengthChange: false,
11 | bSort: false,
12 | bPaginate: false,
13 | autoWidth: true,
14 | buttons: [ ]
15 | } );
16 |
17 | table.buttons().container()
18 | .appendTo('#standard_table_wrapper .col-sm-6:eq(0)' );
19 | } );
20 |
21 | var codeElement = document.getElementById("sql-script-codemirror");
22 | var editor = null;
23 | if (null != codeElement) {
24 | editor = CodeMirror.fromTextArea(codeElement, {
25 | lineNumbers: true,
26 | mode: 'text/x-sql',
27 | indentWithTabs: true,
28 | smartIndent: true,
29 | lineNumbers: true,
30 | matchBrackets: true,
31 | autofocus: true,
32 | readOnly: true
33 | });
34 | }
35 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/schemaSpy.js:
--------------------------------------------------------------------------------
1 | $(function() {
2 | var pgurl = window.location.href.substr(window.location.href.lastIndexOf("/")+1);
3 | $("#navbar-collapse ul li a").each(function(){
4 | if($(this).attr("href") == pgurl || $(this).attr("href") == '' )
5 | $(this).parent().addClass("active");
6 | })
7 | });
8 |
9 | function dataTableExportButtons(table) {
10 | $("\n" +
11 | "
\n" +
12 | "
\n" +
13 | "
").prependTo('#' + table.table().container().id);
14 | new $.fn.dataTable.Buttons( table, {
15 | name: 'exports',
16 | buttons: [
17 | {
18 | extend: 'copyHtml5',
19 | text: '',
20 | titleAttr: 'Copy'
21 | },
22 | {
23 | extend: 'excelHtml5',
24 | text: '',
25 | titleAttr: 'Excel'
26 | },
27 | {
28 | extend: 'csvHtml5',
29 | text: '',
30 | titleAttr: 'CSV'
31 | },
32 | {
33 | extend: 'pdfHtml5',
34 | text: '',
35 | orientation: 'landscape',
36 | titleAttr: 'PDF'
37 | }
38 | ]
39 | } );
40 |
41 | table.buttons().container().appendTo( '#' + table.table().container().id + ' #button_group_one' );
42 | table.buttons( 'exports', null ).container().appendTo( '#' + table.table().container().id + ' #button_group_two' );
43 | }
44 |
45 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/layout/tables/table.js:
--------------------------------------------------------------------------------
1 | $(document).ready(function() {
2 | anchors.options.visible = 'always';
3 | anchors.add('h3');
4 |
5 | var table = $('#standard_table').DataTable( {
6 | lengthChange: false,
7 | ordering: false,
8 | paging: config.pagination,
9 | autoWidth: true,
10 | buttons: [
11 | {
12 | text: 'Related columns',
13 | action: function ( e, dt, node, config ) {
14 | $(".relatedKey").toggle();
15 | this.active( !this.active() );
16 | table.columns.adjust().draw();
17 | }
18 | },
19 | {
20 | text: 'Constraint',
21 | action: function ( e, dt, node, config ) {
22 | $(".constraint").toggle();
23 | this.active( !this.active() );
24 | table.columns.adjust().draw();
25 | }
26 | },
27 | {
28 | extend: 'columnsToggle',
29 | columns: '.toggle'
30 | }
31 | ]
32 |
33 | } );
34 | dataTableExportButtons(table);
35 |
36 | if ($('#indexes_table').length) {
37 | var indexes = $('#indexes_table').DataTable({
38 | lengthChange: false,
39 | paging: config.pagination,
40 | ordering: false
41 | });
42 | dataTableExportButtons(indexes);
43 | }
44 |
45 | if ($('#check_table').length) {
46 | var check = $('#check_table').DataTable( {
47 | lengthChange: false,
48 | paging: config.pagination,
49 | ordering: false
50 | } );
51 | dataTableExportButtons(check);
52 | }
53 | } );
54 |
55 |
56 | $(function() {
57 | var $imgs = $('img.diagram, object.diagram');
58 | $imgs.css("cursor", "move")
59 | $imgs.draggable();
60 | });
61 |
62 | $.fn.digits = function(){
63 | return this.each(function(){
64 | $(this).text( $(this).text().replace(/(\d)(?=(\d\d\d)+(?!\d))/g, "$1 ") );
65 | })
66 | }
67 |
68 | $(function() {
69 | $("#recordNumber").digits();
70 | });
71 |
72 | var codeElement = document.getElementById("sql-script-codemirror");
73 | var editor = null;
74 | if (null != codeElement) {
75 | editor = CodeMirror.fromTextArea(codeElement, {
76 | lineNumbers: true,
77 | mode: 'text/x-sql',
78 | indentWithTabs: true,
79 | smartIndent: true,
80 | lineNumbers: true,
81 | matchBrackets: true,
82 | autofocus: true,
83 | readOnly: true
84 | });
85 | }
86 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/page_models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/page_models/__init__.py
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/page_models/page_data.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 |
4 | class PageData:
5 | def __init__(self, template_name: str, script_name: str):
6 | self.template_name = template_name
7 | self.script_name = script_name
8 | self.scope = {}
9 | self.depth = 0
10 |
11 | def add_scope(self, key: str, value: Any):
12 | self.scope[key] = value
13 |
14 | def set_depth(self, depth: int):
15 | self.depth = depth
16 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/report.py:
--------------------------------------------------------------------------------
1 | """
2 | This module implements the Report class for create_db_report.
3 | """
4 |
5 | import os
6 | import shutil
7 | import warnings
8 | import webbrowser
9 | from pathlib import Path
10 | from typing import Optional
11 | from ...utils import is_notebook
12 |
13 |
14 | class Report:
15 | """
16 | This class creates a customized Report object for the create_db_report function
17 | """
18 |
19 | def __init__(self, database_name: str, report: str) -> None:
20 | self.database_name = database_name
21 | self.report = report
22 |
23 | def show(self) -> None:
24 | """
25 | Open the report in notebook.
26 | """
27 | if is_notebook():
28 | warnings.warn(
29 | "Report generated by create_db_report currently does not support output in notebook,"
30 | " consider using report.show_browser() to view in browser.",
31 | )
32 |
33 | def save(self, path: Optional[str] = None) -> None:
34 | """
35 | Save report to current working directory.
36 |
37 | Parameters
38 | ----------
39 | path: Optional[str], default Path.cwd()
40 | The path to where the report will be saved.
41 | """
42 | output_folder_name = f"{self.database_name}_report"
43 | source = os.path.realpath(os.path.join(os.path.dirname(__file__), "layout"))
44 | if path:
45 | destination = Path(path).expanduser()
46 | else:
47 | path = str(Path.cwd())
48 | destination = Path(path).expanduser()
49 | destination = os.path.join(destination, output_folder_name)
50 | if os.path.exists(destination):
51 | shutil.rmtree(destination)
52 | shutil.copytree(source, destination)
53 | print(f"Report has been saved to {destination}!")
54 |
55 | def show_browser(self) -> None:
56 | """
57 | Open the report in the browser.
58 | """
59 | webbrowser.open(f"file://{self.report}", new=2)
60 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/template_models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/template_models/__init__.py
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/template_models/constraint.py:
--------------------------------------------------------------------------------
1 | class TemplateConstraint:
2 | def __init__(self, table_name: str, name: str, definition: str) -> None:
3 | self.table_name = table_name
4 | self.name = name
5 | self.definition = definition
6 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/template_models/table.py:
--------------------------------------------------------------------------------
1 | from ..db_models.table import Table
2 |
3 |
4 | class TemplateTable:
5 | def __init__(self, table: Table) -> None:
6 | for attr in dir(table):
7 | if not attr.startswith("__"):
8 | setattr(self, attr, getattr(table, attr))
9 | self.table = table
10 | self.comments = ""
11 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/template_models/table_index.py:
--------------------------------------------------------------------------------
1 | from ..db_models.table_index import TableIndex
2 |
3 |
4 | class TemplateTableIndex:
5 | def __init__(self, index: TableIndex) -> None:
6 | for attr in dir(index):
7 | if not attr.startswith("__"):
8 | setattr(self, attr, getattr(index, attr))
9 | self.index = index
10 |
11 | def get_index(self):
12 | return self.index
13 |
14 | def get_key(self):
15 | if self.index.is_primary:
16 | key_type = " class='primaryKey' title='Primary Key'"
17 | elif self.index.is_unique:
18 | key_type = " class='uniqueKey' title='Unique Key'"
19 | else:
20 | key_type = " title='Indexed'"
21 | return key_type
22 |
23 | def get_key_icon(self):
24 | key_icon = ""
25 | if self.index.is_primary or self.index.is_unique:
26 | key_icon = " "
27 |
28 | return key_icon
29 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/views/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/create_db_report/views/__init__.py
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/views/column.py:
--------------------------------------------------------------------------------
1 | import json
2 | from typing import List
3 | from ..db_models.table import Table
4 | from ..page_models.page_data import PageData
5 | from ..page_models.page_template import PageTemplate
6 | from ..template_models.table_column import TemplateTableColumn
7 |
8 |
9 | class ColumnPage:
10 | def __init__(self, template_object: PageTemplate) -> None:
11 | self.template_object = template_object
12 |
13 | def page_writer(self, tables: List[Table], new_file: str):
14 | """
15 | Compile the data needed by the template for columns page
16 | """
17 | table_columns = set()
18 | for table in tables:
19 | for column in table.get_columns():
20 | table_columns.add(TemplateTableColumn(column, column.index, ""))
21 |
22 | json_columns = []
23 | for mc in table_columns:
24 | json_dict = {
25 | "table_name": mc.table_column.table.name,
26 | "table_file_name": mc.table_column.table.name,
27 | "table_type": mc.table_column.table.get_type(),
28 | "key_class": mc.get_key_class(),
29 | "key_title": mc.get_key_title(),
30 | "name": mc.get_key_icon() + mc.table_column.name,
31 | "type": mc.table_column.type_name,
32 | "length": "",
33 | "nullable": mc.get_nullable(),
34 | "auto_updated": mc.get_auto_updated(),
35 | "default_value": mc.get_default_value(),
36 | "comments": "",
37 | }
38 | json_columns.append(json.loads(json.dumps(json_dict)))
39 |
40 | page_data = PageData("column.html", "column.js")
41 | page_data.add_scope("table_data", json_columns)
42 | page_data.set_depth(0)
43 |
44 | pagination_configs = {
45 | "columnTable": {"paging": "true", "pageLength": 20, "lengthChange": "false"}
46 | }
47 |
48 | return self.template_object.write_data(page_data, new_file, "column.js", pagination_configs)
49 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/views/constraint.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 | from ..db_models.constraint import ForeignKeyConstraint
3 | from ..db_models.table import Table
4 | from ..page_models.page_data import PageData
5 | from ..page_models.page_template import PageTemplate
6 | from ..template_models.constraint import TemplateConstraint
7 |
8 |
9 | class ConstraintPage:
10 | def __init__(self, template_object: PageTemplate) -> None:
11 | self.template_object = template_object
12 |
13 | def page_writer(
14 | self, constraints: List[ForeignKeyConstraint], tables: List[Table], new_file: str
15 | ):
16 | """
17 | Compile the data needed by the template for constraints page
18 | """
19 | page_data = PageData("constraint.html", "constraint.js")
20 | page_data.add_scope("constraints", constraints)
21 | page_data.add_scope("constraints_num", len(constraints))
22 | page_data.add_scope("check_constraints", self.collect_check_constraints(tables))
23 | page_data.set_depth(0)
24 | pagination_configs = {
25 | "fk_table": {"paging": "true", "pageLength": 20, "lengthChange": "false"},
26 | "check_table": {"paging": "true", "pageLength": 10, "lengthChange": "false"},
27 | }
28 | return self.template_object.write_data(
29 | page_data, new_file, "constraint.js", pagination_configs
30 | )
31 |
32 | @staticmethod
33 | def collect_check_constraints(tables: List[Table]):
34 | all_constraints = []
35 | results = []
36 | for table in tables:
37 | if len(table.check_constraints) > 0:
38 | all_constraints.append(table.check_constraints)
39 | for x in all_constraints:
40 | results.append(TemplateConstraint(x, x.keys(), x.values()))
41 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/views/orphan.py:
--------------------------------------------------------------------------------
1 | import json
2 | from typing import List, Any
3 | from ..page_models.page_data import PageData
4 | from ..page_models.page_template import PageTemplate
5 |
6 |
7 | class OrphanPage:
8 | def __init__(self, template_object: PageTemplate) -> None:
9 | self.template_object = template_object
10 |
11 | def page_writer(
12 | self,
13 | json_tables: List[Any],
14 | json_relationships: List[Any],
15 | new_file: str,
16 | ):
17 | """
18 | Compile the data needed by the pystache template for orphan page
19 | """
20 | page_data = PageData("orphan.html", "")
21 | page_data.add_scope("diagram_tables", json.dumps(json_tables))
22 | page_data.add_scope("diagram_relationships", json.dumps(json_relationships))
23 | page_data.set_depth(0)
24 |
25 | return self.template_object.write_data(page_data, new_file, "", {})
26 |
--------------------------------------------------------------------------------
/dataprep/eda/create_db_report/views/relationship.py:
--------------------------------------------------------------------------------
1 | import json
2 | from typing import List, Any
3 | from ..page_models.page_data import PageData
4 | from ..page_models.page_template import PageTemplate
5 |
6 |
7 | class RelationshipPage:
8 | def __init__(self, template_object: PageTemplate) -> None:
9 | self.template_object = template_object
10 |
11 | def page_writer(
12 | self,
13 | json_tables: List[Any],
14 | json_relationships: List[Any],
15 | new_file: str,
16 | ):
17 | """
18 | Compile the data needed by the pystache template for relationship page
19 | """
20 | page_data = PageData("relationship.html", "relationship.js")
21 | page_data.add_scope("diagram_tables", json.dumps(json_tables))
22 | page_data.add_scope("diagram_relationships", json.dumps(json_relationships))
23 | page_data.set_depth(0)
24 |
25 | return self.template_object.write_data(page_data, new_file, "relationship.js", {})
26 |
--------------------------------------------------------------------------------
/dataprep/eda/create_diff_report/templates/base.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | {{ context.title }}
7 | {% include 'scripts.html' %}
8 | {% include 'styles.html' %}
9 |
10 |
11 |
12 |
28 |
29 |
30 |
31 | {% if context.components.dfs[0].has_overview %}
32 |
36 |
37 |
38 | {% include 'overview.html' %}
39 |
40 |
41 | {% endif %}
42 | {% if context.components.dfs[0].has_variables %}
43 |
47 |
48 | {% include 'variables.html' %}
49 |
50 | {% endif %}
51 |
52 |
53 |
54 |
57 |
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/dataprep/eda/create_diff_report/templates/overview.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
Dataset Statistics
5 |
6 |
7 | |
8 | {% for df_label in context.df_labels %}
9 | {{ df_label }} |
10 | {% endfor %}
11 |
12 |
13 | {% for k, v in context.stats.items() %}
14 | {% if v is defined and v %}
15 |
16 | {{ k }} |
17 | {% for stats in v %}
18 | {{ stats }} |
19 | {% endfor %}
20 |
21 | {% endif %}
22 | {% endfor %}
23 |
24 |
25 |
26 |
27 |
28 |
62 |
63 |
64 |
--------------------------------------------------------------------------------
/dataprep/eda/create_report/templates/correlation.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | {% for corr_name in context.components.correlation_names %}
5 | {% if loop.index == 1 %}
6 |
7 |
8 | {% else %}
9 |
10 |
11 | {% endif %}
12 | {% endfor %}
13 |
14 |
15 | {% for plot in context.components.correlations[1] %}
16 | {% if loop.index == 1 %}
17 |
18 | {{ plot }}
19 |
20 | {% else %}
21 |
22 | {{ plot }}
23 |
24 | {% endif %}
25 | {% endfor %}
26 |
27 |
28 |
--------------------------------------------------------------------------------
/dataprep/eda/create_report/templates/error.html:
--------------------------------------------------------------------------------
1 |
2 |
Something Happened: {{ error_message }}
3 |
--------------------------------------------------------------------------------
/dataprep/eda/create_report/templates/interactions.html:
--------------------------------------------------------------------------------
1 |
2 | {% if context.components.interactions[1].error %}
3 | {% with error_message = context.components.interactions[1].error %}
4 | {% include 'error.html' %}
5 | {% endwith %}
6 | {% else %}
7 | {{ context.components.interactions[1] }}
8 | {% endif %}
9 |
--------------------------------------------------------------------------------
/dataprep/eda/create_report/templates/missing.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | {% for miss_name in context.components.missing_tabs %}
5 | {% if loop.index == 1 %}
6 |
7 |
8 | {% else %}
9 |
10 |
11 | {% endif %}
12 | {% endfor %}
13 |
14 |
15 | {% for plot in context.components.missing[1] %}
16 | {% if loop.index == 1 %}
17 |
18 | {{ plot }}
19 |
20 | {% else %}
21 |
22 | {{ plot }}
23 |
24 | {% endif %}
25 | {% endfor %}
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/dataprep/eda/create_report/templates/overview.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
Dataset Statistics
4 |
5 |
6 | {% for h, d in context.components.overview[0].items() %}
7 |
8 | {{ h }} |
9 | {{ d }} |
10 |
11 | {% endfor %}
12 |
13 | Variable Types |
14 |
15 |
16 | {% for tp, num in context.components.overview[1].items() %}
17 | - {{ tp }}: {{ num }}
18 | {% endfor %}
19 |
20 | |
21 |
22 |
23 |
24 |
25 | {% if context.components.overview_insights %}
26 |
27 |
Dataset Insights
28 | {% for page, content in context.components.overview_insights.items() %}
29 |
30 |
31 | {% for entry in content %}
32 | {% for ins_type, insight in entry.items() %}
33 |
34 |
35 | {{ insight|escape|replace('/*start*/', '')|replace('/*end*/', '') }}
36 | |
37 | {{ ins_type }} |
38 |
39 | {% endfor %}
40 | {% endfor %}
41 |
42 |
43 | {% endfor %}
44 |
45 |
46 | {% for _ in context.components.overview_insights %}
47 | {% if loop.length > 1 %}
48 | - {{ loop.index }}
49 | {% endif %}
50 | {% endfor %}
51 |
52 |
53 |
54 | {% endif %}
55 |
56 |
--------------------------------------------------------------------------------
/dataprep/eda/outlier/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/eda/outlier/__init__.py
--------------------------------------------------------------------------------
/dataprep/eda/outlier/computation.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing plot_outlier function.
3 | """
4 |
5 | import dask.dataframe as dd
6 |
7 | from ..intermediate import Intermediate
8 |
9 | DEFAULT_PARTITIONS = 1
10 |
11 |
12 | def _calc_num_outlier(df: dd.DataFrame, col_x: str) -> Intermediate:
13 | """
14 | calculate outliers based on the MAD method for numerical values.
15 | :param df: the input dataframe
16 | :param col_x: the column of df (univariate outlier detection)
17 | :return: dict(index: value) of outliers
18 | """
19 | data_df = dd.from_dask_array(df[col_x].to_dask_array(), columns=["data"])
20 | median = data_df["data"].quantile(0.5)
21 | MAD = abs(data_df["data"] - median).quantile(0.5) # pylint: disable=invalid-name
22 | data_df["z_score"] = (0.6745 * (data_df["data"] - median)) / MAD
23 | res_df = data_df[data_df["z_score"] > 3.5].drop("z_score", axis=1)
24 | result = {"outliers_index": list(res_df["data"].index.compute())}
25 | raw_data = {"df": df, "col_x": col_x}
26 | return Intermediate(result, raw_data)
27 |
28 |
29 | def _calc_cat_outlier(df: dd.DataFrame, col_x: str, threshold: int = 1) -> Intermediate:
30 | """
31 | calculate outliers based on the threshold for categorical values.
32 | :param df: the input dataframe
33 | :param col_x: the column of df (univariate outlier detection)
34 | :return: dict(index: value) of outliers
35 | """
36 | groups = df.groupby([col_x]).size()
37 | result = {"outlier_index": list(groups[groups <= threshold].index.compute())}
38 | raw_data = {"df": df, "col_x": col_x, "threshold": threshold}
39 | return Intermediate(result, raw_data)
40 |
--------------------------------------------------------------------------------
/dataprep/eda/staged.py:
--------------------------------------------------------------------------------
1 | """Decorator to make it cope with two staged computation easily."""
2 |
3 | from typing import Any, Callable, Generator, Tuple, Union, cast
4 |
5 | import dask
6 |
7 | from .intermediate import Intermediate
8 |
9 | Decoratee = Callable[..., Generator[Any, Any, Intermediate]]
10 |
11 | Completion = Callable[[Any], Intermediate]
12 |
13 |
14 | def staged(
15 | func: Decoratee,
16 | ) -> Callable[..., Union[Tuple[Any, Completion], Intermediate]]:
17 | """Transform a two stage computation into a result and a completion function."""
18 |
19 | def staged_imp(
20 | *args: Any, _staged: bool = False, **kwargs: Any
21 | ) -> Union[Tuple[Any, Completion], Intermediate]:
22 | gen = func(*args, **kwargs)
23 |
24 | def completion(computed: Any) -> Intermediate:
25 | try:
26 | gen.send(computed)
27 | raise RuntimeError("Computation didn't stop.")
28 | except StopIteration as stop:
29 | return cast(Intermediate, stop.value)
30 |
31 | if _staged:
32 | return next(gen), completion
33 | else:
34 | (computed,) = dask.compute(next(gen))
35 | return completion(computed)
36 |
37 | return staged_imp
38 |
--------------------------------------------------------------------------------
/dataprep/errors.py:
--------------------------------------------------------------------------------
1 | """
2 | Library-wise errors
3 | """
4 |
5 |
6 | class DataprepError(Exception):
7 | """
8 | Base exception, used library-wise
9 | """
10 |
11 |
12 | class UnreachableError(DataprepError):
13 | """
14 | Error indicating some path of the code is unreachable.
15 | """
16 |
--------------------------------------------------------------------------------
/dataprep/lineage/__init__.py:
--------------------------------------------------------------------------------
1 | from .lx import lineagex
2 |
--------------------------------------------------------------------------------
/dataprep/lineage/lx.py:
--------------------------------------------------------------------------------
1 | """
2 | This module contains the method of lineagex.
3 | It is a wrapper on lineagex.lineagex function.
4 | """
5 |
6 | from typing import Optional, Union, List
7 |
8 | try:
9 | import lineagex as lx
10 |
11 | _WITH_LX = True
12 | except ImportError:
13 | _WITH_LX = False
14 |
15 |
16 | def lineagex(
17 | sql: Optional[Union[List, str]] = None,
18 | target_schema: Optional[str] = "",
19 | conn_string: Optional[str] = None,
20 | search_path_schema: Optional[str] = "",
21 | ) -> dict:
22 | """
23 | Produce the lineage information.
24 | Please check out https://github.com/sfu-db/lineagex for more details.
25 | :param sql: The input of the SQL files, it can be a path to a file, a path to a folder containing SQL files, a list of SQLs or a list of view names and/or schemas
26 | :param target_schema: The schema where the SQL files would be created, defaults to public, or the first schema in the search_path_schema if provided
27 | :param conn_string: The postgres connection string in the format postgresql://username:password@server:port/database, defaults to None
28 | :param search_path_schema: The SET search_path TO ... schemas, defaults to public or the target_schema if provided
29 | :return:
30 | """
31 |
32 | if _WITH_LX:
33 | output_dict = lx.lineagex(
34 | sql=sql,
35 | target_schema=target_schema,
36 | conn_string=conn_string,
37 | search_path_schema=search_path_schema,
38 | ).output_dict
39 | return output_dict
40 | else:
41 | raise ImportError("lineagex is not installed." "Please run pip install lineagex")
42 |
--------------------------------------------------------------------------------
/dataprep/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/tests/__init__.py
--------------------------------------------------------------------------------
/dataprep/tests/benchmarks/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This module is used for performance testing and generating plot in github action.
3 | """
4 |
--------------------------------------------------------------------------------
/dataprep/tests/benchmarks/eda.py:
--------------------------------------------------------------------------------
1 | """
2 | This module is for performance testing of EDA module in github action.
3 | """
4 |
5 | from functools import partial
6 | import pandas as pd
7 | from typing import Any
8 | from ...datasets import load_dataset
9 | from ...eda import create_report
10 |
11 |
12 | def report_func(df: pd.DataFrame, **kwargs: Any) -> None:
13 | """
14 | Create report function, used for performance testing.
15 | """
16 | create_report(df, **kwargs)
17 |
18 |
19 | def test_create_report(benchmark: Any) -> None:
20 | """
21 | Performance test of create report on titanic dataset.
22 | """
23 | df = load_dataset("titanic")
24 | benchmark(partial(report_func), df)
25 |
--------------------------------------------------------------------------------
/dataprep/tests/clean/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | dataprep.clean tests
3 | """
4 |
--------------------------------------------------------------------------------
/dataprep/tests/connector/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/tests/connector/__init__.py
--------------------------------------------------------------------------------
/dataprep/tests/connector/test_integration.py:
--------------------------------------------------------------------------------
1 | # type: ignore
2 | from os import environ
3 | import asyncio
4 | import pytest
5 |
6 | from ...connector import Connector, websites
7 | from ...utils import display_dataframe
8 | from ...connector.utils import Request
9 |
10 |
11 | # @pytest.mark.skipif(
12 | # environ.get("DATAPREP_CREDENTIAL_TESTS", "0") == "0",
13 | # reason="Skip tests that requires credential",
14 | # )
15 | # def test_connector() -> None:
16 | # token = environ["DATAPREP_DATA_CONNECTOR_YELP_TOKEN"]
17 | # dc = Connector("yelp", _auth={"access_token": token}, _concurrency=3)
18 | # df = asyncio.run(dc.query("businesses", term="ramen", location="vancouver"))
19 | #
20 | # assert len(df) > 0
21 | #
22 | # websites()
23 | #
24 | # dc.info()
25 | #
26 | # display_dataframe(df)
27 | #
28 | # df = asyncio.run(dc.query("businesses", _count=120, term="ramen", location="vancouver"))
29 | #
30 | # assert len(df) == 120
31 | #
32 | # df = asyncio.run(dc.query("businesses", _count=1000, term="ramen", location="vancouver"))
33 | #
34 | # assert len(df) < 1000
35 | #
36 |
37 |
38 | @pytest.mark.skipif(
39 | environ.get("DATAPREP_CREDENTIAL_TESTS", "0") == "0",
40 | reason="Skip tests that requires credential",
41 | )
42 | def test_query_params() -> None:
43 |
44 | token = environ["DATAPREP_DATA_CONNECTOR_YOUTUBE_TOKEN"]
45 |
46 | dc = Connector("youtube", _auth={"access_token": token})
47 | df = asyncio.run(dc.query("videos", q="covid", part="snippet"))
48 |
49 | assert len(df) != 0
50 |
51 |
52 | def test_requests() -> None:
53 | # GET request
54 | req1 = Request("https://www.python.org/")
55 | get_resp = req1.get()
56 | assert get_resp.status == 200
57 |
58 | # POST request
59 | params = {"@number": 12524, "@type": "issue", "@action": "show"}
60 | headers = {"Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain"}
61 | req2 = Request("https://bugs.python.org/")
62 | post_resp = req2.post(_data=params, _headers=headers)
63 | assert post_resp.status == 302
64 |
65 | # PUT request
66 | params = {"@number": 12524, "@type": "issue", "@action": "show"}
67 | headers = {"Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain"}
68 | req3 = Request("https://bugs.python.org/")
69 | put_resp = req3.put(_data=params, _headers=headers)
70 | assert put_resp.status == 302
71 |
--------------------------------------------------------------------------------
/dataprep/tests/connector/test_read_sql.py:
--------------------------------------------------------------------------------
1 | # type: ignore
2 | from os import environ
3 | import pytest
4 | import pandas as pd
5 |
6 | from ...utils import display_dataframe
7 | from ...connector import read_sql
8 |
9 |
10 | @pytest.mark.skipif(
11 | environ.get("DB_URL", "") == "" or environ.get("DB_SQL", "") == "",
12 | reason="Skip tests that requires database setup and sql query specified",
13 | )
14 | def test_read_sql() -> None:
15 | db_url = environ["DB_URL"]
16 | sql = environ["DB_SQL"]
17 | df = read_sql(db_url, sql)
18 | display_dataframe(df)
19 |
--------------------------------------------------------------------------------
/dataprep/tests/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | dataprep.datasets tests
3 | """
4 |
--------------------------------------------------------------------------------
/dataprep/tests/datasets/test_datasets.py:
--------------------------------------------------------------------------------
1 | """
2 | module for testing the functions inside datasets
3 | """
4 |
5 | from ...datasets import get_dataset_names, get_db_names, load_dataset, load_db
6 |
7 |
8 | def test_get_dataset_names() -> None:
9 | names = get_dataset_names()
10 | assert len(names) > 0
11 |
12 |
13 | def test_get_db_names() -> None:
14 | names = get_db_names()
15 | assert len(names) > 0
16 |
17 |
18 | def test_load_dataset() -> None:
19 | dataset_names = get_dataset_names()
20 | for name in dataset_names:
21 | df = load_dataset(name)
22 | assert len(df) > 0
23 |
24 |
25 | def test_load_db() -> None:
26 | dataset_names = get_db_names()
27 | for name in dataset_names:
28 | db = load_db(name)
29 |
--------------------------------------------------------------------------------
/dataprep/tests/eda/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | EDA Tests
3 | """
4 |
--------------------------------------------------------------------------------
/dataprep/tests/eda/test.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime as DateTime
2 | from datetime import timedelta as TimeDelta
3 |
4 | import pandas as pd
5 |
6 | from ...eda.dtypes import is_nominal, is_continuous
7 |
8 |
9 | def test_dtypes() -> None:
10 | df = pd.DataFrame(data=[["a", "c", False]], columns=["S", "C", "B"])
11 | df["C"] = df["C"].astype("category")
12 |
13 | for col in df.columns:
14 | assert is_nominal(df[col].dtype)
15 |
16 | df = pd.DataFrame(
17 | data=[
18 | [
19 | complex(3, 1),
20 | 1,
21 | 1.1,
22 | TimeDelta(1),
23 | DateTime.now(),
24 | ]
25 | ],
26 | columns=["IM", "I", "F", "TD", "DT"],
27 | )
28 |
29 | for col in df.columns:
30 | assert is_continuous(df[col].dtype)
31 |
--------------------------------------------------------------------------------
/dataprep/tests/eda/test_create_db_report.py:
--------------------------------------------------------------------------------
1 | import os
2 | from ...eda.create_db_report.run_function import generate_db_report
3 | from ...datasets import load_db
4 |
5 |
6 | def test_create_db_report_sqlite() -> None:
7 | db_engine = load_db("sakila.db")
8 | generate_db_report(db_engine)
9 |
10 | # Check if output files were generated properly
11 | assert get_folder_file_num("../../eda/create_db_report/layout/tables") == 23
12 | assert get_folder_file_num("../../eda/create_db_report/layout/diagrams/summary") == 1
13 | assert get_folder_file_num("../../eda/create_db_report/layout/diagrams/tables") == 21
14 |
15 |
16 | def get_folder_file_num(path):
17 | file = os.path.realpath(os.path.join(os.path.dirname(__file__), path))
18 | files = os.listdir(file)
19 | return len(files)
20 |
--------------------------------------------------------------------------------
/dataprep/tests/eda/test_create_diff_report.py:
--------------------------------------------------------------------------------
1 | """
2 | module for testing create_diff_report(df) function.
3 | """
4 |
5 | import logging
6 | import numpy as np
7 | import pandas as pd
8 | import pytest
9 | from ...eda import create_diff_report
10 |
11 |
12 | LOGGER = logging.getLogger(__name__)
13 |
14 |
15 | @pytest.fixture(scope="module") # type: ignore
16 | def simpledf() -> pd.DataFrame:
17 | df = pd.DataFrame(np.random.rand(1000, 3), columns=["a", "b", "c"])
18 |
19 | df = pd.concat([df, pd.Series(np.random.choice(["a", "b", "c"], 1000, replace=True))], axis=1)
20 | df = pd.concat([df, pd.Series([["foo"] * 1000])], axis=1)
21 | df = pd.concat(
22 | [
23 | df,
24 | pd.Series(
25 | np.random.choice(["2020/03/29", "2020/01/10", "2019/11/21"], 1000, replace=True)
26 | ),
27 | ],
28 | axis=1,
29 | )
30 | df.columns = ["a", "b", "c", "d", "e", "f"]
31 | df["g"] = pd.to_datetime(df["f"])
32 | # test when column is object but some cells are numerical
33 | df["h"] = pd.Series([0, "x"] * 500)
34 |
35 | idx = np.arange(1000)
36 | np.random.shuffle(idx)
37 | df.iloc[idx[:500], 0] = None
38 |
39 | return df
40 |
41 |
42 | @pytest.fixture(scope="module") # type: ignore
43 | def test_diff_report(simpledf: pd.DataFrame) -> None:
44 | from sys import platform
45 |
46 | if platform == "darwin":
47 | import matplotlib
48 |
49 | matplotlib.use("PS")
50 | create_diff_report([simpledf, simpledf], mode="basic")
51 |
--------------------------------------------------------------------------------
/dataprep/tests/eda/test_show.py:
--------------------------------------------------------------------------------
1 | # type: ignore
2 | from os import environ
3 | import dask.dataframe as dd
4 | import pandas as pd
5 | import numpy as np
6 | import pytest
7 |
8 | from ...eda import plot, plot_correlation, plot_missing
9 | from ...eda.utils import to_dask
10 |
11 |
12 | @pytest.fixture(scope="module") # type: ignore
13 | def simpledf() -> dd.DataFrame:
14 | df = pd.DataFrame(np.random.rand(10, 3), columns=["a", "b", "c"])
15 | df = pd.concat([df, pd.Series(["a"] * 10)], axis=1)
16 | df.columns = ["a", "b", "c", "d"]
17 | df = to_dask(df)
18 | return df
19 |
20 |
21 | def test_show(simpledf: dd.DataFrame) -> None:
22 | plot(simpledf).show()
23 | plot_correlation(simpledf).show()
24 | plot_missing(simpledf).show()
25 |
26 |
27 | @pytest.mark.skipif(
28 | environ.get("DATAPREP_BROWSER_TESTS", "0") == "0",
29 | reason="Skip tests that requires opening browser",
30 | )
31 | def test_show_browser(simpledf: dd.DataFrame) -> None:
32 | plot(simpledf).show_browser()
33 | plot_correlation(simpledf).show_browser()
34 | plot_missing(simpledf).show_browser()
35 |
--------------------------------------------------------------------------------
/dataprep/tests/lineage/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/dataprep/tests/lineage/__init__.py
--------------------------------------------------------------------------------
/dataprep/tests/lineage/dependency_example/a_table.sql:
--------------------------------------------------------------------------------
1 | SELECT subject_id, gender
2 | FROM `physionet-data.mimiciii_derived.no_dob`;
3 |
--------------------------------------------------------------------------------
/dataprep/tests/lineage/dependency_example/aa_table.sql:
--------------------------------------------------------------------------------
1 | CREATE VIEW aa_table AS
2 | SELECT a.subject_id, b.gender
3 | FROM `physionet-data.mimiciii_derived.a_table` a, `physionet-data.mimiciii_derived.no_dob` b;
4 | CREATE TABLE a_table AS
5 | SELECT subject_id, gender
6 | FROM `physionet-data.mimiciii_derived.no_dob`;
7 |
--------------------------------------------------------------------------------
/dataprep/tests/lineage/dependency_example/basic_patient_info.sql:
--------------------------------------------------------------------------------
1 | -- ------------------------------------------------------------------
2 | -- Title: Retrieves basic patient information from the patients table
3 | -- Notes: this query does not specify a schema. To run it on your local
4 | -- MIMIC schema, run the following command:
5 | -- SET SEARCH_PATH TO mimiciii;
6 | -- Where "mimiciii" is the name of your schema, and may be different.
7 | -- ------------------------------------------------------------------
8 |
9 |
10 | SELECT subject_id, gender, dob
11 | FROM `physionet-data.mimiciii_clinical.patients`;
12 |
--------------------------------------------------------------------------------
/dataprep/tests/lineage/dependency_example/from_aa_table.sql:
--------------------------------------------------------------------------------
1 | SELECT * FROM aa_table;
2 |
--------------------------------------------------------------------------------
/dataprep/tests/lineage/dependency_example/no_dob.sql:
--------------------------------------------------------------------------------
1 | SELECT subject_id, gender, dob
2 | FROM `physionet-data.mimiciii_derived.basic_patient_info`;
3 |
--------------------------------------------------------------------------------
/dataprep/tests/lineage/test_lineagex.py:
--------------------------------------------------------------------------------
1 | # type: ignore
2 | from os import environ
3 | import os
4 | import pytest
5 |
6 | from ...lineage import lineagex
7 |
8 |
9 | @pytest.mark.skipif(
10 | environ.get("DB_URL", "") == "",
11 | reason="Skip tests that requires database setup and sql query specified",
12 | )
13 | def test_read_sql() -> None:
14 | db_url = environ["DB_URL"]
15 | sql = os.path.join(os.getcwd(), "dependency_example")
16 | lx = lineagex(sql, "mimiciii_derived", db_url, "mimiciii_clinical, public")
17 | print("dependency test with database connection", lx)
18 | lx = lineagex(
19 | sql=sql, target_schema="mimiciii_derived", search_path_schema="mimiciii_clinical, public"
20 | )
21 | print("dependency test without database connection", lx)
22 |
23 |
24 | if __name__ == "__main__":
25 | test_read_sql()
26 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/source/_static/images/connector/connector_auto_pagination_off.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/connector/connector_auto_pagination_off.png
--------------------------------------------------------------------------------
/docs/source/_static/images/connector/connector_auto_pagination_on.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/connector/connector_auto_pagination_on.png
--------------------------------------------------------------------------------
/docs/source/_static/images/connector/connector_pagination_offset_offset_limit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/connector/connector_pagination_offset_offset_limit.png
--------------------------------------------------------------------------------
/docs/source/_static/images/connector/connector_pagination_offset_page_perpage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/connector/connector_pagination_offset_page_perpage.png
--------------------------------------------------------------------------------
/docs/source/_static/images/connector/connector_yelp_query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/connector/connector_yelp_query.png
--------------------------------------------------------------------------------
/docs/source/_static/images/connector/connector_yelp_query_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/connector/connector_yelp_query_2.png
--------------------------------------------------------------------------------
/docs/source/_static/images/connector/connector_yelp_show_schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/connector/connector_yelp_show_schema.png
--------------------------------------------------------------------------------
/docs/source/_static/images/connector/connector_youtube_query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/connector/connector_youtube_query.png
--------------------------------------------------------------------------------
/docs/source/_static/images/connector/connector_youtube_query_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/connector/connector_youtube_query_2.png
--------------------------------------------------------------------------------
/docs/source/_static/images/connector/info.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/connector/info.png
--------------------------------------------------------------------------------
/docs/source/_static/images/connector/query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/connector/query.png
--------------------------------------------------------------------------------
/docs/source/_static/images/connector/show_schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/connector/show_schema.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/.DS_Store
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/App.js_config.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/App.js_config.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/App_find.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/App_find.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/Config_destination.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/Config_destination.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/ID_and_secret.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/ID_and_secret.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/Node_js.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/Node_js.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/SFU_Spotify.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/SFU_Spotify.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/Spotify_authentication.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/Spotify_authentication.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/Spotify_dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/Spotify_dashboard.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/Spotify_git_page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/Spotify_git_page.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/Spotify_server.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/Spotify_server.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/URI_.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/URI_.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/Yelp_API_Key.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/Yelp_API_Key.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/Yelp_authentication.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/Yelp_authentication.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/connector_yelp_query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/connector_yelp_query.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/connector_yelp_query_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/connector_yelp_query_2.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/connector_yelp_show_schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/connector_yelp_show_schema.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/connector_youtube_query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/connector_youtube_query.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/connector_youtube_query_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/connector_youtube_query_2.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/dc_dblp_author.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/dc_dblp_author.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/dc_dblp_info.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/dc_dblp_info.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/dc_dblp_pagination.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/dc_dblp_pagination.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/dc_dblp_query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/dc_dblp_query.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/dc_git.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/dc_git.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/dc_git_clone.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/dc_git_clone.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/dc_query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/dc_query.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/dc_spotify_info.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/dc_spotify_info.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/dc_spotify_query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/dc_spotify_query.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/dc_spotify_query_pag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/dc_spotify_query_pag.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/dc_yelp_query.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/dc_yelp_query.png
--------------------------------------------------------------------------------
/docs/source/_static/images/tutorial/dc_yelp_query_pag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfu-db/dataprep/be3f00c66f1248f05cc4f6007e84ac800f0700b5/docs/source/_static/images/tutorial/dc_yelp_query_pag.png
--------------------------------------------------------------------------------
/docs/source/acknowledgement.rst:
--------------------------------------------------------------------------------
1 | .. _acknowledgement:
2 |
3 | ===============
4 | Acknowledgement
5 | ===============
6 |
7 | Some functionalities of DataPrep are inspired by the following packages.
8 |
9 | * `Pandas Profiling `_
10 |
11 | Inspired the report functionality and insights provided in DataPrep.eda.
12 |
13 | * `missingno `_
14 |
15 | Inspired the missing value analysis in DataPrep.eda.
16 |
--------------------------------------------------------------------------------
/docs/source/api_reference/dataprep.connector.rst:
--------------------------------------------------------------------------------
1 | dataprep.data\_connector
2 | ================================
3 |
4 |
5 | Configuration Manager
6 | -----------------------------------------------
7 |
8 | .. automodule:: dataprep.connector.config_manager
9 | :members:
10 | :undoc-members:
11 | :show-inheritance:
12 |
13 | Connector
14 | -----------------------------------------
15 |
16 | .. automodule:: dataprep.connector.connector
17 | :members:
18 | :undoc-members:
19 | :show-inheritance:
20 |
21 | Info
22 | -----------------------------------------
23 |
24 | .. automodule:: dataprep.connector.info
25 | :members:
26 | :undoc-members:
27 | :show-inheritance:
28 |
29 | Info UI
30 | -----------------------------------------
31 |
32 | .. automodule:: dataprep.connector.info_ui
33 | :members:
34 | :undoc-members:
35 | :show-inheritance:
36 |
37 |
38 | Schema
39 | --------------------------------------
40 |
41 | .. automodule:: dataprep.connector.schema
42 | :members:
43 | :undoc-members:
44 | :show-inheritance:
45 |
46 | Implicit database
47 | --------------------------------------------------
48 |
49 | .. automodule:: dataprep.connector.implicit_database
50 | :members:
51 | :undoc-members:
52 | :show-inheritance:
53 |
54 | Errors
55 | --------------------------------------
56 |
57 | .. automodule:: dataprep.connector.errors
58 | :members:
59 | :undoc-members:
60 | :show-inheritance:
61 |
62 | read_sql
63 | ---------
64 |
65 | .. autofunction:: dataprep.connector.read_sql
--------------------------------------------------------------------------------
/docs/source/api_reference/dataprep.eda.correlation.rst:
--------------------------------------------------------------------------------
1 | dataprep.eda.correlation
2 | ==============================================
3 |
4 | .. _plot_correlation_doc:
5 |
6 | plot_correlation
7 | ----------------
8 |
9 | .. autofunction:: dataprep.eda.correlation.plot_correlation
10 |
11 | compute_correlation
12 | -------------------
13 |
14 | .. automodule:: dataprep.eda.correlation.compute
15 | :members:
16 | :undoc-members:
17 | :show-inheritance:
18 |
19 | render_correlation
20 | ------------------
21 |
22 | .. automodule:: dataprep.eda.correlation.render
23 | :members:
24 | :undoc-members:
25 | :show-inheritance:
26 |
--------------------------------------------------------------------------------
/docs/source/api_reference/dataprep.eda.create_report.rst:
--------------------------------------------------------------------------------
1 | dataprep.eda.create\_report
2 | ===========================
3 |
4 | create_report
5 | -----------------
6 |
7 | .. automodule:: dataprep.eda.create_report
8 | :members:
9 | :undoc-members:
10 | :show-inheritance:
11 |
12 | formatters
13 | ----------
14 |
15 | .. automodule:: dataprep.eda.create_report.formatter
16 | :members:
17 | :undoc-members:
18 | :show-inheritance:
19 |
--------------------------------------------------------------------------------
/docs/source/api_reference/dataprep.eda.diff.rst:
--------------------------------------------------------------------------------
1 | dataprep.eda.diff
2 | =========================================
3 |
4 | .. _plot_diff_doc:
5 |
6 | plot_diff
7 | ---------
8 |
9 | .. autofunction:: dataprep.eda.diff.plot_diff
10 |
11 | compute_diff
12 | ------------
13 |
14 | .. automodule:: dataprep.eda.diff.compute
15 | :members:
16 | :undoc-members:
17 | :show-inheritance:
18 |
19 | render_diff
20 | -----------
21 |
22 | .. automodule:: dataprep.eda.diff.render
23 | :members:
24 | :undoc-members:
25 | :show-inheritance:
26 |
--------------------------------------------------------------------------------
/docs/source/api_reference/dataprep.eda.distribution.rst:
--------------------------------------------------------------------------------
1 | .. _dataprep.eda.distribution:
2 |
3 | dataprep.eda.distribution
4 | =========================================
5 |
6 | .. _plot_doc:
7 |
8 | plot
9 | ----
10 |
11 | .. autofunction:: dataprep.eda.distribution.plot
12 |
13 | compute
14 | -------
15 |
16 | .. automodule:: dataprep.eda.distribution.compute
17 | :members:
18 | :undoc-members:
19 | :show-inheritance:
20 |
21 | render
22 | ------
23 |
24 | .. automodule:: dataprep.eda.distribution.render
25 | :members:
26 | :undoc-members:
27 | :show-inheritance:
28 |
29 |
--------------------------------------------------------------------------------
/docs/source/api_reference/dataprep.eda.missing.rst:
--------------------------------------------------------------------------------
1 | dataprep.eda.missing
2 | ============================================
3 |
4 | plot_missing
5 | -------------
6 |
7 | .. autofunction:: dataprep.eda.missing.plot_missing
8 |
9 | compute_missing
10 | ---------------
11 |
12 | .. automodule:: dataprep.eda.missing.compute
13 | :members:
14 | :undoc-members:
15 | :show-inheritance:
16 |
17 | render_missing
18 | --------------
19 |
20 | .. automodule:: dataprep.eda.missing.render
21 | :members:
22 | :undoc-members:
23 | :show-inheritance:
24 |
--------------------------------------------------------------------------------
/docs/source/api_reference/dataprep.eda.rst:
--------------------------------------------------------------------------------
1 | ==============================
2 | dataprep.eda auxiliary modules
3 | ==============================
4 |
5 | .. _dtypes:
6 |
7 | Data types
8 | ----------
9 |
10 | .. automodule:: dataprep.eda.dtypes
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | Intermediate
16 | ------------
17 |
18 | .. automodule:: dataprep.eda.intermediate
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
23 | Palette
24 | -------
25 |
26 | .. automodule:: dataprep.eda.palette
27 | :members:
28 | :undoc-members:
29 | :show-inheritance:
30 |
31 | Container
32 | ---------
33 |
34 | .. automodule:: dataprep.eda.container
35 | :members:
36 | :undoc-members:
37 | :show-inheritance:
38 |
39 | Utils
40 | -----
41 |
42 | .. automodule:: dataprep.eda.utils
43 | :members:
44 | :undoc-members:
45 | :show-inheritance:
46 |
47 |
48 | Config
49 | ------
50 |
51 | .. automodule:: dataprep.eda.configs
52 | :members:
53 | :undoc-members:
54 | :show-inheritance:
--------------------------------------------------------------------------------
/docs/source/api_reference/dataprep.rst:
--------------------------------------------------------------------------------
1 | .. _api_reference:
2 |
3 | .. _reference:
4 |
5 | =============
6 | API Reference
7 | =============
8 |
9 | This section contains the public API reference for DataPrep. It is
10 | auto-generated from the docstrings in the project source code.
11 |
12 |
13 | .. toctree::
14 | :maxdepth: 2
15 | :hidden:
16 |
17 | dataprep.eda.distribution
18 | dataprep.eda.correlation
19 | dataprep.eda.missing
20 | dataprep.eda.diff
21 | dataprep.eda.create_report
22 | dataprep.eda
23 | dataprep.connector
24 | dataprep.clean
25 |
26 |
27 |
28 | EDA Documentation
29 | =================
30 |
31 | .. toctree::
32 | :maxdepth: 2
33 |
34 | dataprep.eda.distribution
35 | dataprep.eda.correlation
36 | dataprep.eda.missing
37 | dataprep.eda.create_report
38 | dataprep.eda.diff
39 | dataprep.eda
40 |
41 | Connector Documentation
42 | ============================
43 |
44 | .. toctree::
45 | :maxdepth: 2
46 |
47 | dataprep.connector
48 |
49 | Clean Documentation
50 | ============================
51 |
52 | .. toctree::
53 | :maxdepth: 2
54 |
55 | dataprep.clean
56 |
--------------------------------------------------------------------------------
/docs/source/bokeh/bokeh_license.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2012 - 2020, Anaconda, Inc., and Bokeh Contributors
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without modification,
5 | are permitted provided that the following conditions are met:
6 |
7 | Redistributions of source code must retain the above copyright notice,
8 | this list of conditions and the following disclaimer.
9 |
10 | Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |
14 | Neither the name of Anaconda nor the names of any contributors
15 | may be used to endorse or promote products derived from this software
16 | without specific prior written permission.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28 | THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/docs/source/bokeh/docs-navbar.html:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/source/bokeh/docs-sidebar.html:
--------------------------------------------------------------------------------
1 |