├── .gitignore ├── .luacov ├── .travis.yml ├── CMakeLists.txt ├── CONTRIBUTING.md ├── LICENSE ├── NEWS.md ├── README.md ├── argcheck.lua ├── custom_assertions.lua ├── dataframe ├── categorical.lua ├── column.lua ├── export_data.lua ├── init.lua ├── load_data.lua ├── metatable.lua ├── missing_data.lua ├── output.lua ├── row.lua ├── select_set_update.lua ├── statistics.lua └── subsets_and_batches.lua ├── dataseries ├── categorical.lua ├── export.lua ├── init.lua ├── metatable.lua ├── sngl_elmnt_ops.lua └── statistics.lua ├── doc.lua ├── doc ├── README.md ├── core │ ├── README.md │ ├── categorical.md │ ├── column.md │ ├── export_data.md │ ├── init.md │ ├── load_data.md │ ├── metatable.md │ ├── missing_data.md │ ├── output.md │ ├── row.md │ ├── select_set_update.md │ ├── statistics.md │ └── subsets_and_batches.md ├── dataseries │ ├── README.md │ ├── categorical.md │ ├── export.md │ ├── init.md │ ├── metatable.md │ ├── sngl_elmnt_ops.md │ └── statistics.md ├── helper_classes │ ├── 10_iterator.md │ ├── 11_paralleliterator.md │ ├── 20_tbl.md │ ├── 21_dict.md │ ├── 22_array.md │ └── README.md ├── sub_classes │ ├── 01_subset.md │ ├── 10_batchframe.md │ └── README.md └── utils │ ├── README.md │ └── utils.md ├── examples ├── Facebook license │ ├── LICENSE │ └── PATENTS └── mnist_example.lua ├── helper_classes ├── 10_iterator.lua ├── 11_paralleliterator.lua ├── 20_tbl.lua ├── 21_dict.lua ├── 22_array.lua └── Facebok license ├── init.lua ├── rocks ├── torch-dataframe-1.0-0.rockspec ├── torch-dataframe-1.1-0.rockspec ├── torch-dataframe-1.5-0.rockspec ├── torch-dataframe-1.6-0.rockspec ├── torch-dataframe-1.6-1.rockspec ├── torch-dataframe-1.7-0.rockspec └── torch-dataframe-scm-1.rockspec ├── specs ├── coverage.sh ├── data │ ├── advanced_short.csv │ ├── full.csv │ ├── iris-label.csv │ ├── iris-no-header.csv │ ├── iris-no-label.csv │ ├── realistic_29_row_data.csv │ ├── sampler_csv_files │ │ ├── index.csv │ │ └── index3.csv │ └── simple_short.csv ├── dataframe │ ├── batchframe_spec.lua │ ├── categorical_spec.lua │ ├── column_order_spec.lua │ ├── column_spec.lua │ ├── export_data_spec.lua │ ├── load_data_spec.lua │ ├── main_spec.lua │ ├── metatable_spec.lua │ ├── missing_data_spec.lua │ ├── row_spec.lua │ ├── sampler_spec.lua │ ├── select_set_update_spec.lua │ ├── serialization_spec.lua │ ├── statistics_spec.lua │ └── subsets_and_batches_spec.lua ├── dataseries │ └── dataseries_spec.lua ├── helper_classes │ ├── df_array_spec.lua │ ├── df_dict_spec.lua │ └── df_tbl_spec.lua ├── linter.sh ├── output │ ├── Wiki-templates │ │ ├── Readme.md │ │ └── Where_update_and_set.ipynb │ ├── cli_output.lua │ └── itorch_notebook_df_test.ipynb ├── run_all.sh └── utils │ ├── ntwrk_implementation_spec.lua │ ├── test.lua │ └── utils_spec.lua ├── sub_classes ├── 01_subset.lua ├── 10_batchframe.lua └── subset_extensions │ └── samplers.lua └── utils ├── doc_helpers ├── get_anchors.lua ├── parse_file.lua └── write_doc.lua ├── loader.lua └── utils.lua /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | luacov.* 3 | build.* 4 | -------------------------------------------------------------------------------- /.luacov: -------------------------------------------------------------------------------- 1 | return { 2 | modules = { 3 | ["init"] = 'init.lua', 4 | ["argcheck"] = 'argcheck.lua', 5 | ["main"] = 'main.lua', 6 | 7 | ["utils.utils"] = 'utils/utils.lua', 8 | ["utils.loader"] = 'utils/loader.lua', 9 | ["utils.doc_helpers.get_anchors"] = 'utils/doc_helpers/get_anchors.lua', 10 | ["utils.doc_helpers.parse_file"] = 'utils/doc_helpers/parse_file.lua', 11 | ["utils.doc_helpers.write_doc"] = 'utils/doc_helpers/write_doc.lua', 12 | 13 | ["sub_classes.01_subset"] = 'sub_classes/01_subset.lua', 14 | ["sub_classes.10_batchframe"] = 'sub_classes/10_batchframe.lua', 15 | ["sub_classes.subset_extensions.samplers"] = 'sub_classes/subset_extensions/samplers.lua', 16 | 17 | ["helper_classes.10_iterator"] = 'helper_classes/10_iterator.lua', 18 | ["helper_classes.11_paralleliterator"] = 'helper_classes/11_paralleliterator.lua', 19 | ["helper_classes.20_tbl"] = 'helper_classes/20_tbl.lua', 20 | ["helper_classes.21_dict"] = 'helper_classes/21_dict.lua', 21 | ["helper_classes.22_array"] = 'helper_classes/22_array.lua', 22 | 23 | ["dataseries.categorical"] = 'dataseries/categorical.lua', 24 | ["dataseries.categorical"] = 'dataseries/export.lua', 25 | ["dataseries.export"] = 'dataseries/export.lua', 26 | ["dataseries.init"] = 'dataseries/init.lua', 27 | ["dataseries.metatable"] = 'dataseries/metatable.lua', 28 | ["dataseries.sngl_elmnt_ops"] = 'dataseries/sngl_elmnt_ops.lua', 29 | ["dataseries.statistics"] = 'dataseries/statistics.lua', 30 | 31 | ["dataframe.categorical"] = 'dataframe/categorical.lua', 32 | ["dataframe.column"] = 'dataframe/column.lua', 33 | ["dataframe.export_data"] = 'dataframe/export_data.lua', 34 | ["dataframe.init"] = 'dataframe/init.lua', 35 | ["dataframe.load_data"] = 'dataframe/load_data.lua', 36 | ["dataframe.metatable"] = 'dataframe/metatable.lua', 37 | ["dataframe.missing_data"] = 'dataframe/missing_data.lua', 38 | ["dataframe.output"] = 'dataframe/output.lua', 39 | ["dataframe.row"] = 'dataframe/row.lua', 40 | ["dataframe.select_set_update"] = 'dataframe/select_set_update.lua', 41 | ["dataframe.statistics"] = 'dataframe/statistics.lua', 42 | ["dataframe.subsets_and_batches"] = 'dataframe/subsets_and_batches.lua' 43 | 44 | 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | sudo: true 4 | 5 | branches: 6 | only: 7 | - master 8 | - develop 9 | env: 10 | global: 11 | - TORCH_SERVER=https://raw.githubusercontent.com/torch/rocks/master/ 12 | matrix: 13 | - LUA="LUA52" 14 | - LUA="LUA53" 15 | - LUA="LUAJIT20" 16 | - LUA="LUAJIT21" 17 | 18 | before_install: 19 | - if [[ ! -d torch ]]; then git clone https://github.com/torch/distro.git torch --recursive ; fi 20 | - cd torch 21 | - git pull 22 | - git submodule update 23 | - git submodule foreach git pull origin master 24 | - cd .. 25 | - cp -rf torch torch_$LUA 26 | - cd torch_$LUA 27 | - TORCH_LUA_VERSION=$LUA ./install.sh -b 28 | - cd .. 29 | 30 | install: 31 | - source ./torch_$LUA/install/bin/torch-activate 32 | - luarocks --from=$TORCH_SERVER install sundown 33 | - luarocks --from=$TORCH_SERVER install dok 34 | - luarocks --from=$TORCH_SERVER install argcheck 35 | - luarocks --from=$TORCH_SERVER install csvigo 36 | - luarocks install luafilesystem 37 | - luarocks install paths 38 | - luarocks install threads 39 | - luarocks install torchnet 40 | - luarocks install busted 41 | - luarocks install luacov 42 | - luarocks install nn 43 | - luarocks make rocks/torch-dataframe-scm-1.rockspec CFLAGS="-O2 -fPIC -fprofile-arcs -ftest-coverage" LIBFLAG="-shared --coverage" 44 | 45 | script: 46 | - cd specs 47 | - ./run_all.sh --coverage --version $LUA 48 | - ./coverage.sh --generate 49 | - cd .. 50 | 51 | after_success: 52 | - bash <(curl -s https://codecov.io/bash) 53 | 54 | notifications: 55 | email: 56 | on_success: change 57 | on_failure: always 58 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 2.8) 2 | cmake_policy(VERSION 2.8) 3 | 4 | set(PKGNAME Dataframe) 5 | 6 | file(GLOB_RECURSE luafiles RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.lua") 7 | 8 | # Exclude doc helpers and spec files 9 | set (EXCLUDE_DIRS "utils/doc_helpers/" "specs/") 10 | list(REMOVE_ITEM luafiles "custom_assertions.lua") 11 | 12 | foreach (TMP_PATH ${luafiles}) 13 | 14 | foreach (EXCLUDE_DIR ${EXCLUDE_DIRS}) 15 | string (FIND ${TMP_PATH} ${EXCLUDE_DIR} EXCLUDE_DIR_FOUND) 16 | if (NOT ${EXCLUDE_DIR_FOUND} EQUAL -1) 17 | MESSAGE("Removing ${TMP_PATH}") 18 | list (REMOVE_ITEM luafiles ${TMP_PATH}) 19 | endif () 20 | endforeach(EXCLUDE_DIR) 21 | 22 | endforeach(TMP_PATH) 23 | 24 | foreach(file ${luafiles}) 25 | get_filename_component(dir ${file} PATH) 26 | install(FILES ${file} DESTINATION ${LUA_PATH}/${PKGNAME}/${dir}) 27 | endforeach() 28 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Feel free to report a bug, suggest enhancements or submit new cool features using [Issues][df_issues] or directly send us a [Pull Request][df_pr] :). 4 | 5 | ## Before submitting 6 | 7 | Don't forget to : 8 | - test your code 9 | - generate the doc 10 | - use the linter script in `specs` directory 11 | 12 | You can find how we implemented our tests in the [specs directory][df_specs]. See "Behavior Driven Development" for more details on this technique. 13 | 14 | ## Programming book 15 | 16 | For a better contribution we ask you to follow these simple rules to keep the code reading as smooth as possible : 17 | * Indentation is a tabulation of size 2 18 | * Every composants of a function's name is separated by an underscore : `my_func_name` 19 | 20 | [df_issues]: https://github.com/AlexMili/torch-dataframe/issues 21 | [df_pr]: https://github.com/AlexMili/torch-dataframe/pulls 22 | [df_specs]: https://github.com/AlexMili/torch-dataframe/tree/readme/specs 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /argcheck.lua: -------------------------------------------------------------------------------- 1 | local env = require 'argcheck.env' -- retrieve argcheck environement 2 | 3 | -- From http://lua-users.org/wiki/SplitJoin 4 | function string:split(sep) 5 | local sep, fields = sep or ":", {} 6 | local pattern = string.format("([^%s]+)", sep) 7 | self:gsub(pattern, function(c) fields[#fields+1] = c end) 8 | return fields 9 | end 10 | 11 | env.istype = function(obj, typename) 12 | if (typename == "*") then 13 | return true 14 | end 15 | 16 | -- From the original argcheck env 17 | local thname = torch.typename(obj) -- empty if non-torch class 18 | local thtype = torch.type(obj) 19 | if (typename == "!table" and thtype ~= "table") then 20 | return true 21 | end 22 | 23 | if (typename:match("|")) then 24 | if (thname) then 25 | -- Do a recursive search thrhough all the patterns for torch class objects 26 | for _,subtype in ipairs(typename:split("|")) do 27 | local ret = env.istype(obj, subtype) 28 | if (ret) then 29 | return true 30 | end 31 | end 32 | 33 | return false 34 | else 35 | -- We only need to find basic variable match + nan values 36 | for _,subtype in ipairs(typename:split("|")) do 37 | if ((thtype == subtype) or 38 | (thtype == "nan" and isnan(obj))) 39 | then 40 | return true 41 | end 42 | end 43 | 44 | return false 45 | end 46 | end 47 | 48 | if thname then 49 | -- __typename (see below) might be absent 50 | local match = thname:match(typename) 51 | if match and (match ~= typename or match == thname) then 52 | return true 53 | end 54 | local mt = torch.getmetatable(thname) 55 | while mt do 56 | if mt.__typename then 57 | match = mt.__typename:match(typename) 58 | if match and (match ~= typename or match == mt.__typename) then 59 | return true 60 | end 61 | end 62 | mt = getmetatable(mt) 63 | end 64 | return false 65 | end 66 | 67 | return type(obj) == typename 68 | end 69 | -------------------------------------------------------------------------------- /dataframe/export_data.lua: -------------------------------------------------------------------------------- 1 | local params = {...} 2 | local Dataframe = params[1] 3 | 4 | local argcheck = require "argcheck" 5 | local doc = require "argcheck.doc" 6 | 7 | doc[[ 8 | 9 | ## Data save/export functions 10 | 11 | ]] 12 | 13 | Dataframe.to_csv = argcheck{ 14 | doc = [[ 15 | 16 | ### Dataframe.to_csv(@ARGP) 17 | 18 | Saves a Dataframe into a CSV using csvigo as backend 19 | 20 | _Return value_: self (Dataframe) 21 | 22 | @ARGT 23 | 24 | ]], 25 | {name="self", type="Dataframe"}, 26 | {name='path', type='string', doc='path to file'}, 27 | {name="separator", type='string', doc='separator (one character)', default=','}, 28 | {name='verbose', type='boolean', help='verbose load', default=false}, 29 | call = function(self, path, separator, verbose) 30 | 31 | -- Make sure that categorical columns are presented in the correct way 32 | save_data = {} 33 | for _,k in pairs(self.column_order) do 34 | save_data[k] = self:get_column(k):to_table{boolean2string = true} 35 | end 36 | 37 | -- TODO: The csvigo will have memory issues when used with regular tables 38 | csvigo.save{path = path, 39 | data = save_data, 40 | separator = separator, 41 | verbose = verbose, 42 | column_order = self.column_order, 43 | nan_as_missing = true} 44 | 45 | return self 46 | end} 47 | 48 | Dataframe.to_tensor = argcheck{ 49 | doc = [[ 50 | 51 | ### Dataframe.to_tensor(@ARGP) 52 | 53 | Convert the numeric section or specified columns of the dataset to a tensor 54 | 55 | @ARGT 56 | 57 | _Return value_: (1) torch.tensor with self:size(1) rows and self:size(2) columns, 58 | (2) exported column names 59 | 60 | ]], 61 | {name="self", type="Dataframe"}, 62 | call = function(self) 63 | 64 | return self:to_tensor(Df_Array(self:get_numerical_colnames())) 65 | end} 66 | 67 | Dataframe.to_tensor = argcheck{doc=[[ 68 | 69 | You can export selected columns using the columns argument: 70 | 71 | @ARGT 72 | ]], 73 | overload=Dataframe.to_tensor, 74 | {name="self", type="Dataframe"}, 75 | {name="columns", type='Df_Array', doc='The columns to export to labels'}, 76 | call = function(self, columns) 77 | 78 | columns = columns.data 79 | 80 | -- Check data integrity 81 | local numeric_dataset = {} 82 | local type = -1 83 | local tensor_types = { 84 | "ByteTensor" -- contains unsigned chars 85 | ,"CharTensor" -- contains signed chars 86 | ,"ShortTensor" -- contains shorts 87 | ,"IntTensor" -- contains ints 88 | ,"LongTensor" -- contains longs 89 | ,"FloatTensor" -- contains floats 90 | ,"DoubleTensor" 91 | } 92 | for _,k in pairs(columns) do 93 | self:assert_has_column(k) 94 | assert(self:is_numerical(k), "Column " .. tostring(k) .. " is not numerical") 95 | local col = self:get_column(k) 96 | numeric_dataset[k] = col:to_tensor() 97 | local current_type = col:type() 98 | 99 | for idx,tnsr_type in ipairs(tensor_types) do 100 | if (current_type:match(tnsr_type)) then 101 | current_type = idx 102 | break 103 | end 104 | end 105 | if (current_type > type) then 106 | type = current_type 107 | end 108 | end 109 | 110 | -- Convert all tensors to the same format before concat 111 | type = ("torch.%s"):format(tensor_types[type]) 112 | for cn,col in pairs(numeric_dataset) do 113 | numeric_dataset[cn] = numeric_dataset[cn]:type(type) 114 | end 115 | 116 | tensor_data = nil 117 | tensor_col_names = {} 118 | for col_no = 1,#self.column_order do 119 | -- Find the next column that is present in the numerics 120 | found = false 121 | column_name = self.column_order[col_no] 122 | for k,v in pairs(numeric_dataset) do 123 | if (k == column_name) then 124 | found = true 125 | break 126 | end 127 | end 128 | 129 | -- If column found we then concatenate that with our tensor_data 130 | if (found) then 131 | next_col = numeric_dataset[column_name] 132 | if (torch.isTensor(tensor_data)) then 133 | tensor_data = torch.cat(tensor_data, next_col, 2) 134 | else 135 | tensor_data = next_col 136 | end 137 | table.insert(tensor_col_names, column_name) 138 | end 139 | end 140 | 141 | if (#tensor_col_names == 1) then 142 | -- Reshape to tabular if this is only a single column 143 | tensor_data = tensor_data:reshape(tensor_data:size(1), 1) 144 | end 145 | 146 | return tensor_data, tensor_col_names 147 | end} 148 | 149 | Dataframe.to_tensor = argcheck{ 150 | doc=[[ 151 | 152 | If a filename is provided the tensor will be saved (`torch.save`) to that file: 153 | 154 | @ARGT 155 | ]], 156 | overload=Dataframe.to_tensor, 157 | {name="self", type="Dataframe"}, 158 | {name='filename', type='string', doc='Filename for tensor.save()'}, 159 | {name="columns", type='Df_Array', doc='The columns to export to labels', default=false}, 160 | call = function(self, filename, columns) 161 | 162 | if (columns) then 163 | tensor_data, tensor_col_names = self:to_tensor{columns = columns} 164 | else 165 | tensor_data, tensor_col_names = self:to_tensor() 166 | end 167 | 168 | torch.save(filename, tensor_data) 169 | 170 | return tensor_data, tensor_col_names 171 | end} 172 | 173 | Dataframe.get = argcheck{ 174 | doc = [[ 175 | 176 | ### Dataframe.get(@ARGP) 177 | 178 | A funtion for *torchnet* compliance. It subsets a single index and returns the 179 | `to_tensor` on that example. 180 | 181 | @ARGT 182 | 183 | _Return value_: (1) torch.tensor with 1 row and #numerical columns 184 | 185 | ]], 186 | {name="self", type="Dataframe"}, 187 | {name="idx", type="number"}, 188 | call = function(self, idx) 189 | local row = self:sub(idx, idx) 190 | return row:to_tensor() 191 | end} 192 | -------------------------------------------------------------------------------- /dataframe/metatable.lua: -------------------------------------------------------------------------------- 1 | local params = {...} 2 | local Dataframe = params[1] 3 | 4 | local argcheck = require "argcheck" 5 | local doc = require "argcheck.doc" 6 | 7 | doc[[ 8 | 9 | ## Metatable functions 10 | 11 | ]] 12 | 13 | Dataframe.size = argcheck{ 14 | doc = [[ 15 | 16 | ### Dataframe.size(@ARGP) 17 | 18 | By providing dimension you can get only that dimension, row == 1, col == 2. If 19 | value omitted it will return the number of rows in order to comply with torchnet 20 | standard. 21 | 22 | @ARGT 23 | 24 | _Return value_: integer 25 | ]], 26 | {name="self", type="Dataframe"}, 27 | {name="dim", type="number", doc="The dimension of interest", default = 1}, 28 | call=function(self, dim) 29 | assert(isint(dim), "The dimension isn't an integer: " .. tostring(dim)) 30 | assert(dim == 1 or dim == 2, "The dimension can only be between 1 and 2 - you've provided: " .. dim) 31 | if (dim == 1) then 32 | if (not self.column_order or #self.column_order == 0) then 33 | return 0 34 | end 35 | 36 | local col = self.column_order[1] 37 | if (self:has_column(col)) then 38 | return self:get_column(self.column_order[1]):size() 39 | else 40 | -- this case happends when _copy_meta has been called and the column_order has been set 41 | -- TODO: remove the dependence of column_order for the row calc 42 | return 0 43 | end 44 | end 45 | 46 | return #self.column_order 47 | end} 48 | 49 | doc = [[ 50 | 51 | ### Dataframe.[] 52 | 53 | The `__index__` function is a powerful tool that allows quick access to regular functions 54 | 55 | - _Single integer_: it returns the raw row table (see `get_row()`) 56 | - _Df_Array()_: select rows of interest (see `_create_subset()`) 57 | - _"start:stop"_: get a row span using start/stop index, e.g. `"2:5"` (see `sub()`) 58 | - _"$column_name"_: get a column by prepending the name with `$`, e.g. `"$a column name"` (see `get_column`) 59 | - _"/subset_name"_: get a subset by prepending the name with `/`, e.g. `"/a subset name"` (see `get_subset`) 60 | 61 | _Return value_: Table or Dataframe 62 | ]] 63 | 64 | function Dataframe:__index__(index) 65 | if (torch.type(index) == "number") then 66 | return self:get_row(index), true 67 | end 68 | 69 | if (torch.type(index) == "string") then 70 | if (index:match("^[0-9]+:[0-9]+$")) then 71 | -- Get the core data 72 | local start = index:gsub(":.*", "") 73 | start = tonumber(start) 74 | local stop = index:gsub("[^:]+:", "") 75 | stop = tonumber(stop) 76 | 77 | return self:sub{start=start, stop=stop}, true 78 | end 79 | 80 | -- Index a column using a $ at the beginning of a string 81 | if (index:match("^[$]")) then 82 | local column_name = index:gsub("^[$]", "") 83 | return self:get_column(column_name), true 84 | end 85 | 86 | -- Index a subset using a / at the beginning of a string 87 | if (index:match("^[/]")) then 88 | local subset_name = index:gsub("^[/]", "") 89 | return self:get_subset(subset_name), true 90 | end 91 | 92 | return false 93 | end 94 | 95 | if (torch.type(index) == "Df_Array") then 96 | return self:_create_subset(index), true 97 | end 98 | 99 | return false 100 | end 101 | 102 | doc = [[ 103 | 104 | ### Dataframe.[] = 105 | 106 | The `__newindex__` allows easy updating of a single row (see `_update_single_row()`) 107 | 108 | ]] 109 | 110 | function Dataframe:__newindex__(index, value) 111 | if (torch.type(index) == "number") then 112 | self:_update_single_row(index, Df_Tbl(value), Df_Tbl(self:get_row(index))) 113 | return true 114 | end 115 | 116 | return false 117 | end 118 | 119 | Dataframe.__tostring__ = argcheck{ 120 | doc=[[ 121 | 122 | ### Dataframe.__tostring__(@ARGP) 123 | 124 | A wrapper for `tostring()` 125 | 126 | @ARGT 127 | 128 | _Return value_: string 129 | ]], 130 | {name="self", type="Dataframe"}, 131 | call=function (self) 132 | return self:tostring() 133 | end} 134 | 135 | 136 | Dataframe.copy = argcheck{ 137 | doc = [[ 138 | 139 | ### Dataframe.copy(@ARGP) 140 | 141 | Copies the table together with all metadata 142 | 143 | @ARGT 144 | 145 | _Return value_: Dataframe 146 | ]], 147 | {name="self", type="Dataframe"}, 148 | call=function(self) 149 | local new_df = Dataframe.new(Df_Dict(self.dataset)) 150 | new_df = self:_copy_meta(new_df) 151 | return new_df 152 | end} 153 | 154 | Dataframe.__len__ = argcheck{ 155 | doc = [[ 156 | 157 | ### Dataframe.# 158 | 159 | Returns the number of rows 160 | 161 | _Return value_: integer 162 | ]], 163 | {name="self", type="Dataframe"}, 164 | {name="other", type="Dataframe"}, 165 | call=function(self, other) 166 | return self:size(1) 167 | end} 168 | 169 | Dataframe.__len__ = argcheck{ 170 | overload=Dataframe.__len__, 171 | {name="self", type="Dataframe"}, 172 | call=function(self) 173 | return self:size(1) 174 | end} 175 | 176 | Dataframe.__eq__ = argcheck{ 177 | doc = [[ 178 | 179 | ### Dataframe.== 180 | 181 | Checks if Dataframe's contain the same values 182 | 183 | _Return value_: boolean 184 | ]], 185 | {name="self", type="Dataframe"}, 186 | {name="other", type="Dataframe"}, 187 | call=function(self, other) 188 | -- Check that size matches 189 | if (self:size(1) ~= other:size(1) or 190 | self:size(2) ~= other:size(2)) then 191 | return false 192 | end 193 | 194 | -- Check that columns match 195 | for i=1,#self.column_order do 196 | if (not other:has_column(self.column_order[i])) then 197 | return false 198 | end 199 | end 200 | 201 | -- Check actual content (expensive why this is left to last) 202 | for i=1,#self.column_order do 203 | local self_col = self:get_column(self.column_order[i]) 204 | local other_col = other:get_column(self.column_order[i]) 205 | 206 | for i=1,self:size(1) do 207 | -- one is nan and not the other 208 | if ((not isnan(self_col[i]) and 209 | isnan(other_col[i])) or 210 | (isnan(self_col[i]) and 211 | not isnan(other_col[i]))) then 212 | return false 213 | end 214 | 215 | -- Actual value check if both weren't nan 216 | if (not(isnan(self_col[i]))) then 217 | if (self_col[i] ~= other_col[i]) then 218 | return false 219 | end 220 | end 221 | 222 | end 223 | end 224 | 225 | -- If the function hasn't exited before then it means that the two dataframes are equal 226 | return true 227 | end} 228 | -------------------------------------------------------------------------------- /dataframe/missing_data.lua: -------------------------------------------------------------------------------- 1 | local params = {...} 2 | local Dataframe = params[1] 3 | 4 | local argcheck = require "argcheck" 5 | local doc = require "argcheck.doc" 6 | 7 | doc[[ 8 | 9 | ## Missing data functions 10 | 11 | ]] 12 | 13 | Dataframe.count_na = argcheck{ 14 | doc = [[ 15 | 16 | ### Dataframe.count_na(@ARGP) 17 | 18 | Count missing values in dataset 19 | 20 | @ARGT 21 | 22 | _Return value_: Dataframe or table containing missing values per column, total na 23 | ]], 24 | {name="self", type="Dataframe"}, 25 | {name="columns", type="Df_Array", doc="The columns to count", opt=true}, 26 | {name='as_dataframe', type='boolean', default=true, 27 | doc="Return a dataframe"}, 28 | call=function(self, columns, as_dataframe) 29 | if (columns) then 30 | columns = columns.data 31 | else 32 | columns = self.column_order 33 | end 34 | 35 | local ret = {} 36 | local tot_na = 0 37 | for i=1,#columns do 38 | ret[columns[i]] = self:count_na(columns[i]) 39 | tot_na = tot_na + ret[columns[i]] 40 | end 41 | 42 | if (as_dataframe) then 43 | local ret_df = Dataframe.new() 44 | for name,val in pairs(ret) do 45 | ret_df:append{rows = Df_Dict{Column = name, Value = val}, 46 | column_order = Df_Array("Column", "Value")} 47 | end 48 | return ret_df, tot_na 49 | else 50 | return ret, tot_na 51 | end 52 | end} 53 | 54 | Dataframe.count_na = argcheck{ 55 | doc = [[ 56 | If you only want to count a single column 57 | 58 | @ARGT 59 | 60 | _Return value_: single integer 61 | ]], 62 | overload=Dataframe.count_na, 63 | {name="self", type="Dataframe"}, 64 | {name="column", type="string", doc="The column to count"}, 65 | call=function(self, column) 66 | self:assert_has_column(column) 67 | 68 | return self:get_column(column):count_na() 69 | end} 70 | 71 | Dataframe.fill_na = argcheck{ 72 | doc = [[ 73 | 74 | ### Dataframe.fill_na(@ARGP) 75 | 76 | Replace missing value in a specific column 77 | 78 | @ARGT 79 | 80 | _Return value_: self 81 | ]], 82 | {name="self", type="Dataframe"}, 83 | {name="column_name", type="string", doc="The column to fill"}, 84 | {name="default_value", type="number|string|boolean", 85 | doc="The default missing value", default=0}, 86 | call=function(self, column_name, default_value) 87 | self:assert_has_column(column_name) 88 | 89 | local column_data = self:get_column(column_name) 90 | 91 | column_data:fill_na(default_value) 92 | 93 | return self 94 | end} 95 | 96 | Dataframe.fill_all_na = argcheck{ 97 | doc = [[ 98 | 99 | ### Dataframe.fill_na(@ARGP) 100 | 101 | Replace missing value in all columns 102 | 103 | @ARGT 104 | 105 | _Return value_: self 106 | ]], 107 | {name="self", type="Dataframe"}, 108 | {name="default_value", type="number|string|boolean", doc="The default missing value", default=0}, 109 | call=function(self, default_value) 110 | for i=1,#self.column_order do 111 | self:fill_na(self.column_order[i], default_value) 112 | end 113 | 114 | return self 115 | end} 116 | -------------------------------------------------------------------------------- /dataseries/export.lua: -------------------------------------------------------------------------------- 1 | local params = {...} 2 | local Dataseries = params[1] 3 | 4 | local argcheck = require "argcheck" 5 | local doc = require "argcheck.doc" 6 | 7 | doc[[ 8 | 9 | ## Export functions 10 | 11 | Here are functions are used for exporting to a different format. Generally `to_` 12 | functions should reside here. Only exception is the `tostring`. 13 | 14 | ]] 15 | 16 | Dataseries.to_tensor = argcheck{ 17 | doc=[[ 18 | 19 | ### Dataseries.to_tensor(@ARGP) 20 | 21 | Returns the values in tensor format. Note that if you don't provide a replacement 22 | for missing values and there are missing values the function will throw an error. 23 | 24 | *Note*: boolean columns are not tensors and need to be manually converted to a 25 | tensor. This since 0 would be a natural value for false but can cause issues as 26 | neurons are labeled 1 to n for classification tasks. See the `Dataframe.update` 27 | function for details or run the `boolean2tensor`. 28 | 29 | @ARGT 30 | 31 | _Return value_: `torch.*Tensor` of the current type 32 | ]], 33 | {name="self", type="Dataseries"}, 34 | {name="missing_value", type="number", 35 | doc="Set a value for the missing data", 36 | opt=true}, 37 | {name="copy", type="boolean", default=true, 38 | doc="Set to false if you want the original data to be returned."}, 39 | call=function(self, missing_value) 40 | assert(self:type():match("torch.*Tensor"), 41 | "Can only automatically retrieve columns that already are tensors") 42 | assert(self:count_na() == 0 or missing_value, 43 | "Missing data should be replaced with a default value before retrieving tensor") 44 | 45 | local ret 46 | if (copy) then 47 | ret = self:copy() 48 | else 49 | ret = self 50 | end 51 | 52 | if (missing_value and self:count_na() > 0) then 53 | assert(copy, "Replacing missing values is not allowed in to_tensor unless you are returning a copy") 54 | ret:fill_na(missing_value) 55 | end 56 | 57 | return ret.data 58 | end} 59 | 60 | Dataseries.to_table = argcheck{ 61 | doc=[[ 62 | 63 | ### Dataseries.to_table(@ARGP) 64 | 65 | Returns the values in table format 66 | 67 | @ARGT 68 | 69 | _Return value_: table 70 | ]], 71 | {name="self", type="Dataseries"}, 72 | {name="boolean2string", type="boolean", opt=true, 73 | doc="Convert boolean values to strings since they cause havoc with csvigo"}, 74 | call=function(self, boolean2string) 75 | local ret = {} 76 | for i=1,self:size() do 77 | ret[i] = self:get(i) 78 | end 79 | 80 | if (boolean2string and self:type() == "tds.Vec") then 81 | for i=1,#ret do 82 | if (type(ret[i]) == "boolean") then 83 | ret[i] = tostring(ret[i]) 84 | end 85 | end 86 | end 87 | 88 | return ret 89 | end} 90 | -------------------------------------------------------------------------------- /dataseries/metatable.lua: -------------------------------------------------------------------------------- 1 | local params = {...} 2 | local Dataseries = params[1] 3 | 4 | local argcheck = require "argcheck" 5 | local doc = require "argcheck.doc" 6 | 7 | doc[[ 8 | 9 | ## Metatable functions 10 | 11 | ]] 12 | 13 | doc = [[ 14 | 15 | ### Dataseries.[] 16 | 17 | The `__index__` function is a powerful tool that allows quick access to regular functions 18 | 19 | - _Single integer_: it returns the raw elemet table (see `get()`) 20 | - _Df_Array()_: select a set of interest (see `_create_subset()`) 21 | - _"start:stop"_: get a row span using start/stop index, e.g. `"2:5"` (see `sub()`) 22 | 23 | _Return value_: Table or Dataseries 24 | ]] 25 | 26 | function Dataseries:__index__(index) 27 | local thtype = torch.type(index) 28 | -- If this is a number or a Df_Array, let `get()` method handle them both 29 | if (thtype == "number" or 30 | thtype == "Df_Array") then 31 | return self:get(index), true 32 | -- If this is a string matching "start:stop", it should be a query for a subset 33 | elseif (thtype == "string" and index:match("^[0-9]*:[0-9]*$")) then 34 | start = index:gsub(":.*", "") 35 | start = tonumber(start) 36 | 37 | stop = index:gsub("[^:]*:", "") 38 | stop = tonumber(stop) 39 | 40 | return self:sub(start, stop), true 41 | end 42 | 43 | return false 44 | end 45 | 46 | 47 | doc = [[ 48 | 49 | ### Dataseries.[] = 50 | 51 | The `__newindex__` allows updating of a single element (uses `set()`) 52 | 53 | ]] 54 | function Dataseries:__newindex__(index, value) 55 | if (torch.type(index) == "number") then 56 | self:set(index, value) 57 | return true 58 | end 59 | 60 | return false 61 | end 62 | 63 | Dataseries.__len__ = argcheck{ 64 | doc = [[ 65 | 66 | ### Dataseries.# 67 | 68 | Returns the number of elements 69 | 70 | _Return value_: integer 71 | ]], 72 | {name="self", type="Dataseries"}, 73 | {name="other", type="Dataseries", opt=true}, 74 | call=function(self, other) 75 | return self:size() 76 | end} 77 | 78 | Dataseries.__tostring__ = argcheck{ 79 | doc=[[ 80 | 81 | ### Dataseries.__tostring__(@ARGP) 82 | 83 | A wrapper for `tostring()` 84 | 85 | @ARGT 86 | 87 | _Return value_: string 88 | ]], 89 | {name="self", type="Dataseries"}, 90 | call=function (self) 91 | return self:tostring() 92 | end} 93 | -------------------------------------------------------------------------------- /doc.lua: -------------------------------------------------------------------------------- 1 | local paths = require 'paths' 2 | 3 | local dataframe_path = paths.thisfile():gsub("doc.lua$", "?.lua") 4 | local dataframe_dir = string.gsub(dataframe_path, "[^/]+$", "") 5 | 6 | -- Custom argument checks 7 | local argcheck_file = string.gsub(dataframe_path,"?", "argcheck") 8 | assert(loadfile(argcheck_file))() 9 | 10 | -- Get the core loader function 11 | local loader_file = string.gsub(dataframe_path,"?", "utils/loader") 12 | assert(loadfile(loader_file))() 13 | 14 | load_dir_files(dataframe_dir .. "utils/doc_helpers/") 15 | 16 | --[[ 17 | The doc.lua loads everything in the same order as the init script. As 18 | we want to later link the scripts has three sections: 19 | 20 | 1. Load the scripts and store the full docs in the docs table. The file order is 21 | retained via the files table. 22 | 2. Parse the files in the apropriate order and generate a table of content for exact_length 23 | file that is written to the doc folder with the same name as the file but with 24 | `md` as file ending. 25 | 3. Merge all the table of contents data into the README so that the docs are 26 | easier to navigate. 27 | ]] 28 | local docs = {} 29 | local files = {} 30 | files.utils, docs.utils = load_dir_files{ 31 | path = dataframe_dir .. "utils/", 32 | docs = true 33 | } 34 | 35 | files.helper_classes, docs.helper_classes = load_dir_files{ 36 | path = dataframe_dir .. "helper_classes/", 37 | docs = true 38 | } 39 | 40 | files.dataseries, docs.dataseries = load_dir_files{ 41 | path = dataframe_dir .. "dataseries/", 42 | docs = true 43 | } 44 | 45 | files.core, docs.core = load_dir_files{ 46 | path = dataframe_dir .. "dataframe/", 47 | docs = true 48 | } 49 | 50 | files.sub_classes, docs.sub_classes = 51 | -- Load all sub classes 52 | load_dir_files{ 53 | path = dataframe_dir .. "sub_classes/", 54 | params = {Dataframe}, 55 | docs = true 56 | } 57 | 58 | --[[ 59 | !!! Start section 2 !!! 60 | Parse each group, create a directory for that group, parse all files and write an 61 | MD for each file. Then add a Readme for that directory. 62 | ]] 63 | 64 | local parsed_docs = {} 65 | local doc_path = "doc" 66 | if (not paths.dirp(doc_path)) then 67 | paths.mkdir(doc_path) 68 | end 69 | 70 | local rough_toc_tbl = {} 71 | local detailed_toc_tbl = {} 72 | for group_name,group in pairs(docs) do 73 | local sub_doc_path = ("%s/%s/"):format(doc_path,group_name) 74 | if (not paths.dirp(sub_doc_path)) then 75 | paths.mkdir(sub_doc_path) 76 | end 77 | 78 | local grp_rough_toc = "" 79 | local grp_detailed_toc = "" 80 | local gnrl_rough_toc = "" 81 | local gnrl_detailed_toc = "" 82 | 83 | parsed_docs[group_name] = {} 84 | for _,file_name in ipairs(files[group_name]) do 85 | local base_fn = paths.basename(file_name) 86 | local md_path = ("%s%s"):format(sub_doc_path, 87 | base_fn:gsub("%.lua$", ".md")) 88 | 89 | parsed_docs[group_name][base_fn] = parse_doc(group[file_name], base_fn) 90 | local pd = parsed_docs[group_name][base_fn] 91 | write_doc(pd, 92 | md_path) 93 | 94 | grp_rough_toc, grp_detailed_toc = 95 | get_doc_anchors(sub_doc_path, md_path, pd, grp_rough_toc, grp_detailed_toc) 96 | gnrl_rough_toc, gnrl_detailed_toc = 97 | get_doc_anchors(doc_path, md_path, pd, gnrl_rough_toc, gnrl_detailed_toc) 98 | end 99 | 100 | local readmefile = io.open(sub_doc_path .. "README.md", "w") 101 | readmefile:write(([[# Documentation for %s 102 | 103 | This documentation ha been auto-generated from code using the `argcheck` system. 104 | 105 | ## Table of contents (file-level) 106 | 107 | Below follows a more [detailed](#detailed) table of contents with links to 108 | the different functions. Not this list may be incompleted due to failure to 109 | add apropriate anchor tags during documentation. 110 | 111 | %s 112 | 113 | ## Detailed table of contents (file-level + anchors) 114 | 115 | %s]]):format(group_name:gsub("_", " "), grp_rough_toc, grp_detailed_toc)) 116 | 117 | -- Save the group TOCS for the general README 118 | rough_toc_tbl[group_name] = gnrl_rough_toc 119 | detailed_toc_tbl[group_name] = gnrl_detailed_toc 120 | end 121 | 122 | local readmefile = io.open("doc/README.md", "w") 123 | readmefile:write(([[# Documentation for torch-dataframe 124 | 125 | This documentation ha been auto-generated from code using the `argcheck` system. 126 | 127 | Below follows a more [detailed](#detailed) table of contents with links to 128 | the different functions. Not this list may be incompleted due to failure to 129 | add apropriate anchor tags during documentation. 130 | 131 | ## Dataframe core components 132 | 133 | %s 134 | 135 | ## Dataseries - Dataframe's data storage 136 | 137 | %s 138 | 139 | ## Dataframe sub-classes 140 | 141 | %s 142 | 143 | ## Helper classes 144 | 145 | %s]]):format(rough_toc_tbl["core"], 146 | rough_toc_tbl["dataseries"], 147 | rough_toc_tbl["sub_classes"], 148 | rough_toc_tbl["helper_classes"])) 149 | 150 | detailed_toc = ([[ 151 | 152 | # Detailed table of contents (file-level + anchors) 153 | 154 | ## Dataframe core components 155 | 156 | %s 157 | 158 | ## Dataseries - Dataframe's data storage 159 | 160 | %s 161 | 162 | ## Dataframe sub-classes 163 | 164 | %s 165 | 166 | ## Helper classes 167 | 168 | %s]]):format(detailed_toc_tbl["core"], 169 | detailed_toc_tbl["dataseries"], 170 | detailed_toc_tbl["sub_classes"], 171 | detailed_toc_tbl["helper_classes"]) 172 | 173 | -- Remove these elements from the tables in order to avoid ouputting them twice 174 | for _,key in ipairs({"core", "dataseries", "sub_classes", "helper_classes"}) do 175 | rough_toc_tbl[key] = nil 176 | detailed_toc_tbl[key] = nil 177 | end 178 | 179 | for group_name, toc in pairs(rough_toc_tbl) do 180 | local group_title = group_name:sub(1,1):upper() .. group_name:sub(2):gsub("_", " ") 181 | readmefile:write(([[ 182 | 183 | ## %s 184 | 185 | %s]]):format(group_title, toc)) 186 | detailed_toc = ([[%s 187 | 188 | ## %s 189 | 190 | %s]]):format(detailed_toc, group_title, detailed_toc_tbl[group_name]) 191 | end 192 | 193 | readmefile:write(([[ 194 | 195 | %s 196 | ]]):format(detailed_toc)) 197 | 198 | readmefile:close() 199 | -------------------------------------------------------------------------------- /doc/core/README.md: -------------------------------------------------------------------------------- 1 | # Documentation for core 2 | 3 | This documentation ha been auto-generated from code using the `argcheck` system. 4 | 5 | ## Table of contents (file-level) 6 | 7 | Below follows a more [detailed](#detailed) table of contents with links to 8 | the different functions. Not this list may be incompleted due to failure to 9 | add apropriate anchor tags during documentation. 10 | 11 | 12 | - [Core functions](init.md) 13 | - [Categorical functions](categorical.md) 14 | - [Column functions](column.md) 15 | - [Data save/export functions](export_data.md) 16 | - [Data loader functions](load_data.md) 17 | - [Metatable functions](metatable.md) 18 | - [Missing data functions](missing_data.md) 19 | - [Output functions](output.md) 20 | - [Row functions](row.md) 21 | - [Subsetting and manipulation functions](select_set_update.md) 22 | - [Statistical functions](statistics.md) 23 | - [Subsets and batches](subsets_and_batches.md) 24 | 25 | ## Detailed table of contents (file-level + anchors) 26 | 27 | 28 | - **[Core functions](init.md)** 29 | - [Dataframe.`__init`](init.md#Dataframe.__init) 30 | - [Dataframe.get_schema](init.md#Dataframe.get_schema) 31 | - [Dataframe.shape](init.md#Dataframe.shape) 32 | - [Dataframe.version](init.md#Dataframe.version) 33 | - [Dataframe.set_version](init.md#Dataframe.set_version) 34 | - [Dataframe.upgrade_frame](init.md#Dataframe.upgrade_frame) 35 | - [Dataframe.assert_is_index](init.md#Dataframe.assert_is_index) 36 | - **[Categorical functions](categorical.md)** 37 | - [Dataframe.as_categorical](categorical.md#Dataframe.as_categorical) 38 | - [Dataframe.add_cat_key](categorical.md#Dataframe.add_cat_key) 39 | - [Dataframe.as_string](categorical.md#Dataframe.as_string) 40 | - [Dataframe.clean_categorical](categorical.md#Dataframe.clean_categorical) 41 | - [Dataframe.is_categorical](categorical.md#Dataframe.is_categorical) 42 | - [Dataframe.get_cat_keys](categorical.md#Dataframe.get_cat_keys) 43 | - [Dataframe.to_categorical](categorical.md#Dataframe.to_categorical) 44 | - [Dataframe.from_categorical](categorical.md#Dataframe.from_categorical) 45 | - [Dataframe.boolean2categorical](categorical.md#Dataframe.boolean2categorical) 46 | - **[Column functions](column.md)** 47 | - [Dataframe.is_numerical](column.md#Dataframe.is_numerical) 48 | - [Dataframe.is_string](column.md#Dataframe.is_string) 49 | - [Dataframe.is_boolean](column.md#Dataframe.is_boolean) 50 | - [Dataframe.has_column](column.md#Dataframe.has_column) 51 | - [Dataframe.assert_has_column](column.md#Dataframe.assert_has_column) 52 | - [Dataframe.assert_has_not_column](column.md#Dataframe.assert_has_not_column) 53 | - [Dataframe.drop](column.md#Dataframe.drop) 54 | - [Dataframe.add_column](column.md#Dataframe.add_column) 55 | - [Dataframe.get_column](column.md#Dataframe.get_column) 56 | - [Dataframe.reset_column](column.md#Dataframe.reset_column) 57 | - [Dataframe.rename_column](column.md#Dataframe.rename_column) 58 | - [Dataframe.get_numerical_colnames](column.md#Dataframe.get_numerical_colnames) 59 | - [Dataframe.get_column_order](column.md#Dataframe.get_column_order) 60 | - [Dataframe.swap_column_order](column.md#Dataframe.swap_column_order) 61 | - [Dataframe.pos_column_order](column.md#Dataframe.pos_column_order) 62 | - [Dataframe.boolean2tensor](column.md#Dataframe.boolean2tensor) 63 | - **[Data save/export functions](export_data.md)** 64 | - [Dataframe.to_csv](export_data.md#Dataframe.to_csv) 65 | - [Dataframe.to_tensor](export_data.md#Dataframe.to_tensor) 66 | - [Dataframe.get](export_data.md#Dataframe.get) 67 | - **[Data loader functions](load_data.md)** 68 | - [Dataframe.load_csv](load_data.md#Dataframe.load_csv) 69 | - [Dataframe.bulk_load_csv](load_data.md#Dataframe.bulk_load_csv) 70 | - [Dataframe.load_table](load_data.md#Dataframe.load_table) 71 | - [Dataframe.`_clean_columns`](load_data.md#Dataframe._clean_columns) 72 | - **[Metatable functions](metatable.md)** 73 | - [Dataframe.size](metatable.md#Dataframe.size) 74 | - [Dataframe.`__tostring__`](metatable.md#Dataframe.__tostring__) 75 | - [Dataframe.copy](metatable.md#Dataframe.copy) 76 | - [Dataframe.#](metatable.md#Dataframe.#) 77 | - [Dataframe.==](metatable.md#Dataframe.==) 78 | - **[Missing data functions](missing_data.md)** 79 | - [Dataframe.count_na](missing_data.md#Dataframe.count_na) 80 | - [Dataframe.fill_na](missing_data.md#Dataframe.fill_na) 81 | - [Dataframe.fill_na](missing_data.md#Dataframe.fill_na) 82 | - **[Output functions](output.md)** 83 | - [Dataframe.output](output.md#Dataframe.output) 84 | - [Dataframe.show](output.md#Dataframe.show) 85 | - [Dataframe.tostring](output.md#Dataframe.tostring) 86 | - [Dataframe.`_to_html`](output.md#Dataframe._to_html) 87 | - **[Row functions](row.md)** 88 | - [Dataframe.get_row](row.md#Dataframe.get_row) 89 | - [Dataframe.insert](row.md#Dataframe.insert) 90 | - [Dataframe.insert](row.md#Dataframe.insert) 91 | - [Dataframe.append](row.md#Dataframe.append) 92 | - [Dataframe.rbind](row.md#Dataframe.rbind) 93 | - [Dataframe.remove_index](row.md#Dataframe.remove_index) 94 | - **[Subsetting and manipulation functions](select_set_update.md)** 95 | - [Dataframe.sub](select_set_update.md#Dataframe.sub) 96 | - [Dataframe.get_random](select_set_update.md#Dataframe.get_random) 97 | - [Dataframe.head](select_set_update.md#Dataframe.head) 98 | - [Dataframe.tail](select_set_update.md#Dataframe.tail) 99 | - [Dataframe.`_create_subset`](select_set_update.md#Dataframe._create_subset) 100 | - [Dataframe.where](select_set_update.md#Dataframe.where) 101 | - [Dataframe.which](select_set_update.md#Dataframe.which) 102 | - [Dataframe.update](select_set_update.md#Dataframe.update) 103 | - [Dataframe.set](select_set_update.md#Dataframe.set) 104 | - [Dataframe.wide2long](select_set_update.md#Dataframe.wide2long) 105 | - **[Statistical functions](statistics.md)** 106 | - [Dataframe.unique](statistics.md#Dataframe.unique) 107 | - [Dataframe.value_counts](statistics.md#Dataframe.value_counts) 108 | - [Dataframe.which_max](statistics.md#Dataframe.which_max) 109 | - [Dataframe.which_min](statistics.md#Dataframe.which_min) 110 | - [Dataframe.get_mode](statistics.md#Dataframe.get_mode) 111 | - [Dataframe.get_max_value](statistics.md#Dataframe.get_max_value) 112 | - [Dataframe.get_min_value](statistics.md#Dataframe.get_min_value) 113 | - **[Subsets and batches](subsets_and_batches.md)** 114 | - [Dataframe.create_subsets](subsets_and_batches.md#Dataframe.create_subsets) 115 | - [Dataframe.reset_subsets](subsets_and_batches.md#Dataframe.reset_subsets) 116 | - [Dataframe.has_subset](subsets_and_batches.md#Dataframe.has_subset) 117 | - [Dataframe.get_subset](subsets_and_batches.md#Dataframe.get_subset) -------------------------------------------------------------------------------- /doc/core/categorical.md: -------------------------------------------------------------------------------- 1 | # API documentation for [categorical functions](#__Categorical functions__) 2 | - [Dataframe.as_categorical](#Dataframe.as_categorical) 3 | - [Dataframe.add_cat_key](#Dataframe.add_cat_key) 4 | - [Dataframe.as_string](#Dataframe.as_string) 5 | - [Dataframe.clean_categorical](#Dataframe.clean_categorical) 6 | - [Dataframe.is_categorical](#Dataframe.is_categorical) 7 | - [Dataframe.get_cat_keys](#Dataframe.get_cat_keys) 8 | - [Dataframe.to_categorical](#Dataframe.to_categorical) 9 | - [Dataframe.from_categorical](#Dataframe.from_categorical) 10 | - [Dataframe.boolean2categorical](#Dataframe.boolean2categorical) 11 | 12 | 13 | ## Categorical functions 14 | 15 | 16 | ### Dataframe.as_categorical(self, column_name[, levels][, labels][, exclude]) 17 | 18 | Set a column to categorical type. 19 | 20 | ``` 21 | ({ 22 | self = Dataframe -- 23 | column_name = string -- The column name to convert 24 | [levels = Df_Array|boolean] -- An optional array of the values that column might have taken. 25 | The default is the unique set of values taken by Dataframe.unique, 26 | sorted into increasing order. If you provide values that aren't present 27 | within the current column the value will still be saved and may be envoked in 28 | the future. [default=false] 29 | [labels = Df_Array|boolean] -- An optional character vector of labels for the levels 30 | (in the same order as levels after removing those in exclude) [default=false] 31 | [exclude = Df_Array|boolean] -- Values to be excluded when forming the set of levels. This should be 32 | of the same type as column, and will be coerced if necessary. [default=false] 33 | }) 34 | ``` 35 | 36 | _Return value_: self 37 | 38 | ``` 39 | ({ 40 | self = Dataframe -- 41 | column_array = Df_Array -- An array with column names 42 | [levels = Df_Array|boolean] -- An optional array of the values that column might have taken. 43 | The default is the unique set of values taken by Dataframe.unique, 44 | sorted into increasing order. If you provide values that aren't present 45 | within the current column the value will still be saved and may be envoked in 46 | the future. [default=false] 47 | [labels = Df_Array|boolean] -- An optional character vector of labels for the levels 48 | (in the same order as levels after removing those in exclude) [default=false] 49 | [exclude = Df_Array|boolean] -- Values to be excluded when forming the set of levels. This should be 50 | of the same type as column, and will be coerced if necessary. [default=false] 51 | }) 52 | ``` 53 | 54 | 55 | ### Dataframe.add_cat_key(self, column_name, key) 56 | 57 | Adds a key to the keyset of a categorical column. Mostly intended for internal use. 58 | 59 | ``` 60 | ({ 61 | self = Dataframe -- 62 | column_name = string -- The column name 63 | key = number|string -- The new key to insert 64 | }) 65 | ``` 66 | 67 | _Return value_: index value for key (integer) 68 | 69 | ### Dataframe.as_string(self, column_name) 70 | 71 | Converts a categorical column to a string column. This can be used to revert 72 | the Dataframe.as_categorical or as a way to convert numericals into strings. 73 | 74 | ``` 75 | ({ 76 | self = Dataframe -- 77 | column_name = string -- The column name 78 | }) 79 | ``` 80 | 81 | _Return value_: self 82 | 83 | ### Dataframe.clean_categorical(self, column_name[, reset_keys]) 84 | 85 | ``` 86 | ({ 87 | self = Dataframe -- 88 | column_name = string -- the name of the column 89 | [reset_keys = boolean] -- if all the keys should be reinitialized [default=false] 90 | }) 91 | ``` 92 | 93 | Removes any categories no longer present from the keys 94 | 95 | _Return value_: self 96 | 97 | ### Dataframe.is_categorical(self, column_name) 98 | 99 | Check if a column is categorical 100 | 101 | ``` 102 | ({ 103 | self = Dataframe -- 104 | column_name = string -- the name of the column 105 | }) 106 | ``` 107 | 108 | _Return value_: boolean 109 | 110 | ### Dataframe.get_cat_keys(self, column_name) 111 | 112 | Get keys from a categorical column. 113 | 114 | ``` 115 | ({ 116 | self = Dataframe -- 117 | column_name = string -- the name of the column 118 | }) 119 | ``` 120 | 121 | _Return value_: table with `["key"] = number` structure 122 | 123 | ### Dataframe.to_categorical(self, data, column_name) 124 | 125 | Converts values to categorical according to a column's keys 126 | 127 | ``` 128 | ({ 129 | self = Dataframe -- 130 | data = number|torch.*Tensor|Df_Array -- The integer to be converted 131 | column_name = string -- The name of the column which keys to use 132 | }) 133 | ``` 134 | 135 | _Return value_: string with the value 136 | 137 | ### Dataframe.from_categorical(self, data, column_name[, as_tensor]) 138 | 139 | ``` 140 | ({ 141 | self = Dataframe -- 142 | data = Df_Array -- The data to be converted 143 | column_name = string -- The name of the column 144 | [as_tensor = boolean] -- If the returned value should be a tensor [default=false] 145 | }) 146 | ``` 147 | 148 | Converts categorical to numerical according to a column's keys 149 | 150 | _Return value_: table or tensor 151 | 152 | ``` 153 | ({ 154 | self = Dataframe -- 155 | data = number|string -- The data to be converted 156 | column_name = string -- The name of the column 157 | }) 158 | ``` 159 | 160 | 161 | ### Dataframe.boolean2categorical(self, column_name[, false_str][, true_str]) 162 | 163 | Converts a boolean column into a torch.ByteTensor of type integer 164 | 165 | ``` 166 | ({ 167 | self = Dataframe -- 168 | column_name = string -- The boolean column that you want to convert 169 | [false_str = string] -- The string value for false [default=false] 170 | [true_str = string] -- The string value for true [default=true] 171 | }) 172 | ``` 173 | 174 | _Return value_: self -------------------------------------------------------------------------------- /doc/core/export_data.md: -------------------------------------------------------------------------------- 1 | # API documentation for [Data save/export functions](#__Data save/export functions__) 2 | - [Dataframe.to_csv](#Dataframe.to_csv) 3 | - [Dataframe.to_tensor](#Dataframe.to_tensor) 4 | - [Dataframe.get](#Dataframe.get) 5 | 6 | 7 | ## Data save/export functions 8 | 9 | 10 | ### Dataframe.to_csv(self, path[, separator][, verbose]) 11 | 12 | Saves a Dataframe into a CSV using csvigo as backend 13 | 14 | _Return value_: self (Dataframe) 15 | 16 | ``` 17 | ({ 18 | self = Dataframe -- 19 | path = string -- path to file 20 | [separator = string] -- separator (one character) [default=,] 21 | [verbose = boolean] -- verbose load [default=false] 22 | }) 23 | ``` 24 | 25 | 26 | ### Dataframe.to_tensor(self) 27 | 28 | Convert the numeric section or specified columns of the dataset to a tensor 29 | 30 | ``` 31 | ({ 32 | self = Dataframe -- 33 | }) 34 | ``` 35 | 36 | _Return value_: (1) torch.tensor with self:size(1) rows and self:size(2) columns, 37 | (2) exported column names 38 | 39 | 40 | You can export selected columns using the columns argument: 41 | 42 | ``` 43 | ({ 44 | self = Dataframe -- 45 | columns = Df_Array -- The columns to export to labels 46 | }) 47 | ``` 48 | 49 | If a filename is provided the tensor will be saved (`torch.save`) to that file: 50 | 51 | ``` 52 | ({ 53 | self = Dataframe -- 54 | filename = string -- Filename for tensor.save() 55 | [columns = Df_Array] -- The columns to export to labels [default=false] 56 | }) 57 | ``` 58 | 59 | ### Dataframe.get(self, idx) 60 | 61 | A funtion for *torchnet* compliance. It subsets a single index and returns the 62 | `to_tensor` on that example. 63 | 64 | ``` 65 | ({ 66 | self = Dataframe -- 67 | idx = number -- 68 | }) 69 | ``` 70 | 71 | _Return value_: (1) torch.tensor with 1 row and #numerical columns -------------------------------------------------------------------------------- /doc/core/init.md: -------------------------------------------------------------------------------- 1 | # API documentation for [core functions](#__Core functions__) 2 | - [Dataframe.`__init`](#Dataframe.__init) 3 | - [Dataframe.get_schema](#Dataframe.get_schema) 4 | - [Dataframe.shape](#Dataframe.shape) 5 | - [Dataframe.version](#Dataframe.version) 6 | - [Dataframe.set_version](#Dataframe.set_version) 7 | - [Dataframe.upgrade_frame](#Dataframe.upgrade_frame) 8 | - [Dataframe.assert_is_index](#Dataframe.assert_is_index) 9 | 10 | 11 | ## Core functions 12 | 13 | 14 | ### Dataframe.__init(self) 15 | 16 | Creates and initializes a Dataframe class. Envoked through `local my_dataframe = Dataframe()` 17 | 18 | ``` 19 | ({ 20 | self = Dataframe -- 21 | }) 22 | ``` 23 | 24 | _Return value_: Dataframe 25 | Read in an csv-file 26 | 27 | ``` 28 | ({ 29 | self = Dataframe -- 30 | csv_file = string -- The file path to the CSV 31 | }) 32 | ``` 33 | 34 | Directly input a table 35 | 36 | ``` 37 | ({ 38 | self = Dataframe -- 39 | data = Df_Dict -- The data to read in 40 | [column_order = Df_Array] -- The order of the column (has to be array and _not_ a dictionary) 41 | }) 42 | ``` 43 | 44 | If you enter column schema* and number of rows a table will be initialized. Note 45 | that you can optionally set all non-set values to `nan` values but this may be 46 | time-consuming for big datasets. 47 | 48 | * A schema is a hash table with the column names as keys and the column types 49 | as values. The column types are: 50 | - `boolean` 51 | - `integer` 52 | - `long` 53 | - `double` 54 | - `string` (this is stored as a `tds.Vec` and can be any value) 55 | 56 | ``` 57 | ({ 58 | self = Dataframe -- 59 | schema = Df_Dict -- The schema to use for initializaiton 60 | no_rows = number -- The number of rows 61 | [column_order = Df_Array] -- The column order 62 | [set_missing = boolean] -- Whether all elements should be set to missing from start [default=false] 63 | }) 64 | ``` 65 | 66 | _Return value_: Dataframe 67 | No updates is performed on already inserted data. The purpose of this method 68 | is to prepare a Dataframe object. 69 | 70 | A schema is a hash table with the column names as keys and the column types 71 | as values. The column types are: 72 | - `boolean` 73 | - `integer` 74 | - `long` 75 | - `double` 76 | - `string` (this is stored as a `tds.Vec` and can be any value) 77 | 78 | ``` 79 | ({ 80 | self = Dataframe -- 81 | schema = Df_Dict -- The schema to use for initializaiton 82 | column_order = Df_Array -- The column order 83 | }) 84 | ``` 85 | 86 | 87 | ### Dataframe.get_schema(self, column_name) 88 | 89 | Returns the schema, i.e. column types 90 | 91 | ``` 92 | ({ 93 | self = Dataframe -- 94 | column_name = string -- The column to get schema for 95 | }) 96 | ``` 97 | 98 | _Return value_: string 99 | ``` 100 | ({ 101 | self = Dataframe -- 102 | [columns = Df_Array] -- The columns to get schema for 103 | }) 104 | ``` 105 | 106 | _Return value_: table 107 | 108 | ### Dataframe.shape(self) 109 | 110 | Returns the number of rows and columns in a table 111 | 112 | ``` 113 | ({ 114 | self = Dataframe -- 115 | }) 116 | ``` 117 | 118 | _Return value_: table 119 | 120 | ### Dataframe.version(self) 121 | 122 | Returns the current data-frame version 123 | 124 | ``` 125 | ({ 126 | self = Dataframe -- 127 | }) 128 | ``` 129 | 130 | _Return value_: string 131 | 132 | ### Dataframe.set_version(self) 133 | 134 | Sets the data-frame version 135 | 136 | ``` 137 | ({ 138 | self = Dataframe -- 139 | }) 140 | ``` 141 | 142 | _Return value_: self 143 | 144 | ### Dataframe.upgrade_frame(self[, skip_version][, current_version]) 145 | 146 | Upgrades a dataframe using the old batch loading framework to the new framework 147 | by instantiating the subsets argument, copying the indexes and setting the 148 | samplers to either: 149 | 150 | - linear for test/validate or shuffle = false 151 | - permutation if shuffle = true and none of above names 152 | 153 | ``` 154 | ({ 155 | self = Dataframe -- 156 | [skip_version = boolean] -- Set to true if you want to upgrade your dataframe regardless of the version check 157 | [current_version = number] -- The current version of the dataframe 158 | }) 159 | ``` 160 | 161 | *Note:* Sometimes the version check fails to identify that the Dataframe is of 162 | an old version and you can therefore skip the version check. 163 | 164 | _Return value_: Dataframe 165 | 166 | ### Dataframe.assert_is_index(self, index[, plus_one]) 167 | 168 | Asserts that the number is a valid index. 169 | 170 | ``` 171 | ({ 172 | self = Dataframe -- 173 | index = number -- The index to investigate 174 | [plus_one = boolean] -- Count next non-existing index as good. When adding rows, an index of size(1) + 1 is OK [default=false] 175 | }) 176 | ``` 177 | 178 | _Return value_: Dataframe -------------------------------------------------------------------------------- /doc/core/load_data.md: -------------------------------------------------------------------------------- 1 | # API documentation for [Data loader functions](#__Data loader functions__) 2 | - [Dataframe.load_csv](#Dataframe.load_csv) 3 | - [Dataframe.bulk_load_csv](#Dataframe.bulk_load_csv) 4 | - [Dataframe.load_table](#Dataframe.load_table) 5 | - [Dataframe.`_clean_columns`](#Dataframe._clean_columns) 6 | 7 | 8 | ## Data loader functions 9 | 10 | 11 | ### Dataframe.load_csv(self, path[, header][, schema][, separator][, skip][, verbose][, rows2explore]) 12 | 13 | Loads a CSV file into Dataframe using csvigo as backend 14 | 15 | ``` 16 | ({ 17 | self = Dataframe -- 18 | path = string -- path to file 19 | [header = boolean] -- if has header on first line [default=true] 20 | [schema = Df_Dict] -- The column schema types with column names as keys 21 | [separator = string] -- separator (one character) [default=,] 22 | [skip = number] -- skip this many lines at start of file [default=0] 23 | [verbose = boolean] -- verbose load [default=false] 24 | [rows2explore = number] -- The maximum number of rows to traverse when trying to identify schema 25 | }) 26 | ``` 27 | 28 | _Return value_: self 29 | 30 | ### Dataframe.bulk_load_csv(self, path[, header][, schema][, separator][, skip][, verbose][, nthreads]) 31 | 32 | Loads a CSV file into Dataframe using multithreading. 33 | Warning : this method does not do the same checks as load_csv would do. It doesn't handle other format than torch.*Tensor and tds.Vec. 34 | 35 | ``` 36 | ({ 37 | self = Dataframe -- 38 | path = string -- path to file 39 | [header = boolean] -- if has header on first line (not used at the moment) [default=true] 40 | [schema = Df_Dict] -- The column schema types with column names as keys 41 | [separator = string] -- separator (one character) [default=,] 42 | [skip = number] -- skip this many lines at start of file (not used at the moment) [default=0] 43 | [verbose = boolean] -- verbose load [default=false] 44 | [nthreads = number] -- Number of threads to use to read the csv file [default=1] 45 | }) 46 | ``` 47 | 48 | _Return value_: self 49 | 50 | ### Dataframe.load_table(self, data[, schema][, column_order]) 51 | 52 | ``` 53 | ({ 54 | self = Dataframe -- 55 | data = Df_Dict -- Table (dictionary) to import. Max depth 2. 56 | [schema = Df_Dict] -- Provide if you want to force column types 57 | [column_order = Df_Array] -- The order of the column (has to be array and _not_ a dictionary) 58 | }) 59 | ``` 60 | 61 | Imports a table data directly into Dataframe. The table should all be of equal length 62 | or just single values. If a table contains one column with 10 rows and then has 63 | another column with a single element that element is duplicated 10 times, i.e. 64 | filling the entire column with that single value. 65 | 66 | 67 | _Return value_: self 68 | 69 | ### Dataframe._clean_columns(self, data[, column_order][, schema]) 70 | 71 | ``` 72 | { 73 | self = Dataframe -- 74 | data = table -- 75 | [column_order = table] -- 76 | [schema = table] -- 77 | } 78 | ``` 79 | 80 | Internal function to clean columns names 81 | 82 | _Return value_: self -------------------------------------------------------------------------------- /doc/core/metatable.md: -------------------------------------------------------------------------------- 1 | # API documentation for [metatable functions](#__Metatable functions__) 2 | - [Dataframe.size](#Dataframe.size) 3 | - [Dataframe.`__tostring__`](#Dataframe.__tostring__) 4 | - [Dataframe.copy](#Dataframe.copy) 5 | - [Dataframe.#](#Dataframe.#) 6 | - [Dataframe.==](#Dataframe.==) 7 | 8 | 9 | ## Metatable functions 10 | 11 | 12 | ### Dataframe.size(self[, dim]) 13 | 14 | By providing dimension you can get only that dimension, row == 1, col == 2. If 15 | value omitted it will return the number of rows in order to comply with torchnet 16 | standard. 17 | 18 | ``` 19 | ({ 20 | self = Dataframe -- 21 | [dim = number] -- The dimension of interest [default=1] 22 | }) 23 | ``` 24 | 25 | _Return value_: integer 26 | 27 | ### Dataframe.__tostring__(self) 28 | 29 | A wrapper for `tostring()` 30 | 31 | ``` 32 | ({ 33 | self = Dataframe -- 34 | }) 35 | ``` 36 | 37 | _Return value_: string 38 | 39 | ### Dataframe.copy(self) 40 | 41 | Copies the table together with all metadata 42 | 43 | ``` 44 | ({ 45 | self = Dataframe -- 46 | }) 47 | ``` 48 | 49 | _Return value_: Dataframe 50 | 51 | ### Dataframe.# 52 | 53 | Returns the number of rows 54 | 55 | _Return value_: integer 56 | 57 | ### Dataframe.== 58 | 59 | Checks if Dataframe's contain the same values 60 | 61 | _Return value_: boolean -------------------------------------------------------------------------------- /doc/core/missing_data.md: -------------------------------------------------------------------------------- 1 | # API documentation for [missing data functions](#__Missing data functions__) 2 | - [Dataframe.count_na](#Dataframe.count_na) 3 | - [Dataframe.fill_na](#Dataframe.fill_na) 4 | - [Dataframe.fill_na](#Dataframe.fill_na) 5 | 6 | 7 | ## Missing data functions 8 | 9 | 10 | ### Dataframe.count_na(self[, columns][, as_dataframe]) 11 | 12 | Count missing values in dataset 13 | 14 | ``` 15 | ({ 16 | self = Dataframe -- 17 | [columns = Df_Array] -- The columns to count 18 | [as_dataframe = boolean] -- Return a dataframe [default=true] 19 | }) 20 | ``` 21 | 22 | _Return value_: Dataframe or table containing missing values per column, total na 23 | If you only want to count a single column 24 | 25 | ``` 26 | ({ 27 | self = Dataframe -- 28 | column = string -- The column to count 29 | }) 30 | ``` 31 | 32 | _Return value_: single integer 33 | 34 | ### Dataframe.fill_na(self, column_name[, default_value]) 35 | 36 | Replace missing value in a specific column 37 | 38 | ``` 39 | ({ 40 | self = Dataframe -- 41 | column_name = string -- The column to fill 42 | [default_value = number|string|boolean] -- The default missing value [default=0] 43 | }) 44 | ``` 45 | 46 | _Return value_: self 47 | 48 | ### Dataframe.fill_na(self[, default_value]) 49 | 50 | Replace missing value in all columns 51 | 52 | ``` 53 | ({ 54 | self = Dataframe -- 55 | [default_value = number|string|boolean] -- The default missing value [default=0] 56 | }) 57 | ``` 58 | 59 | _Return value_: self -------------------------------------------------------------------------------- /doc/core/output.md: -------------------------------------------------------------------------------- 1 | # API documentation for [output functions](#__Output functions__) 2 | - [Dataframe.output](#Dataframe.output) 3 | - [Dataframe.show](#Dataframe.show) 4 | - [Dataframe.tostring](#Dataframe.tostring) 5 | - [Dataframe.`_to_html`](#Dataframe._to_html) 6 | 7 | 8 | ## Output functions 9 | 10 | 11 | ### Dataframe.output(self[, html][, max_rows][, digits]) 12 | 13 | ``` 14 | ({ 15 | self = Dataframe -- 16 | [html = boolean] -- If the output should be in html format [default=false] 17 | [max_rows = number] -- Limit the maximum number of printed rows [default=20] 18 | [digits = number|boolean] -- Set this to an integer >= 0 in order to reduce the number of integers shown [default=false] 19 | }) 20 | ``` 21 | 22 | Prints the table into itorch.html if in itorch and html == true, otherwise prints a table string 23 | 24 | _Return value_: self 25 | 26 | ### Dataframe.show(self[, digits]) 27 | 28 | ``` 29 | ({ 30 | self = Dataframe -- 31 | [digits = number|boolean] -- Set this to an integer >= 0 in order to reduce the number of integers shown [default=false] 32 | }) 33 | ``` 34 | 35 | Prints the top and bottom section of the table for better overview. Uses itorch if available 36 | 37 | _Return value_: self 38 | 39 | ### Dataframe.tostring(self[, digits][, columns2skip][, no_rows][, min_col_width][, max_table_width]) 40 | 41 | Converts table to a string representation that follows standard markdown syntax. 42 | The table tries to follow a maximum table width inspired by the `dplyr` table print. 43 | The core concept is that wide columns are clipped when the table risks of being larger 44 | than a certain max width. The columns convey though no information if they need to 45 | be clipped to just a few characters why there is a minimum number of characters. 46 | The columns that then don't fit are noted below the table as skipped columns. 47 | 48 | You can also specify columns that you wish to skip by providing the columns2skip 49 | skip argumnt. If columns are skipped by user demand there won't be a ... column to 50 | the right but if the table is still too wide then the software may choose to skip 51 | additional columns and thereby add a ... column. 52 | 53 | ``` 54 | ({ 55 | self = Dataframe -- 56 | [digits = number|boolean] -- Set this to an integer >= 0 in order to reduce the number of integers shown [default=false] 57 | [columns2skip = Df_Array] -- Columns to skip from the output [default=false] 58 | [no_rows = number|boolean] -- The number of rows to display. If -1 then shows all. Defaults to setting in Dataframe.tostring_defaults [default=false] 59 | [min_col_width = number|boolean] -- The minimum column width in characters. Defaults to setting in Dataframe.tostring_defaults [default=false] 60 | [max_table_width = number|boolean] -- The maximum table width in characters. Defaults to setting in Dataframe.tostring_defaults [default=false] 61 | }) 62 | ``` 63 | 64 | _Return value_: string 65 | 66 | ``` 67 | ({ 68 | self = Dataframe -- 69 | [digits = number|boolean] -- Set this to an integer >= 0 in order to reduce the number of integers shown [default=false] 70 | columns2skip = string -- Columns to skip from the output as regular expression 71 | [no_rows = number] -- The number of rows to display. If -1 then shows all. Defaults to setting in Dataframe.tostring_defaults [default=false] 72 | [min_col_width = number] -- The minimum column width in characters. Defaults to setting in Dataframe.tostring_defaults [default=false] 73 | [max_table_width = number] -- The maximum table width in characters. Defaults to setting in Dataframe.tostring_defaults [default=false] 74 | }) 75 | ``` 76 | 77 | 78 | ### Dataframe._to_html(self[, split_table][, offset][, digits]) 79 | 80 | ``` 81 | ({ 82 | self = Dataframe -- 83 | [split_table = string] -- Where the table is split. Valid input is 'none', 'top', 'bottom', 'all'. 84 | Note that the 'bottom' removes the trailing while the 'top' removes 85 | the initial '