├── .gitignore
├── .luacov
├── .travis.yml
├── CMakeLists.txt
├── CONTRIBUTING.md
├── LICENSE
├── NEWS.md
├── README.md
├── argcheck.lua
├── custom_assertions.lua
├── dataframe
    ├── categorical.lua
    ├── column.lua
    ├── export_data.lua
    ├── init.lua
    ├── load_data.lua
    ├── metatable.lua
    ├── missing_data.lua
    ├── output.lua
    ├── row.lua
    ├── select_set_update.lua
    ├── statistics.lua
    └── subsets_and_batches.lua
├── dataseries
    ├── categorical.lua
    ├── export.lua
    ├── init.lua
    ├── metatable.lua
    ├── sngl_elmnt_ops.lua
    └── statistics.lua
├── doc.lua
├── doc
    ├── README.md
    ├── core
    │   ├── README.md
    │   ├── categorical.md
    │   ├── column.md
    │   ├── export_data.md
    │   ├── init.md
    │   ├── load_data.md
    │   ├── metatable.md
    │   ├── missing_data.md
    │   ├── output.md
    │   ├── row.md
    │   ├── select_set_update.md
    │   ├── statistics.md
    │   └── subsets_and_batches.md
    ├── dataseries
    │   ├── README.md
    │   ├── categorical.md
    │   ├── export.md
    │   ├── init.md
    │   ├── metatable.md
    │   ├── sngl_elmnt_ops.md
    │   └── statistics.md
    ├── helper_classes
    │   ├── 10_iterator.md
    │   ├── 11_paralleliterator.md
    │   ├── 20_tbl.md
    │   ├── 21_dict.md
    │   ├── 22_array.md
    │   └── README.md
    ├── sub_classes
    │   ├── 01_subset.md
    │   ├── 10_batchframe.md
    │   └── README.md
    └── utils
    │   ├── README.md
    │   └── utils.md
├── examples
    ├── Facebook license
    │   ├── LICENSE
    │   └── PATENTS
    └── mnist_example.lua
├── helper_classes
    ├── 10_iterator.lua
    ├── 11_paralleliterator.lua
    ├── 20_tbl.lua
    ├── 21_dict.lua
    ├── 22_array.lua
    └── Facebok license
├── init.lua
├── rocks
    ├── torch-dataframe-1.0-0.rockspec
    ├── torch-dataframe-1.1-0.rockspec
    ├── torch-dataframe-1.5-0.rockspec
    ├── torch-dataframe-1.6-0.rockspec
    ├── torch-dataframe-1.6-1.rockspec
    ├── torch-dataframe-1.7-0.rockspec
    └── torch-dataframe-scm-1.rockspec
├── specs
    ├── coverage.sh
    ├── data
    │   ├── advanced_short.csv
    │   ├── full.csv
    │   ├── iris-label.csv
    │   ├── iris-no-header.csv
    │   ├── iris-no-label.csv
    │   ├── realistic_29_row_data.csv
    │   ├── sampler_csv_files
    │   │   ├── index.csv
    │   │   └── index3.csv
    │   └── simple_short.csv
    ├── dataframe
    │   ├── batchframe_spec.lua
    │   ├── categorical_spec.lua
    │   ├── column_order_spec.lua
    │   ├── column_spec.lua
    │   ├── export_data_spec.lua
    │   ├── load_data_spec.lua
    │   ├── main_spec.lua
    │   ├── metatable_spec.lua
    │   ├── missing_data_spec.lua
    │   ├── row_spec.lua
    │   ├── sampler_spec.lua
    │   ├── select_set_update_spec.lua
    │   ├── serialization_spec.lua
    │   ├── statistics_spec.lua
    │   └── subsets_and_batches_spec.lua
    ├── dataseries
    │   └── dataseries_spec.lua
    ├── helper_classes
    │   ├── df_array_spec.lua
    │   ├── df_dict_spec.lua
    │   └── df_tbl_spec.lua
    ├── linter.sh
    ├── output
    │   ├── Wiki-templates
    │   │   ├── Readme.md
    │   │   └── Where_update_and_set.ipynb
    │   ├── cli_output.lua
    │   └── itorch_notebook_df_test.ipynb
    ├── run_all.sh
    └── utils
    │   ├── ntwrk_implementation_spec.lua
    │   ├── test.lua
    │   └── utils_spec.lua
├── sub_classes
    ├── 01_subset.lua
    ├── 10_batchframe.lua
    └── subset_extensions
    │   └── samplers.lua
└── utils
    ├── doc_helpers
        ├── get_anchors.lua
        ├── parse_file.lua
        └── write_doc.lua
    ├── loader.lua
    └── utils.lua


/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | luacov.*
3 | build.*
4 | 


--------------------------------------------------------------------------------
/.luacov:
--------------------------------------------------------------------------------
 1 | return {
 2 | 	modules = {
 3 | 		["init"] = 'init.lua',
 4 | 		["argcheck"] = 'argcheck.lua',
 5 | 		["main"] = 'main.lua',
 6 | 
 7 | 		["utils.utils"] = 'utils/utils.lua',
 8 | 		["utils.loader"] = 'utils/loader.lua',
 9 | 		["utils.doc_helpers.get_anchors"] = 'utils/doc_helpers/get_anchors.lua',
10 | 		["utils.doc_helpers.parse_file"] = 'utils/doc_helpers/parse_file.lua',
11 | 		["utils.doc_helpers.write_doc"] = 'utils/doc_helpers/write_doc.lua',
12 | 
13 | 		["sub_classes.01_subset"] = 'sub_classes/01_subset.lua',
14 | 		["sub_classes.10_batchframe"] = 'sub_classes/10_batchframe.lua',
15 | 		["sub_classes.subset_extensions.samplers"] = 'sub_classes/subset_extensions/samplers.lua',
16 | 		
17 | 		["helper_classes.10_iterator"] = 'helper_classes/10_iterator.lua',
18 | 		["helper_classes.11_paralleliterator"] = 'helper_classes/11_paralleliterator.lua',
19 | 		["helper_classes.20_tbl"] = 'helper_classes/20_tbl.lua',
20 | 		["helper_classes.21_dict"] = 'helper_classes/21_dict.lua',
21 | 		["helper_classes.22_array"] = 'helper_classes/22_array.lua',
22 | 
23 | 		["dataseries.categorical"] = 'dataseries/categorical.lua',
24 | 		["dataseries.categorical"] = 'dataseries/export.lua',
25 | 		["dataseries.export"] = 'dataseries/export.lua',
26 | 		["dataseries.init"] = 'dataseries/init.lua',
27 | 		["dataseries.metatable"] = 'dataseries/metatable.lua',
28 | 		["dataseries.sngl_elmnt_ops"] = 'dataseries/sngl_elmnt_ops.lua',
29 | 		["dataseries.statistics"] = 'dataseries/statistics.lua',
30 | 
31 | 		["dataframe.categorical"] = 'dataframe/categorical.lua',
32 | 		["dataframe.column"] = 'dataframe/column.lua',
33 | 		["dataframe.export_data"] = 'dataframe/export_data.lua',
34 | 		["dataframe.init"] = 'dataframe/init.lua',
35 | 		["dataframe.load_data"] = 'dataframe/load_data.lua',
36 | 		["dataframe.metatable"] = 'dataframe/metatable.lua',
37 | 		["dataframe.missing_data"] = 'dataframe/missing_data.lua',
38 | 		["dataframe.output"] = 'dataframe/output.lua',
39 | 		["dataframe.row"] = 'dataframe/row.lua',
40 | 		["dataframe.select_set_update"] = 'dataframe/select_set_update.lua',
41 | 		["dataframe.statistics"] = 'dataframe/statistics.lua',
42 | 		["dataframe.subsets_and_batches"] = 'dataframe/subsets_and_batches.lua'
43 | 		
44 | 
45 | 	}
46 | }
47 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | sudo: true
 4 | 
 5 | branches:
 6 |   only:
 7 |     - master
 8 |     - develop
 9 | env:
10 |   global:
11 |     - TORCH_SERVER=https://raw.githubusercontent.com/torch/rocks/master/
12 |   matrix:
13 |     - LUA="LUA52"
14 |     - LUA="LUA53"
15 |     - LUA="LUAJIT20"
16 |     - LUA="LUAJIT21"
17 | 
18 | before_install:
19 |   - if [[ ! -d torch ]]; then git clone https://github.com/torch/distro.git torch --recursive ; fi
20 |   - cd torch
21 |   - git pull
22 |   - git submodule update
23 |   - git submodule foreach git pull origin master
24 |   - cd ..
25 |   - cp -rf torch torch_$LUA
26 |   - cd torch_$LUA
27 |   - TORCH_LUA_VERSION=$LUA ./install.sh -b
28 |   - cd ..
29 | 
30 | install:
31 |   - source ./torch_$LUA/install/bin/torch-activate
32 |   - luarocks --from=$TORCH_SERVER install sundown
33 |   - luarocks --from=$TORCH_SERVER install dok
34 |   - luarocks --from=$TORCH_SERVER install argcheck
35 |   - luarocks --from=$TORCH_SERVER install csvigo
36 |   - luarocks install luafilesystem
37 |   - luarocks install paths
38 |   - luarocks install threads
39 |   - luarocks install torchnet
40 |   - luarocks install busted
41 |   - luarocks install luacov
42 |   - luarocks install nn
43 |   - luarocks make rocks/torch-dataframe-scm-1.rockspec CFLAGS="-O2 -fPIC -fprofile-arcs -ftest-coverage" LIBFLAG="-shared --coverage"
44 | 
45 | script:
46 |   - cd specs
47 |   - ./run_all.sh --coverage --version $LUA
48 |   - ./coverage.sh --generate
49 |   - cd ..
50 | 
51 | after_success:
52 |   - bash <(curl -s https://codecov.io/bash)
53 | 
54 | notifications:
55 |   email:
56 |     on_success: change
57 |     on_failure: always
58 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 2.8)
 2 | cmake_policy(VERSION 2.8)
 3 | 
 4 | set(PKGNAME Dataframe)
 5 | 
 6 | file(GLOB_RECURSE luafiles RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.lua")
 7 | 
 8 | # Exclude doc helpers and spec files
 9 | set (EXCLUDE_DIRS "utils/doc_helpers/" "specs/")
10 | list(REMOVE_ITEM luafiles "custom_assertions.lua")
11 | 
12 | foreach (TMP_PATH ${luafiles})
13 | 
14 |   foreach (EXCLUDE_DIR ${EXCLUDE_DIRS})
15 |     string (FIND ${TMP_PATH} ${EXCLUDE_DIR} EXCLUDE_DIR_FOUND)
16 |     if (NOT ${EXCLUDE_DIR_FOUND} EQUAL -1)
17 |       MESSAGE("Removing ${TMP_PATH}")
18 |       list (REMOVE_ITEM luafiles ${TMP_PATH})
19 |     endif ()
20 |   endforeach(EXCLUDE_DIR)
21 | 
22 | endforeach(TMP_PATH)
23 | 
24 | foreach(file ${luafiles})
25 |   get_filename_component(dir ${file} PATH)
26 |   install(FILES ${file} DESTINATION ${LUA_PATH}/${PKGNAME}/${dir})
27 | endforeach()
28 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | Feel free to report a bug, suggest enhancements or submit new cool features using [Issues][df_issues] or directly send us a [Pull Request][df_pr] :).
 4 | 
 5 | ## Before submitting
 6 | 
 7 | Don't forget to :
 8 | - test your code
 9 | - generate the doc
10 | - use the linter script in `specs` directory
11 | 
12 | You can find how we implemented our tests in the [specs directory][df_specs]. See "Behavior Driven Development" for more details on this technique.
13 | 
14 | ## Programming book
15 | 
16 | For a better contribution we ask you to follow these simple rules to keep the code reading as smooth as possible :
17 | * Indentation is a tabulation of size 2
18 | * Every composants of a function's name is separated by an underscore : `my_func_name`
19 | 
20 | [df_issues]: https://github.com/AlexMili/torch-dataframe/issues
21 | [df_pr]: https://github.com/AlexMili/torch-dataframe/pulls
22 | [df_specs]: https://github.com/AlexMili/torch-dataframe/tree/readme/specs
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/argcheck.lua:
--------------------------------------------------------------------------------
 1 | local env = require 'argcheck.env' -- retrieve argcheck environement
 2 | 
 3 | -- From http://lua-users.org/wiki/SplitJoin
 4 | function string:split(sep)
 5 | 	local sep, fields = sep or ":", {}
 6 | 	local pattern = string.format("([^%s]+)", sep)
 7 | 	self:gsub(pattern, function(c) fields[#fields+1] = c end)
 8 | 	return fields
 9 | end
10 | 
11 | env.istype = function(obj, typename)
12 | 	if (typename == "*") then
13 | 		return true
14 | 	end
15 | 
16 | 	-- From the original argcheck env
17 | 	local thname = torch.typename(obj) -- empty if non-torch class
18 | 	local thtype = torch.type(obj)
19 | 	if (typename == "!table" and thtype ~= "table") then
20 | 		return true
21 | 	end
22 | 
23 | 	if (typename:match("|")) then
24 | 		if (thname) then
25 | 			-- Do a recursive search thrhough all the patterns for torch class objects
26 | 			for _,subtype in ipairs(typename:split("|")) do
27 | 				local ret = env.istype(obj, subtype)
28 | 				if (ret) then
29 | 					return true
30 | 				end
31 | 			end
32 | 
33 | 			return false
34 | 		else
35 | 			-- We only need to find basic variable match + nan values
36 | 			for _,subtype in ipairs(typename:split("|")) do
37 | 				if ((thtype == subtype) or
38 | 					 (thtype == "nan" and isnan(obj)))
39 | 				then
40 | 					return true
41 | 				end
42 | 			end
43 | 
44 | 			return false
45 | 		end
46 | 	end
47 | 
48 | 	if thname then
49 | 		-- __typename (see below) might be absent
50 | 		local match = thname:match(typename)
51 | 		if match and (match ~= typename or match == thname) then
52 | 			return true
53 | 		end
54 | 		local mt = torch.getmetatable(thname)
55 | 		while mt do
56 | 			if mt.__typename then
57 | 				match = mt.__typename:match(typename)
58 | 				if match and (match ~= typename or match == mt.__typename) then
59 | 					return true
60 | 				end
61 | 			end
62 | 			mt = getmetatable(mt)
63 | 		end
64 | 		return false
65 | 	end
66 | 
67 | 	return type(obj) == typename
68 | end
69 | 


--------------------------------------------------------------------------------
/dataframe/export_data.lua:
--------------------------------------------------------------------------------
  1 | local params = {...}
  2 | local Dataframe = params[1]
  3 | 
  4 | local argcheck = require "argcheck"
  5 | local doc = require "argcheck.doc"
  6 | 
  7 | doc[[
  8 | 
  9 | ## Data save/export functions
 10 | 
 11 | ]]
 12 | 
 13 | Dataframe.to_csv = argcheck{
 14 | 	doc =  [[
 15 | <a name="Dataframe.to_csv">
 16 | ### Dataframe.to_csv(@ARGP)
 17 | 
 18 | Saves a Dataframe into a CSV using csvigo as backend
 19 | 
 20 | _Return value_: self (Dataframe)
 21 | 
 22 | @ARGT
 23 | 
 24 | ]],
 25 | 	{name="self", type="Dataframe"},
 26 | 	{name='path', type='string', doc='path to file'},
 27 | 	{name="separator", type='string', doc='separator (one character)', default=','},
 28 | 	{name='verbose', type='boolean', help='verbose load', default=false},
 29 | 	call = function(self, path, separator, verbose)
 30 | 
 31 | 	-- Make sure that categorical columns are presented in the correct way
 32 | 	save_data = {}
 33 | 	for _,k in pairs(self.column_order) do
 34 | 		save_data[k] = self:get_column(k):to_table{boolean2string = true}
 35 | 	end
 36 | 
 37 | 	-- TODO: The csvigo will have memory issues when used with regular tables
 38 | 	csvigo.save{path = path,
 39 | 	            data = save_data,
 40 | 	            separator = separator,
 41 | 	            verbose = verbose,
 42 | 	            column_order = self.column_order,
 43 | 	            nan_as_missing = true}
 44 | 
 45 | 	return self
 46 | end}
 47 | 
 48 | Dataframe.to_tensor = argcheck{
 49 | 	doc =  [[
 50 | <a name="Dataframe.to_tensor">
 51 | ### Dataframe.to_tensor(@ARGP)
 52 | 
 53 | Convert the numeric section or specified columns of the dataset to a tensor
 54 | 
 55 | @ARGT
 56 | 
 57 | _Return value_: (1) torch.tensor with self:size(1) rows and self:size(2) columns,
 58 |  (2) exported column names
 59 | 
 60 | ]],
 61 | 	{name="self", type="Dataframe"},
 62 | 	call = function(self)
 63 | 
 64 | 	return self:to_tensor(Df_Array(self:get_numerical_colnames()))
 65 | end}
 66 | 
 67 | Dataframe.to_tensor = argcheck{doc=[[
 68 | 
 69 | You can export selected columns using the columns argument:
 70 | 
 71 | @ARGT
 72 | ]],
 73 | 	overload=Dataframe.to_tensor,
 74 | 	{name="self", type="Dataframe"},
 75 | 	{name="columns", type='Df_Array', doc='The columns to export to labels'},
 76 | 	call = function(self, columns)
 77 | 
 78 | 	columns = columns.data
 79 | 
 80 | 	-- Check data integrity
 81 | 	local numeric_dataset = {}
 82 | 	local type = -1
 83 | 	local tensor_types = {
 84 | 		"ByteTensor" -- contains unsigned chars
 85 | 		,"CharTensor" -- contains signed chars
 86 | 		,"ShortTensor" -- contains shorts
 87 | 		,"IntTensor" -- contains ints
 88 | 		,"LongTensor" -- contains longs
 89 | 		,"FloatTensor" -- contains floats
 90 | 		,"DoubleTensor"
 91 | 	}
 92 | 	for _,k in pairs(columns) do
 93 | 		self:assert_has_column(k)
 94 | 		assert(self:is_numerical(k), "Column " .. tostring(k) .. " is not numerical")
 95 | 		local col = self:get_column(k)
 96 | 		numeric_dataset[k] =  col:to_tensor()
 97 | 		local current_type = col:type()
 98 | 
 99 | 		for idx,tnsr_type in ipairs(tensor_types) do
100 | 			if (current_type:match(tnsr_type)) then
101 | 				current_type = idx
102 | 				break
103 | 			end
104 | 		end
105 | 		if (current_type > type) then
106 | 			type = current_type
107 | 		end
108 | 	end
109 | 
110 | 	-- Convert all tensors to the same format before concat
111 | 	type = ("torch.%s"):format(tensor_types[type])
112 | 	for cn,col in pairs(numeric_dataset) do
113 | 		numeric_dataset[cn] = numeric_dataset[cn]:type(type)
114 | 	end
115 | 
116 | 	tensor_data = nil
117 | 	tensor_col_names = {}
118 | 	for col_no = 1,#self.column_order do
119 | 		-- Find the next column that is present in the numerics
120 | 		found = false
121 | 		column_name = self.column_order[col_no]
122 | 		for k,v in pairs(numeric_dataset) do
123 | 			if (k == column_name) then
124 | 				found = true
125 | 				break
126 | 			end
127 | 		end
128 | 
129 | 		-- If column found we then concatenate that with our tensor_data
130 | 		if (found) then
131 | 			next_col =  numeric_dataset[column_name]
132 | 			if (torch.isTensor(tensor_data)) then
133 | 				tensor_data = torch.cat(tensor_data, next_col, 2)
134 | 			else
135 | 				tensor_data = next_col
136 | 			end
137 | 			table.insert(tensor_col_names, column_name)
138 | 		end
139 | 	end
140 | 
141 | 	if (#tensor_col_names == 1) then
142 | 		-- Reshape to tabular if this is only a single column
143 | 		tensor_data	= tensor_data:reshape(tensor_data:size(1), 1)
144 | 	end
145 | 
146 | 	return tensor_data, tensor_col_names
147 | end}
148 | 
149 | Dataframe.to_tensor = argcheck{
150 | 	doc=[[
151 | 
152 | If a filename is provided the tensor will be saved (`torch.save`) to that file:
153 | 
154 | @ARGT
155 | ]],
156 | 	overload=Dataframe.to_tensor,
157 | 	{name="self", type="Dataframe"},
158 | 	{name='filename', type='string', doc='Filename for tensor.save()'},
159 | 	{name="columns", type='Df_Array', doc='The columns to export to labels', default=false},
160 | 	call = function(self, filename, columns)
161 | 
162 | 	if (columns) then
163 | 		tensor_data, tensor_col_names = self:to_tensor{columns = columns}
164 | 	else
165 | 		tensor_data, tensor_col_names = self:to_tensor()
166 | 	end
167 | 
168 | 	torch.save(filename, tensor_data)
169 | 
170 | 	return tensor_data, tensor_col_names
171 | end}
172 | 
173 | Dataframe.get = argcheck{
174 | 	doc =  [[
175 | <a name="Dataframe.get">
176 | ### Dataframe.get(@ARGP)
177 | 
178 | A funtion for *torchnet* compliance. It subsets a single index and returns the
179 | `to_tensor` on that example.
180 | 
181 | @ARGT
182 | 
183 | _Return value_: (1) torch.tensor with 1 row and #numerical columns
184 | 
185 | ]],
186 | 	{name="self", type="Dataframe"},
187 | 	{name="idx", type="number"},
188 | 	call = function(self, idx)
189 | 	local row = self:sub(idx, idx)
190 | 	return row:to_tensor()
191 | end}
192 | 


--------------------------------------------------------------------------------
/dataframe/metatable.lua:
--------------------------------------------------------------------------------
  1 | local params = {...}
  2 | local Dataframe = params[1]
  3 | 
  4 | local argcheck = require "argcheck"
  5 | local doc = require "argcheck.doc"
  6 | 
  7 | doc[[
  8 | 
  9 | ## Metatable functions
 10 | 
 11 | ]]
 12 | 
 13 | Dataframe.size = argcheck{
 14 | 	doc =  [[
 15 | <a name="Dataframe.size">
 16 | ### Dataframe.size(@ARGP)
 17 | 
 18 | By providing dimension you can get only that dimension, row == 1, col == 2. If
 19 | value omitted it will  return the number of rows in order to comply with torchnet
 20 | standard.
 21 | 
 22 | @ARGT
 23 | 
 24 | _Return value_: integer
 25 | ]],
 26 | 	{name="self", type="Dataframe"},
 27 | 	{name="dim", type="number", doc="The dimension of interest", default = 1},
 28 | 	call=function(self, dim)
 29 | 	assert(isint(dim), "The dimension isn't an integer: " .. tostring(dim))
 30 | 	assert(dim == 1 or dim == 2, "The dimension can only be between 1 and 2 - you've provided: " .. dim)
 31 | 	if (dim == 1) then
 32 | 		if (not self.column_order or #self.column_order == 0) then
 33 | 			return 0
 34 | 		end
 35 | 
 36 | 		local col = self.column_order[1]
 37 | 		if (self:has_column(col)) then
 38 | 			return self:get_column(self.column_order[1]):size()
 39 | 		else
 40 | 			-- this case happends when _copy_meta has been called and the column_order has been set
 41 | 			-- TODO: remove the dependence of column_order for the row calc
 42 | 			return 0
 43 | 		end
 44 | 	end
 45 | 
 46 | 	return #self.column_order
 47 | end}
 48 | 
 49 | doc =  [[
 50 | <a name="Dataframe.[]">
 51 | ### Dataframe.[]
 52 | 
 53 | The `__index__` function is a powerful tool that allows quick access to regular functions
 54 | 
 55 | - _Single integer_: it returns the raw row table (see `get_row()`)
 56 | - _Df_Array()_: select rows of interest (see `_create_subset()`)
 57 | - _"start:stop"_: get a row span using start/stop index, e.g. `"2:5"` (see `sub()`)
 58 | - _"$column_name"_: get a column by prepending the name with `$`, e.g. `"$a column name"` (see `get_column`)
 59 | - _"/subset_name"_: get a subset by prepending the name with `/`, e.g. `"/a subset name"` (see `get_subset`)
 60 | 
 61 | _Return value_: Table or Dataframe
 62 | ]]
 63 | 
 64 | function Dataframe:__index__(index)
 65 | 	if (torch.type(index) == "number") then
 66 | 		return self:get_row(index), true
 67 | 	end
 68 | 
 69 | 	if (torch.type(index) == "string") then
 70 | 		if (index:match("^[0-9]+:[0-9]+$")) then
 71 | 			-- Get the core data
 72 | 			local start = index:gsub(":.*", "")
 73 | 			start = tonumber(start)
 74 | 			local stop = index:gsub("[^:]+:", "")
 75 | 			stop = tonumber(stop)
 76 | 
 77 | 			return self:sub{start=start, stop=stop}, true
 78 | 		end
 79 | 
 80 | 		-- Index a column using a $ at the beginning of a string
 81 | 		if (index:match("^[$]")) then
 82 | 			local column_name = index:gsub("^[$]", "")
 83 | 			return self:get_column(column_name), true
 84 | 		end
 85 | 
 86 | 		-- Index a subset using a / at the beginning of a string
 87 | 		if (index:match("^[/]")) then
 88 | 			local subset_name = index:gsub("^[/]", "")
 89 | 			return self:get_subset(subset_name), true
 90 | 		end
 91 | 
 92 | 		return false
 93 | 	end
 94 | 
 95 | 	if (torch.type(index) == "Df_Array") then
 96 | 		return self:_create_subset(index), true
 97 | 	end
 98 | 
 99 | 	return false
100 | end
101 | 
102 | doc =  [[
103 | <a name="Dataframe.[] =">
104 | ### Dataframe.[] =
105 | 
106 | The `__newindex__` allows easy updating of a single row (see `_update_single_row()`)
107 | 
108 | ]]
109 | 
110 | function Dataframe:__newindex__(index, value)
111 | 	if (torch.type(index) == "number") then
112 | 		self:_update_single_row(index, Df_Tbl(value), Df_Tbl(self:get_row(index)))
113 | 		return true
114 | 	end
115 | 
116 | 	return false
117 | end
118 | 
119 | Dataframe.__tostring__ = argcheck{
120 | 	doc=[[
121 | 	<a name="Dataframe.__tostring__">
122 | ### Dataframe.__tostring__(@ARGP)
123 | 
124 | A wrapper for `tostring()`
125 | 
126 | @ARGT
127 | 
128 | _Return value_: string
129 | ]],
130 | 	{name="self", type="Dataframe"},
131 | 	call=function (self)
132 | 	return self:tostring()
133 | end}
134 | 
135 | 
136 | Dataframe.copy = argcheck{
137 | 	doc =  [[
138 | <a name="Dataframe.copy">
139 | ### Dataframe.copy(@ARGP)
140 | 
141 | Copies the table together with all metadata
142 | 
143 | @ARGT
144 | 
145 | _Return value_: Dataframe
146 | ]],
147 | 	{name="self", type="Dataframe"},
148 | 	call=function(self)
149 | 	local new_df = Dataframe.new(Df_Dict(self.dataset))
150 | 	new_df = self:_copy_meta(new_df)
151 | 	return new_df
152 | end}
153 | 
154 | Dataframe.__len__ = argcheck{
155 | 	doc =  [[
156 | <a name="Dataframe.#">
157 | ### Dataframe.#
158 | 
159 | Returns the number of rows
160 | 
161 | _Return value_: integer
162 | ]],
163 | 	{name="self", type="Dataframe"},
164 | 	{name="other", type="Dataframe"},
165 | 	call=function(self, other)
166 | 	return self:size(1)
167 | end}
168 | 
169 | Dataframe.__len__ = argcheck{
170 | 	overload=Dataframe.__len__,
171 | 	{name="self", type="Dataframe"},
172 | 	call=function(self)
173 | 	return self:size(1)
174 | end}
175 | 
176 | Dataframe.__eq__ = argcheck{
177 | 	doc =  [[
178 | <a name="Dataframe.==">
179 | ### Dataframe.==
180 | 
181 | Checks if Dataframe's contain the same values
182 | 
183 | _Return value_: boolean
184 | ]],
185 | 	{name="self", type="Dataframe"},
186 | 	{name="other", type="Dataframe"},
187 | 	call=function(self, other)
188 | 	-- Check that size matches
189 | 	if (self:size(1) ~= other:size(1) or
190 | 	    self:size(2) ~= other:size(2)) then
191 | 		return false
192 | 	end
193 | 
194 | 	-- Check that columns match
195 | 	for i=1,#self.column_order do
196 | 		if (not other:has_column(self.column_order[i])) then
197 | 			return false
198 | 		end
199 | 	end
200 | 
201 | 	-- Check actual content (expensive why this is left to last)
202 | 	for i=1,#self.column_order do
203 | 		local self_col = self:get_column(self.column_order[i])
204 | 		local other_col = other:get_column(self.column_order[i])
205 | 
206 | 		for i=1,self:size(1) do
207 | 			-- one is nan and not the other
208 | 			if ((not isnan(self_col[i]) and
209 | 			     isnan(other_col[i])) or
210 | 			    (isnan(self_col[i]) and
211 | 			     not isnan(other_col[i]))) then
212 | 				return false
213 | 			end
214 | 
215 | 			-- Actual value check if both weren't nan
216 | 			if (not(isnan(self_col[i]))) then
217 | 				if (self_col[i] ~= other_col[i]) then
218 | 					return false
219 | 				end
220 | 			end
221 | 
222 | 		end
223 | 	end
224 | 
225 | 	-- If the function hasn't exited before then it means that the two dataframes are equal
226 | 	return true
227 | end}
228 | 


--------------------------------------------------------------------------------
/dataframe/missing_data.lua:
--------------------------------------------------------------------------------
  1 | local params = {...}
  2 | local Dataframe = params[1]
  3 | 
  4 | local argcheck = require "argcheck"
  5 | local doc = require "argcheck.doc"
  6 | 
  7 | doc[[
  8 | 
  9 | ## Missing data functions
 10 | 
 11 | ]]
 12 | 
 13 | Dataframe.count_na = argcheck{
 14 | 	doc =  [[
 15 | <a name="Dataframe.count_na">
 16 | ### Dataframe.count_na(@ARGP)
 17 | 
 18 | Count missing values in dataset
 19 | 
 20 | @ARGT
 21 | 
 22 | _Return value_: Dataframe or table containing missing values per column, total na
 23 | ]],
 24 | 	{name="self", type="Dataframe"},
 25 | 	{name="columns", type="Df_Array", doc="The columns to count", opt=true},
 26 | 	{name='as_dataframe', type='boolean', default=true,
 27 | 	 doc="Return a dataframe"},
 28 | 	call=function(self, columns, as_dataframe)
 29 | 	if (columns) then
 30 | 		columns = columns.data
 31 | 	else
 32 | 		columns = self.column_order
 33 | 	end
 34 | 
 35 | 	local ret = {}
 36 | 	local tot_na = 0
 37 | 	for i=1,#columns do
 38 | 		ret[columns[i]] = self:count_na(columns[i])
 39 | 		tot_na = tot_na + ret[columns[i]]
 40 | 	end
 41 | 
 42 | 	if (as_dataframe) then
 43 | 		local ret_df = Dataframe.new()
 44 | 		for name,val in pairs(ret) do
 45 | 			ret_df:append{rows = Df_Dict{Column = name, Value = val},
 46 | 			              column_order = Df_Array("Column", "Value")}
 47 | 		end
 48 | 		return ret_df, tot_na
 49 | 	else
 50 | 		return ret, tot_na
 51 | 	end
 52 | end}
 53 | 
 54 | Dataframe.count_na = argcheck{
 55 | 	doc =  [[
 56 | If you only want to count a single column
 57 | 
 58 | @ARGT
 59 | 
 60 | _Return value_: single integer
 61 | 	]],
 62 | 	overload=Dataframe.count_na,
 63 | 	{name="self", type="Dataframe"},
 64 | 	{name="column", type="string", doc="The column to count"},
 65 | 	call=function(self, column)
 66 | 	self:assert_has_column(column)
 67 | 
 68 | 	return self:get_column(column):count_na()
 69 | end}
 70 | 
 71 | Dataframe.fill_na = argcheck{
 72 | 	doc = [[
 73 | <a name="Dataframe.fill_na">
 74 | ### Dataframe.fill_na(@ARGP)
 75 | 
 76 | Replace missing value in a specific column
 77 | 
 78 | @ARGT
 79 | 
 80 | _Return value_: self
 81 | ]],
 82 | 	{name="self", type="Dataframe"},
 83 | 	{name="column_name", type="string", doc="The column to fill"},
 84 | 	{name="default_value", type="number|string|boolean",
 85 | 	 doc="The default missing value", default=0},
 86 | 	call=function(self, column_name, default_value)
 87 | 	self:assert_has_column(column_name)
 88 | 
 89 | 	local column_data = self:get_column(column_name)
 90 | 
 91 | 	column_data:fill_na(default_value)
 92 | 
 93 | 	return self
 94 | end}
 95 | 
 96 | Dataframe.fill_all_na = argcheck{
 97 | 	doc = [[
 98 | <a name="Dataframe.fill_na">
 99 | ### Dataframe.fill_na(@ARGP)
100 | 
101 | Replace missing value in all columns
102 | 
103 | @ARGT
104 | 
105 | _Return value_: self
106 | ]],
107 | 	{name="self", type="Dataframe"},
108 | 	{name="default_value", type="number|string|boolean", doc="The default missing value", default=0},
109 | 	call=function(self, default_value)
110 | 	for i=1,#self.column_order do
111 | 		self:fill_na(self.column_order[i], default_value)
112 | 	end
113 | 
114 | 	return self
115 | end}
116 | 


--------------------------------------------------------------------------------
/dataseries/export.lua:
--------------------------------------------------------------------------------
 1 | local params = {...}
 2 | local Dataseries = params[1]
 3 | 
 4 | local argcheck = require "argcheck"
 5 | local doc = require "argcheck.doc"
 6 | 
 7 | doc[[
 8 | 
 9 | ## Export functions
10 | 
11 | Here are functions are used for exporting to a different format. Generally `to_`
12 | functions should reside here. Only exception is the `tostring`.
13 | 
14 | ]]
15 | 
16 | Dataseries.to_tensor = argcheck{
17 | 	doc=[[
18 | <a name="Dataseries.to_tensor">
19 | ### Dataseries.to_tensor(@ARGP)
20 | 
21 | Returns the values in tensor format. Note that if you don't provide a replacement
22 | for missing values and there are missing values the function will throw an error.
23 | 
24 | *Note*: boolean columns are not tensors and need to be manually converted to a
25 | tensor. This since 0 would be a natural value for false but can cause issues as
26 | neurons are labeled 1 to n for classification tasks. See the `Dataframe.update`
27 | function for details or run the `boolean2tensor`.
28 | 
29 | @ARGT
30 | 
31 | _Return value_: `torch.*Tensor` of the current type
32 | ]],
33 | 	{name="self", type="Dataseries"},
34 | 	{name="missing_value", type="number",
35 | 	 doc="Set a value for the missing data",
36 | 	 opt=true},
37 | 	{name="copy", type="boolean", default=true,
38 | 	 doc="Set to false if you want the original data to be returned."},
39 | 	call=function(self, missing_value)
40 | 	assert(self:type():match("torch.*Tensor"),
41 | 	       "Can only automatically retrieve columns that already are tensors")
42 | 	assert(self:count_na() == 0 or missing_value,
43 | 	       "Missing data should be replaced with a default value before retrieving tensor")
44 | 
45 | 	local ret
46 | 	if (copy) then
47 | 		ret = self:copy()
48 | 	else
49 | 		ret = self
50 | 	end
51 | 
52 | 	if (missing_value and self:count_na() > 0) then
53 | 		assert(copy, "Replacing missing values is not allowed in to_tensor unless you are returning a copy")
54 | 		ret:fill_na(missing_value)
55 | 	end
56 | 
57 | 	return ret.data
58 | end}
59 | 
60 | Dataseries.to_table = argcheck{
61 | 	doc=[[
62 | <a name="Dataseries.to_table">
63 | ### Dataseries.to_table(@ARGP)
64 | 
65 | Returns the values in table format
66 | 
67 | @ARGT
68 | 
69 | _Return value_: table
70 | ]],
71 | 	{name="self", type="Dataseries"},
72 | 	{name="boolean2string", type="boolean", opt=true,
73 | 	 doc="Convert boolean values to strings since they cause havoc with csvigo"},
74 | 	call=function(self, boolean2string)
75 | 	local ret = {}
76 | 	for i=1,self:size() do
77 | 		ret[i] = self:get(i)
78 | 	end
79 | 
80 | 	if (boolean2string and self:type() == "tds.Vec") then
81 | 		for i=1,#ret do
82 | 			if (type(ret[i]) == "boolean") then
83 | 				ret[i] = tostring(ret[i])
84 | 			end
85 | 		end
86 | 	end
87 | 
88 | 	return ret
89 | end}
90 | 


--------------------------------------------------------------------------------
/dataseries/metatable.lua:
--------------------------------------------------------------------------------
 1 | local params = {...}
 2 | local Dataseries = params[1]
 3 | 
 4 | local argcheck = require "argcheck"
 5 | local doc = require "argcheck.doc"
 6 | 
 7 | doc[[
 8 | 
 9 | ## Metatable functions
10 | 
11 | ]]
12 | 
13 | doc =  [[
14 | <a name="Dataseries.[]">
15 | ### Dataseries.[]
16 | 
17 | The `__index__` function is a powerful tool that allows quick access to regular functions
18 | 
19 | - _Single integer_: it returns the raw elemet table (see `get()`)
20 | - _Df_Array()_: select a set of interest (see `_create_subset()`)
21 | - _"start:stop"_: get a row span using start/stop index, e.g. `"2:5"` (see `sub()`)
22 | 
23 | _Return value_: Table or Dataseries
24 | ]]
25 | 
26 | function Dataseries:__index__(index)
27 | 	local thtype = torch.type(index)
28 | 	-- If this is a number or a Df_Array, let `get()` method handle them both
29 | 	if (thtype == "number" or
30 | 	    thtype == "Df_Array") then
31 | 		return self:get(index), true
32 | 	-- If this is a string matching "start:stop", it should be a query for a subset
33 | 	elseif (thtype == "string" and index:match("^[0-9]*:[0-9]*$")) then
34 | 		start = index:gsub(":.*", "")
35 | 		start = tonumber(start)
36 | 
37 | 		stop = index:gsub("[^:]*:", "")
38 | 		stop = tonumber(stop)
39 | 
40 | 		return self:sub(start, stop), true
41 | 	end
42 | 	
43 | 	return false
44 | end
45 | 
46 | 
47 | doc =  [[
48 | <a name="Dataseries.[] =">
49 | ### Dataseries.[] =
50 | 
51 | The `__newindex__` allows updating of a single element (uses `set()`)
52 | 
53 | ]]
54 | function Dataseries:__newindex__(index, value)
55 | 	if (torch.type(index) == "number") then
56 | 		self:set(index, value)
57 | 		return true
58 | 	end
59 | 
60 | 	return false
61 | end
62 | 
63 | Dataseries.__len__ = argcheck{
64 | 	doc =  [[
65 | <a name="Dataseries.#">
66 | ### Dataseries.#
67 | 
68 | Returns the number of elements
69 | 
70 | _Return value_: integer
71 | ]],
72 | 	{name="self", type="Dataseries"},
73 | 	{name="other", type="Dataseries", opt=true},
74 | 	call=function(self, other)
75 | 	return self:size()
76 | end}
77 | 
78 | Dataseries.__tostring__ = argcheck{
79 | 	doc=[[
80 | 	<a name="Dataseries.__tostring__">
81 | ### Dataseries.__tostring__(@ARGP)
82 | 
83 | A wrapper for `tostring()`
84 | 
85 | @ARGT
86 | 
87 | _Return value_: string
88 | ]],
89 | 	{name="self", type="Dataseries"},
90 | 	call=function (self)
91 | 	return self:tostring()
92 | end}
93 | 


--------------------------------------------------------------------------------
/doc.lua:
--------------------------------------------------------------------------------
  1 | local paths = require 'paths'
  2 | 
  3 | local dataframe_path = paths.thisfile():gsub("doc.lua$", "?.lua")
  4 | local dataframe_dir = string.gsub(dataframe_path, "[^/]+$", "")
  5 | 
  6 | -- Custom argument checks
  7 | local argcheck_file = string.gsub(dataframe_path,"?", "argcheck")
  8 | assert(loadfile(argcheck_file))()
  9 | 
 10 | -- Get the core loader function
 11 | local loader_file = string.gsub(dataframe_path,"?", "utils/loader")
 12 | assert(loadfile(loader_file))()
 13 | 
 14 | load_dir_files(dataframe_dir .. "utils/doc_helpers/")
 15 | 
 16 | --[[
 17 | The doc.lua loads everything in the same order as the init script. As
 18 | we want to later link the scripts has three sections:
 19 | 
 20 | 1. Load the scripts and store the full docs in the docs table. The file order is
 21 |    retained via the files table.
 22 | 2. Parse the files in the apropriate order and generate a table of content for exact_length
 23 |    file that is written to the doc folder with the same name as the file but with
 24 | 	 `md` as file ending.
 25 | 3. Merge all the table of contents data into the README so that the docs are
 26 |    easier to navigate.
 27 | ]]
 28 | local docs = {}
 29 | local files = {}
 30 | files.utils, docs.utils = load_dir_files{
 31 | 	path = dataframe_dir .. "utils/",
 32 | 	docs = true
 33 | }
 34 | 
 35 | files.helper_classes, docs.helper_classes = load_dir_files{
 36 | 	path = dataframe_dir .. "helper_classes/",
 37 | 	docs = true
 38 | }
 39 | 
 40 | files.dataseries, docs.dataseries = load_dir_files{
 41 | 	path = dataframe_dir .. "dataseries/",
 42 | 	docs = true
 43 | }
 44 | 
 45 | files.core, docs.core = load_dir_files{
 46 | 	path = dataframe_dir .. "dataframe/",
 47 | 	docs = true
 48 | }
 49 | 
 50 | files.sub_classes, docs.sub_classes =
 51 | 	-- Load all sub classes
 52 | 	load_dir_files{
 53 | 		path = dataframe_dir .. "sub_classes/",
 54 | 		params = {Dataframe},
 55 | 		docs = true
 56 | 	}
 57 | 
 58 | --[[
 59 | !!! Start section 2 !!!
 60 | Parse each group, create a directory for that group, parse all files and write an
 61 | MD for each file. Then add a Readme for that directory.
 62 | ]]
 63 | 
 64 | local parsed_docs = {}
 65 | local doc_path = "doc"
 66 | if (not paths.dirp(doc_path)) then
 67 | 	paths.mkdir(doc_path)
 68 | end
 69 | 
 70 | local rough_toc_tbl = {}
 71 | local detailed_toc_tbl = {}
 72 | for group_name,group in pairs(docs) do
 73 | 	local sub_doc_path = ("%s/%s/"):format(doc_path,group_name)
 74 | 	if (not paths.dirp(sub_doc_path)) then
 75 | 		paths.mkdir(sub_doc_path)
 76 | 	end
 77 | 
 78 | 	local grp_rough_toc = ""
 79 | 	local grp_detailed_toc = ""
 80 | 	local gnrl_rough_toc = ""
 81 | 	local gnrl_detailed_toc = ""
 82 | 
 83 | 	parsed_docs[group_name] = {}
 84 | 	for _,file_name in ipairs(files[group_name]) do
 85 | 		local base_fn = paths.basename(file_name)
 86 | 		local md_path = ("%s%s"):format(sub_doc_path,
 87 | 		                                base_fn:gsub("%.lua$", ".md"))
 88 | 
 89 | 		parsed_docs[group_name][base_fn] = parse_doc(group[file_name], base_fn)
 90 | 		local pd = parsed_docs[group_name][base_fn]
 91 | 		write_doc(pd,
 92 | 		          md_path)
 93 | 
 94 | 		grp_rough_toc, grp_detailed_toc =
 95 | 			get_doc_anchors(sub_doc_path, md_path, pd, grp_rough_toc, grp_detailed_toc)
 96 | 		gnrl_rough_toc, gnrl_detailed_toc =
 97 | 			get_doc_anchors(doc_path, md_path, pd, gnrl_rough_toc, gnrl_detailed_toc)
 98 | 	end
 99 | 
100 | 	local readmefile = io.open(sub_doc_path .. "README.md", "w")
101 | 	readmefile:write(([[# Documentation for %s
102 | 
103 | This documentation ha been auto-generated from code using the `argcheck` system.
104 | 
105 | ## Table of contents (file-level)
106 | 
107 | Below follows a more [detailed](#detailed) table of contents with links to
108 | the different functions. Not this list may be incompleted due to failure to
109 | add apropriate anchor tags during documentation.
110 | 
111 | %s
112 | 
113 | ## Detailed table of contents (file-level + anchors)<a name=\"detailed\">
114 | 
115 | %s]]):format(group_name:gsub("_", " "), grp_rough_toc, grp_detailed_toc))
116 | 
117 | 	-- Save the group TOCS for the general README
118 | 	rough_toc_tbl[group_name] = gnrl_rough_toc
119 | 	detailed_toc_tbl[group_name] = gnrl_detailed_toc
120 | end
121 | 
122 | local readmefile = io.open("doc/README.md", "w")
123 | readmefile:write(([[# Documentation for torch-dataframe
124 | 
125 | This documentation ha been auto-generated from code using the `argcheck` system.
126 | 
127 | Below follows a more [detailed](#detailed) table of contents with links to
128 | the different functions. Not this list may be incompleted due to failure to
129 | add apropriate anchor tags during documentation.
130 | 
131 | ## Dataframe core components
132 | 
133 | %s
134 | 
135 | ## Dataseries - Dataframe's data storage
136 | 
137 | %s
138 | 
139 | ## Dataframe sub-classes
140 | 
141 | %s
142 | 
143 | ## Helper classes
144 | 
145 | %s]]):format(rough_toc_tbl["core"],
146 |              rough_toc_tbl["dataseries"],
147 |              rough_toc_tbl["sub_classes"],
148 |              rough_toc_tbl["helper_classes"]))
149 | 
150 | detailed_toc = ([[
151 | 
152 | # Detailed table of contents (file-level + anchors)<a name=\"detailed\">
153 | 
154 | ## Dataframe core components
155 | 
156 | %s
157 | 
158 | ## Dataseries - Dataframe's data storage
159 | 
160 | %s
161 | 
162 | ## Dataframe sub-classes
163 | 
164 | %s
165 | 
166 | ## Helper classes
167 | 
168 | %s]]):format(detailed_toc_tbl["core"],
169 |              detailed_toc_tbl["dataseries"],
170 |              detailed_toc_tbl["sub_classes"],
171 |              detailed_toc_tbl["helper_classes"])
172 | 
173 | -- Remove these elements from the tables in order to avoid ouputting them twice
174 | for _,key in ipairs({"core", "dataseries", "sub_classes", "helper_classes"}) do
175 | 	rough_toc_tbl[key] = nil
176 | 	detailed_toc_tbl[key] = nil
177 | end
178 | 
179 | for group_name, toc in pairs(rough_toc_tbl) do
180 | 	local group_title = group_name:sub(1,1):upper() .. group_name:sub(2):gsub("_", " ")
181 | 	readmefile:write(([[
182 | 
183 | ## %s
184 | 
185 | %s]]):format(group_title, toc))
186 | 	detailed_toc = ([[%s
187 | 
188 | ## %s
189 | 
190 | %s]]):format(detailed_toc, group_title, detailed_toc_tbl[group_name])
191 | end
192 | 
193 | readmefile:write(([[
194 | 
195 | %s
196 | ]]):format(detailed_toc))
197 | 
198 | readmefile:close()
199 | 


--------------------------------------------------------------------------------
/doc/core/README.md:
--------------------------------------------------------------------------------
  1 | # Documentation for core
  2 | 
  3 | This documentation ha been auto-generated from code using the `argcheck` system.
  4 | 
  5 | ## Table of contents (file-level)
  6 | 
  7 | Below follows a more [detailed](#detailed) table of contents with links to
  8 | the different functions. Not this list may be incompleted due to failure to
  9 | add apropriate anchor tags during documentation.
 10 | 
 11 | 
 12 | - [Core functions](init.md)
 13 | - [Categorical functions](categorical.md)
 14 | - [Column functions](column.md)
 15 | - [Data save/export functions](export_data.md)
 16 | - [Data loader functions](load_data.md)
 17 | - [Metatable functions](metatable.md)
 18 | - [Missing data functions](missing_data.md)
 19 | - [Output functions](output.md)
 20 | - [Row functions](row.md)
 21 | - [Subsetting and manipulation functions](select_set_update.md)
 22 | - [Statistical functions](statistics.md)
 23 | - [Subsets and batches](subsets_and_batches.md)
 24 | 
 25 | ## Detailed table of contents (file-level + anchors)<a name=\"detailed\">
 26 | 
 27 | 
 28 | - **[Core functions](init.md)**
 29 |   - [Dataframe.`__init`](init.md#Dataframe.__init)
 30 |   - [Dataframe.get_schema](init.md#Dataframe.get_schema)
 31 |   - [Dataframe.shape](init.md#Dataframe.shape)
 32 |   - [Dataframe.version](init.md#Dataframe.version)
 33 |   - [Dataframe.set_version](init.md#Dataframe.set_version)
 34 |   - [Dataframe.upgrade_frame](init.md#Dataframe.upgrade_frame)
 35 |   - [Dataframe.assert_is_index](init.md#Dataframe.assert_is_index)
 36 | - **[Categorical functions](categorical.md)**
 37 |   - [Dataframe.as_categorical](categorical.md#Dataframe.as_categorical)
 38 |   - [Dataframe.add_cat_key](categorical.md#Dataframe.add_cat_key)
 39 |   - [Dataframe.as_string](categorical.md#Dataframe.as_string)
 40 |   - [Dataframe.clean_categorical](categorical.md#Dataframe.clean_categorical)
 41 |   - [Dataframe.is_categorical](categorical.md#Dataframe.is_categorical)
 42 |   - [Dataframe.get_cat_keys](categorical.md#Dataframe.get_cat_keys)
 43 |   - [Dataframe.to_categorical](categorical.md#Dataframe.to_categorical)
 44 |   - [Dataframe.from_categorical](categorical.md#Dataframe.from_categorical)
 45 |   - [Dataframe.boolean2categorical](categorical.md#Dataframe.boolean2categorical)
 46 | - **[Column functions](column.md)**
 47 |   - [Dataframe.is_numerical](column.md#Dataframe.is_numerical)
 48 |   - [Dataframe.is_string](column.md#Dataframe.is_string)
 49 |   - [Dataframe.is_boolean](column.md#Dataframe.is_boolean)
 50 |   - [Dataframe.has_column](column.md#Dataframe.has_column)
 51 |   - [Dataframe.assert_has_column](column.md#Dataframe.assert_has_column)
 52 |   - [Dataframe.assert_has_not_column](column.md#Dataframe.assert_has_not_column)
 53 |   - [Dataframe.drop](column.md#Dataframe.drop)
 54 |   - [Dataframe.add_column](column.md#Dataframe.add_column)
 55 |   - [Dataframe.get_column](column.md#Dataframe.get_column)
 56 |   - [Dataframe.reset_column](column.md#Dataframe.reset_column)
 57 |   - [Dataframe.rename_column](column.md#Dataframe.rename_column)
 58 |   - [Dataframe.get_numerical_colnames](column.md#Dataframe.get_numerical_colnames)
 59 |   - [Dataframe.get_column_order](column.md#Dataframe.get_column_order)
 60 |   - [Dataframe.swap_column_order](column.md#Dataframe.swap_column_order)
 61 |   - [Dataframe.pos_column_order](column.md#Dataframe.pos_column_order)
 62 |   - [Dataframe.boolean2tensor](column.md#Dataframe.boolean2tensor)
 63 | - **[Data save/export functions](export_data.md)**
 64 |   - [Dataframe.to_csv](export_data.md#Dataframe.to_csv)
 65 |   - [Dataframe.to_tensor](export_data.md#Dataframe.to_tensor)
 66 |   - [Dataframe.get](export_data.md#Dataframe.get)
 67 | - **[Data loader functions](load_data.md)**
 68 |   - [Dataframe.load_csv](load_data.md#Dataframe.load_csv)
 69 |   - [Dataframe.bulk_load_csv](load_data.md#Dataframe.bulk_load_csv)
 70 |   - [Dataframe.load_table](load_data.md#Dataframe.load_table)
 71 |   - [Dataframe.`_clean_columns`](load_data.md#Dataframe._clean_columns)
 72 | - **[Metatable functions](metatable.md)**
 73 |   - [Dataframe.size](metatable.md#Dataframe.size)
 74 |   - [Dataframe.`__tostring__`](metatable.md#Dataframe.__tostring__)
 75 |   - [Dataframe.copy](metatable.md#Dataframe.copy)
 76 |   - [Dataframe.#](metatable.md#Dataframe.#)
 77 |   - [Dataframe.==](metatable.md#Dataframe.==)
 78 | - **[Missing data functions](missing_data.md)**
 79 |   - [Dataframe.count_na](missing_data.md#Dataframe.count_na)
 80 |   - [Dataframe.fill_na](missing_data.md#Dataframe.fill_na)
 81 |   - [Dataframe.fill_na](missing_data.md#Dataframe.fill_na)
 82 | - **[Output functions](output.md)**
 83 |   - [Dataframe.output](output.md#Dataframe.output)
 84 |   - [Dataframe.show](output.md#Dataframe.show)
 85 |   - [Dataframe.tostring](output.md#Dataframe.tostring)
 86 |   - [Dataframe.`_to_html`](output.md#Dataframe._to_html)
 87 | - **[Row functions](row.md)**
 88 |   - [Dataframe.get_row](row.md#Dataframe.get_row)
 89 |   - [Dataframe.insert](row.md#Dataframe.insert)
 90 |   - [Dataframe.insert](row.md#Dataframe.insert)
 91 |   - [Dataframe.append](row.md#Dataframe.append)
 92 |   - [Dataframe.rbind](row.md#Dataframe.rbind)
 93 |   - [Dataframe.remove_index](row.md#Dataframe.remove_index)
 94 | - **[Subsetting and manipulation functions](select_set_update.md)**
 95 |   - [Dataframe.sub](select_set_update.md#Dataframe.sub)
 96 |   - [Dataframe.get_random](select_set_update.md#Dataframe.get_random)
 97 |   - [Dataframe.head](select_set_update.md#Dataframe.head)
 98 |   - [Dataframe.tail](select_set_update.md#Dataframe.tail)
 99 |   - [Dataframe.`_create_subset`](select_set_update.md#Dataframe._create_subset)
100 |   - [Dataframe.where](select_set_update.md#Dataframe.where)
101 |   - [Dataframe.which](select_set_update.md#Dataframe.which)
102 |   - [Dataframe.update](select_set_update.md#Dataframe.update)
103 |   - [Dataframe.set](select_set_update.md#Dataframe.set)
104 |   - [Dataframe.wide2long](select_set_update.md#Dataframe.wide2long)
105 | - **[Statistical functions](statistics.md)**
106 |   - [Dataframe.unique](statistics.md#Dataframe.unique)
107 |   - [Dataframe.value_counts](statistics.md#Dataframe.value_counts)
108 |   - [Dataframe.which_max](statistics.md#Dataframe.which_max)
109 |   - [Dataframe.which_min](statistics.md#Dataframe.which_min)
110 |   - [Dataframe.get_mode](statistics.md#Dataframe.get_mode)
111 |   - [Dataframe.get_max_value](statistics.md#Dataframe.get_max_value)
112 |   - [Dataframe.get_min_value](statistics.md#Dataframe.get_min_value)
113 | - **[Subsets and batches](subsets_and_batches.md)**
114 |   - [Dataframe.create_subsets](subsets_and_batches.md#Dataframe.create_subsets)
115 |   - [Dataframe.reset_subsets](subsets_and_batches.md#Dataframe.reset_subsets)
116 |   - [Dataframe.has_subset](subsets_and_batches.md#Dataframe.has_subset)
117 |   - [Dataframe.get_subset](subsets_and_batches.md#Dataframe.get_subset)


--------------------------------------------------------------------------------
/doc/core/categorical.md:
--------------------------------------------------------------------------------
  1 | # API documentation for [categorical functions](#__Categorical functions__)
  2 | - [Dataframe.as_categorical](#Dataframe.as_categorical)
  3 | - [Dataframe.add_cat_key](#Dataframe.add_cat_key)
  4 | - [Dataframe.as_string](#Dataframe.as_string)
  5 | - [Dataframe.clean_categorical](#Dataframe.clean_categorical)
  6 | - [Dataframe.is_categorical](#Dataframe.is_categorical)
  7 | - [Dataframe.get_cat_keys](#Dataframe.get_cat_keys)
  8 | - [Dataframe.to_categorical](#Dataframe.to_categorical)
  9 | - [Dataframe.from_categorical](#Dataframe.from_categorical)
 10 | - [Dataframe.boolean2categorical](#Dataframe.boolean2categorical)
 11 | 
 12 | <a name="__Categorical functions__">
 13 | ## Categorical functions
 14 | 
 15 | <a name="Dataframe.as_categorical">
 16 | ### Dataframe.as_categorical(self, column_name[, levels][, labels][, exclude])
 17 | 
 18 | Set a column to categorical type.
 19 | 
 20 | ```
 21 | ({
 22 |    self        = Dataframe          -- 
 23 |    column_name = string             -- The column name to convert
 24 |   [levels      = Df_Array|boolean]  -- An optional array of the values that column might have taken.
 25 | 	 The default is the unique set of values taken by Dataframe.unique,
 26 | 	 sorted into increasing order. If you provide values that aren't present
 27 | 	 within the current column the value will still be saved and may be envoked in
 28 | 	 the future. [default=false]
 29 |   [labels      = Df_Array|boolean]  -- An optional character vector of labels for the levels
 30 | 	 (in the same order as levels after removing those in exclude) [default=false]
 31 |   [exclude     = Df_Array|boolean]  -- Values to be excluded when forming the set of levels. This should be
 32 | 	 of the same type as column, and will be coerced if necessary. [default=false]
 33 | })
 34 | ```
 35 | 
 36 | _Return value_: self
 37 | 
 38 | ```
 39 | ({
 40 |    self         = Dataframe          -- 
 41 |    column_array = Df_Array           -- An array with column names
 42 |   [levels       = Df_Array|boolean]  -- An optional array of the values that column might have taken.
 43 | 	 The default is the unique set of values taken by Dataframe.unique,
 44 | 	 sorted into increasing order. If you provide values that aren't present
 45 | 	 within the current column the value will still be saved and may be envoked in
 46 | 	 the future. [default=false]
 47 |   [labels       = Df_Array|boolean]  -- An optional character vector of labels for the levels
 48 | 	 (in the same order as levels after removing those in exclude) [default=false]
 49 |   [exclude      = Df_Array|boolean]  -- Values to be excluded when forming the set of levels. This should be
 50 | 	 of the same type as column, and will be coerced if necessary. [default=false]
 51 | })
 52 | ```
 53 | 
 54 | <a name="Dataframe.add_cat_key">
 55 | ### Dataframe.add_cat_key(self, column_name, key)
 56 | 
 57 | Adds a key to the keyset of a categorical column. Mostly intended for internal use.
 58 | 
 59 | ```
 60 | ({
 61 |    self        = Dataframe      -- 
 62 |    column_name = string         -- The column name
 63 |    key         = number|string  -- The new key to insert
 64 | })
 65 | ```
 66 | 
 67 | _Return value_: index value for key (integer)
 68 | 	<a name="Dataframe.as_string">
 69 | ### Dataframe.as_string(self, column_name)
 70 | 
 71 | Converts a categorical column to a string column. This can be used to revert
 72 | the Dataframe.as_categorical or as a way to convert numericals into strings.
 73 | 
 74 | ```
 75 | ({
 76 |    self        = Dataframe  -- 
 77 |    column_name = string     -- The column name
 78 | })
 79 | ```
 80 | 
 81 | _Return value_: self
 82 | <a name="Dataframe.clean_categorical">
 83 | ### Dataframe.clean_categorical(self, column_name[, reset_keys])
 84 | 
 85 | ```
 86 | ({
 87 |    self        = Dataframe  -- 
 88 |    column_name = string     -- the name of the column
 89 |   [reset_keys  = boolean]   -- if all the keys should be reinitialized [default=false]
 90 | })
 91 | ```
 92 | 
 93 | Removes any categories no longer present from the keys
 94 | 
 95 | _Return value_: self
 96 | <a name="Dataframe.is_categorical">
 97 | ### Dataframe.is_categorical(self, column_name)
 98 | 
 99 | Check if a column is categorical
100 | 
101 | ```
102 | ({
103 |    self        = Dataframe  -- 
104 |    column_name = string     -- the name of the column
105 | })
106 | ```
107 | 
108 | _Return value_: boolean
109 | <a name="Dataframe.get_cat_keys">
110 | ### Dataframe.get_cat_keys(self, column_name)
111 | 
112 | Get keys from a categorical column.
113 | 
114 | ```
115 | ({
116 |    self        = Dataframe  -- 
117 |    column_name = string     -- the name of the column
118 | })
119 | ```
120 | 
121 | _Return value_: table with `["key"] = number` structure
122 | <a name="Dataframe.to_categorical">
123 | ### Dataframe.to_categorical(self, data, column_name)
124 | 
125 | Converts values to categorical according to a column's keys
126 | 
127 | ```
128 | ({
129 |    self        = Dataframe                      -- 
130 |    data        = number|torch.*Tensor|Df_Array  -- The integer to be converted
131 |    column_name = string                         -- The name of the column  which keys to use
132 | })
133 | ```
134 | 
135 | _Return value_: string with the value
136 | <a name="Dataframe.from_categorical">
137 | ### Dataframe.from_categorical(self, data, column_name[, as_tensor])
138 | 
139 | ```
140 | ({
141 |    self        = Dataframe  -- 
142 |    data        = Df_Array   -- The data to be converted
143 |    column_name = string     -- The name of the column
144 |   [as_tensor   = boolean]   -- If the returned value should be a tensor [default=false]
145 | })
146 | ```
147 | 
148 | Converts categorical to numerical according to a column's keys
149 | 
150 | _Return value_: table or tensor
151 | 
152 | ```
153 | ({
154 |    self        = Dataframe      -- 
155 |    data        = number|string  -- The data to be converted
156 |    column_name = string         -- The name of the column
157 | })
158 | ```
159 | 
160 | <a name="Dataframe.boolean2categorical">
161 | ### Dataframe.boolean2categorical(self, column_name[, false_str][, true_str])
162 | 
163 | Converts a boolean column into a torch.ByteTensor of type integer
164 | 
165 | ```
166 | ({
167 |    self        = Dataframe  -- 
168 |    column_name = string     -- The boolean column that you want to convert
169 |   [false_str   = string]    -- The string value for false [default=false]
170 |   [true_str    = string]    -- The string value for true [default=true]
171 | })
172 | ```
173 | 
174 | _Return value_: self


--------------------------------------------------------------------------------
/doc/core/export_data.md:
--------------------------------------------------------------------------------
 1 | # API documentation for [Data save/export functions](#__Data save/export functions__)
 2 | - [Dataframe.to_csv](#Dataframe.to_csv)
 3 | - [Dataframe.to_tensor](#Dataframe.to_tensor)
 4 | - [Dataframe.get](#Dataframe.get)
 5 | 
 6 | <a name="__Data save/export functions__">
 7 | ## Data save/export functions
 8 | 
 9 | <a name="Dataframe.to_csv">
10 | ### Dataframe.to_csv(self, path[, separator][, verbose])
11 | 
12 | Saves a Dataframe into a CSV using csvigo as backend
13 | 
14 | _Return value_: self (Dataframe)
15 | 
16 | ```
17 | ({
18 |    self      = Dataframe  -- 
19 |    path      = string     -- path to file
20 |   [separator = string]    -- separator (one character) [default=,]
21 |   [verbose   = boolean]   -- verbose load [default=false]
22 | })
23 | ```
24 | 
25 | <a name="Dataframe.to_tensor">
26 | ### Dataframe.to_tensor(self)
27 | 
28 | Convert the numeric section or specified columns of the dataset to a tensor
29 | 
30 | ```
31 | ({
32 |    self = Dataframe  -- 
33 | })
34 | ```
35 | 
36 | _Return value_: (1) torch.tensor with self:size(1) rows and self:size(2) columns,
37 |  (2) exported column names
38 | 
39 | 
40 | You can export selected columns using the columns argument:
41 | 
42 | ```
43 | ({
44 |    self    = Dataframe  -- 
45 |    columns = Df_Array   -- The columns to export to labels
46 | })
47 | ```
48 | 
49 | If a filename is provided the tensor will be saved (`torch.save`) to that file:
50 | 
51 | ```
52 | ({
53 |    self     = Dataframe  -- 
54 |    filename = string     -- Filename for tensor.save()
55 |   [columns  = Df_Array]  -- The columns to export to labels [default=false]
56 | })
57 | ```
58 | <a name="Dataframe.get">
59 | ### Dataframe.get(self, idx)
60 | 
61 | A funtion for *torchnet* compliance. It subsets a single index and returns the
62 | `to_tensor` on that example.
63 | 
64 | ```
65 | ({
66 |    self = Dataframe  -- 
67 |    idx  = number     -- 
68 | })
69 | ```
70 | 
71 | _Return value_: (1) torch.tensor with 1 row and #numerical columns


--------------------------------------------------------------------------------
/doc/core/init.md:
--------------------------------------------------------------------------------
  1 | # API documentation for [core functions](#__Core functions__)
  2 | - [Dataframe.`__init`](#Dataframe.__init)
  3 | - [Dataframe.get_schema](#Dataframe.get_schema)
  4 | - [Dataframe.shape](#Dataframe.shape)
  5 | - [Dataframe.version](#Dataframe.version)
  6 | - [Dataframe.set_version](#Dataframe.set_version)
  7 | - [Dataframe.upgrade_frame](#Dataframe.upgrade_frame)
  8 | - [Dataframe.assert_is_index](#Dataframe.assert_is_index)
  9 | 
 10 | <a name="__Core functions__">
 11 | ## Core functions
 12 | 
 13 | <a name="Dataframe.__init">
 14 | ### Dataframe.__init(self)
 15 | 
 16 | Creates and initializes a Dataframe class. Envoked through `local my_dataframe = Dataframe()`
 17 | 
 18 | ```
 19 | ({
 20 |    self = Dataframe  -- 
 21 | })
 22 | ```
 23 | 
 24 | _Return value_: Dataframe
 25 | Read in an csv-file
 26 | 
 27 | ```
 28 | ({
 29 |    self     = Dataframe  -- 
 30 |    csv_file = string     -- The file path to the CSV
 31 | })
 32 | ```
 33 | 
 34 | Directly input a table
 35 | 
 36 | ```
 37 | ({
 38 |    self         = Dataframe  -- 
 39 |    data         = Df_Dict    -- The data to read in
 40 |   [column_order = Df_Array]  -- The order of the column (has to be array and _not_ a dictionary)
 41 | })
 42 | ```
 43 | 
 44 | If you enter column schema* and number of rows a table will be initialized. Note
 45 | that you can optionally set all non-set values to `nan` values but this may be
 46 | time-consuming for big datasets.
 47 | 
 48 | * A schema is a hash table with the column names as keys and the column types
 49 | as values. The column types are:
 50 | - `boolean`
 51 | - `integer`
 52 | - `long`
 53 | - `double`
 54 | - `string` (this is stored as a `tds.Vec` and can be any value)
 55 | 
 56 | ```
 57 | ({
 58 |    self         = Dataframe  -- 
 59 |    schema       = Df_Dict    -- The schema to use for initializaiton
 60 |    no_rows      = number     -- The number of rows
 61 |   [column_order = Df_Array]  -- The column order
 62 |   [set_missing  = boolean]   -- Whether all elements should be set to missing from start [default=false]
 63 | })
 64 | ```
 65 | 
 66 | _Return value_: Dataframe
 67 | No updates is performed on already inserted data. The purpose of this method
 68 | is to prepare a Dataframe object.
 69 | 
 70 | A schema is a hash table with the column names as keys and the column types
 71 | as values. The column types are:
 72 | - `boolean`
 73 | - `integer`
 74 | - `long`
 75 | - `double`
 76 | - `string` (this is stored as a `tds.Vec` and can be any value)
 77 | 
 78 | ```
 79 | ({
 80 |    self         = Dataframe  -- 
 81 |    schema       = Df_Dict    -- The schema to use for initializaiton
 82 |    column_order = Df_Array   -- The column order
 83 | })
 84 | ```
 85 | 
 86 | <a name="Dataframe.get_schema">
 87 | ### Dataframe.get_schema(self, column_name)
 88 | 
 89 | Returns the schema, i.e. column types
 90 | 
 91 | ```
 92 | ({
 93 |    self        = Dataframe  -- 
 94 |    column_name = string     -- The column to get schema for
 95 | })
 96 | ```
 97 | 
 98 | _Return value_: string
 99 | ```
100 | ({
101 |    self    = Dataframe  -- 
102 |   [columns = Df_Array]  -- The columns to get schema for
103 | })
104 | ```
105 | 
106 | _Return value_: table
107 | <a name="Dataframe.shape">
108 | ### Dataframe.shape(self)
109 | 
110 | Returns the number of rows and columns in a table
111 | 
112 | ```
113 | ({
114 |    self = Dataframe  -- 
115 | })
116 | ```
117 | 
118 | _Return value_: table
119 | <a name="Dataframe.version">
120 | ### Dataframe.version(self)
121 | 
122 | Returns the current data-frame version
123 | 
124 | ```
125 | ({
126 |    self = Dataframe  -- 
127 | })
128 | ```
129 | 
130 | _Return value_: string
131 | <a name="Dataframe.set_version">
132 | ### Dataframe.set_version(self)
133 | 
134 | Sets the data-frame version
135 | 
136 | ```
137 | ({
138 |    self = Dataframe  -- 
139 | })
140 | ```
141 | 
142 | _Return value_: self
143 | <a name="Dataframe.upgrade_frame">
144 | ### Dataframe.upgrade_frame(self[, skip_version][, current_version])
145 | 
146 | Upgrades a dataframe using the old batch loading framework to the new framework
147 | by instantiating the subsets argument, copying the indexes and setting the
148 | samplers to either:
149 | 
150 | - linear for test/validate or shuffle = false
151 | - permutation if shuffle = true and none of above names
152 | 
153 | ```
154 | ({
155 |    self            = Dataframe  -- 
156 |   [skip_version    = boolean]   -- Set to true if you want to upgrade your dataframe regardless of the version check
157 |   [current_version = number]    -- The current version of the dataframe
158 | })
159 | ```
160 | 
161 | *Note:* Sometimes the version check fails to identify that the Dataframe is of
162 | an old version and you can therefore skip the version check.
163 | 
164 | _Return value_: Dataframe
165 | <a name="Dataframe.assert_is_index">
166 | ### Dataframe.assert_is_index(self, index[, plus_one])
167 | 
168 | Asserts that the number is a valid index.
169 | 
170 | ```
171 | ({
172 |    self     = Dataframe  -- 
173 |    index    = number     -- The index to investigate
174 |   [plus_one = boolean]   -- Count next non-existing index as good. When adding rows, an index of size(1) + 1 is OK [default=false]
175 | })
176 | ```
177 | 
178 | _Return value_: Dataframe


--------------------------------------------------------------------------------
/doc/core/load_data.md:
--------------------------------------------------------------------------------
 1 | # API documentation for [Data loader functions](#__Data loader functions__)
 2 | - [Dataframe.load_csv](#Dataframe.load_csv)
 3 | - [Dataframe.bulk_load_csv](#Dataframe.bulk_load_csv)
 4 | - [Dataframe.load_table](#Dataframe.load_table)
 5 | - [Dataframe.`_clean_columns`](#Dataframe._clean_columns)
 6 | 
 7 | <a name="__Data loader functions__">
 8 | ## Data loader functions
 9 | 
10 | <a name="Dataframe.load_csv">
11 | ### Dataframe.load_csv(self, path[, header][, schema][, separator][, skip][, verbose][, rows2explore])
12 | 
13 | Loads a CSV file into Dataframe using csvigo as backend
14 | 
15 | ```
16 | ({
17 |    self         = Dataframe  -- 
18 |    path         = string     -- path to file
19 |   [header       = boolean]   -- if has header on first line [default=true]
20 |   [schema       = Df_Dict]   -- The column schema types with column names as keys
21 |   [separator    = string]    -- separator (one character) [default=,]
22 |   [skip         = number]    -- skip this many lines at start of file [default=0]
23 |   [verbose      = boolean]   -- verbose load [default=false]
24 |   [rows2explore = number]    -- The maximum number of rows to traverse when trying to identify schema
25 | })
26 | ```
27 | 
28 | _Return value_: self
29 | 	<a name="Dataframe.bulk_load_csv">
30 | ### Dataframe.bulk_load_csv(self, path[, header][, schema][, separator][, skip][, verbose][, nthreads])
31 | 
32 | Loads a CSV file into Dataframe using multithreading.
33 | Warning : this method does not do the same checks as load_csv would do. It doesn't handle other format than torch.*Tensor and tds.Vec.
34 | 
35 | ```
36 | ({
37 |    self      = Dataframe  -- 
38 |    path      = string     -- path to file
39 |   [header    = boolean]   -- if has header on first line (not used at the moment) [default=true]
40 |   [schema    = Df_Dict]   -- The column schema types with column names as keys
41 |   [separator = string]    -- separator (one character) [default=,]
42 |   [skip      = number]    -- skip this many lines at start of file (not used at the moment) [default=0]
43 |   [verbose   = boolean]   -- verbose load [default=false]
44 |   [nthreads  = number]    -- Number of threads to use to read the csv file [default=1]
45 | })
46 | ```
47 | 
48 | _Return value_: self
49 | 	<a name="Dataframe.load_table">
50 | ### Dataframe.load_table(self, data[, schema][, column_order])
51 | 
52 | ```
53 | ({
54 |    self         = Dataframe  -- 
55 |    data         = Df_Dict    -- Table (dictionary) to import. Max depth 2.
56 |   [schema       = Df_Dict]   -- Provide if you want to force column types
57 |   [column_order = Df_Array]  -- The order of the column (has to be array and _not_ a dictionary)
58 | })
59 | ```
60 | 
61 | Imports a table data directly into Dataframe. The table should all be of equal length
62 | or just single values. If a table contains one column with 10 rows and then has
63 | another column with a single element that element is duplicated 10 times, i.e.
64 | filling the entire column with that single value.
65 | 
66 | 
67 | _Return value_: self
68 | 	<a name="Dataframe._clean_columns">
69 | ### Dataframe._clean_columns(self, data[, column_order][, schema])
70 | 
71 | ```
72 | {
73 |    self         = Dataframe  -- 
74 |    data         = table      -- 
75 |   [column_order = table]     -- 
76 |   [schema       = table]     -- 
77 | }
78 | ```
79 | 
80 | Internal function to clean columns names
81 | 
82 | _Return value_: self


--------------------------------------------------------------------------------
/doc/core/metatable.md:
--------------------------------------------------------------------------------
 1 | # API documentation for [metatable functions](#__Metatable functions__)
 2 | - [Dataframe.size](#Dataframe.size)
 3 | - [Dataframe.`__tostring__`](#Dataframe.__tostring__)
 4 | - [Dataframe.copy](#Dataframe.copy)
 5 | - [Dataframe.#](#Dataframe.#)
 6 | - [Dataframe.==](#Dataframe.==)
 7 | 
 8 | <a name="__Metatable functions__">
 9 | ## Metatable functions
10 | 
11 | <a name="Dataframe.size">
12 | ### Dataframe.size(self[, dim])
13 | 
14 | By providing dimension you can get only that dimension, row == 1, col == 2. If
15 | value omitted it will  return the number of rows in order to comply with torchnet
16 | standard.
17 | 
18 | ```
19 | ({
20 |    self = Dataframe  -- 
21 |   [dim  = number]    -- The dimension of interest [default=1]
22 | })
23 | ```
24 | 
25 | _Return value_: integer
26 | 	<a name="Dataframe.__tostring__">
27 | ### Dataframe.__tostring__(self)
28 | 
29 | A wrapper for `tostring()`
30 | 
31 | ```
32 | ({
33 |    self = Dataframe  -- 
34 | })
35 | ```
36 | 
37 | _Return value_: string
38 | <a name="Dataframe.copy">
39 | ### Dataframe.copy(self)
40 | 
41 | Copies the table together with all metadata
42 | 
43 | ```
44 | ({
45 |    self = Dataframe  -- 
46 | })
47 | ```
48 | 
49 | _Return value_: Dataframe
50 | <a name="Dataframe.#">
51 | ### Dataframe.#
52 | 
53 | Returns the number of rows
54 | 
55 | _Return value_: integer
56 | <a name="Dataframe.==">
57 | ### Dataframe.==
58 | 
59 | Checks if Dataframe's contain the same values
60 | 
61 | _Return value_: boolean


--------------------------------------------------------------------------------
/doc/core/missing_data.md:
--------------------------------------------------------------------------------
 1 | # API documentation for [missing data functions](#__Missing data functions__)
 2 | - [Dataframe.count_na](#Dataframe.count_na)
 3 | - [Dataframe.fill_na](#Dataframe.fill_na)
 4 | - [Dataframe.fill_na](#Dataframe.fill_na)
 5 | 
 6 | <a name="__Missing data functions__">
 7 | ## Missing data functions
 8 | 
 9 | <a name="Dataframe.count_na">
10 | ### Dataframe.count_na(self[, columns][, as_dataframe])
11 | 
12 | Count missing values in dataset
13 | 
14 | ```
15 | ({
16 |    self         = Dataframe  -- 
17 |   [columns      = Df_Array]  -- The columns to count
18 |   [as_dataframe = boolean]   -- Return a dataframe [default=true]
19 | })
20 | ```
21 | 
22 | _Return value_: Dataframe or table containing missing values per column, total na
23 | If you only want to count a single column
24 | 
25 | ```
26 | ({
27 |    self   = Dataframe  -- 
28 |    column = string     -- The column to count
29 | })
30 | ```
31 | 
32 | _Return value_: single integer
33 | 	<a name="Dataframe.fill_na">
34 | ### Dataframe.fill_na(self, column_name[, default_value])
35 | 
36 | Replace missing value in a specific column
37 | 
38 | ```
39 | ({
40 |    self          = Dataframe               -- 
41 |    column_name   = string                  -- The column to fill
42 |   [default_value = number|string|boolean]  -- The default missing value [default=0]
43 | })
44 | ```
45 | 
46 | _Return value_: self
47 | <a name="Dataframe.fill_na">
48 | ### Dataframe.fill_na(self[, default_value])
49 | 
50 | Replace missing value in all columns
51 | 
52 | ```
53 | ({
54 |    self          = Dataframe               -- 
55 |   [default_value = number|string|boolean]  -- The default missing value [default=0]
56 | })
57 | ```
58 | 
59 | _Return value_: self


--------------------------------------------------------------------------------
/doc/core/output.md:
--------------------------------------------------------------------------------
 1 | # API documentation for [output functions](#__Output functions__)
 2 | - [Dataframe.output](#Dataframe.output)
 3 | - [Dataframe.show](#Dataframe.show)
 4 | - [Dataframe.tostring](#Dataframe.tostring)
 5 | - [Dataframe.`_to_html`](#Dataframe._to_html)
 6 | 
 7 | <a name="__Output functions__">
 8 | ## Output functions
 9 | 
10 | <a name="Dataframe.output">
11 | ### Dataframe.output(self[, html][, max_rows][, digits])
12 | 
13 | ```
14 | ({
15 |    self     = Dataframe        -- 
16 |   [html     = boolean]         -- If the output should be in html format [default=false]
17 |   [max_rows = number]          -- Limit the maximum number of printed rows [default=20]
18 |   [digits   = number|boolean]  -- Set this to an integer >= 0 in order to reduce the number of integers shown [default=false]
19 | })
20 | ```
21 | 
22 | Prints the table into itorch.html if in itorch and html == true, otherwise prints a table string
23 | 
24 | _Return value_: self
25 | <a name="Dataframe.show">
26 | ### Dataframe.show(self[, digits])
27 | 
28 | ```
29 | ({
30 |    self   = Dataframe        -- 
31 |   [digits = number|boolean]  -- Set this to an integer >= 0 in order to reduce the number of integers shown [default=false]
32 | })
33 | ```
34 | 
35 | Prints the top  and bottom section of the table for better overview. Uses itorch if available
36 | 
37 | _Return value_: self
38 | <a name="Dataframe.tostring">
39 | ### Dataframe.tostring(self[, digits][, columns2skip][, no_rows][, min_col_width][, max_table_width])
40 | 
41 | Converts table to a string representation that follows standard markdown syntax.
42 | The table tries to follow a maximum table width inspired by the `dplyr` table print.
43 | The core concept is that wide columns are clipped when the table risks of being larger
44 | than a certain max width. The columns convey though no information if they need to
45 | be clipped to just a few characters why there is a minimum number of characters.
46 | The columns that then don't fit are noted below the table as skipped columns.
47 | 
48 | You can also specify columns that you wish to skip by providing the columns2skip
49 | skip argumnt. If columns are skipped by user demand there won't be a ... column to
50 | the right but if the table is still too wide then the software may choose to skip
51 | additional columns and thereby add a ... column.
52 | 
53 | ```
54 | ({
55 |    self            = Dataframe        -- 
56 |   [digits          = number|boolean]  -- Set this to an integer >= 0 in order to reduce the number of integers shown [default=false]
57 |   [columns2skip    = Df_Array]        -- Columns to skip from the output [default=false]
58 |   [no_rows         = number|boolean]  -- The number of rows to display. If -1 then shows all. Defaults to setting in Dataframe.tostring_defaults [default=false]
59 |   [min_col_width   = number|boolean]  -- The minimum column width in characters. Defaults to setting in Dataframe.tostring_defaults [default=false]
60 |   [max_table_width = number|boolean]  -- The maximum table width in characters. Defaults to setting in Dataframe.tostring_defaults [default=false]
61 | })
62 | ```
63 | 
64 | _Return value_: string
65 | 
66 | ```
67 | ({
68 |    self            = Dataframe        -- 
69 |   [digits          = number|boolean]  -- Set this to an integer >= 0 in order to reduce the number of integers shown [default=false]
70 |    columns2skip    = string           -- Columns to skip from the output as regular expression
71 |   [no_rows         = number]          -- The number of rows to display. If -1 then shows all. Defaults to setting in Dataframe.tostring_defaults [default=false]
72 |   [min_col_width   = number]          -- The minimum column width in characters. Defaults to setting in Dataframe.tostring_defaults [default=false]
73 |   [max_table_width = number]          -- The maximum table width in characters. Defaults to setting in Dataframe.tostring_defaults [default=false]
74 | })
75 | ```
76 | 
77 | <a name="Dataframe._to_html">
78 | ### Dataframe._to_html(self[, split_table][, offset][, digits])
79 | 
80 | ```
81 | ({
82 |    self        = Dataframe        -- 
83 |   [split_table = string]          -- 		Where the table is split. Valid input is 'none', 'top', 'bottom', 'all'.
84 | 		Note that the 'bottom' removes the trailing </table> while the 'top' removes
85 | 		the initial '<table>'. The 'all' removes both but retains the header while
86 | 		the 'top' has no header.
87 | 	 [default=none]
88 |   [offset      = number]          -- The line index offset [default=0]
89 |   [digits      = number|boolean]  -- Set this to an integer >= 0 in order to reduce the number of integers shown [default=false]
90 | })
91 | ```
92 | 
93 | Internal function to convert a table to html (only works for 1D table)
94 | 
95 | _Return value_: string


--------------------------------------------------------------------------------
/doc/core/row.md:
--------------------------------------------------------------------------------
  1 | # API documentation for [row functions](#__Row functions__)
  2 | - [Dataframe.get_row](#Dataframe.get_row)
  3 | - [Dataframe.insert](#Dataframe.insert)
  4 | - [Dataframe.insert](#Dataframe.insert)
  5 | - [Dataframe.append](#Dataframe.append)
  6 | - [Dataframe.rbind](#Dataframe.rbind)
  7 | - [Dataframe.remove_index](#Dataframe.remove_index)
  8 | 
  9 | <a name="__Row functions__">
 10 | ## Row functions
 11 | 
 12 | <a name="Dataframe.get_row">
 13 | ### Dataframe.get_row(self, index)
 14 | 
 15 | Gets a single row from the Dataframe
 16 | 
 17 | ```
 18 | ({
 19 |    self  = Dataframe  -- 
 20 |    index = number     -- The row index to retrieve
 21 | })
 22 | ```
 23 | 
 24 | _Return value_: A table with the row content
 25 | <a name="Dataframe.insert">
 26 | ### Dataframe.insert(self, index, rows)
 27 | 
 28 | Inserts a row or multiple rows into database at the position of the provided index.
 29 | 
 30 | ```
 31 | ({
 32 |    self  = Dataframe  -- 
 33 |    index = number     -- The row number where to insert the row(s)
 34 |    rows  = Df_Dict    -- Insert values to the dataset
 35 | })
 36 | ```
 37 | 
 38 | _Return value_: self
 39 | <a name="Dataframe.insert">
 40 | ### Dataframe.insert(self, index, rows, schema)
 41 | 
 42 | Inserts a row or multiple rows into database at the position of the provided index and 
 43 | according to the prvided schema.
 44 | 
 45 | ```
 46 | ({
 47 |    self   = Dataframe  -- 
 48 |    index  = number     -- The row number where to insert the row(s)
 49 |    rows   = Df_Dict    -- Insert values to the dataset
 50 |    schema = Df_Dict    -- Specify a schema to check before insertion
 51 | })
 52 | ```
 53 | 
 54 | _Return value_: self
 55 | Note, if you provide a Dataframe the primary dataframes meta-information will
 56 | be the ones that are kept
 57 | 
 58 | ```
 59 | ({
 60 |    self  = Dataframe  -- 
 61 |    index = number     -- The row number where to insert the row(s)
 62 |    rows  = Dataframe  -- A Dataframe that you want to insert
 63 | })
 64 | ```
 65 | 
 66 | <a name="Dataframe.append">
 67 | ### Dataframe.append(self, rows[, column_order][, schema])
 68 | 
 69 | Appends the row(s) to the Dataframe.
 70 | 
 71 | ```
 72 | ({
 73 |    self         = Dataframe  -- 
 74 |    rows         = Df_Dict    -- Values to append to the Dataframe
 75 |   [column_order = Df_Array]  -- The order of the column (has to be array and _not_ a dictionary). Only used when the Dataframe is empty
 76 |   [schema       = Df_Dict]   -- The schema for the data - used in case the table is new
 77 | })
 78 | ```
 79 | 
 80 | _Return value_: self
 81 | Note, if you provide a Dataframe the primary dataframes meta-information will
 82 | be the ones that are kept
 83 | 
 84 | ```
 85 | ({
 86 |    self = Dataframe  -- 
 87 |    rows = Dataframe  -- A Dataframe that you want to append
 88 | })
 89 | ```
 90 | 
 91 | <a name="Dataframe.rbind">
 92 | ### Dataframe.rbind(self, rows)
 93 | 
 94 | Alias to Dataframe.append
 95 | 
 96 | ```
 97 | ({
 98 |    self = Dataframe  -- 
 99 |    rows = Df_Dict    -- Values to append to the Dataframe
100 | })
101 | ```
102 | 
103 | _Return value_: self
104 | Note, if you provide a Dataframe the primary dataframes meta-information will
105 | be the ones that are kept
106 | 
107 | ```
108 | ({
109 |    self = Dataframe  -- 
110 |    rows = Dataframe  -- A Dataframe that you want to append
111 | })
112 | ```
113 | 
114 | <a name="Dataframe.remove_index">
115 | ### Dataframe.remove_index(self, index)
116 | 
117 | Deletes a given row
118 | 
119 | ```
120 | ({
121 |    self  = Dataframe  -- 
122 |    index = number     -- The row index to remove
123 | })
124 | ```
125 | 
126 | _Return value_: self


--------------------------------------------------------------------------------
/doc/dataseries/README.md:
--------------------------------------------------------------------------------
 1 | # Documentation for dataseries
 2 | 
 3 | This documentation ha been auto-generated from code using the `argcheck` system.
 4 | 
 5 | ## Table of contents (file-level)
 6 | 
 7 | Below follows a more [detailed](#detailed) table of contents with links to
 8 | the different functions. Not this list may be incompleted due to failure to
 9 | add apropriate anchor tags during documentation.
10 | 
11 | 
12 | - [Dataseries](init.md)
13 | - [Categorical functions](categorical.md)
14 | - [Export functions](export.md)
15 | - [Metatable functions](metatable.md)
16 | - [Single element functions](sngl_elmnt_ops.md)
17 | - [Statistics](statistics.md)
18 | 
19 | ## Detailed table of contents (file-level + anchors)<a name=\"detailed\">
20 | 
21 | 
22 | - **[Dataseries](init.md)**
23 |   - [Dataseries.`__init`](init.md#Dataseries.__init)
24 |   - [Dataseries.load](init.md#Dataseries.load)
25 |   - [Dataseries.new_storage](init.md#Dataseries.new_storage)
26 |   - [Dataseries.copy](init.md#Dataseries.copy)
27 |   - [Dataseries.size](init.md#Dataseries.size)
28 |   - [Dataseries.resize](init.md#Dataseries.resize)
29 |   - [Dataseries.assert_is_index](init.md#Dataseries.assert_is_index)
30 |   - [Dataseries.is_numerical](init.md#Dataseries.is_numerical)
31 |   - [Dataseries.is_numerical](init.md#Dataseries.is_numerical)
32 |   - [Dataseries.is_boolean](init.md#Dataseries.is_boolean)
33 |   - [Dataseries.is_string](init.md#Dataseries.is_string)
34 |   - [Dataseries.type](init.md#Dataseries.type)
35 |   - [Dataseries.get_variable_type](init.md#Dataseries.get_variable_type)
36 |   - [Dataseries.boolean2tensor](init.md#Dataseries.boolean2tensor)
37 |   - [Dataseries.fill](init.md#Dataseries.fill)
38 |   - [Dataseries.fill_na](init.md#Dataseries.fill_na)
39 |   - [Dataseries.tostring](init.md#Dataseries.tostring)
40 |   - [Dataseries.sub](init.md#Dataseries.sub)
41 |   - [Dataseries.eq](init.md#Dataseries.eq)
42 |   - [Dataseries.get_data_mask](init.md#Dataseries.get_data_mask)
43 | - **[Categorical functions](categorical.md)**
44 |   - [Dataseries.as_categorical](categorical.md#Dataseries.as_categorical)
45 |   - [Dataseries.add_cat_key](categorical.md#Dataseries.add_cat_key)
46 |   - [Dataseries.as_string](categorical.md#Dataseries.as_string)
47 |   - [Dataseries.clean_categorical](categorical.md#Dataseries.clean_categorical)
48 |   - [Dataseries.is_categorical](categorical.md#Dataseries.is_categorical)
49 |   - [Dataseries.get_cat_keys](categorical.md#Dataseries.get_cat_keys)
50 |   - [Dataseries.to_categorical](categorical.md#Dataseries.to_categorical)
51 |   - [Dataseries.from_categorical](categorical.md#Dataseries.from_categorical)
52 |   - [Dataseries.boolean2categorical](categorical.md#Dataseries.boolean2categorical)
53 | - **[Export functions](export.md)**
54 |   - [Dataseries.to_tensor](export.md#Dataseries.to_tensor)
55 |   - [Dataseries.to_table](export.md#Dataseries.to_table)
56 | - **[Metatable functions](metatable.md)**
57 |   - [Dataseries.#](metatable.md#Dataseries.#)
58 |   - [Dataseries.`__tostring__`](metatable.md#Dataseries.__tostring__)
59 | - **[Single element functions](sngl_elmnt_ops.md)**
60 |   - [Dataseries.get](sngl_elmnt_ops.md#Dataseries.get)
61 |   - [Dataseries.set](sngl_elmnt_ops.md#Dataseries.set)
62 |   - [Dataseries.mutate](sngl_elmnt_ops.md#Dataseries.mutate)
63 |   - [Dataseries.append](sngl_elmnt_ops.md#Dataseries.append)
64 |   - [Dataseries.remove](sngl_elmnt_ops.md#Dataseries.remove)
65 |   - [Dataseries.insert](sngl_elmnt_ops.md#Dataseries.insert)
66 | - **[Statistics](statistics.md)**
67 |   - [Dataseries.count_na](statistics.md#Dataseries.count_na)
68 |   - [Dataseries.unique](statistics.md#Dataseries.unique)
69 |   - [Dataseries.value_counts](statistics.md#Dataseries.value_counts)
70 |   - [Dataseries.which_max](statistics.md#Dataseries.which_max)
71 |   - [Dataseries.which_min](statistics.md#Dataseries.which_min)
72 |   - [Dataseries.get_mode](statistics.md#Dataseries.get_mode)
73 |   - [Dataseries.get_max_value](statistics.md#Dataseries.get_max_value)
74 |   - [Dataseries.get_min_value](statistics.md#Dataseries.get_min_value)


--------------------------------------------------------------------------------
/doc/dataseries/categorical.md:
--------------------------------------------------------------------------------
  1 | # API documentation for [categorical functions](#__Categorical functions__)
  2 | - [Dataseries.as_categorical](#Dataseries.as_categorical)
  3 | - [Dataseries.add_cat_key](#Dataseries.add_cat_key)
  4 | - [Dataseries.as_string](#Dataseries.as_string)
  5 | - [Dataseries.clean_categorical](#Dataseries.clean_categorical)
  6 | - [Dataseries.is_categorical](#Dataseries.is_categorical)
  7 | - [Dataseries.get_cat_keys](#Dataseries.get_cat_keys)
  8 | - [Dataseries.to_categorical](#Dataseries.to_categorical)
  9 | - [Dataseries.from_categorical](#Dataseries.from_categorical)
 10 | - [Dataseries.boolean2categorical](#Dataseries.boolean2categorical)
 11 | 
 12 | <a name="__Categorical functions__">
 13 | ## Categorical functions
 14 | 
 15 | Here are functions are used for converting to and from categorical type. The
 16 | categorical series type is a hash table around a torch.IntTensor that maps
 17 | numerical values between integer and string values. The standard numbering is
 18 | from 1 to n unique values.
 19 | 
 20 | <a name="Dataseries.as_categorical">
 21 | ### Dataseries.as_categorical(self[, levels][, labels][, exclude])
 22 | 
 23 | Set a series to categorical type. The keys retrieved from Dataseries.unique.
 24 | 
 25 | ```
 26 | ({
 27 |    self    = Dataseries         -- 
 28 |   [levels  = Df_Array|boolean]  -- An optional array of the values that series might have taken.
 29 | 	 The default is the unique set of values taken by Dataseries.unique,
 30 | 	 sorted into increasing order. If you provide values that aren't present
 31 | 	 within the current series the value will still be saved and may be envoked in
 32 | 	 the future.
 33 |   [labels  = Df_Array|boolean]  -- An optional character vector of labels for the levels
 34 | 	 (in the same order as levels after removing those in exclude)
 35 |   [exclude = Df_Array|boolean]  -- Values to be excluded when forming the set of levels. This should be
 36 | 	 of the same type as the series, and will be coerced if necessary.
 37 | })
 38 | ```
 39 | 
 40 | _Return value_: self
 41 | <a name="Dataseries.add_cat_key">
 42 | ### Dataseries.add_cat_key(self, key[, key_index])
 43 | 
 44 | Adds a key to the keyset of a categorical series. Mostly intended for internal use.
 45 | 
 46 | ```
 47 | ({
 48 |    self      = Dataseries     -- 
 49 |    key       = number|string  -- The new key to insert
 50 |   [key_index = number]        -- The key index to use
 51 | })
 52 | ```
 53 | 
 54 | _Return value_: index value for key (integer)
 55 | 	<a name="Dataseries.as_string">
 56 | ### Dataseries.as_string(self)
 57 | 
 58 | Converts a categorical Dataseries to a string Dataseries. This can be used to revert
 59 | the Dataseries.as_categorical or as a way to convert numericals into strings.
 60 | 
 61 | ```
 62 | ({
 63 |    self = Dataseries  -- 
 64 | })
 65 | ```
 66 | 
 67 | _Return value_: self
 68 | <a name="Dataseries.clean_categorical">
 69 | ### Dataseries.clean_categorical(self[, reset_keys])
 70 | 
 71 | ```
 72 | ({
 73 |    self       = Dataseries  -- 
 74 |   [reset_keys = boolean]    -- if all the keys should be reinitialized [default=false]
 75 | })
 76 | ```
 77 | 
 78 | Removes any categories no longer present from the keys
 79 | 
 80 | _Return value_: self
 81 | <a name="Dataseries.is_categorical">
 82 | ### Dataseries.is_categorical(self)
 83 | 
 84 | Check if a Dataseries is categorical
 85 | 
 86 | ```
 87 | ({
 88 |    self = Dataseries  -- 
 89 | })
 90 | ```
 91 | 
 92 | _Return value_: boolean
 93 | <a name="Dataseries.get_cat_keys">
 94 | ### Dataseries.get_cat_keys(self)
 95 | 
 96 | Get keys
 97 | 
 98 | ```
 99 | ({
100 |    self = Dataseries  -- 
101 | })
102 | ```
103 | 
104 | _Return value_: table with `["key"] = number` structure
105 | <a name="Dataseries.to_categorical">
106 | ### Dataseries.to_categorical(self, key_index)
107 | 
108 | Converts values to categorical according to a series's keys
109 | 
110 | ```
111 | ({
112 |    self      = Dataseries  -- 
113 |    key_index = number      -- The integer to be converted
114 | })
115 | ```
116 | 
117 | _Return value_: string with the value. If provided `nan` it will also
118 |  return a `nan`. It returns `nil` if no key is found
119 | You can also provide a tensor
120 | 
121 | ```
122 | ({
123 |    self = Dataseries     -- 
124 |    data = torch.*Tensor  -- The integers to be converted
125 | })
126 | ```
127 | 
128 | _Return value_: table with values
129 | You can also provide an array
130 | 
131 | ```
132 | ({
133 |    self = Dataseries  -- 
134 |    data = Df_Array    -- The integers to be converted
135 | })
136 | ```
137 | 
138 | _Return value_: table with values
139 | <a name="Dataseries.from_categorical">
140 | ### Dataseries.from_categorical(self, data)
141 | 
142 | Converts categorical to numerical according to a Dataseries's keys
143 | 
144 | ```
145 | ({
146 |    self = Dataseries     -- 
147 |    data = number|string  -- The data to be converted
148 | })
149 | ```
150 | 
151 | _Return value_: table or tensor
152 | You can also provide an array with values
153 | 
154 | ```
155 | ({
156 |    self      = Dataseries  -- 
157 |    data      = Df_Array    -- The data to be converted
158 |   [as_tensor = boolean]    -- If the returned value should be a tensor [default=false]
159 | })
160 | ```
161 | 
162 | _Return value_: table or tensor
163 | Checks if categorical key exists
164 | 
165 | ```
166 | ({
167 |    self  = Dataseries     -- 
168 |    value = number|string  -- The value that should be present in the categorical hash
169 | })
170 | ```
171 | 
172 | _Return value_: boolean
173 | Checks if categorical value exists
174 | 
175 | ```
176 | ({
177 |    self  = Dataseries     -- 
178 |    value = number|string  -- The value that should be present in the categorical hash
179 | })
180 | ```
181 | 
182 | _Return value_: boolean
183 | <a name="Dataseries.boolean2categorical">
184 | ### Dataseries.boolean2categorical(self[, false_str][, true_str])
185 | 
186 | Converts a boolean Dataseries into a categorical tensor
187 | 
188 | ```
189 | ({
190 |    self      = Dataseries  -- 
191 |   [false_str = string]     -- The string value for false [default=false]
192 |   [true_str  = string]     -- The string value for true [default=true]
193 | })
194 | ```
195 | 
196 | _Return value_: self, boolean indicating successful conversion


--------------------------------------------------------------------------------
/doc/dataseries/export.md:
--------------------------------------------------------------------------------
 1 | # API documentation for [export functions](#__Export functions__)
 2 | - [Dataseries.to_tensor](#Dataseries.to_tensor)
 3 | - [Dataseries.to_table](#Dataseries.to_table)
 4 | 
 5 | <a name="__Export functions__">
 6 | ## Export functions
 7 | 
 8 | Here are functions are used for exporting to a different format. Generally `to_`
 9 | functions should reside here. Only exception is the `tostring`.
10 | 
11 | <a name="Dataseries.to_tensor">
12 | ### Dataseries.to_tensor(self[, missing_value][, copy])
13 | 
14 | Returns the values in tensor format. Note that if you don't provide a replacement
15 | for missing values and there are missing values the function will throw an error.
16 | 
17 | *Note*: boolean columns are not tensors and need to be manually converted to a
18 | tensor. This since 0 would be a natural value for false but can cause issues as
19 | neurons are labeled 1 to n for classification tasks. See the `Dataframe.update`
20 | function for details or run the `boolean2tensor`.
21 | 
22 | ```
23 | ({
24 |    self          = Dataseries  -- 
25 |   [missing_value = number]     -- Set a value for the missing data
26 |   [copy          = boolean]    -- Set to false if you want the original data to be returned. [default=true]
27 | })
28 | ```
29 | 
30 | _Return value_: `torch.*Tensor` of the current type
31 | <a name="Dataseries.to_table">
32 | ### Dataseries.to_table(self[, boolean2string])
33 | 
34 | Returns the values in table format
35 | 
36 | ```
37 | ({
38 |    self           = Dataseries  -- 
39 |   [boolean2string = boolean]    -- Convert boolean values to strings since they cause havoc with csvigo
40 | })
41 | ```
42 | 
43 | _Return value_: table


--------------------------------------------------------------------------------
/doc/dataseries/metatable.md:
--------------------------------------------------------------------------------
 1 | # API documentation for [metatable functions](#__Metatable functions__)
 2 | - [Dataseries.#](#Dataseries.#)
 3 | - [Dataseries.`__tostring__`](#Dataseries.__tostring__)
 4 | 
 5 | <a name="__Metatable functions__">
 6 | ## Metatable functions
 7 | 
 8 | <a name="Dataseries.#">
 9 | ### Dataseries.#
10 | 
11 | Returns the number of elements
12 | 
13 | _Return value_: integer
14 | 	<a name="Dataseries.__tostring__">
15 | ### Dataseries.__tostring__(self)
16 | 
17 | A wrapper for `tostring()`
18 | 
19 | ```
20 | ({
21 |    self = Dataseries  -- 
22 | })
23 | ```
24 | 
25 | _Return value_: string


--------------------------------------------------------------------------------
/doc/dataseries/sngl_elmnt_ops.md:
--------------------------------------------------------------------------------
  1 | # API documentation for [single element functions](#__Single element functions__)
  2 | - [Dataseries.get](#Dataseries.get)
  3 | - [Dataseries.set](#Dataseries.set)
  4 | - [Dataseries.mutate](#Dataseries.mutate)
  5 | - [Dataseries.append](#Dataseries.append)
  6 | - [Dataseries.remove](#Dataseries.remove)
  7 | - [Dataseries.insert](#Dataseries.insert)
  8 | 
  9 | <a name="__Single element functions__">
 10 | ## Single element functions
 11 | 
 12 | Here are functions are mainly used for manipulating a single element.
 13 | 
 14 | <a name="Dataseries.get">
 15 | ### Dataseries.get(self, index[, as_raw])
 16 | 
 17 | Gets a single or a set of elements.
 18 | 
 19 | ```
 20 | ({
 21 |    self   = Dataseries  -- 
 22 |    index  = number      -- The index to set the value to
 23 |   [as_raw = boolean]    -- Set to true if you want categorical values to be returned as their raw numeric representation [default=false]
 24 | })
 25 | ```
 26 | 
 27 | _Return value_: number|string|boolean
 28 | If you provide a Df_Array you get back a Dataseries of elements
 29 | 
 30 | ```
 31 | ({
 32 |    self  = Dataseries  -- 
 33 |    index = Df_Array    -- Indexes of wanted elements
 34 | })
 35 | ```
 36 | 
 37 | _Return value_:  Dataseries
 38 | <a name="Dataseries.set">
 39 | ### Dataseries.set(self, index, value)
 40 | 
 41 | Sets a single element
 42 | 
 43 | ```
 44 | ({
 45 |    self  = Dataseries  -- 
 46 |    index = number      -- The index to set the value to
 47 |    value = *           -- The data to set
 48 | })
 49 | ```
 50 | 
 51 | _Return value_: self
 52 | <a name="Dataseries.mutate">
 53 | ### Dataseries.mutate(self, mutation[, type])
 54 | 
 55 | Modifies a dataseries. Takes a function where that each element is applied to.
 56 | 
 57 | ```
 58 | ({
 59 |    self     = Dataseries  -- 
 60 |    mutation = function    -- The function to apply to each value
 61 |   [type     = string]     -- The return type of the data if other than the current
 62 | })
 63 | ```
 64 | 
 65 | _Return value_: self
 66 | <a name="Dataseries.append">
 67 | ### Dataseries.append(self, value)
 68 | 
 69 | Appends a single element to series. This function resizes the tensor to +1
 70 | and then calls the `set` function so if possible try to directly size the
 71 | series to apropriate length before setting elements as this alternative is
 72 | slow and should only be used with a few values at the time.
 73 | 
 74 | ```
 75 | ({
 76 |    self  = Dataseries  -- 
 77 |    value = *           -- The data to set
 78 | })
 79 | ```
 80 | 
 81 | _Return value_: self
 82 | <a name="Dataseries.remove">
 83 | ### Dataseries.remove(self, index)
 84 | 
 85 | Removes a single element
 86 | 
 87 | ```
 88 | ({
 89 |    self  = Dataseries  -- 
 90 |    index = number      -- The index to remove
 91 | })
 92 | ```
 93 | 
 94 | _Return value_: self
 95 | <a name="Dataseries.insert">
 96 | ### Dataseries.insert(self, index, value)
 97 | 
 98 | Inserts a single element
 99 | 
100 | ```
101 | ({
102 |    self  = Dataseries  -- 
103 |    index = number      -- The index to insert at
104 |    value = !table      -- The value to insert
105 | })
106 | ```
107 | 
108 | _Return value_: self


--------------------------------------------------------------------------------
/doc/dataseries/statistics.md:
--------------------------------------------------------------------------------
  1 | # API documentation for [statistics](#__Statistics__)
  2 | - [Dataseries.count_na](#Dataseries.count_na)
  3 | - [Dataseries.unique](#Dataseries.unique)
  4 | - [Dataseries.value_counts](#Dataseries.value_counts)
  5 | - [Dataseries.which_max](#Dataseries.which_max)
  6 | - [Dataseries.which_min](#Dataseries.which_min)
  7 | - [Dataseries.get_mode](#Dataseries.get_mode)
  8 | - [Dataseries.get_max_value](#Dataseries.get_max_value)
  9 | - [Dataseries.get_min_value](#Dataseries.get_min_value)
 10 | 
 11 | <a name="__Statistics__">
 12 | ## Statistics
 13 | 
 14 | Here are functions gather commmonly used descriptive statistics
 15 | 
 16 | <a name="Dataseries.count_na">
 17 | ### Dataseries.count_na(self)
 18 | 
 19 | Count missing values
 20 | 
 21 | ```
 22 | ({
 23 |    self = Dataseries  -- 
 24 | })
 25 | ```
 26 | 
 27 | _Return value_: number
 28 | <a name="Dataseries.unique">
 29 | ### Dataseries.unique(self[, as_keys][, as_raw])
 30 | 
 31 | Get unique elements
 32 | 
 33 | ```
 34 | ({
 35 |    self    = Dataseries  -- 
 36 |   [as_keys = boolean]    -- return table with unique as keys and a count for frequency [default=false]
 37 |   [as_raw  = boolean]    -- return table with raw data without categorical transformation [default=false]
 38 | })
 39 | ```
 40 | 
 41 | _Return value_: tds.Vec with unique values or
 42 | 	tds.Hash if as_keys == true then the unique
 43 | 	value as key with an incremental integer
 44 | 	value => {'unique1':1, 'unique2':2, 'unique6':3}
 45 | <a name="Dataseries.value_counts">
 46 | ### Dataseries.value_counts(self[, normalize][, dropna][, as_raw][, as_dataframe])
 47 | 
 48 | Counts number of occurences for each unique element (frequency/histogram).
 49 | 
 50 | ```
 51 | ({
 52 |    self         = Dataseries  -- 
 53 |   [normalize    = boolean]    -- 		If True then the object returned will contain the relative frequencies of
 54 | 		the unique values. [default=false]
 55 |   [dropna       = boolean]    -- Don’t include counts of NaN (missing values). [default=true]
 56 |   [as_raw       = boolean]    -- Use raw numerical values instead of category label for categoricals [default=false]
 57 |   [as_dataframe = boolean]    -- Return a Dataframe with `value` and `count` columns [default=true]
 58 | })
 59 | ```
 60 | 
 61 | _Return value_: Dataframe|table
 62 | <a name="Dataseries.which_max">
 63 | ### Dataseries.which_max(self)
 64 | 
 65 | Retrieves the index for the rows with the highest value. Can be > 1 rows that
 66 | share the highest value.
 67 | 
 68 | ```
 69 | ({
 70 |    self = Dataseries  -- 
 71 | })
 72 | ```
 73 | 
 74 | _Return value_: table with the highest indexes, max value
 75 | <a name="Dataseries.which_min">
 76 | ### Dataseries.which_min(self)
 77 | 
 78 | Retrieves the index for the rows with the lowest value. Can be > 1 rows that
 79 | share the lowest value.
 80 | 
 81 | ```
 82 | ({
 83 |    self = Dataseries  -- 
 84 | })
 85 | ```
 86 | 
 87 | _Return value_: table with the lowest indexes, lowest value
 88 | <a name="Dataseries.get_mode">
 89 | ### Dataseries.get_mode(self[, normalize][, dropna][, as_dataframe])
 90 | 
 91 | Gets the mode for a Dataseries. A mode is defined as the most frequent value.
 92 | Note that if two or more values are equally common then there are several modes.
 93 | The mode is useful as it can be viewed as any algorithms most naive guess where
 94 | it always guesses the same value.
 95 | 
 96 | ```
 97 | ({
 98 |    self         = Dataseries  -- 
 99 |   [normalize    = boolean]    -- 		If True then the object returned will contain the relative frequencies of
100 | 		the unique values. [default=false]
101 |   [dropna       = boolean]    -- Don’t include counts of NaN (missing values). [default=true]
102 |   [as_dataframe = boolean]    -- Return a dataframe [default=true]
103 | })
104 | ```
105 | 
106 | _Return value_: Table or Dataframe
107 | <a name="Dataseries.get_max_value">
108 | ### Dataseries.get_max_value(self)
109 | 
110 | Gets the maximum value. Similar in function to which_max but it will also return
111 | the maximum integer value for the categorical values. This can be useful when
112 | deciding on the number of neurons in the final layer.
113 | 
114 | ```
115 | ({
116 |    self = Dataseries  -- 
117 | })
118 | ```
119 | 
120 | _Return value_: number
121 | <a name="Dataseries.get_min_value">
122 | ### Dataseries.get_min_value(self)
123 | 
124 | Gets the minimum value for a given column. Returns minimum values for all
125 | numerical columns if none is provided.
126 | 
127 | ```
128 | ({
129 |    self = Dataseries  -- 
130 | })
131 | ```
132 | 
133 | _Return value_: number


--------------------------------------------------------------------------------
/doc/helper_classes/10_iterator.md:
--------------------------------------------------------------------------------
 1 | # API documentation for [Df_Iterator and general about Dataframe's iterators](#__Df_Iterator and general about Dataframe's iterators__)
 2 | - [Df_Iterator](#Df_Iterator)
 3 | 
 4 | <a name="__Df_Iterator and general about Dataframe's iterators__">
 5 | ## Df_Iterator and general about Dataframe's iterators
 6 | 
 7 | The `torchnet` iterators allow a simple iteration over a dataset. If combined
 8 | with a list function you can create so that the iterators returns a table with
 9 | the two key elements `input` and `target` that `tnt.SGDEngine` and
10 | `tnt.OptimEngine` require.
11 | 
12 | The Dataframe approach is to combine everything into a single iterator that does
13 | returns the training tensors. This is a complement to the subset `get_batch`
14 | function and relies on the same core functions.
15 | 
16 | Iterators implement two methods:
17 | 
18 | - `run()` which returns a Lua iterator usable in a for loop.
19 | - `exec(funcname, ...)` which execute a given funcname on the underlying dataset.
20 | 
21 | Typical usage is achieved with a for loop:
22 | ```lua
23 | for sample in iterator:run() do
24 |   <do something with sample>
25 | end
26 | ```
27 | 
28 | Iterators implement the `__call` event, so one might also use the `()` operator:
29 | ```lua
30 | for sample in iterator() do
31 |   <do something with sample>
32 | end
33 | ```
34 | 
35 | **Important:** The `tnt.DatasetIterator` does not reset the iterator after running
36 | to the end. In order to do this you must add a `reset_sampler` call in the endEpoch
37 | hook for the engine:
38 | 
39 | ```lua
40 | engine.hooks.onEndEpoch = function(state)
41 | 	state.iterator.dataset:reset_sampler()
42 | end
43 | ```
44 | 
45 | As torchnet is epoch-centered all samplers will be behave as if there was an underlying
46 | epoch mechanism. E.g. the uniform sampler will never trigger a reset but the epoch
47 | hook will still be called as there is a "fake epoch" calculated by
48 | `math.ceil(dataset:size()/batch_size)`.
49 | 
50 | **Note**: An important note is that the transform and filters are ran before the
51 | `to_tensor` as they are assumed to be more valuable with the raw data. As transformations
52 | can be useful after the tensors have been generated the `target_transform` and `input_transform`
53 | have been added that allow transforming the two tensor elements in the return table.
54 | 
55 | <a name="Df_Iterator">
56 | ##### Df_Iterator(self, dataset, batch_size[, filter][, transform][, input_transform][, target_transform])
57 | 
58 | After creating your data split (`create_subsets`) you call the `get_subset` and
59 | get the subset that you need to feed to this method. Remember that you must define
60 | the data and label retrievers that the `Batchframe` will use when calling the
61 | `to_tensor`. The default retrievers can be set through the `class_args` argument:
62 | 
63 | ```lua
64 | my_data:create_subsets{
65 | 	class_args = Df_Tbl({
66 | 		batch_args = Df_Tbl({
67 | 			data = function(row) image_loader(row.filename) end,
68 | 			label = Df_Array("Gender")
69 | 		})
70 | 	})
71 | }
72 | ```
73 | 
74 | ```
75 | ({
76 |    self             = Df_Iterator  -- 
77 |    dataset          = Df_Subset    -- 
78 |    batch_size       = number       -- The size of the batches
79 |   [filter           = function]    -- is a closure which returns `true` if the given sample
80 | 	 should be considered or `false` if not. Note that filter is called _after_
81 | 	 fetching the data in a threaded manner and _before_ the `to_tensor` is called. [has default value]
82 |   [transform        = function]    -- a function which maps the given sample to a new value. This transformation occurs before filtering. [has default value]
83 |   [input_transform  = function]    -- Allows transforming the input (data) values after the `Batchframe:to_tensor` call [has default value]
84 |   [target_transform = function]    -- Allows transforming the target (label) values after the `Batchframe:to_tensor` call [has default value]
85 | })
86 | ```


--------------------------------------------------------------------------------
/doc/helper_classes/11_paralleliterator.md:
--------------------------------------------------------------------------------
 1 | # API documentation for [Df_ParallelIterator](#__Df_ParallelIterator__)
 2 | - [Df_ParallelIterator](#Df_ParallelIterator)
 3 | 
 4 | <a name="__Df_ParallelIterator__">
 5 | ## Df_ParallelIterator
 6 | 
 7 | The Df_ParallelIterator allows parallel loading when callin the `to_tensor`
 8 | function. For details see `Df_Iterator` docs.
 9 | 
10 | <a name="Df_ParallelIterator">
11 | ##### Df_ParallelIterator(self, dataset, batch_size[, init], nthread[, filter][, transform][, input_transform][, target_transform][, ordered])
12 | ```
13 | ({
14 |    self             = Df_ParallelIterator  -- 
15 |    dataset          = Df_Subset            -- The Dataframe subset to use for the iterator
16 |    batch_size       = number               -- The size of the batches
17 |   [init             = function]            -- `init(threadid)` (where threadid=1..nthread) is a closure which may
18 | 	 initialize the specified thread as needed, if needed. It is loading
19 | 	 the libraries 'torch' and 'Dataframe' by default. [has default value]
20 |    nthread          = number               -- The number of threads used to parallelize is specified by `nthread`.
21 |   [filter           = function]            -- is a closure which returns `true` if the given sample
22 | 	 should be considered or `false` if not. Note that filter is called _after_
23 | 	 fetching the data in a threaded manner and _before_ the `to_tensor` is called. [has default value]
24 |   [transform        = function]            -- a function which maps the given sample to a new value. This transformation occurs before filtering. [has default value]
25 |   [input_transform  = function]            -- Allows transforming the input (data) values after the `Batchframe:to_tensor` call [has default value]
26 |   [target_transform = function]            -- Allows transforming the target (label) values after the `Batchframe:to_tensor` call [has default value]
27 |   [ordered          = boolean]             -- This option is particularly useful for repeatable experiments.
28 | 	 By default `ordered` is false, which means that order is not guaranteed by
29 | 	 `run()` (though often the ordering is similar in practice).
30 | })
31 | ```
32 | 
33 | Allows to iterate over a dataset in a thread
34 | manner. `Df_ParallelIterator:run()` guarantees that all samples
35 | will be seen, but does not guarantee the order unless `ordered` is set to true.
36 | 
37 | The purpose of this class is to have a minimal pre-processing cost.
38 | The current implementation calls the `get_batch` inside the scope of the
39 | main process while all the loaders, transformers etc are moved into the threads.
40 | When reading datasets on the fly from
41 | disk (not loading them fully in memory), or performing complex
42 | pre-processing this can be of interest.
43 | 
44 | A common error raised by this dataset is when `closure()` is not
45 | serializable. Make sure that all [upvalues](http://www.lua.org/pil/27.3.3.html) of `closure()` are
46 | serializable. It is recommended to avoid [upvalues](http://www.lua.org/pil/27.3.3.html) at all cost,
47 | and to make sure you require all the appropriate torch packages needed to (de-)serialize
48 | `closure()` in the `init()` function.
49 | 
50 | For more information, check out the [threads package](https://github.com/torch/threads),
51 | on which `Df_ParallelIterator` relies.


--------------------------------------------------------------------------------
/doc/helper_classes/20_tbl.md:
--------------------------------------------------------------------------------
 1 | # API documentation for [Df_Tbl](#__Df_Tbl__)
 2 | - [Df_Tbl.`__init`](#Df_Tbl.__init)
 3 | - [Df_Tbl.#](#Df_Tbl.#)
 4 | 
 5 | <a name="__Df_Tbl__">
 6 | ## Df_Tbl
 7 | 
 8 | The Df_Tbl is a class that is used to wrap a table. In contrast with Df_Array
 9 | and Df_Dict it does not check any input data.
10 | 
11 | <a name="Df_Tbl.__init">
12 | ### Df_Tbl.__init(table)
13 | 
14 | This is the fastes table wrapper that doesn't care to copy the original data. Should be used sparingly.
15 | 
16 | <a name="Df_Tbl.#">
17 | ### Df_Tbl.#
18 | 
19 | Returns the number of elements


--------------------------------------------------------------------------------
/doc/helper_classes/21_dict.md:
--------------------------------------------------------------------------------
 1 | # API documentation for [Df_Dict](#__Df_Dict__)
 2 | - [Df_Dict.`__init`](#Df_Dict.__init)
 3 | - [Df_Dict.check_lengths()](#Df_Dict.check_lengths)
 4 | - [Df_Dict.set_keys](#Df_Dict.set_keys)
 5 | - [Df_Dict.[]](#Df_Dict.[])
 6 | - [Df_Dict.#](#Df_Dict.#)
 7 | 
 8 | <a name="__Df_Dict__">
 9 | ## Df_Dict
10 | 
11 | The Df_Dict is a class that is used to wrap a dictionary table. A dictionary table
12 | has a string name corresponding to each key and an array as values, i.e. it may
13 | not contain any tables.
14 | 
15 | The following properties are available :
16 | It is possible to access the Df_Dict's keys with the property `keys`.
17 | - `Df_Dict.keys`: list of the key
18 | - `Df_Dict.length`: content size for each key
19 | <a name="Df_Dict.__init">
20 | ### Df_Dict.__init(table_data)
21 | 
22 | Create a Df_Dict object given a table
23 | 
24 | <a name="Df_Dict.check_lengths">
25 | ### Df_Dict.check_lengths()
26 | 
27 | Ensure every columns has the same size
28 | 
29 | _Return value_: boolean
30 | <a name="Df_Dict.set_keys">
31 | ### Df_Dict.set_keys(table_data)
32 | 
33 | Replace all the keys by the given values
34 | 
35 | `table_data` must be a table and have the same item length as the keys
36 | 
37 | <a name="Df_Dict.[]">
38 | ### Df_Dict.[]
39 | 
40 | Returns the value with the given key
41 | - _Single integer_: it returns the value corresponding
42 | - _"$column_name"_: get a column by prepending the name with `$`, e.g. `"$a column name"`
43 | 
44 | _Return value_: Table or single value
45 | 
46 | <a name="Df_Dict.#">
47 | ### Df_Dict.#
48 | 
49 | Returns the number of elements


--------------------------------------------------------------------------------
/doc/helper_classes/22_array.md:
--------------------------------------------------------------------------------
 1 | # API documentation for [Df_Array](#__Df_Array__)
 2 | - [Df_Array.`__init`](#Df_Array.__init)
 3 | - [Df_Array.[]](#Df_Array.[])
 4 | - [Df_Array.#](#Df_Array.#)
 5 | 
 6 | <a name="__Df_Array__">
 7 | ## Df_Array
 8 | 
 9 | The Df_Array is a class that is used to wrap an array table. An array table
10 | has no key names, it only uses numbers for indexing and each element has to be
11 | an atomic element, i.e. it may not contain any tables.
12 | 
13 | <a name="Df_Array.__init">
14 | ### Df_Array.__init(...)
15 | 
16 | Df_Array accepts 5 type of init values :
17 | - single value (string, integer, float, etc)
18 | - table
19 | - torch.*Tensor
20 | - Dataseries
21 | - arguments list (e.g. Df_Array(1,2,3,4,5) )
22 | 
23 | <a name="Df_Array.[]">
24 | ### Df_Array.[]
25 | 
26 | Returns the value at the given index
27 | 
28 | <a name="Df_Array.#">
29 | ### Df_Array.#
30 | 
31 | Returns the number of elements


--------------------------------------------------------------------------------
/doc/helper_classes/README.md:
--------------------------------------------------------------------------------
 1 | # Documentation for helper classes
 2 | 
 3 | This documentation ha been auto-generated from code using the `argcheck` system.
 4 | 
 5 | ## Table of contents (file-level)
 6 | 
 7 | Below follows a more [detailed](#detailed) table of contents with links to
 8 | the different functions. Not this list may be incompleted due to failure to
 9 | add apropriate anchor tags during documentation.
10 | 
11 | 
12 | - [Df_Iterator and general about Dataframe's iterators](10_iterator.md)
13 | - [Df_ParallelIterator](11_paralleliterator.md)
14 | - [Df_Tbl](20_tbl.md)
15 | - [Df_Dict](21_dict.md)
16 | - [Df_Array](22_array.md)
17 | 
18 | ## Detailed table of contents (file-level + anchors)<a name=\"detailed\">
19 | 
20 | 
21 | - **[Df_Iterator and general about Dataframe's iterators](10_iterator.md)**
22 |   - [Df_Iterator](10_iterator.md#Df_Iterator)
23 | - **[Df_ParallelIterator](11_paralleliterator.md)**
24 |   - [Df_ParallelIterator](11_paralleliterator.md#Df_ParallelIterator)
25 | - **[Df_Tbl](20_tbl.md)**
26 |   - [Df_Tbl.`__init`](20_tbl.md#Df_Tbl.__init)
27 |   - [Df_Tbl.#](20_tbl.md#Df_Tbl.#)
28 | - **[Df_Dict](21_dict.md)**
29 |   - [Df_Dict.`__init`](21_dict.md#Df_Dict.__init)
30 |   - [Df_Dict.check_lengths()](21_dict.md#Df_Dict.check_lengths)
31 |   - [Df_Dict.set_keys](21_dict.md#Df_Dict.set_keys)
32 |   - [Df_Dict.[]](21_dict.md#Df_Dict.[])
33 |   - [Df_Dict.#](21_dict.md#Df_Dict.#)
34 | - **[Df_Array](22_array.md)**
35 |   - [Df_Array.`__init`](22_array.md#Df_Array.__init)
36 |   - [Df_Array.[]](22_array.md#Df_Array.[])
37 |   - [Df_Array.#](22_array.md#Df_Array.#)


--------------------------------------------------------------------------------
/doc/sub_classes/README.md:
--------------------------------------------------------------------------------
 1 | # Documentation for sub classes
 2 | 
 3 | This documentation ha been auto-generated from code using the `argcheck` system.
 4 | 
 5 | ## Table of contents (file-level)
 6 | 
 7 | Below follows a more [detailed](#detailed) table of contents with links to
 8 | the different functions. Not this list may be incompleted due to failure to
 9 | add apropriate anchor tags during documentation.
10 | 
11 | 
12 | - [Df_Subset](01_subset.md)
13 | - [Batchframe](10_batchframe.md)
14 | 
15 | ## Detailed table of contents (file-level + anchors)<a name=\"detailed\">
16 | 
17 | 
18 | - **[Df_Subset](01_subset.md)**
19 |   - [Df_Subset.`__init`](01_subset.md#Df_Subset.__init)
20 |   - [Df_Subset.`_clean`](01_subset.md#Df_Subset._clean)
21 |   - [Df_Subset.set_idxs](01_subset.md#Df_Subset.set_idxs)
22 |   - [Df_Subset.get_idx](01_subset.md#Df_Subset.get_idx)
23 |   - [Df_Subset.set_labels](01_subset.md#Df_Subset.set_labels)
24 |   - [Df_Subset.set_sampler](01_subset.md#Df_Subset.set_sampler)
25 |   - [Df_Subset.get_sampler](01_subset.md#Df_Subset.get_sampler)
26 |   - [Sampler: linear - Df_Subset.get_sampler_linear](01_subset.md#Df_Subset.get_sampler_linear)
27 |   - [Sampler: ordered - Df_Subset.get_sampler_ordered](01_subset.md#Df_Subset.get_sampler_ordered)
28 |   - [Sampler: uniform - Df_Subset.get_sampler_uniform](01_subset.md#Df_Subset.get_sampler_uniform)
29 |   - [Sampler: permutation - Df_Subset.get_sampler_permutation](01_subset.md#Df_Subset.get_sampler_permutation)
30 |   - [Sampler: label-uniform - Df_Subset.get_sampler_label_uniform](01_subset.md#Df_Subset.get_sampler_label_uniform)
31 |   - [Sampler: label-distribution - Df_Subset.get_sampler_label_distribution](01_subset.md#Df_Subset.get_sampler_label_distribution)
32 |   - [Sampler: label-permutation - Df_Subset.get_sampler_label_permutation](01_subset.md#Df_Subset.get_sampler_label_permutation)
33 |   - [Df_Subset.get_batch](01_subset.md#Df_Subset.get_batch)
34 |   - [Df_Subset.reset_sampler](01_subset.md#Df_Subset.reset_sampler)
35 |   - [Df_Subset.get_iterator](01_subset.md#Df_Subset.get_iterator)
36 |   - [Df_Subset.get_parallel_iterator](01_subset.md#Df_Subset.get_parallel_iterator)
37 |   - [Df_Subset.`__tostring__`](01_subset.md#Df_Subset.__tostring__)
38 |   - [Df_Subset.set_data_retriever](01_subset.md#Df_Subset.set_data_retriever)
39 |   - [Df_Subset.set_label_retriever](01_subset.md#Df_Subset.set_label_retriever)
40 |   - [Df_Subset.set_label_shape](01_subset.md#Df_Subset.set_label_shape)
41 | - **[Batchframe](10_batchframe.md)**
42 |   - [Batchframe.`__init`](10_batchframe.md#Batchframe.__init)
43 |   - [Batchframe.set_data_retriever](10_batchframe.md#Batchframe.set_data_retriever)
44 |   - [Batchframe.get_data_retriever](10_batchframe.md#Batchframe.get_data_retriever)
45 |   - [Batchframe.set_label_retriever](10_batchframe.md#Batchframe.set_label_retriever)
46 |   - [Batchframe.get_label_retriever](10_batchframe.md#Batchframe.get_label_retriever)
47 |   - [Batchframe.set_label_shape](10_batchframe.md#Batchframe.set_label_shape)
48 |   - [Batchframe.to_tensor](10_batchframe.md#Batchframe.to_tensor)


--------------------------------------------------------------------------------
/doc/utils/README.md:
--------------------------------------------------------------------------------
 1 | # Documentation for utils
 2 | 
 3 | This documentation ha been auto-generated from code using the `argcheck` system.
 4 | 
 5 | ## Table of contents (file-level)
 6 | 
 7 | Below follows a more [detailed](#detailed) table of contents with links to
 8 | the different functions. Not this list may be incompleted due to failure to
 9 | add apropriate anchor tags during documentation.
10 | 
11 | 
12 | - [Utility functions](utils.md)
13 | 
14 | ## Detailed table of contents (file-level + anchors)<a name=\"detailed\">
15 | 
16 | 
17 | - **[Utility functions](utils.md)**
18 |   - [trim](utils.md#trim)
19 |   - [trim_table_strings](utils.md#trim_table_strings)
20 |   - [table.array2hash](utils.md#table.array2hash)
21 |   - [get_variable_type](utils.md#get_variable_type)
22 |   - [warning](utils.md#warning)
23 |   - [convert_table_2_dataframe](utils.md#convert_table_2_dataframe)


--------------------------------------------------------------------------------
/doc/utils/utils.md:
--------------------------------------------------------------------------------
 1 | # API documentation for [utility functions](#__Utility functions__)
 2 | - [trim](#trim)
 3 | - [trim_table_strings](#trim_table_strings)
 4 | - [table.array2hash](#table.array2hash)
 5 | - [get_variable_type](#get_variable_type)
 6 | - [warning](#warning)
 7 | - [convert_table_2_dataframe](#convert_table_2_dataframe)
 8 | 
 9 | <a name="__Utility functions__">
10 | ## Utility functions
11 | 
12 | Here are utility functions that are not specific to the dataframe but add a general
13 | Lua functionality.
14 | 
15 | <a name="trim">
16 | ### trim(s[, ignore])
17 | 
18 | Trims a string from whitespace chars
19 | 
20 | ```
21 | ({
22 |    s      = string   -- The string to trim
23 |   [ignore = number]  -- Useful when string is directly given by the gsub function. Gsub returns a number this needs to be ignored through this argument [default=false]
24 | })
25 | ```
26 | 
27 | _Return value_: string
28 | <a name="trim_table_strings">
29 | ### trim_table_strings(t)
30 | 
31 | Trims a table with strings fro whitespace chars
32 | 
33 | ```
34 | ({
35 |    t = table  -- The table with strings to trim
36 | })
37 | ```
38 | 
39 | _Return value_: string
40 | <a name="table.array2hash">
41 | ### table.array2hash(array)
42 | 
43 | Converts an array to hash table with numbers corresponding to the index of the
44 | original elements position in the array. Intended for use with arrays where all
45 | values are unique.
46 | 
47 | ```
48 | ({
49 |    array = table  -- An array of elements
50 | })
51 | ```
52 | 
53 | _Return value_: table with string keys
54 | <a name="get_variable_type">
55 | ### get_variable_type(value[, prev_type])
56 | 
57 | Checks the variable type for a string/numeric/boolean variable. Missing values
58 | `nan` or "" are ignored. If a previous value is provided then the new variable
59 | type will be in relation to the previous. I.e. if you provide an integer after
60 | previously seen a double then the type will still be double.
61 | 
62 | ```
63 | ({
64 |    value     = !table   -- The value to type-check
65 |   [prev_type = string]  -- The previous value type
66 | })
67 | ```
68 | 
69 | _Return value_: string of type: 'boolean', 'integer', 'long', 'double', or 'string'
70 | <a name="warning">
71 | ### warning(ARGP)
72 | 
73 | A function for printing warnings, i.e. events that souldn't occur but are not
74 | serious anough to throw an error. If you want to supress the warning then set
75 | the `no_warnings = true` in the global environement.
76 | 
77 | @ARPT
78 | <a name="convert_table_2_dataframe">
79 | ### convert_table_2_dataframe(tbl[, value_name][, key_name])
80 | 
81 | Converts a table to a Dataframe
82 | 
83 | ```
84 | ({
85 |    tbl        = Df_Tbl   -- 
86 |   [value_name = string]  -- The name of the value column [default=value]
87 |   [key_name   = string]  -- The name of the key column [default=key]
88 | })
89 | ```
90 | 
91 | _Return value_: Dataframe


--------------------------------------------------------------------------------
/examples/Facebook license/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD License
 2 | 
 3 | For Torchnet software
 4 | 
 5 | Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without modification,
 8 | are permitted provided that the following conditions are met:
 9 | 
10 |  * Redistributions of source code must retain the above copyright notice, this
11 |    list of conditions and the following disclaimer.
12 | 
13 |  * Redistributions in binary form must reproduce the above copyright notice,
14 |    this list of conditions and the following disclaimer in the documentation
15 |    and/or other materials provided with the distribution.
16 | 
17 |  * Neither the name Facebook nor the names of its contributors may be used to
18 |    endorse or promote products derived from this software without specific
19 |    prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/examples/Facebook license/PATENTS:
--------------------------------------------------------------------------------
 1 | Additional Grant of Patent Rights Version 2
 2 | 
 3 | "Software" means the Torchnet software distributed by Facebook, Inc.
 4 | 
 5 | Facebook, Inc. ("Facebook") hereby grants to each recipient of the Software
 6 | ("you") a perpetual, worldwide, royalty-free, non-exclusive, irrevocable
 7 | (subject to the termination provision below) license under any Necessary
 8 | Claims, to make, have made, use, sell, offer to sell, import, and otherwise
 9 | transfer the Software. For avoidance of doubt, no license is granted under
10 | Facebook’s rights in any patent claims that are infringed by (i) modifications
11 | to the Software made by you or any third party or (ii) the Software in
12 | combination with any software or other technology.
13 | 
14 | The license granted hereunder will terminate, automatically and without notice,
15 | if you (or any of your subsidiaries, corporate affiliates or agents) initiate
16 | directly or indirectly, or take a direct financial interest in, any Patent
17 | Assertion: (i) against Facebook or any of its subsidiaries or corporate
18 | affiliates, (ii) against any party if such Patent Assertion arises in whole or
19 | in part from any software, technology, product or service of Facebook or any of
20 | its subsidiaries or corporate affiliates, or (iii) against any party relating
21 | to the Software. Notwithstanding the foregoing, if Facebook or any of its
22 | subsidiaries or corporate affiliates files a lawsuit alleging patent
23 | infringement against you in the first instance, and you respond by filing a
24 | patent infringement counterclaim in that lawsuit against that party that is
25 | unrelated to the Software, the license granted hereunder will not terminate
26 | under section (i) of this paragraph due to such counterclaim.
27 | 
28 | A "Necessary Claim" is a claim of a patent owned by Facebook that is
29 | necessarily infringed by the Software standing alone.
30 | 
31 | A "Patent Assertion" is any lawsuit or other action alleging direct, indirect,
32 | or contributory infringement or inducement to infringe any patent, including a
33 | cross-claim or counterclaim.
34 | 


--------------------------------------------------------------------------------
/examples/mnist_example.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | Copyright (c) 2016-present, Facebook, Inc.
  3 | All rights reserved.
  4 | This source code is licensed under the BSD-style license found in the
  5 | LICENSE file in the Facebook license in the same directory as this file. An
  6 | additional grant of patent rights can be found in the PATENTS file in the
  7 | same directory.
  8 | ]]--
  9 | -- load torchnet:
 10 | local tnt = require 'torchnet'
 11 | 
 12 | require 'Dataframe'
 13 | 
 14 | -- use GPU or not:
 15 | local cmd = torch.CmdLine()
 16 | cmd:option('-usegpu', false, 'use gpu for training')
 17 | cmd:option('-parallel', false, 'use multithreaded loading for training')
 18 | 
 19 | local config = cmd:parse(arg)
 20 | print(string.format('running on %s', config.usegpu and 'GPU' or 'CPU'))
 21 | print(string.format('using %s execution', config.parallel and 'parallel' or 'single thread'))
 22 | 
 23 | -- function that sets of dataset iterator:
 24 | local function getIterator(mode)
 25 | 	-- load MNIST dataset:
 26 | 	local mnist = require 'mnist'
 27 | 	local mnist_dataset = mnist[mode .. 'dataset']()
 28 | 
 29 | 	-- Create a Dataframe with the label. The actual images will be loaded
 30 | 	--  as an external resource
 31 | 	local df = Dataframe(
 32 | 		Df_Dict{
 33 | 			label = mnist_dataset.label:totable(),
 34 | 			row_id = torch.range(1, mnist_dataset.data:size(1)):totable()
 35 | 		})
 36 | 
 37 | 	-- Since the mnist package already has taken care of the data
 38 | 	--  splitting we create a single subsetter
 39 | 	df:create_subsets{
 40 | 		subsets = Df_Dict{core = 1},
 41 | 		data_retriever = function(row)
 42 | 			return ext_resource[row.row_id]
 43 | 		end,
 44 | 		label_retriever = Df_Array("label")
 45 | 	}
 46 | 
 47 | 	local subset = df["/core"]
 48 | 	if (config.parallel) then
 49 | 		return Df_ParallelIterator{
 50 | 			dataset = subset,
 51 | 			batch_size = 128,
 52 | 			init = function(idx)
 53 | 				-- Load the libraries needed
 54 | 				require 'torch'
 55 | 				require 'Dataframe'
 56 | 
 57 | 				-- Load the datasets external resource
 58 | 				local mnist = require 'mnist'
 59 | 				local mnist_dataset = mnist[mode .. 'dataset']()
 60 | 				ext_resource = mnist_dataset.data:reshape(mnist_dataset.data:size(1),
 61 | 					mnist_dataset.data:size(2) * mnist_dataset.data:size(3)):double()
 62 | 			end,
 63 | 			nthread = 2,
 64 | 			target_transform =  function(val)
 65 | 				return val + 1
 66 | 			end
 67 | 		}
 68 | 	else
 69 | 		ext_resource = mnist_dataset.data:reshape(mnist_dataset.data:size(1),
 70 | 			mnist_dataset.data:size(2) * mnist_dataset.data:size(3)):double()
 71 | 
 72 | 		return Df_Iterator{
 73 | 			dataset = subset,
 74 | 			batch_size = 128,
 75 | 			target_transform = function(val)
 76 | 				return val + 1
 77 | 			end
 78 | 		}
 79 | 	end
 80 | end
 81 | 
 82 | -- set up logistic regressor:
 83 | local net = nn.Sequential():add(nn.Linear(784,10))
 84 | local criterion = nn.CrossEntropyCriterion()
 85 | 
 86 | -- set up training engine:
 87 | local engine = tnt.SGDEngine()
 88 | local meter  = tnt.AverageValueMeter()
 89 | local clerr  = tnt.ClassErrorMeter{topk = {1}}
 90 | engine.hooks.onStartEpoch = function(state)
 91 | 	meter:reset()
 92 | 	clerr:reset()
 93 | end
 94 | engine.hooks.onForwardCriterion = function(state)
 95 | 	meter:add(state.criterion.output)
 96 | 	clerr:add(state.network.output, state.sample.target)
 97 | 	if state.training then
 98 | 		print(string.format('avg. loss: %2.2f; avg. error: %2.2f',
 99 | 			meter:value(), clerr:value{k = 1}))
100 | 	end
101 | end
102 | -- After each epoch we need to envoke the sampler reset (only needed for some samples)
103 | engine.hooks.onEndEpoch = function(state)
104 | 	print("End epoch no " .. state.epoch)
105 | 	state.iterator.dataset:reset_sampler()
106 | end
107 | 
108 | -- set up GPU training:
109 | if config.usegpu then
110 | 
111 | 	-- copy model to GPU:
112 | 	require 'cunn'
113 | 	net       = net:cuda()
114 | 	criterion = criterion:cuda()
115 | 
116 | 	-- copy sample to GPU buffer:
117 | 	local igpu, tgpu = torch.CudaTensor(), torch.CudaTensor()
118 | 	engine.hooks.onSample = function(state)
119 | 		igpu:resize(state.sample.input:size() ):copy(state.sample.input)
120 | 		tgpu:resize(state.sample.target:size()):copy(state.sample.target)
121 | 		state.sample.input  = igpu
122 | 		state.sample.target = tgpu
123 | 	end  -- alternatively, this logic can be implemented via a TransformDataset
124 | end
125 | 
126 | -- train the model:
127 | engine:train{
128 | 	network   = net,
129 | 	iterator  = getIterator('train'),
130 | 	criterion = criterion,
131 | 	lr        = 0.2,
132 | 	maxepoch  = 3,
133 | }
134 | 
135 | -- measure test loss and error:
136 | meter:reset()
137 | clerr:reset()
138 | engine:test{
139 | 	network   = net,
140 | 	iterator  = getIterator('test'),
141 | 	criterion = criterion,
142 | }
143 | print("\n ***** Done *****")
144 | print(string.format('test loss: %2.2f; test error: %2.2f',
145 | 	meter:value(), clerr:value{k = 1}))
146 | 


--------------------------------------------------------------------------------
/helper_classes/10_iterator.lua:
--------------------------------------------------------------------------------
  1 | -- Skip if the Df_Iterator has already been loaded via paralleliterator
  2 | if (Df_Iterator) then
  3 | 	return true
  4 | end
  5 | 
  6 | local argcheck = require 'argcheck'
  7 | local doc = require 'argcheck.doc'
  8 | local torchnet
  9 | if (doc.__record) then
 10 | 	doc.stop()
 11 | 	torchnet = require "torchnet"
 12 | 	doc.record()
 13 | else
 14 | 	torchnet = require "torchnet"
 15 | end
 16 | 
 17 | 
 18 | doc[[
 19 | ## Df_Iterator and general about Dataframe's iterators
 20 | 
 21 | The `torchnet` iterators allow a simple iteration over a dataset. If combined
 22 | with a list function you can create so that the iterators returns a table with
 23 | the two key elements `input` and `target` that `tnt.SGDEngine` and
 24 | `tnt.OptimEngine` require.
 25 | 
 26 | The Dataframe approach is to combine everything into a single iterator that does
 27 | returns the training tensors. This is a complement to the subset `get_batch`
 28 | function and relies on the same core functions.
 29 | 
 30 | Iterators implement two methods:
 31 | 
 32 | - `run()` which returns a Lua iterator usable in a for loop.
 33 | - `exec(funcname, ...)` which execute a given funcname on the underlying dataset.
 34 | 
 35 | Typical usage is achieved with a for loop:
 36 | ```lua
 37 | for sample in iterator:run() do
 38 |   <do something with sample>
 39 | end
 40 | ```
 41 | 
 42 | Iterators implement the `__call` event, so one might also use the `()` operator:
 43 | ```lua
 44 | for sample in iterator() do
 45 |   <do something with sample>
 46 | end
 47 | ```
 48 | 
 49 | **Important:** The `tnt.DatasetIterator` does not reset the iterator after running
 50 | to the end. In order to do this you must add a `reset_sampler` call in the endEpoch
 51 | hook for the engine:
 52 | 
 53 | ```lua
 54 | engine.hooks.onEndEpoch = function(state)
 55 | 	state.iterator.dataset:reset_sampler()
 56 | end
 57 | ```
 58 | 
 59 | As torchnet is epoch-centered all samplers will be behave as if there was an underlying
 60 | epoch mechanism. E.g. the uniform sampler will never trigger a reset but the epoch
 61 | hook will still be called as there is a "fake epoch" calculated by
 62 | `math.ceil(dataset:size()/batch_size)`.
 63 | 
 64 | **Note**: An important note is that the transform and filters are ran before the
 65 | `to_tensor` as they are assumed to be more valuable with the raw data. As transformations
 66 | can be useful after the tensors have been generated the `target_transform` and `input_transform`
 67 | have been added that allow transforming the two tensor elements in the return table.
 68 | 
 69 | ]]
 70 | 
 71 | local Df_Iterator, parent_class = torch.class('Df_Iterator', 'tnt.DatasetIterator')
 72 | 
 73 | -- iterate over a dataset
 74 | Df_Iterator.__init = argcheck{
 75 | 	doc = [[
 76 | <a name="Df_Iterator">
 77 | ##### Df_Iterator(@ARGP)
 78 | 
 79 | After creating your data split (`create_subsets`) you call the `get_subset` and
 80 | get the subset that you need to feed to this method. Remember that you must define
 81 | the data and label retrievers that the `Batchframe` will use when calling the
 82 | `to_tensor`. The default retrievers can be set through the `class_args` argument:
 83 | 
 84 | ```lua
 85 | my_data:create_subsets{
 86 | 	class_args = Df_Tbl({
 87 | 		batch_args = Df_Tbl({
 88 | 			data = function(row) image_loader(row.filename) end,
 89 | 			label = Df_Array("Gender")
 90 | 		})
 91 | 	})
 92 | }
 93 | ```
 94 | 
 95 | @ARGT
 96 | 
 97 | ]],
 98 | 	{name='self', type='Df_Iterator'},
 99 | 	{name='dataset', type='Df_Subset'},
100 | 	{name="batch_size", type="number", doc="The size of the batches"},
101 | 	{name='filter', type='function', default=function(sample) return true end,
102 | 	 doc=[[is a closure which returns `true` if the given sample
103 | 	 should be considered or `false` if not. Note that filter is called _after_
104 | 	 fetching the data in a threaded manner and _before_ the `to_tensor` is called.]]},
105 | 	{name='transform', type='function', default=function(sample) return sample end,
106 | 	 doc='a function which maps the given sample to a new value. This transformation occurs before filtering.'},
107 | 	{name='input_transform', type='function', default=function(val) return val end,
108 | 	 doc="Allows transforming the input (data) values after the `Batchframe:to_tensor` call"},
109 | 	{name='target_transform', type='function', default=function(val) return val end,
110 | 	 doc="Allows transforming the target (label) values after the `Batchframe:to_tensor` call"},
111 | 	call = function(self, dataset, batch_size, filter, transform, input_transform, target_transform)
112 | 	assert(dataset.batch_args,
113 | 	      "If you want to use the iterator you must prespecify the batch data/label loaders")
114 | 	assert(isint(batch_size) and batch_size > 0, "The batch size must be a positive integer")
115 | 
116 | 	self.dataset = dataset
117 | 
118 | 	function self.run()
119 | 		local size = math.ceil(self:exec("size")/batch_size)
120 | 		local idx = 1 -- TODO: Should the idx be skipped since the Dataframe implementation doesn require it?
121 | 		return function()
122 | 			while idx <= size do
123 | 				local sample, reset = self:exec("get_batch", batch_size)
124 | 
125 | 				if (reset) then
126 | 					idx = size + 1
127 | 				else
128 | 					idx = idx + 1
129 | 				end
130 | 
131 | 				-- The samplers may return nil value if a reset is needed
132 | 				if (sample) then
133 | 					sample = transform(sample)
134 | 
135 | 					-- Only return non-nil values
136 | 					if (filter(sample)) then
137 | 						local input, target = sample:to_tensor()
138 | 						return {
139 | 							input = input_transform(input),
140 | 							target = target_transform(target)
141 | 						}
142 | 					end
143 | 				end
144 | 			end -- End while
145 | 
146 | 		end
147 | 	end
148 | end}
149 | 


--------------------------------------------------------------------------------
/helper_classes/20_tbl.lua:
--------------------------------------------------------------------------------
 1 | require 'torch'
 2 | 
 3 | local argcheck = require "argcheck"
 4 | local doc = require "argcheck.doc"
 5 | 
 6 | doc[[
 7 | 
 8 | ## Df_Tbl
 9 | 
10 | The Df_Tbl is a class that is used to wrap a table. In contrast with Df_Array
11 | and Df_Dict it does not check any input data.
12 | 
13 | ]]
14 | 
15 | -- create class object
16 | local dtbl = torch.class('Df_Tbl')
17 | 
18 | doc[[
19 | <a name="Df_Tbl.__init">
20 | ### Df_Tbl.__init(table)
21 | 
22 | This is the fastes table wrapper that doesn't care to copy the original data. Should be used sparingly.
23 | 
24 | ]]
25 | function dtbl:__init(table_data)
26 | 	self.data = table_data
27 | end
28 | 
29 | doc[[
30 | <a name="Df_Tbl.#">
31 | ### Df_Tbl.#
32 | 
33 | Returns the number of elements
34 | 
35 | ]]
36 | dtbl.__len__ = argcheck{
37 | 	{name="self", type="Df_Tbl"},
38 | 	{name="other", type="Df_Tbl"},
39 | 	call=function(self)
40 | 	return table.exact_length(self.data)
41 | end}
42 | 
43 | return dtbl
44 | 


--------------------------------------------------------------------------------
/helper_classes/21_dict.lua:
--------------------------------------------------------------------------------
  1 | require 'torch'
  2 | 
  3 | local argcheck = require "argcheck"
  4 | local doc = require "argcheck.doc"
  5 | 
  6 | doc[[
  7 | 
  8 | ## Df_Dict
  9 | 
 10 | The Df_Dict is a class that is used to wrap a dictionary table. A dictionary table
 11 | has a string name corresponding to each key and an array as values, i.e. it may
 12 | not contain any tables.
 13 | 
 14 | The following properties are available :
 15 | It is possible to access the Df_Dict's keys with the property `keys`.
 16 | - `Df_Dict.keys`: list of the key
 17 | - `Df_Dict.length`: content size for each key
 18 | ]]
 19 | 
 20 | -- create class object
 21 | local dict = torch.class('Df_Dict')
 22 | 
 23 | doc[[
 24 | <a name="Df_Dict.__init">
 25 | ### Df_Dict.__init(table_data)
 26 | 
 27 | Create a Df_Dict object given a table
 28 | 
 29 | ]]
 30 | function dict:__init(table_data)
 31 | 	local dict_data = {}
 32 | 	local dict_lengths = {}-- lengths of each key's value
 33 | 	local dict_keys = {}
 34 | 
 35 | 
 36 | 	assert(torch.type(table_data) == "table", "Argument must be a table")
 37 | 
 38 | 	for k,v in pairs(table_data) do
 39 | 		dict_lengths[k] = 0
 40 | 
 41 | 		-- Check dimension
 42 | 		if (torch.type(v) == "table") then
 43 | 			for i=1,#v do
 44 | 				assert(type(v[i]) ~= "table",
 45 | 				      ("For key '%s' in the position %d the value is a table, this isn't allowed"):format(k, i))
 46 | 				dict_lengths[k] = dict_lengths[k] + 1
 47 | 			end
 48 | 		else
 49 | 			dict_lengths[k] = 1
 50 | 		end
 51 | 
 52 | 		-- store the key value in another table for future access
 53 | 		table.insert(dict_keys,k)
 54 | 
 55 | 		dict_data[k] = v
 56 | 	end
 57 | 
 58 | 	self.keys = dict_keys
 59 | 	self.data = dict_data
 60 | 	self.length = dict_lengths
 61 | end
 62 | 
 63 | doc[[
 64 | <a name="Df_Dict.check_lengths">
 65 | ### Df_Dict.check_lengths()
 66 | 
 67 | Ensure every columns has the same size
 68 | 
 69 | _Return value_: boolean
 70 | ]]
 71 | function dict:check_lengths()
 72 | 	local previous_length = self.length[self.keys[1]]
 73 | 
 74 | 	for key,value in pairs(self.length) do
 75 | 		if previous_length ~= value then
 76 | 			return false
 77 | 		end
 78 | 
 79 | 		previous_length = self.length[key]
 80 | 	end
 81 | 
 82 | 	return true
 83 | end
 84 | 
 85 | doc[[
 86 | <a name="Df_Dict.set_keys">
 87 | ### Df_Dict.set_keys(table_data)
 88 | 
 89 | Replace all the keys by the given values
 90 | 
 91 | `table_data` must be a table and have the same item length as the keys
 92 | 
 93 | ]]
 94 | function dict:set_keys(table_data)
 95 | 	assert(torch.type(table_data) == "table", "You must provide a table as argument")
 96 | 	assert(#table_data == #self.keys, 
 97 | 		("The keys you provided (%d items) has not the same number of current elements (%d items)")
 98 | 		:format(#table_data,#self.keys))
 99 | 
100 | 	local temp_data = {}
101 | 
102 | 	for i=1,#self.keys do
103 | 		local old_key = self.keys[i]
104 | 		local new_key = table_data[i]
105 | 
106 | 		temp_data[new_key] = self.data[old_key]
107 | 	end
108 | 
109 | 	self.keys = table_data
110 | 	self.data = temp_data
111 | end
112 | 
113 | doc[[
114 | <a name="Df_Dict.[]">
115 | ### Df_Dict.[]
116 | 
117 | Returns the value with the given key
118 | - _Single integer_: it returns the value corresponding
119 | - _"$column_name"_: get a column by prepending the name with `$`, e.g. `"$a column name"`
120 | 
121 | _Return value_: Table or single value
122 | 
123 | ]]
124 | function dict:__index__(key)
125 | 	if (torch.type(key) == "number") then
126 | 		return self.data[key], true
127 | 	-- Index a column using a $ at the beginning of a string
128 | 	elseif (torch.type(key) == "string" and key:match("^[$]")) then
129 | 		local key_name = key:gsub("^[$]", "")
130 | 		return self.data[key_name], true
131 | 	end
132 | 
133 | 	return false
134 | end
135 | 
136 | function dict:__newindex__(index)
137 | 	return false
138 | end
139 | 
140 | doc[[
141 | <a name="Df_Dict.#">
142 | ### Df_Dict.#
143 | 
144 | Returns the number of elements
145 | 
146 | ]]
147 | dict.__len__ = argcheck{
148 | 	{name="self", type="Df_Dict"},
149 | 	{name="other", type="Df_Dict"},-- used by lua when invoking #myDict
150 | 	call=function(self)
151 | 		return table.exact_length(self.data)
152 | end}
153 | 
154 | return dict
155 | 


--------------------------------------------------------------------------------
/helper_classes/22_array.lua:
--------------------------------------------------------------------------------
  1 | require 'torch'
  2 | 
  3 | 
  4 | local argcheck = require "argcheck"
  5 | local doc = require "argcheck.doc"
  6 | 
  7 | doc[[
  8 | 
  9 | ## Df_Array
 10 | 
 11 | The Df_Array is a class that is used to wrap an array table. An array table
 12 | has no key names, it only uses numbers for indexing and each element has to be
 13 | an atomic element, i.e. it may not contain any tables.
 14 | 
 15 | ]]
 16 | 
 17 | -- create class object
 18 | local da = torch.class('Df_Array')
 19 | 
 20 | 
 21 | doc[[
 22 | <a name="Df_Array.__init">
 23 | ### Df_Array.__init(...)
 24 | 
 25 | Df_Array accepts 5 type of init values :
 26 | - single value (string, integer, float, etc)
 27 | - table
 28 | - torch.*Tensor
 29 | - Dataseries
 30 | - arguments list (e.g. Df_Array(1,2,3,4,5) )
 31 | 
 32 | ]]
 33 | -- (...) allows to call Df_Array with an infinite number of arguments
 34 | function da:__init(...)
 35 | 	arg = {...}
 36 | 
 37 | 	-- If there is only one value, which can be 
 38 | 	-- a simple type (string, number, etc), a table or a tensor
 39 | 	if (#arg == 1 and
 40 | 		(torch.type(arg[1]) == 'table' or
 41 | 		torch.isTensor(arg[1])) or
 42 | 		torch.type(arg[1]) == "Dataseries") then
 43 | 		-- If this is the case, arg var is set as its single value
 44 | 		arg = arg[1]
 45 | 	end
 46 | 
 47 | 	local array_data = {}
 48 | 	if (torch.isTensor(arg)) then
 49 | 		-- If Df_Array is inited with a tensor, 
 50 | 		-- it is simply converted into a table and set
 51 | 		array_data = arg:totable()
 52 | 	elseif (torch.type(arg) == "Dataseries") then
 53 | 		-- Same fate for Dataseries
 54 | 		array_data = arg:to_table()
 55 | 	else
 56 | 		-- If there is multiple arguments or 
 57 | 		-- a table (thanks to #arg == 1 condition above),
 58 | 		-- value is set row by row.
 59 | 		-- in the case of a table, it allows to get rid of eventual 
 60 | 		-- keys and only keep numerical indexes
 61 | 		for i=1,#arg do
 62 | 			assert(type(arg[i]) ~= "table",
 63 | 			       ("The Dataframe array cannot contain tables - see position %d in your input"):format(i))
 64 | 			array_data[i] = arg[i]
 65 | 		end
 66 | 	end
 67 | 
 68 | 	self.data = array_data
 69 | end
 70 | 
 71 | 
 72 | doc[[
 73 | <a name="Df_Array.[]">
 74 | ### Df_Array.[]
 75 | 
 76 | Returns the value at the given index
 77 | 
 78 | ]]
 79 | function da:__index__(index)
 80 | 	if (torch.type(index) == "number") then
 81 | 		return self.data[index], true
 82 | 	end
 83 | 
 84 | 	return false
 85 | end
 86 | 
 87 | function da:__newindex__(index)
 88 | 	return false
 89 | end
 90 | 
 91 | 
 92 | doc[[
 93 | <a name="Df_Array.#">
 94 | ### Df_Array.#
 95 | 
 96 | Returns the number of elements
 97 | 
 98 | ]]
 99 | da.__len__ = argcheck{
100 | 	{name="self", type="Df_Array"},
101 | 	{name="other", type="Df_Array"},-- used by lua when invoking #myArray
102 | 	call=function(self)
103 | 	return #self.data
104 | end}
105 | 
106 | return da
107 | 


--------------------------------------------------------------------------------
/helper_classes/Facebok license:
--------------------------------------------------------------------------------
1 | ../examples/Facebook license/


--------------------------------------------------------------------------------
/init.lua:
--------------------------------------------------------------------------------
 1 | local paths = require 'paths'
 2 | local dataframe_dir = string.gsub(paths.thisfile(), "[^/]+$", "")
 3 | 
 4 | -- Custom argument checks
 5 | local argcheck_file = dataframe_dir .. "argcheck.lua"
 6 | assert(loadfile(argcheck_file))()
 7 | -- Custom busted assertions, only needed for running tests
 8 | local assert_file = dataframe_dir .. "custom_assertions.lua"
 9 | if (paths.filep(assert_file)) then
10 |   assert(loadfile(assert_file))()
11 | end
12 | 
13 | -- Get the loader funciton and start by making utils available to all
14 | local loader_file = dataframe_dir .. "utils/loader.lua"
15 | assert(loadfile(loader_file))()
16 | load_dir_files(dataframe_dir .. "utils/")
17 | 
18 | -- Load all the classes
19 | load_dir_files(dataframe_dir .. "helper_classes/")
20 | 
21 | load_dir_files(dataframe_dir .. "dataseries/")
22 | 
23 | load_dir_files(dataframe_dir .. "dataframe/")
24 | 
25 | load_dir_files(dataframe_dir .. "sub_classes/")
26 | 
27 | return Dataframe
28 | 


--------------------------------------------------------------------------------
/rocks/torch-dataframe-1.0-0.rockspec:
--------------------------------------------------------------------------------
 1 | package = "torch-dataframe"
 2 |  version = "1.0-0"
 3 |  source = {
 4 | 		url = "https://github.com/alexmili/torch-dataframe/archive/v1.0-0.tar.gz",
 5 | 		dir = "torch-dataframe-1.0-0"
 6 |  }
 7 |  description = {
 8 | 		summary = "A Dataframe class for Torch",
 9 | 		detailed = [[
10 | 			 Dataframe is a Torch7 class to load and manipulate
11 | 			 Kaggle-style CSVs inspired from R and pandas Dataframes.
12 | 		]],
13 | 		homepage = "https://github.com/alexmili/torch-dataframe",
14 | 		license = "MIT/X11",
15 | 		maintainer = "AlexMili"
16 |  }
17 |  dependencies = {
18 | 		"lua ~> 5.1",
19 | 		"torch >= 7.0",
20 | 		"luafilesystem >= 1.6.3"
21 |  }
22 |  build = {
23 | 	type = 'builtin',
24 | 	modules = {
25 | 			["Dataframe.init"] = 'init.lua',
26 | 			["Dataframe.utils"] = 'utils.lua',
27 | 			["Dataframe.main"] = 'main.lua',
28 | 			["Dataframe.Extensions.categorical"] = 'Extensions/categorical.lua',
29 | 			["Dataframe.Extensions.load_batch"] = 'Extensions/load_batch.lua',
30 | 			["Dataframe.Extensions.load_data"] = 'Extensions/load_data.lua',
31 | 			["Dataframe.Extensions.missing_data"] = 'Extensions/missing_data.lua',
32 | 			["Dataframe.Extensions.output"] = 'Extensions/output.lua',
33 | 			["Dataframe.Extensions.save_data"] = 'Extensions/save_data.lua',
34 | 			["Dataframe.Extensions.select_set_update"] = 'Extensions/select_set_update.lua',
35 | 			["Dataframe.Extensions.statistics"] = 'Extensions/statistics.lua'
36 | 	}
37 |  }
38 | 


--------------------------------------------------------------------------------
/rocks/torch-dataframe-1.1-0.rockspec:
--------------------------------------------------------------------------------
 1 | package = "torch-dataframe"
 2 |  version = "1.1-0"
 3 |  source = {
 4 | 		url = "https://github.com/alexmili/torch-dataframe/archive/v1.1-0.tar.gz",
 5 | 		dir = "torch-dataframe-1.1-0"
 6 |  }
 7 |  description = {
 8 | 		summary = "A Dataframe class for Torch",
 9 | 		detailed = [[
10 | 			 Dataframe is a Torch7 class to load and manipulate
11 | 			 Kaggle-style CSVs inspired from R's and pandas' Dataframes.
12 | 		]],
13 | 		homepage = "https://github.com/alexmili/torch-dataframe",
14 | 		license = "MIT/X11",
15 | 		maintainer = "AlexMili"
16 |  }
17 |  dependencies = {
18 | 		"lua ~> 5.1",
19 | 		"torch >= 7.0",
20 | 		"argcheck >= 2.0",
21 | 		"luafilesystem >= 1.6.3"
22 |  }
23 |  build = {
24 | 	type = 'builtin',
25 | 	modules = {
26 | 			["Dataframe.init"] = 'init.lua',
27 | 			["Dataframe.utils"] = 'utils.lua',
28 | 			["Dataframe.argcheck"] = 'argcheck.lua',
29 | 			["Dataframe.main"] = 'main.lua',
30 | 			["Dataframe.Extensions.categorical"] = 'Extensions/categorical.lua',
31 | 			["Dataframe.Extensions.column"] = 'Extensions/column.lua',
32 | 			["Dataframe.Extensions.load_batch"] = 'Extensions/load_batch.lua',
33 | 			["Dataframe.Extensions.load_data"] = 'Extensions/load_data.lua',
34 | 			["Dataframe.Extensions.missing_data"] = 'Extensions/missing_data.lua',
35 | 			["Dataframe.Extensions.output"] = 'Extensions/output.lua',
36 | 			["Dataframe.Extensions.export_data"] = 'Extensions/export_data.lua',
37 | 			["Dataframe.Extensions.select_set_update"] = 'Extensions/select_set_update.lua',
38 | 			["Dataframe.Extensions.statistics"] = 'Extensions/statistics.lua',
39 | 			["Dataframe.helper_classes.array"] = 'helper_classes/array.lua',
40 | 			["Dataframe.helper_classes.dict"] = 'helper_classes/dict.lua',
41 | 			["Dataframe.helper_classes.tbl"] = 'helper_classes/tbl.lua'
42 | 	}
43 |  }
44 | 


--------------------------------------------------------------------------------
/rocks/torch-dataframe-1.5-0.rockspec:
--------------------------------------------------------------------------------
 1 | package = "torch-dataframe"
 2 | version = "1.5-0"
 3 | source = {
 4 | 	url = "https://github.com/alexmili/torch-dataframe/archive/v1.5-0.tar.gz",
 5 | 	dir = "torch-dataframe-1.5-0"
 6 | }
 7 | 
 8 | description = {
 9 | 		summary = "A Dataframe class for Torch",
10 | 		detailed = [[
11 | 			 Dataframe is a Torch7 class to load and manipulate
12 | 			 Kaggle-style CSVs inspired from R's and pandas' Dataframes.
13 | 			 Compatible with torchnet.
14 | 		]],
15 | 		homepage = "https://github.com/alexmili/torch-dataframe",
16 | 		license = "MIT/X11",
17 | 		maintainer = "AlexMili"
18 | }
19 | dependencies = {
20 | 		"lua >= 5.1",
21 | 		"torch >= 7.0",
22 | 		"argcheck >= 2.0",
23 | 		"luafilesystem >= 1.6.3",
24 | 		"paths",
25 | 		"torchnet >= 1.0",
26 | 		"threads >= 1.0",
27 | 		"nn"
28 | }
29 | build = {
30 | 	type = 'builtin',
31 | 	modules = {
32 | 			["Dataframe.init"] = 'init.lua',
33 | 			["Dataframe.utils"] = 'utils.lua',
34 | 			["Dataframe.argcheck"] = 'argcheck.lua',
35 | 			["Dataframe.main"] = 'main.lua',
36 | 			["Dataframe.extensions.metatable"] = 'extensions/metatable.lua',
37 | 			["Dataframe.extensions.categorical"] = 'extensions/categorical.lua',
38 | 			["Dataframe.extensions.column"] = 'extensions/column.lua',
39 | 			["Dataframe.extensions.row"] = 'extensions/row.lua',
40 | 			["Dataframe.extensions.subsets_and_batches"] = 'extensions/subsets_and_batches.lua',
41 | 			["Dataframe.extensions.load_data"] = 'extensions/load_data.lua',
42 | 			["Dataframe.extensions.missing_data"] = 'extensions/missing_data.lua',
43 | 			["Dataframe.extensions.output"] = 'extensions/output.lua',
44 | 			["Dataframe.extensions.export_data"] = 'extensions/export_data.lua',
45 | 			["Dataframe.extensions.select_set_update"] = 'extensions/select_set_update.lua',
46 | 			["Dataframe.extensions.statistics"] = 'extensions/statistics.lua',
47 | 
48 | 			["Dataframe.sub_classes.01_subset"] = 'sub_classes/01_subset.lua',
49 | 			["Dataframe.sub_classes.10_batchframe"] = 'sub_classes/10_batchframe.lua',
50 | 			["Dataframe.sub_classes.subset_extensions.samplers"] = 'sub_classes/subset_extensions/samplers.lua',
51 | 
52 | 			["Dataframe.helper_classes.01_iterator"] = 'helper_classes/01_iterator.lua',
53 | 			["Dataframe.helper_classes.02_paralleliterator"] = 'helper_classes/02_paralleliterator.lua',
54 | 			["Dataframe.helper_classes.10_array"] = 'helper_classes/10_array.lua',
55 | 			["Dataframe.helper_classes.11_dict"] = 'helper_classes/11_dict.lua',
56 | 			["Dataframe.helper_classes.12_tbl"] = 'helper_classes/12_tbl.lua'
57 | 	}
58 | }
59 | 


--------------------------------------------------------------------------------
/rocks/torch-dataframe-1.6-0.rockspec:
--------------------------------------------------------------------------------
 1 | package = "torch-dataframe"
 2 | version = "1.6-0"
 3 | source = {
 4 | 	url = "https://github.com/alexmili/torch-dataframe/archive/v1.6-0.tar.gz",
 5 | 	dir = "torch-dataframe-1.6-0"
 6 | }
 7 | 
 8 | description = {
 9 | 		summary = "A Dataframe class for Torch",
10 | 		detailed = [[
11 | 			 Dataframe is a Torch7 class to load and manipulate
12 | 			 Kaggle-style CSVs inspired from R's and pandas' Dataframes.
13 | 			 Compatible with torchnet.
14 | 		]],
15 | 		homepage = "https://github.com/alexmili/torch-dataframe",
16 | 		license = "MIT/X11",
17 | 		maintainer = "AlexMili"
18 | }
19 | dependencies = {
20 | 		"lua >= 5.1",
21 | 		"torch >= 7.0",
22 | 		"argcheck >= 2.0",
23 | 		"luafilesystem >= 1.6.3",
24 | 		"paths",
25 | 		"torchnet >= 1.0",
26 | 		"threads >= 1.0",
27 | 		"tds",
28 | 		"nn"
29 | }
30 | build = {
31 |    type = "cmake",
32 |    variables = {
33 |       CMAKE_BUILD_TYPE="Release",
34 |       LUA_PATH="$(LUADIR)",
35 |       LUA_CPATH="$(LIBDIR)"
36 |    }
37 | }
38 | 


--------------------------------------------------------------------------------
/rocks/torch-dataframe-1.6-1.rockspec:
--------------------------------------------------------------------------------
 1 | package = "torch-dataframe"
 2 | version = "1.6-1"
 3 | source = {
 4 | 	url = "https://github.com/alexmili/torch-dataframe/archive/v1.6-1.tar.gz",
 5 | 	dir = "torch-dataframe-1.6-1"
 6 | }
 7 | 
 8 | description = {
 9 | 		summary = "A Dataframe class for Torch",
10 | 		detailed = [[
11 | 			 Dataframe is a Torch7 class to load and manipulate
12 | 			 Kaggle-style CSVs inspired from R's and pandas' Dataframes.
13 | 			 Compatible with torchnet.
14 | 		]],
15 | 		homepage = "https://github.com/alexmili/torch-dataframe",
16 | 		license = "MIT/X11",
17 | 		maintainer = "AlexMili"
18 | }
19 | dependencies = {
20 | 		"lua >= 5.1",
21 | 		"torch >= 7.0",
22 | 		"argcheck >= 2.0",
23 | 		"luafilesystem >= 1.6.3",
24 | 		"paths",
25 | 		"torchnet >= 1.0",
26 | 		"threads >= 1.0",
27 | 		"tds",
28 | 		"nn"
29 | }
30 | build = {
31 |    type = "cmake",
32 |    variables = {
33 |       CMAKE_BUILD_TYPE="Release",
34 |       LUA_PATH="$(LUADIR)",
35 |       LUA_CPATH="$(LIBDIR)"
36 |    }
37 | }
38 | 


--------------------------------------------------------------------------------
/rocks/torch-dataframe-1.7-0.rockspec:
--------------------------------------------------------------------------------
 1 | package = "torch-dataframe"
 2 | version = "1.7-0"
 3 | source = {
 4 | 	url = "https://github.com/alexmili/torch-dataframe/archive/v1.7-0.tar.gz",
 5 | 	dir = "torch-dataframe-1.7-0"
 6 | }
 7 | 
 8 | description = {
 9 | 		summary = "A Dataframe class for Torch",
10 | 		detailed = [[
11 | 			 Dataframe is a Torch7 class to load and manipulate
12 | 			 Kaggle-style CSVs inspired from R's and pandas' Dataframes.
13 | 			 Compatible with torchnet.
14 | 		]],
15 | 		homepage = "https://github.com/alexmili/torch-dataframe",
16 | 		license = "MIT/X11",
17 | 		maintainer = "AlexMili"
18 | }
19 | dependencies = {
20 | 		"lua >= 5.1",
21 | 		"torch >= 7.0",
22 | 		"argcheck >= 2.0",
23 | 		"luafilesystem >= 1.6.3",
24 | 		"paths",
25 | 		"torchnet >= 1.0",
26 | 		"threads >= 1.0",
27 | 		"tds",
28 | 		"nn"
29 | }
30 | build = {
31 |    type = "cmake",
32 |    variables = {
33 |       CMAKE_BUILD_TYPE="Release",
34 |       LUA_PATH="$(LUADIR)",
35 |       LUA_CPATH="$(LIBDIR)"
36 |    }
37 | }
38 | 


--------------------------------------------------------------------------------
/rocks/torch-dataframe-scm-1.rockspec:
--------------------------------------------------------------------------------
 1 | package = "torch-dataframe"
 2 | version = "scm-1"
 3 | source = {
 4 | 		url = "https://github.com/alexmili/torch-dataframe/archive/develop.tar.gz",
 5 | 		dir = "torch-dataframe-develop"
 6 | }
 7 | description = {
 8 | 		summary = "A Dataframe class for Torch",
 9 | 		detailed = [[
10 | 			 Dataframe is a Torch7 class to load and manipulate
11 | 			 Kaggle-style CSVs inspired from R's and pandas' Dataframes.
12 | 			 Compatible with torchnet.
13 | 		]],
14 | 		homepage = "https://github.com/alexmili/torch-dataframe",
15 | 		license = "MIT/X11",
16 | 		maintainer = "AlexMili"
17 | }
18 | dependencies = {
19 | 		"lua >= 5.1",
20 | 		"torch >= 7.0",
21 | 		"argcheck >= 2.0",
22 | 		"luafilesystem >= 1.6.3",
23 | 		"paths",
24 | 		"torchnet >= 1.0",
25 | 		"threads >= 1.0",
26 | 		"tds",
27 | 		"nn"
28 | }
29 | build = {
30 |    type = "cmake",
31 |    variables = {
32 |       CMAKE_BUILD_TYPE="Release",
33 |       LUA_PATH="$(LUADIR)",
34 |       LUA_CPATH="$(LIBDIR)"
35 |    }
36 | }
37 | 


--------------------------------------------------------------------------------
/specs/coverage.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | RUN_TESTS=true
 4 | VERBOSE=false
 5 | while [[ $# -gt 0 ]]
 6 | 	do
 7 | 	key="$1"
 8 | 
 9 | 	case $key in
10 | 		-v|--verbose)
11 | 			VERBOSE=true
12 | 			;;
13 | 		-g|--generate)
14 | 			RUN_TESTS=false
15 | 			;;
16 | 		*)
17 | 			# unknown option
18 | 			;;
19 | 	esac
20 | 	shift # past argument or value
21 | done
22 | 
23 | echo -e "=================";
24 | echo -e "= Code coverage =";
25 | echo -e "=================";
26 | echo "";
27 | 
28 | if [ "$RUN_TESTS" = true ]; then
29 | 	./run_all.sh --coverage
30 | fi
31 | 
32 | mv luacov.stats.out ../luacov.stats.out
33 | 
34 | cd ..
35 | 
36 | luacov -c .luacov
37 | 
38 | if [ "$RUN_TESTS" = true ]; then
39 | 
40 | 	mv -f luacov.stats.out specs/luacov.stats.out
41 | 	mv -f luacov.report.out specs/luacov.report.out
42 | 
43 | 	cd specs
44 | 
45 | 	if [ "$VERBOSE" = true ]; then
46 | 		cat luacov.report.out
47 | 	fi
48 | fi
49 | 


--------------------------------------------------------------------------------
/specs/data/advanced_short.csv:
--------------------------------------------------------------------------------
1 | Col A,Col B,Col C
2 | 1,A,8
3 | 2,B,
4 | 3,B,9
5 | 


--------------------------------------------------------------------------------
/specs/data/full.csv:
--------------------------------------------------------------------------------
1 | Col A  ,        Col B,Col C, Col D
2 | 1,0.2,0.1,A
3 | 2,0.3,,B
4 | 3,0.4,9999999999,
5 | 4,0.5,-222,D


--------------------------------------------------------------------------------
/specs/data/iris-label.csv:
--------------------------------------------------------------------------------
  1 | sepal_length,sepal_width,petal_length,petal_width,class
  2 | 5.1,3.5,1.4,0.2,Iris-setosa
  3 | 4.9,3.0,1.4,0.2,Iris-setosa
  4 | 4.7,3.2,1.3,0.2,Iris-setosa
  5 | 4.6,3.1,1.5,0.2,Iris-setosa
  6 | 5.0,3.6,1.4,0.2,Iris-setosa
  7 | 5.4,3.9,1.7,0.4,Iris-setosa
  8 | 4.6,3.4,1.4,0.3,Iris-setosa
  9 | 5.0,3.4,1.5,0.2,Iris-setosa
 10 | 4.4,2.9,1.4,0.2,Iris-setosa
 11 | 4.9,3.1,1.5,0.1,Iris-setosa
 12 | 5.4,3.7,1.5,0.2,Iris-setosa
 13 | 4.8,3.4,1.6,0.2,Iris-setosa
 14 | 4.8,3.0,1.4,0.1,Iris-setosa
 15 | 4.3,3.0,1.1,0.1,Iris-setosa
 16 | 5.8,4.0,1.2,0.2,Iris-setosa
 17 | 5.7,4.4,1.5,0.4,Iris-setosa
 18 | 5.4,3.9,1.3,0.4,Iris-setosa
 19 | 5.1,3.5,1.4,0.3,Iris-setosa
 20 | 5.7,3.8,1.7,0.3,Iris-setosa
 21 | 5.1,3.8,1.5,0.3,Iris-setosa
 22 | 5.4,3.4,1.7,0.2,Iris-setosa
 23 | 5.1,3.7,1.5,0.4,Iris-setosa
 24 | 4.6,3.6,1.0,0.2,Iris-setosa
 25 | 5.1,3.3,1.7,0.5,Iris-setosa
 26 | 4.8,3.4,1.9,0.2,Iris-setosa
 27 | 5.0,3.0,1.6,0.2,Iris-setosa
 28 | 5.0,3.4,1.6,0.4,Iris-setosa
 29 | 5.2,3.5,1.5,0.2,Iris-setosa
 30 | 5.2,3.4,1.4,0.2,Iris-setosa
 31 | 4.7,3.2,1.6,0.2,Iris-setosa
 32 | 4.8,3.1,1.6,0.2,Iris-setosa
 33 | 5.4,3.4,1.5,0.4,Iris-setosa
 34 | 5.2,4.1,1.5,0.1,Iris-setosa
 35 | 5.5,4.2,1.4,0.2,Iris-setosa
 36 | 4.9,3.1,1.5,0.1,Iris-setosa
 37 | 5.0,3.2,1.2,0.2,Iris-setosa
 38 | 5.5,3.5,1.3,0.2,Iris-setosa
 39 | 4.9,3.1,1.5,0.1,Iris-setosa
 40 | 4.4,3.0,1.3,0.2,Iris-setosa
 41 | 5.1,3.4,1.5,0.2,Iris-setosa
 42 | 5.0,3.5,1.3,0.3,Iris-setosa
 43 | 4.5,2.3,1.3,0.3,Iris-setosa
 44 | 4.4,3.2,1.3,0.2,Iris-setosa
 45 | 5.0,3.5,1.6,0.6,Iris-setosa
 46 | 5.1,3.8,1.9,0.4,Iris-setosa
 47 | 4.8,3.0,1.4,0.3,Iris-setosa
 48 | 5.1,3.8,1.6,0.2,Iris-setosa
 49 | 4.6,3.2,1.4,0.2,Iris-setosa
 50 | 5.3,3.7,1.5,0.2,Iris-setosa
 51 | 5.0,3.3,1.4,0.2,Iris-setosa
 52 | 7.0,3.2,4.7,1.4,Iris-versicolor
 53 | 6.4,3.2,4.5,1.5,Iris-versicolor
 54 | 6.9,3.1,4.9,1.5,Iris-versicolor
 55 | 5.5,2.3,4.0,1.3,Iris-versicolor
 56 | 6.5,2.8,4.6,1.5,Iris-versicolor
 57 | 5.7,2.8,4.5,1.3,Iris-versicolor
 58 | 6.3,3.3,4.7,1.6,Iris-versicolor
 59 | 4.9,2.4,3.3,1.0,Iris-versicolor
 60 | 6.6,2.9,4.6,1.3,Iris-versicolor
 61 | 5.2,2.7,3.9,1.4,Iris-versicolor
 62 | 5.0,2.0,3.5,1.0,Iris-versicolor
 63 | 5.9,3.0,4.2,1.5,Iris-versicolor
 64 | 6.0,2.2,4.0,1.0,Iris-versicolor
 65 | 6.1,2.9,4.7,1.4,Iris-versicolor
 66 | 5.6,2.9,3.6,1.3,Iris-versicolor
 67 | 6.7,3.1,4.4,1.4,Iris-versicolor
 68 | 5.6,3.0,4.5,1.5,Iris-versicolor
 69 | 5.8,2.7,4.1,1.0,Iris-versicolor
 70 | 6.2,2.2,4.5,1.5,Iris-versicolor
 71 | 5.6,2.5,3.9,1.1,Iris-versicolor
 72 | 5.9,3.2,4.8,1.8,Iris-versicolor
 73 | 6.1,2.8,4.0,1.3,Iris-versicolor
 74 | 6.3,2.5,4.9,1.5,Iris-versicolor
 75 | 6.1,2.8,4.7,1.2,Iris-versicolor
 76 | 6.4,2.9,4.3,1.3,Iris-versicolor
 77 | 6.6,3.0,4.4,1.4,Iris-versicolor
 78 | 6.8,2.8,4.8,1.4,Iris-versicolor
 79 | 6.7,3.0,5.0,1.7,Iris-versicolor
 80 | 6.0,2.9,4.5,1.5,Iris-versicolor
 81 | 5.7,2.6,3.5,1.0,Iris-versicolor
 82 | 5.5,2.4,3.8,1.1,Iris-versicolor
 83 | 5.5,2.4,3.7,1.0,Iris-versicolor
 84 | 5.8,2.7,3.9,1.2,Iris-versicolor
 85 | 6.0,2.7,5.1,1.6,Iris-versicolor
 86 | 5.4,3.0,4.5,1.5,Iris-versicolor
 87 | 6.0,3.4,4.5,1.6,Iris-versicolor
 88 | 6.7,3.1,4.7,1.5,Iris-versicolor
 89 | 6.3,2.3,4.4,1.3,Iris-versicolor
 90 | 5.6,3.0,4.1,1.3,Iris-versicolor
 91 | 5.5,2.5,4.0,1.3,Iris-versicolor
 92 | 5.5,2.6,4.4,1.2,Iris-versicolor
 93 | 6.1,3.0,4.6,1.4,Iris-versicolor
 94 | 5.8,2.6,4.0,1.2,Iris-versicolor
 95 | 5.0,2.3,3.3,1.0,Iris-versicolor
 96 | 5.6,2.7,4.2,1.3,Iris-versicolor
 97 | 5.7,3.0,4.2,1.2,Iris-versicolor
 98 | 5.7,2.9,4.2,1.3,Iris-versicolor
 99 | 6.2,2.9,4.3,1.3,Iris-versicolor
100 | 5.1,2.5,3.0,1.1,Iris-versicolor
101 | 5.7,2.8,4.1,1.3,Iris-versicolor
102 | 6.3,3.3,6.0,2.5,Iris-virginica
103 | 5.8,2.7,5.1,1.9,Iris-virginica
104 | 7.1,3.0,5.9,2.1,Iris-virginica
105 | 6.3,2.9,5.6,1.8,Iris-virginica
106 | 6.5,3.0,5.8,2.2,Iris-virginica
107 | 7.6,3.0,6.6,2.1,Iris-virginica
108 | 4.9,2.5,4.5,1.7,Iris-virginica
109 | 7.3,2.9,6.3,1.8,Iris-virginica
110 | 6.7,2.5,5.8,1.8,Iris-virginica
111 | 7.2,3.6,6.1,2.5,Iris-virginica
112 | 6.5,3.2,5.1,2.0,Iris-virginica
113 | 6.4,2.7,5.3,1.9,Iris-virginica
114 | 6.8,3.0,5.5,2.1,Iris-virginica
115 | 5.7,2.5,5.0,2.0,Iris-virginica
116 | 5.8,2.8,5.1,2.4,Iris-virginica
117 | 6.4,3.2,5.3,2.3,Iris-virginica
118 | 6.5,3.0,5.5,1.8,Iris-virginica
119 | 7.7,3.8,6.7,2.2,Iris-virginica
120 | 7.7,2.6,6.9,2.3,Iris-virginica
121 | 6.0,2.2,5.0,1.5,Iris-virginica
122 | 6.9,3.2,5.7,2.3,Iris-virginica
123 | 5.6,2.8,4.9,2.0,Iris-virginica
124 | 7.7,2.8,6.7,2.0,Iris-virginica
125 | 6.3,2.7,4.9,1.8,Iris-virginica
126 | 6.7,3.3,5.7,2.1,Iris-virginica
127 | 7.2,3.2,6.0,1.8,Iris-virginica
128 | 6.2,2.8,4.8,1.8,Iris-virginica
129 | 6.1,3.0,4.9,1.8,Iris-virginica
130 | 6.4,2.8,5.6,2.1,Iris-virginica
131 | 7.2,3.0,5.8,1.6,Iris-virginica
132 | 7.4,2.8,6.1,1.9,Iris-virginica
133 | 7.9,3.8,6.4,2.0,Iris-virginica
134 | 6.4,2.8,5.6,2.2,Iris-virginica
135 | 6.3,2.8,5.1,1.5,Iris-virginica
136 | 6.1,2.6,5.6,1.4,Iris-virginica
137 | 7.7,3.0,6.1,2.3,Iris-virginica
138 | 6.3,3.4,5.6,2.4,Iris-virginica
139 | 6.4,3.1,5.5,1.8,Iris-virginica
140 | 6.0,3.0,4.8,1.8,Iris-virginica
141 | 6.9,3.1,5.4,2.1,Iris-virginica
142 | 6.7,3.1,5.6,2.4,Iris-virginica
143 | 6.9,3.1,5.1,2.3,Iris-virginica
144 | 5.8,2.7,5.1,1.9,Iris-virginica
145 | 6.8,3.2,5.9,2.3,Iris-virginica
146 | 6.7,3.3,5.7,2.5,Iris-virginica
147 | 6.7,3.0,5.2,2.3,Iris-virginica
148 | 6.3,2.5,5.0,1.9,Iris-virginica
149 | 6.5,3.0,5.2,2.0,Iris-virginica
150 | 6.2,3.4,5.4,2.3,Iris-virginica
151 | 5.9,3.0,5.1,1.8,Iris-virginica


--------------------------------------------------------------------------------
/specs/data/iris-no-header.csv:
--------------------------------------------------------------------------------
  1 | 5.1,3.5,1.4,0.2,Iris-setosa
  2 | 4.9,3.0,1.4,0.2,Iris-setosa
  3 | 4.7,3.2,1.3,0.2,Iris-setosa
  4 | 4.6,3.1,1.5,0.2,Iris-setosa
  5 | 5.0,3.6,1.4,0.2,Iris-setosa
  6 | 5.4,3.9,1.7,0.4,Iris-setosa
  7 | 4.6,3.4,1.4,0.3,Iris-setosa
  8 | 5.0,3.4,1.5,0.2,Iris-setosa
  9 | 4.4,2.9,1.4,0.2,Iris-setosa
 10 | 4.9,3.1,1.5,0.1,Iris-setosa
 11 | 5.4,3.7,1.5,0.2,Iris-setosa
 12 | 4.8,3.4,1.6,0.2,Iris-setosa
 13 | 4.8,3.0,1.4,0.1,Iris-setosa
 14 | 4.3,3.0,1.1,0.1,Iris-setosa
 15 | 5.8,4.0,1.2,0.2,Iris-setosa
 16 | 5.7,4.4,1.5,0.4,Iris-setosa
 17 | 5.4,3.9,1.3,0.4,Iris-setosa
 18 | 5.1,3.5,1.4,0.3,Iris-setosa
 19 | 5.7,3.8,1.7,0.3,Iris-setosa
 20 | 5.1,3.8,1.5,0.3,Iris-setosa
 21 | 5.4,3.4,1.7,0.2,Iris-setosa
 22 | 5.1,3.7,1.5,0.4,Iris-setosa
 23 | 4.6,3.6,1.0,0.2,Iris-setosa
 24 | 5.1,3.3,1.7,0.5,Iris-setosa
 25 | 4.8,3.4,1.9,0.2,Iris-setosa
 26 | 5.0,3.0,1.6,0.2,Iris-setosa
 27 | 5.0,3.4,1.6,0.4,Iris-setosa
 28 | 5.2,3.5,1.5,0.2,Iris-setosa
 29 | 5.2,3.4,1.4,0.2,Iris-setosa
 30 | 4.7,3.2,1.6,0.2,Iris-setosa
 31 | 4.8,3.1,1.6,0.2,Iris-setosa
 32 | 5.4,3.4,1.5,0.4,Iris-setosa
 33 | 5.2,4.1,1.5,0.1,Iris-setosa
 34 | 5.5,4.2,1.4,0.2,Iris-setosa
 35 | 4.9,3.1,1.5,0.1,Iris-setosa
 36 | 5.0,3.2,1.2,0.2,Iris-setosa
 37 | 5.5,3.5,1.3,0.2,Iris-setosa
 38 | 4.9,3.1,1.5,0.1,Iris-setosa
 39 | 4.4,3.0,1.3,0.2,Iris-setosa
 40 | 5.1,3.4,1.5,0.2,Iris-setosa
 41 | 5.0,3.5,1.3,0.3,Iris-setosa
 42 | 4.5,2.3,1.3,0.3,Iris-setosa
 43 | 4.4,3.2,1.3,0.2,Iris-setosa
 44 | 5.0,3.5,1.6,0.6,Iris-setosa
 45 | 5.1,3.8,1.9,0.4,Iris-setosa
 46 | 4.8,3.0,1.4,0.3,Iris-setosa
 47 | 5.1,3.8,1.6,0.2,Iris-setosa
 48 | 4.6,3.2,1.4,0.2,Iris-setosa
 49 | 5.3,3.7,1.5,0.2,Iris-setosa
 50 | 5.0,3.3,1.4,0.2,Iris-setosa
 51 | 7.0,3.2,4.7,1.4,Iris-versicolor
 52 | 6.4,3.2,4.5,1.5,Iris-versicolor
 53 | 6.9,3.1,4.9,1.5,Iris-versicolor
 54 | 5.5,2.3,4.0,1.3,Iris-versicolor
 55 | 6.5,2.8,4.6,1.5,Iris-versicolor
 56 | 5.7,2.8,4.5,1.3,Iris-versicolor
 57 | 6.3,3.3,4.7,1.6,Iris-versicolor
 58 | 4.9,2.4,3.3,1.0,Iris-versicolor
 59 | 6.6,2.9,4.6,1.3,Iris-versicolor
 60 | 5.2,2.7,3.9,1.4,Iris-versicolor
 61 | 5.0,2.0,3.5,1.0,Iris-versicolor
 62 | 5.9,3.0,4.2,1.5,Iris-versicolor
 63 | 6.0,2.2,4.0,1.0,Iris-versicolor
 64 | 6.1,2.9,4.7,1.4,Iris-versicolor
 65 | 5.6,2.9,3.6,1.3,Iris-versicolor
 66 | 6.7,3.1,4.4,1.4,Iris-versicolor
 67 | 5.6,3.0,4.5,1.5,Iris-versicolor
 68 | 5.8,2.7,4.1,1.0,Iris-versicolor
 69 | 6.2,2.2,4.5,1.5,Iris-versicolor
 70 | 5.6,2.5,3.9,1.1,Iris-versicolor
 71 | 5.9,3.2,4.8,1.8,Iris-versicolor
 72 | 6.1,2.8,4.0,1.3,Iris-versicolor
 73 | 6.3,2.5,4.9,1.5,Iris-versicolor
 74 | 6.1,2.8,4.7,1.2,Iris-versicolor
 75 | 6.4,2.9,4.3,1.3,Iris-versicolor
 76 | 6.6,3.0,4.4,1.4,Iris-versicolor
 77 | 6.8,2.8,4.8,1.4,Iris-versicolor
 78 | 6.7,3.0,5.0,1.7,Iris-versicolor
 79 | 6.0,2.9,4.5,1.5,Iris-versicolor
 80 | 5.7,2.6,3.5,1.0,Iris-versicolor
 81 | 5.5,2.4,3.8,1.1,Iris-versicolor
 82 | 5.5,2.4,3.7,1.0,Iris-versicolor
 83 | 5.8,2.7,3.9,1.2,Iris-versicolor
 84 | 6.0,2.7,5.1,1.6,Iris-versicolor
 85 | 5.4,3.0,4.5,1.5,Iris-versicolor
 86 | 6.0,3.4,4.5,1.6,Iris-versicolor
 87 | 6.7,3.1,4.7,1.5,Iris-versicolor
 88 | 6.3,2.3,4.4,1.3,Iris-versicolor
 89 | 5.6,3.0,4.1,1.3,Iris-versicolor
 90 | 5.5,2.5,4.0,1.3,Iris-versicolor
 91 | 5.5,2.6,4.4,1.2,Iris-versicolor
 92 | 6.1,3.0,4.6,1.4,Iris-versicolor
 93 | 5.8,2.6,4.0,1.2,Iris-versicolor
 94 | 5.0,2.3,3.3,1.0,Iris-versicolor
 95 | 5.6,2.7,4.2,1.3,Iris-versicolor
 96 | 5.7,3.0,4.2,1.2,Iris-versicolor
 97 | 5.7,2.9,4.2,1.3,Iris-versicolor
 98 | 6.2,2.9,4.3,1.3,Iris-versicolor
 99 | 5.1,2.5,3.0,1.1,Iris-versicolor
100 | 5.7,2.8,4.1,1.3,Iris-versicolor
101 | 6.3,3.3,6.0,2.5,Iris-virginica
102 | 5.8,2.7,5.1,1.9,Iris-virginica
103 | 7.1,3.0,5.9,2.1,Iris-virginica
104 | 6.3,2.9,5.6,1.8,Iris-virginica
105 | 6.5,3.0,5.8,2.2,Iris-virginica
106 | 7.6,3.0,6.6,2.1,Iris-virginica
107 | 4.9,2.5,4.5,1.7,Iris-virginica
108 | 7.3,2.9,6.3,1.8,Iris-virginica
109 | 6.7,2.5,5.8,1.8,Iris-virginica
110 | 7.2,3.6,6.1,2.5,Iris-virginica
111 | 6.5,3.2,5.1,2.0,Iris-virginica
112 | 6.4,2.7,5.3,1.9,Iris-virginica
113 | 6.8,3.0,5.5,2.1,Iris-virginica
114 | 5.7,2.5,5.0,2.0,Iris-virginica
115 | 5.8,2.8,5.1,2.4,Iris-virginica
116 | 6.4,3.2,5.3,2.3,Iris-virginica
117 | 6.5,3.0,5.5,1.8,Iris-virginica
118 | 7.7,3.8,6.7,2.2,Iris-virginica
119 | 7.7,2.6,6.9,2.3,Iris-virginica
120 | 6.0,2.2,5.0,1.5,Iris-virginica
121 | 6.9,3.2,5.7,2.3,Iris-virginica
122 | 5.6,2.8,4.9,2.0,Iris-virginica
123 | 7.7,2.8,6.7,2.0,Iris-virginica
124 | 6.3,2.7,4.9,1.8,Iris-virginica
125 | 6.7,3.3,5.7,2.1,Iris-virginica
126 | 7.2,3.2,6.0,1.8,Iris-virginica
127 | 6.2,2.8,4.8,1.8,Iris-virginica
128 | 6.1,3.0,4.9,1.8,Iris-virginica
129 | 6.4,2.8,5.6,2.1,Iris-virginica
130 | 7.2,3.0,5.8,1.6,Iris-virginica
131 | 7.4,2.8,6.1,1.9,Iris-virginica
132 | 7.9,3.8,6.4,2.0,Iris-virginica
133 | 6.4,2.8,5.6,2.2,Iris-virginica
134 | 6.3,2.8,5.1,1.5,Iris-virginica
135 | 6.1,2.6,5.6,1.4,Iris-virginica
136 | 7.7,3.0,6.1,2.3,Iris-virginica
137 | 6.3,3.4,5.6,2.4,Iris-virginica
138 | 6.4,3.1,5.5,1.8,Iris-virginica
139 | 6.0,3.0,4.8,1.8,Iris-virginica
140 | 6.9,3.1,5.4,2.1,Iris-virginica
141 | 6.7,3.1,5.6,2.4,Iris-virginica
142 | 6.9,3.1,5.1,2.3,Iris-virginica
143 | 5.8,2.7,5.1,1.9,Iris-virginica
144 | 6.8,3.2,5.9,2.3,Iris-virginica
145 | 6.7,3.3,5.7,2.5,Iris-virginica
146 | 6.7,3.0,5.2,2.3,Iris-virginica
147 | 6.3,2.5,5.0,1.9,Iris-virginica
148 | 6.5,3.0,5.2,2.0,Iris-virginica
149 | 6.2,3.4,5.4,2.3,Iris-virginica
150 | 5.9,3.0,5.1,1.8,Iris-virginica


--------------------------------------------------------------------------------
/specs/data/iris-no-label.csv:
--------------------------------------------------------------------------------
  1 | sepal_length,sepal_width,petal_length,petal_width
  2 | 5.1,3.5,1.4,0.2
  3 | 4.9,3.0,1.4,0.2
  4 | 4.7,3.2,1.3,0.2
  5 | 4.6,3.1,1.5,0.2
  6 | 5.0,3.6,1.4,0.2
  7 | 5.4,3.9,1.7,0.4
  8 | 4.6,3.4,1.4,0.3
  9 | 5.0,3.4,1.5,0.2
 10 | 4.4,2.9,1.4,0.2
 11 | 4.9,3.1,1.5,0.1
 12 | 5.4,3.7,1.5,0.2
 13 | 4.8,3.4,1.6,0.2
 14 | 4.8,3.0,1.4,0.1
 15 | 4.3,3.0,1.1,0.1
 16 | 5.8,4.0,1.2,0.2
 17 | 5.7,4.4,1.5,0.4
 18 | 5.4,3.9,1.3,0.4
 19 | 5.1,3.5,1.4,0.3
 20 | 5.7,3.8,1.7,0.3
 21 | 5.1,3.8,1.5,0.3
 22 | 5.4,3.4,1.7,0.2
 23 | 5.1,3.7,1.5,0.4
 24 | 4.6,3.6,1.0,0.2
 25 | 5.1,3.3,1.7,0.5
 26 | 4.8,3.4,1.9,0.2
 27 | 5.0,3.0,1.6,0.2
 28 | 5.0,3.4,1.6,0.4
 29 | 5.2,3.5,1.5,0.2
 30 | 5.2,3.4,1.4,0.2
 31 | 4.7,3.2,1.6,0.2
 32 | 4.8,3.1,1.6,0.2
 33 | 5.4,3.4,1.5,0.4
 34 | 5.2,4.1,1.5,0.1
 35 | 5.5,4.2,1.4,0.2
 36 | 4.9,3.1,1.5,0.1
 37 | 5.0,3.2,1.2,0.2
 38 | 5.5,3.5,1.3,0.2
 39 | 4.9,3.1,1.5,0.1
 40 | 4.4,3.0,1.3,0.2
 41 | 5.1,3.4,1.5,0.2
 42 | 5.0,3.5,1.3,0.3
 43 | 4.5,2.3,1.3,0.3
 44 | 4.4,3.2,1.3,0.2
 45 | 5.0,3.5,1.6,0.6
 46 | 5.1,3.8,1.9,0.4
 47 | 4.8,3.0,1.4,0.3
 48 | 5.1,3.8,1.6,0.2
 49 | 4.6,3.2,1.4,0.2
 50 | 5.3,3.7,1.5,0.2
 51 | 5.0,3.3,1.4,0.2
 52 | 7.0,3.2,4.7,1.4
 53 | 6.4,3.2,4.5,1.5
 54 | 6.9,3.1,4.9,1.5
 55 | 5.5,2.3,4.0,1.3
 56 | 6.5,2.8,4.6,1.5
 57 | 5.7,2.8,4.5,1.3
 58 | 6.3,3.3,4.7,1.6
 59 | 4.9,2.4,3.3,1.0
 60 | 6.6,2.9,4.6,1.3
 61 | 5.2,2.7,3.9,1.4
 62 | 5.0,2.0,3.5,1.0
 63 | 5.9,3.0,4.2,1.5
 64 | 6.0,2.2,4.0,1.0
 65 | 6.1,2.9,4.7,1.4
 66 | 5.6,2.9,3.6,1.3
 67 | 6.7,3.1,4.4,1.4
 68 | 5.6,3.0,4.5,1.5
 69 | 5.8,2.7,4.1,1.0
 70 | 6.2,2.2,4.5,1.5
 71 | 5.6,2.5,3.9,1.1
 72 | 5.9,3.2,4.8,1.8
 73 | 6.1,2.8,4.0,1.3
 74 | 6.3,2.5,4.9,1.5
 75 | 6.1,2.8,4.7,1.2
 76 | 6.4,2.9,4.3,1.3
 77 | 6.6,3.0,4.4,1.4
 78 | 6.8,2.8,4.8,1.4
 79 | 6.7,3.0,5.0,1.7
 80 | 6.0,2.9,4.5,1.5
 81 | 5.7,2.6,3.5,1.0
 82 | 5.5,2.4,3.8,1.1
 83 | 5.5,2.4,3.7,1.0
 84 | 5.8,2.7,3.9,1.2
 85 | 6.0,2.7,5.1,1.6
 86 | 5.4,3.0,4.5,1.5
 87 | 6.0,3.4,4.5,1.6
 88 | 6.7,3.1,4.7,1.5
 89 | 6.3,2.3,4.4,1.3
 90 | 5.6,3.0,4.1,1.3
 91 | 5.5,2.5,4.0,1.3
 92 | 5.5,2.6,4.4,1.2
 93 | 6.1,3.0,4.6,1.4
 94 | 5.8,2.6,4.0,1.2
 95 | 5.0,2.3,3.3,1.0
 96 | 5.6,2.7,4.2,1.3
 97 | 5.7,3.0,4.2,1.2
 98 | 5.7,2.9,4.2,1.3
 99 | 6.2,2.9,4.3,1.3
100 | 5.1,2.5,3.0,1.1
101 | 5.7,2.8,4.1,1.3
102 | 6.3,3.3,6.0,2.5
103 | 5.8,2.7,5.1,1.9
104 | 7.1,3.0,5.9,2.1
105 | 6.3,2.9,5.6,1.8
106 | 6.5,3.0,5.8,2.2
107 | 7.6,3.0,6.6,2.1
108 | 4.9,2.5,4.5,1.7
109 | 7.3,2.9,6.3,1.8
110 | 6.7,2.5,5.8,1.8
111 | 7.2,3.6,6.1,2.5
112 | 6.5,3.2,5.1,2.0
113 | 6.4,2.7,5.3,1.9
114 | 6.8,3.0,5.5,2.1
115 | 5.7,2.5,5.0,2.0
116 | 5.8,2.8,5.1,2.4
117 | 6.4,3.2,5.3,2.3
118 | 6.5,3.0,5.5,1.8
119 | 7.7,3.8,6.7,2.2
120 | 7.7,2.6,6.9,2.3
121 | 6.0,2.2,5.0,1.5
122 | 6.9,3.2,5.7,2.3
123 | 5.6,2.8,4.9,2.0
124 | 7.7,2.8,6.7,2.0
125 | 6.3,2.7,4.9,1.8
126 | 6.7,3.3,5.7,2.1
127 | 7.2,3.2,6.0,1.8
128 | 6.2,2.8,4.8,1.8
129 | 6.1,3.0,4.9,1.8
130 | 6.4,2.8,5.6,2.1
131 | 7.2,3.0,5.8,1.6
132 | 7.4,2.8,6.1,1.9
133 | 7.9,3.8,6.4,2.0
134 | 6.4,2.8,5.6,2.2
135 | 6.3,2.8,5.1,1.5
136 | 6.1,2.6,5.6,1.4
137 | 7.7,3.0,6.1,2.3
138 | 6.3,3.4,5.6,2.4
139 | 6.4,3.1,5.5,1.8
140 | 6.0,3.0,4.8,1.8
141 | 6.9,3.1,5.4,2.1
142 | 6.7,3.1,5.6,2.4
143 | 6.9,3.1,5.1,2.3
144 | 5.8,2.7,5.1,1.9
145 | 6.8,3.2,5.9,2.3
146 | 6.7,3.3,5.7,2.5
147 | 6.7,3.0,5.2,2.3
148 | 6.3,2.5,5.0,1.9
149 | 6.5,3.0,5.2,2.0
150 | 6.2,3.4,5.4,2.3
151 | 5.9,3.0,5.1,1.8


--------------------------------------------------------------------------------
/specs/data/realistic_29_row_data.csv:
--------------------------------------------------------------------------------
 1 | Filename,Gender,Weight,Comments
 2 | /home/test/wow.png,Male,55.5,
 3 | /home/test/wow2.png,Female,77,"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud ex"
 4 | /home/test/wow3.png,Female,66,
 5 | /home/test/wow4.png,Female,90,
 6 | /home/test/wow5.png,Male,78,
 7 | /home/test/wow2.png,Male,55,
 8 | /home/test/wow3.png,Male,66,
 9 | /home/test/wow4.png,Male,89,"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud ex"
10 | /home/test/wow5.png,Female,87,
11 | /home/test/wow2.png,Female,67,
12 | /home/test/wow3.png,Female,88,
13 | /home/test/wow4.png,Male,66,"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor"
14 | /home/test/wow5.png,Male,54,
15 | /home/test/wow2.png,Male,66,
16 | /home/test/wow3.png,Male,87,
17 | /home/test/wow4.png,Female,87,Lorem ipsum 
18 | /home/test/wow5.png,Female,57,
19 | /home/test/wow2.png,Female,67,"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud ex"
20 | /home/test/wow3.png,Male,55,
21 | /home/test/wow4.png,Male,76,
22 | /home/test/wow5.png,Male,88,
23 | /home/test/wow2.png,Male,99,
24 | /home/test/wow3.png,Female,111,"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud ex"
25 | /home/test/wow4.png,Female,44,
26 | /home/test/wow5.png,Female,56,
27 | /home/test/wow2.png,Male,88,
28 | /home/test/wow3.png,Male,99,
29 | /home/test/wow4.png,Male,99,
30 | 


--------------------------------------------------------------------------------
/specs/data/sampler_csv_files/index.csv:
--------------------------------------------------------------------------------
 1 | filename,label1,label2,label3
 2 | 1,A,,
 3 | 2,A,,
 4 | 3,B,A,
 5 | 4,B,,
 6 | 5,A,,
 7 | 6,A,,
 8 | 7,B,,
 9 | 8,B,,
10 | 9,A,B,C
11 | 10,A,,
12 | 11,A,,
13 | 12,A,,
14 | 13,B,A,
15 | 14,A,B,
16 | 15,B,,
17 | 16,A,,
18 | 17,B,,
19 | 18,A,,
20 | 19,A,,
21 | 20,A,,
22 | 


--------------------------------------------------------------------------------
/specs/data/sampler_csv_files/index3.csv:
--------------------------------------------------------------------------------
 1 | filename,label
 2 | 1,A
 3 | 2,A
 4 | 3,B
 5 | 4,B
 6 | 5,A
 7 | 6,A
 8 | 7,B
 9 | 8,B
10 | 9,A
11 | 10,A
12 | 11,A
13 | 12,A
14 | 13,B
15 | 14,A
16 | 15,B
17 | 16,A
18 | 17,B
19 | 18,A
20 | 19,A
21 | 20,A
22 | 


--------------------------------------------------------------------------------
/specs/data/simple_short.csv:
--------------------------------------------------------------------------------
1 | Col A,Col B,Col C
2 | 1,0.2,1000
3 | 2,0.3,0.1
4 | 3,0.4,9999999999
5 | 4,0.5,-222
6 | 


--------------------------------------------------------------------------------
/specs/dataframe/column_order_spec.lua:
--------------------------------------------------------------------------------
  1 | require 'lfs'
  2 | 
  3 | -- Ensure the test is launched within the specs/ folder
  4 | assert(string.match(lfs.currentdir(), "specs")~=nil, "You must run this test in specs folder")
  5 | 
  6 | local initial_dir = lfs.currentdir()
  7 | 
  8 | -- Go to specs folder
  9 | while (not string.match(lfs.currentdir(), "/specs$")) do
 10 |   lfs.chdir("..")
 11 | end
 12 | 
 13 | local specs_dir = lfs.currentdir()
 14 | lfs.chdir("..")-- one more directory and it is lib root
 15 | 
 16 | -- Include Dataframe lib
 17 | dofile("init.lua")
 18 | 
 19 | -- Go back into initial dir
 20 | lfs.chdir(initial_dir)
 21 | 
 22 | describe("Column order functionality", function()
 23 | 
 24 | 	it("Keeps the right order when loading a CSV",function()
 25 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
 26 | 		assert.are.same(a.column_order,
 27 | 			{[1] = "Col A",
 28 | 			[2] = "Col B",
 29 | 			[3] = "Col C"})
 30 | 	end)
 31 | 
 32 | 	it("Keeps the right order when loading a table",function()
 33 | 		local a = Dataframe()
 34 | 		local first  = {1,2,3}
 35 | 		local second = {"2","1","3"}
 36 | 		local third  = {"2","a","3"}
 37 | 		local column_order = {[1] = 'firstColumn',
 38 | 							  [2] = 'secondColumn',
 39 | 							  [3] = 'thirdColumn'}
 40 | 		local data = {['firstColumn']=first,
 41 | 					  ['secondColumn']=second,
 42 | 					  ['thirdColumn']=third}
 43 | 
 44 | 		a:load_table{data=Df_Dict(data), column_order = Df_Array(column_order)}
 45 | 
 46 | 		assert.are.same(a.column_order, column_order)
 47 | 
 48 | 		column_order[2] = nil
 49 | 		assert.is.error(function() a:load_table{data=Df_Dict(data), column_order = column_order} end)
 50 | 	end)
 51 | 
 52 | 	it("Keeps the right order when saving to CSV",function()
 53 | 		local a = Dataframe()
 54 | 		local first = {1,2,3}
 55 | 		local second = {"Wow it's tricky","1,2","323."}
 56 | 		local third = {"\"","a\"a","3"}
 57 | 
 58 | 		local data = {['firstColumn']=first,
 59 | 					  ['secondColumn']=second,
 60 | 					  ['thirdColumn']=third}
 61 | 
 62 | 		c_order = {[1] = "firstColumn",
 63 | 				   [4] = "secondColumn",
 64 | 				   [3] = "thirdColumn"}
 65 | 
 66 | 		assert.is.error(function() a:load_table{data=Df_Dict(data), column_order=Df_Array(c_order)} end)
 67 | 
 68 | 		c_order = {[1] = "firstColumn",
 69 | 				   [3] = "thirdColumn"}
 70 | 
 71 | 		assert.is.error(function() a:load_table{data=Df_Dict(data), column_order=Df_Array(c_order)} end)
 72 | 
 73 | 		c_order = {[1] = "firstColumn",
 74 | 				   [2] = "secondColumn",
 75 | 				   [3] = "thirdColumn"}
 76 | 
 77 | 		a:load_table{data=Df_Dict(data), column_order=Df_Array(c_order)}
 78 | 		a:to_csv{path = "tricky_csv.csv"}
 79 | 		a:load_csv{path = "tricky_csv.csv", verbose = false}
 80 | 
 81 | 		for cn,cols in pairs(a.dataset) do
 82 | 			assert.are.same(cols, data[cn])
 83 | 		end
 84 | 		assert.are.same(a.column_order, c_order)
 85 | 
 86 | 		os.remove("tricky_csv.csv")
 87 | 	end)
 88 | 
 89 | 	it("Keeps the right order when saving to Tensor",function()
 90 | 		local a = Dataframe()
 91 | 		local first = {1,2,3}
 92 | 		local second = {"A","B","323."}
 93 | 		local third = 2.2
 94 | 
 95 | 		data = {['1st']=first,
 96 | 				['2nd']=second,
 97 | 				['3rd']=third}
 98 | 
 99 | 		c_order = {[1] = "1st",
100 | 				   [2] = "2nd",
101 | 				   [3] = "3rd"}
102 | 
103 | 		a:load_table{data=Df_Dict(data), column_order=Df_Array(c_order)}
104 | 		tnsr = a:to_tensor()
105 | 
106 | 		assert.is.equal(tnsr:size(1),a:shape()["rows"])
107 | 		assert.is.equal(tnsr:size(2),a:shape()["cols"] - 1)
108 | 
109 | 		sum = 0
110 | 		col_no = a:get_column_order{column_name='1st', as_tensor = true}
111 | 
112 | 		for i=1,tnsr:size(1) do
113 | 			sum = math.abs(tnsr[i][col_no] - a:get_column('1st')[i])
114 | 		end
115 | 
116 | 		assert.is_true(sum < 10^-5)
117 | 
118 | 		sum = 0
119 | 		col_no = a:get_column_order{column_name='3rd', as_tensor = true}
120 | 
121 | 		for i=1,tnsr:size(1) do
122 | 			sum = math.abs(tnsr[i][col_no] - a:get_column('3rd')[i])
123 | 		end
124 | 
125 | 		assert.is_true(sum < 10^-5)
126 | 
127 | 		assert.is.equal(a:get_column_order{column_name = '2nd', as_tensor = true}, nil)
128 | 	end)
129 | 
130 | 
131 | 	it("Check that orders can be swapped",function()
132 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
133 | 		a:swap_column_order("Col A", "Col B")
134 | 		assert.are.same(a.column_order,
135 | 			{[1] = "Col B",
136 | 			[2] = "Col A",
137 | 			[3] = "Col C"})
138 | 	end)
139 | 
140 | 	it("Check that orders can set using pos_column_order",function()
141 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
142 | 		a:pos_column_order("Col B", 2)
143 | 		assert.are.same(a.column_order,
144 | 			{[1] = "Col A",
145 | 			[2] = "Col B",
146 | 			[3] = "Col C"})
147 | 
148 | 		a:pos_column_order("Col B", 1)
149 | 		assert.are.same(a.column_order,
150 | 				{[1] = "Col B",
151 | 				[2] = "Col A",
152 | 				[3] = "Col C"})
153 | 
154 | 		a:pos_column_order("Col C", 1)
155 | 		assert.are.same(a.column_order,
156 | 				{[1] = "Col C",
157 | 				[2] = "Col B",
158 | 				[3] = "Col A"})
159 | 
160 | 
161 | 		a:pos_column_order("Col C", -1)
162 | 		assert.are.same(a.column_order,
163 | 				{[1] = "Col C",
164 | 				[2] = "Col B",
165 | 				[3] = "Col A"})
166 | 
167 | 		a:pos_column_order("Col C", 100)
168 | 		assert.are.same(a.column_order,
169 | 				{[1] = "Col B",
170 | 				[2] = "Col A",
171 | 				[3] = "Col C"})
172 | 	end)
173 | end)
174 | 


--------------------------------------------------------------------------------
/specs/dataframe/export_data_spec.lua:
--------------------------------------------------------------------------------
  1 | require 'lfs'
  2 | 
  3 | -- Ensure the test is launched within the specs/ folder
  4 | assert(string.match(lfs.currentdir(), "specs")~=nil, "You must run this test in specs folder")
  5 | 
  6 | local initial_dir = lfs.currentdir()
  7 | 
  8 | -- Go to specs folder
  9 | while (not string.match(lfs.currentdir(), "/specs$")) do
 10 |   lfs.chdir("..")
 11 | end
 12 | 
 13 | local specs_dir = lfs.currentdir()
 14 | lfs.chdir("..")-- one more directory and it is lib root
 15 | 
 16 | -- Include Dataframe lib
 17 | dofile("init.lua")
 18 | 
 19 | -- Go back into initial dir
 20 | lfs.chdir(initial_dir)
 21 | 
 22 | describe("Exporting data process", function()
 23 | 
 24 | 	describe("for CSV files",function()
 25 | 		it("Exports the Dataframe to a CSV file",function()
 26 | 			local a = Dataframe(specs_dir.."/data/full.csv")
 27 | 
 28 | 			local file_name = specs_dir.."/data/copy_of_full.csv"
 29 | 			a:to_csv(file_name)
 30 | 			local b = Dataframe(file_name)
 31 | 
 32 | 			for k,v in pairs(a.dataset) do
 33 | 				-- Avoid errors on NaN values
 34 | 				a:fill_na(k,8)
 35 | 				b:fill_na(k,8)
 36 | 
 37 | 				assert.are.same(a:get_column(k),
 38 | 				                b:get_column(k))
 39 | 			end
 40 | 
 41 | 			os.remove(file_name)
 42 | 		end)
 43 | 
 44 | 		describe("Column order functionality",function()
 45 | 			local a = Dataframe()
 46 | 			local data = {
 47 | 					['firstColumn']={1,2,3},
 48 | 					['secondColumn']={"Wow it's tricky","1,2","323."},
 49 | 					['thirdColumn']={"\"","a\"a","3"}
 50 | 			}
 51 | 
 52 | 			it("Raises an error if the provided column order has non-continous indexes",function()
 53 | 				c_order = {
 54 | 						[1] = "firstColumn",
 55 | 						[4] = "secondColumn",
 56 | 						[3] = "thirdColumn"
 57 | 				}
 58 | 
 59 | 				assert.has.error(function() a:load_table{data=Df_Dict(data), column_order=Df_Array(c_order)} end)
 60 | 
 61 | 				c_order = {
 62 | 						[1] = "firstColumn",
 63 | 						[3] = "thirdColumn"
 64 | 				}
 65 | 
 66 | 				assert.has.error(function() a:load_table{data=Df_Dict(data), column_order=Df_Array(c_order)} end)
 67 | 			end)
 68 | 
 69 | 			it("Keeps the column order when exporting",function()
 70 | 				c_order = {
 71 | 						[1] = "firstColumn",
 72 | 						[2] = "secondColumn",
 73 | 						[3] = "thirdColumn"
 74 | 				}
 75 | 
 76 | 				a:load_table{data=Df_Dict(data), column_order=Df_Array(c_order)}
 77 | 				a:to_csv(specs_dir.."/data/tricky_csv.csv")
 78 | 				a:load_csv(specs_dir.."/data/tricky_csv.csv")
 79 | 
 80 | 				assert.are.same(a.column_order, c_order)
 81 | 
 82 | 				os.remove(specs_dir.."/data/tricky_csv.csv")
 83 | 			end)
 84 | 		end)
 85 | 	end)
 86 | 
 87 | 	describe("for torch tensors",function()
 88 | 
 89 | 		it("Exports the Dataframe to a tensor",function()
 90 | 			local a = Dataframe(specs_dir.."/data/advanced_short.csv")
 91 | 			-- Avoid NaN comparison (which always false)
 92 | 			a:fill_all_na(2)
 93 | 			a:to_tensor{filename=specs_dir.."/data/tensor_test.th7"}
 94 | 
 95 | 			tnsr = a:to_tensor()
 96 | 			tnsr2 = torch.load('./data/tensor_test.th7')
 97 | 
 98 | 			assert.is_true(torch.all(tnsr:eq(tnsr2)))
 99 | 
100 | 			assert.is.equal(tnsr:size(1),a:shape()["rows"])
101 | 			assert.is.equal(tnsr:size(2),table.exact_length(a:get_numerical_colnames()))
102 | 
103 | 			sum = 0
104 | 			col_no = a:get_column_order('Col A')
105 | 
106 | 			for i=1,tnsr:size(1) do
107 | 				sum = math.abs(tnsr[i][col_no] - a:get_column('Col A')[i])
108 | 			end
109 | 
110 | 			assert.near(0, sum, 10^-5)
111 | 			os.remove(specs_dir.."/data/tensor_test.th7")
112 | 		end)
113 | 
114 | 		it("Keeps the right order when saving to Tensor",function()
115 | 			local a = Dataframe()
116 | 
117 | 			data = {
118 | 					['1st']={1,2,3},
119 | 					['2nd']={"A","B","323."},
120 | 					['3rd']=2.2
121 | 			}
122 | 
123 | 			c_order = {
124 | 					[1] = "1st",
125 | 					[2] = "2nd",
126 | 					[3] = "3rd"
127 | 			}
128 | 
129 | 			a:load_table{data=Df_Dict(data), column_order=Df_Array(c_order)}
130 | 			tnsr = a:to_tensor()
131 | 
132 | 			assert.is.equal(tnsr:size(1),a:shape()["rows"])
133 | 			assert.is.equal(tnsr:size(2),a:shape()["cols"] - 1)
134 | 
135 | 			sum = 0
136 | 			col_no = a:get_column_order{column_name='1st', as_tensor = true}
137 | 			for i=1,tnsr:size(1) do
138 | 				sum = math.abs(tnsr[i][col_no] - a:get_column('1st')[i])
139 | 			end
140 | 
141 | 			assert.near(0, sum, 10^-5)
142 | 
143 | 			sum = 0
144 | 			col_no = a:get_column_order{column_name='3rd', as_tensor = true}
145 | 			for i=1,tnsr:size(1) do
146 | 				sum = math.abs(tnsr[i][col_no] - a:get_column('3rd')[i])
147 | 			end
148 | 
149 | 			assert.near(0, sum, 10^-5)
150 | 		end)
151 | 	end)
152 | 
153 | 	describe("torchnet get compatibility",function()
154 | 		it("The get should retrieve a single row in tensor format",function()
155 | 			local a = Dataframe(specs_dir.."/data/advanced_short.csv")
156 | 
157 | 			tnsr = a:get(1)
158 | 
159 | 			assert.is.equal(tnsr:size(1),1)
160 | 			assert.is.equal(tnsr:size(2),table.exact_length(a:get_numerical_colnames()))
161 | 		end)
162 | 	end)
163 | 
164 | 	describe("to_csv with boolean values", function()
165 | 		-- Do not use advanced_short since it has nan that are 0/0 ~= 0/0 == true
166 | 		local df = Dataframe()
167 | 
168 | 		df:load_table{
169 | 			data = Df_Dict{
170 | 				A = {1,2,3},
171 | 				B = {"A", "B", 'true'},
172 | 				C = {true, false, false}
173 | 			}
174 | 		}
175 | 
176 | 		it("Saves with a boolean", function()
177 | 			df:to_csv("test.csv")
178 | 			local df2 = Dataframe("test.csv")
179 | 
180 | 			os.remove("test.csv")
181 | 
182 | 			assert.are.same(df.column_order, df2.column_order)
183 | 			for _,cn in ipairs(df.column_order) do
184 | 				assert.are.same(df:get_column(cn), df2:get_column(cn))
185 | 			end
186 | 		end)
187 | 	end)
188 | 
189 | end)
190 | 


--------------------------------------------------------------------------------
/specs/dataframe/metatable_spec.lua:
--------------------------------------------------------------------------------
  1 | require 'lfs'
  2 | 
  3 | -- Ensure the test is launched within the specs/ folder
  4 | assert(string.match(lfs.currentdir(), "specs")~=nil, "You must run this test in specs folder")
  5 | 
  6 | local initial_dir = lfs.currentdir()
  7 | 
  8 | -- Go to specs folder
  9 | while (not string.match(lfs.currentdir(), "/specs$")) do
 10 |   lfs.chdir("..")
 11 | end
 12 | 
 13 | local specs_dir = lfs.currentdir()
 14 | lfs.chdir("..")-- one more directory and it is lib root
 15 | 
 16 | -- Include Dataframe lib
 17 | dofile("init.lua")
 18 | 
 19 | -- Go back into initial dir
 20 | lfs.chdir(initial_dir)
 21 | 
 22 | describe("Indexing the dataframe", function()
 23 | 
 24 | 	describe("Retrieving index",function()
 25 | 		local df = Dataframe(specs_dir.."/data/simple_short.csv")
 26 | 		assert.are.same(df["$Col A"], df:get_column('Col A'))
 27 | 		assert.are.same(df["$Col C"], df:get_column('Col C'))
 28 | 	end)
 29 | 
 30 | 	describe("Retrieving index",function()
 31 | 		local df = Dataframe(specs_dir.."/data/simple_short.csv")
 32 | 		-- Wait until https://github.com/torch/torch7/issues/693 is resolved
 33 | 		it("Retrieves a single row",function()
 34 | 			local subset = df[1]
 35 | 			assert.is.truthy(subset, "Fails to subset row")
 36 | 			assert.are.same(subset["Col A"], 1)
 37 | 			assert.are.same(subset["Col C"], 1000)
 38 | 		end)
 39 | 
 40 | 		it("Retrieves a several rows",function()
 41 | 			local subset = df[Df_Array(1, 3)]
 42 | 			assert.is.truthy(subset, "Fails to subset rows")
 43 | 			assert.are.same(subset:size(1), 2)
 44 | 			assert.are.same(subset:size(2), df:size(2))
 45 | 		end)
 46 | 
 47 | 		it("Retrieves a continuous set of rows",function()
 48 | 			local subset = df["1:4"]
 49 | 			assert.is.truthy(subset, "Fails to subset rows with continuous syntax")
 50 | 			assert.are.same(subset:size(1), 4)
 51 | 			assert.are.same(subset:size(2), df:size(2))
 52 | 		end)
 53 | 	end)
 54 | 
 55 | 	describe("Set row via the newindex",function()
 56 | 		local df = Dataframe(specs_dir.."/data/simple_short.csv")
 57 | 
 58 | 		it("Set a single row",function()
 59 | 			df[1]= {["Col A"] = 3231}
 60 | 			assert.are.same(df[1]["Col A"], 3231)
 61 | 		end)
 62 | 	end)
 63 | 
 64 | 	describe("Create a copy of the table",function()
 65 | 		local df = Dataframe(Df_Dict({a={1,2,3}}))
 66 | 
 67 | 		it("Check that it's a true copy and not a reference",function()
 68 | 			local new_df = df:copy()
 69 | 			new_df[1] = {a=2}
 70 | 			assert.are.same(new_df:size(1), df:size(1))
 71 | 			assert.are.same(new_df:size(2), df:size(2))
 72 | 			assert.is_false(new_df[1].a == df[1].a)
 73 | 
 74 | 			-- Check that htis matches also the shape
 75 | 			assert.are.same(new_df:shape(), df:shape())
 76 | 		end)
 77 | 	end)
 78 | 
 79 | 	it("Returns the size of the Dataframe",function()
 80 | 		local a = Dataframe(Df_Dict({test = {1,nil,3, 4}, test2 = {5, 9, 99, 88}}))
 81 | 
 82 | 		assert.are.same(a:size(1), 4)
 83 | 		assert.are.same(a:size(2), 2)
 84 | 	end)
 85 | 
 86 | 	describe("Gets the version number",function()
 87 | 		local df = Dataframe()
 88 | 
 89 | 		it("The torch.version goes to version()",function()
 90 | 			assert.are.same(torch.version(df), df:version())
 91 | 		end)
 92 | 	end)
 93 | 
 94 | 	describe("Check the __len__",function()
 95 | 		local df = Dataframe(Df_Dict{a={1,2,3,4,5}})
 96 | 
 97 | 		it("__len__ should return the n_rows",function()
 98 | 			assert.are.same(df:__len__(), df.n_rows)
 99 | 		end)
100 | 
101 | 		it("# should return the n_rows #skip_version_LUA51",function()
102 | 			assert.are.same(#df, df.n_rows)
103 | 		end)
104 | 	end)
105 | 
106 | 	describe("Check the __eq__",function()
107 | 		it("Should be equal",function()
108 | 			local a = Dataframe(Df_Dict{a={1,2,3,4,5}})
109 | 			local b = Dataframe(Df_Dict{a={1,2,3,4,5}})
110 | 
111 | 			assert.is_true(a == b)
112 | 			assert.is_false(a ~= b)
113 | 
114 | 			a:set(2, Df_Dict{a=0/0})
115 | 			b:set(2, Df_Dict{a=0/0})
116 | 			assert.is_true(a == b, "Fails with nan values")
117 | 			assert.is_false(a ~= b, "Fails with nan values")
118 | 		end)
119 | 
120 | 		it("Should not be equal",function()
121 | 			local a = Dataframe(Df_Dict{a={1,2,3,4,5}})
122 | 			local b = Dataframe(Df_Dict{a={1,3,4,5}})
123 | 			local c = Dataframe(Df_Dict{a={1,2,3,4,6}})
124 | 			local d = Dataframe(Df_Dict{a={1,2,3,0/0,6}})
125 | 			local e = Dataframe(Df_Dict{b={1,2,3,4,5}})
126 | 			local f = Dataframe(Df_Dict{a={1,2,3,4,5},
127 | 			                            b={1,2,3,4,5}})
128 | 
129 | 			assert.is_true(a ~= b, "Fail to differ row length")
130 | 			assert.is_true(a ~= c, "Fail to differ values")
131 | 			assert.is_true(a ~= d, "Fail to differ nan")
132 | 			assert.is_true(a ~= e, "Fail to differ column names")
133 | 			assert.is_true(a ~= f, "Fail to differ number of columns")
134 | 		end)
135 | 	end)
136 | 
137 | end)
138 | 


--------------------------------------------------------------------------------
/specs/dataframe/missing_data_spec.lua:
--------------------------------------------------------------------------------
 1 | require 'lfs'
 2 | 
 3 | -- Ensure the test is launched within the specs/ folder
 4 | assert(string.match(lfs.currentdir(), "specs")~=nil, "You must run this test in specs folder")
 5 | 
 6 | local initial_dir = lfs.currentdir()
 7 | 
 8 | -- Go to specs folder
 9 | while (not string.match(lfs.currentdir(), "/specs$")) do
10 |   lfs.chdir("..")
11 | end
12 | 
13 | local specs_dir = lfs.currentdir()
14 | lfs.chdir("..")-- one more directory and it is lib root
15 | 
16 | -- Include Dataframe lib
17 | dofile("init.lua")
18 | 
19 | -- Go back into initial dir
20 | lfs.chdir(initial_dir)
21 | 
22 | describe("Dataframe class", function()
23 | 
24 | 	it("Counts missing values", function()
25 | 		local a = Dataframe(specs_dir.."/data/full.csv")
26 | 
27 | 		assert.are.same(a:count_na{as_dataframe = false}, {["Col A"]= 0, ["Col B"]= 0, ["Col C"]=1, ["Col D"]=1})
28 | 	end)
29 | 
30 | 	it("Fills missing value(s) for a given column(s)",function()
31 | 		local a = Dataframe(specs_dir.."/data/advanced_short.csv")
32 | 
33 | 		assert.has.error(function() a:fill_na("Random column") end)
34 | 
35 | 		a:fill_na("Col A", 1)
36 | 		assert.are.same(a:count_na{as_dataframe = false},
37 |     {["Col A"]= 0, ["Col B"]= 0, ["Col C"]=1})
38 | 
39 | 		a:fill_na("Col C", 1)
40 | 		assert.are.same(a:count_na{as_dataframe = false}, {["Col A"]= 0, ["Col B"]= 0, ["Col C"]=0})
41 | 
42 | 		assert.are.same(a:get_column("Col C"), {8, 1, 9})
43 | 	end)
44 | 
45 | 	it("Fills all Dataframe's missing values", function()
46 | 		local a = Dataframe(specs_dir.."/data/advanced_short.csv")
47 | 
48 | 		a.dataset['Col A'][3] = nil
49 | 
50 | 		local cnt, tot = a:count_na{as_dataframe = false}
51 | 		assert.are.same(cnt, {["Col A"]= 1, ["Col B"]= 0, ["Col C"]=1})
52 | 		assert.are.same(tot, 2)
53 | 
54 | 
55 | 		a:fill_all_na(-1)
56 | 
57 | 		assert.are.same(a:count_na{as_dataframe = false}, {["Col A"]= 0, ["Col B"]= 0, ["Col C"]=0})
58 | 		assert.are.same(a:get_column('Col A'), {1,2,-1})
59 | 	end)
60 | 
61 | 	it("The count_na should #1 return a Dataframe by default", function()
62 | 		local a = Dataframe(specs_dir.."/data/advanced_short.csv")
63 | 
64 | 		local ret = a:count_na()
65 | 
66 | 		assert.are.same(torch.type(ret), "Dataframe")
67 | 
68 | 		assert.are.same(ret:size(), 3, "3 columns should render 3 rows")
69 | 	end)
70 | 
71 | end)
72 | 


--------------------------------------------------------------------------------
/specs/dataframe/row_spec.lua:
--------------------------------------------------------------------------------
  1 | require 'lfs'
  2 | 
  3 | -- Ensure the test is launched within the specs/ folder
  4 | assert(string.match(lfs.currentdir(), "specs")~=nil, "You must run this test in specs folder")
  5 | 
  6 | local initial_dir = lfs.currentdir()
  7 | 
  8 | -- Go to specs folder
  9 | while (not string.match(lfs.currentdir(), "/specs$")) do
 10 |   lfs.chdir("..")
 11 | end
 12 | 
 13 | local specs_dir = lfs.currentdir()
 14 | lfs.chdir("..")-- one more directory and it is lib root
 15 | 
 16 | -- Include Dataframe lib
 17 | dofile("init.lua")
 18 | 
 19 | -- Go back into initial dir
 20 | lfs.chdir(initial_dir)
 21 | 
 22 | describe("Row functions", function()
 23 |   it("Appends new data",function()
 24 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
 25 | 
 26 | 		a:append(Df_Dict({['Col A']={15},['Col B']={25},['Col C']={35}}))
 27 | 		assert.are.same(a:shape(), {rows=5, cols=3})-- "The simple_short.csv is 4x3 after insert should be 5x3"
 28 | 	end)
 29 | 
 30 | 	it("Appends new columns together with new data",function()
 31 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
 32 | 
 33 | 		a:append(Df_Dict({['Col A']={15},['Col D']={25},['Col C']={35}}))
 34 | 		assert.are.same(a:shape(), {rows=5, cols=4})-- "The simple_short.csv is 4x3 after insert should be 5x3"
 35 | 	end)
 36 | 
 37 | 	it("Appends dataframe",function()
 38 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
 39 | 
 40 | 		b = Dataframe()
 41 | 		b:load_table{data = Df_Dict({['Col A']={15},['Col B']={25},['Col C']={35}}),
 42 | 		             column_order = Df_Array('Col B', 'Col C', 'Col A')}
 43 | 		a:append(b)
 44 | 		assert.are.same(a:shape(), {rows=5, cols=3})-- "The simple_short.csv is 4x3 after insert should be 5x3"
 45 | 	end)
 46 | 
 47 | 	it("Appends dataframe to empty dataset should copy the original including specs",
 48 | 	function()
 49 | 		local a = Dataframe()
 50 | 
 51 | 		b = Dataframe()
 52 | 		b:load_table{data = Df_Dict({['Col A']={15},['Col B']={25},['Col C']={35}}),
 53 | 		             column_order = Df_Array('Col B', 'Col C', 'Col A')}
 54 | 		a:append(b)
 55 | 		assert.are.same(a:shape(), {rows=1, cols=3})-- "The simple_short.csv is 4x3 after insert should be 5x3"
 56 | 		assert.are.same(a.column_order, b.column_order)
 57 | 	end)
 58 | 
 59 | 	it("Check rbind new columns together with new data",function()
 60 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
 61 | 
 62 | 		a:rbind(Df_Dict({['Col A']={15},['Col D']={25},['Col C']={35}}))
 63 | 		assert.are.same(a:shape(), {rows=5, cols=4})-- "The simple_short.csv is 4x3 after insert should be 5x3"
 64 | 	end)
 65 | 
 66 | 	it("Check rbind with dataframe",function()
 67 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
 68 | 
 69 | 		b = Dataframe()
 70 | 		b:load_table{data = Df_Dict({['Col A']={15},['Col B']={25},['Col C']={35}})}
 71 | 		a:rbind(b)
 72 | 		assert.are.same(a:shape(), {rows=5, cols=3})-- "The simple_short.csv is 4x3 after insert should be 5x3"
 73 | 	end)
 74 | 
 75 | 	it("Inserts a row", function()
 76 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
 77 | 
 78 | 		a:insert(2, Df_Dict({['Col A']={15},['Col E']={25},['Col C']={35}}))
 79 | 		assert.are.same(a:shape(), {rows=5, cols=4})
 80 | 		assert.are.same(a:get_column('Col A'), {1, 15, 2, 3, 4})
 81 | 		assert.are.same(a:get_column('Col B'), {0.2, 0/0, 0.3, 0.4, 0.5})
 82 | 	end)
 83 | 
 84 | 	it("Inserts three rows", function()
 85 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
 86 | 		a:insert(2, Df_Dict({['Col A']={15, 16, 17}}))
 87 | 		assert.are.same(a:shape(), {rows=7, cols=3})
 88 | 		assert.are.same(a:get_column('Col A'), {1, 15, 16, 17, 2, 3, 4})
 89 | 		assert.are.same(a:get_column('Col B'), {.2, 0/0, 0/0, 0/0, .3, .4, .5})
 90 | 	end)
 91 | 
 92 | 	it("Removes a row given an index",function()
 93 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
 94 | 
 95 | 		a:remove_index(1)
 96 | 		assert.are.same(a:shape(), {rows=3, cols=3})-- "The simple_short.csv is 4x3"
 97 | 		assert.are.same(a:get_column('Col A'), {2,3,4})
 98 | 
 99 | 		a:remove_index(1)
100 | 		a:remove_index(1)
101 | 		a:remove_index(1)
102 | 		assert.are.same(a:shape(), {rows=0, cols=3})
103 | 	end)
104 | 
105 | 	it("Check that append calls load_table", function()
106 | 		local a =  Dataframe()
107 | 		a:append(Df_Dict{b=1, a=2})
108 | 
109 | 		a:assert_has_column('a')
110 | 		a:assert_has_column('b')
111 | 
112 | 		assert.are.same(a:get_column('a')[1], 2)
113 | 	end)
114 | 
115 | 	it("Check that append calls load_table with column order", function()
116 | 		local a =  Dataframe()
117 | 		a:append(Df_Dict{b=1, a=2}, Df_Array("b", "a"))
118 | 
119 | 		local b =  Dataframe()
120 | 		b:append(Df_Dict{b=1, a=2}, Df_Array("a", "b"))
121 | 
122 | 		assert.are.not_equal(a.column_order, b.column_order)
123 | 		assert.are.same(a:get_column('a'), b:get_column('a'))
124 | 		assert.are.same(a:get_column('b'), b:get_column('b'))
125 | 	end)
126 | end)
127 | 


--------------------------------------------------------------------------------
/specs/dataframe/select_set_update_spec.lua:
--------------------------------------------------------------------------------
  1 | require 'lfs'
  2 | 
  3 | -- Ensure the test is launched within the specs/ folder
  4 | assert(string.match(lfs.currentdir(), "specs")~=nil, "You must run this test in specs folder")
  5 | 
  6 | local initial_dir = lfs.currentdir()
  7 | 
  8 | -- Go to specs folder
  9 | while (not string.match(lfs.currentdir(), "/specs$")) do
 10 |   lfs.chdir("..")
 11 | end
 12 | 
 13 | local specs_dir = lfs.currentdir()
 14 | lfs.chdir("..")-- one more directory and it is lib root
 15 | 
 16 | -- Include Dataframe lib
 17 | dofile("init.lua")
 18 | 
 19 | -- Go back into initial dir
 20 | lfs.chdir(initial_dir)
 21 | 
 22 | describe("Data manipulationf incl. where, update etc.", function()
 23 | 
 24 | 	it("Retrieves a value in a column #where",function()
 25 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
 26 | 
 27 | 		local ret_val = a:where('Col A', 2)
 28 | 		assert.are.same(ret_val:get_column("Col A"), {2})
 29 | 		assert.are.same(ret_val:get_column("Col C"), {.1})
 30 | 		assert.is.equal(torch.type(ret_val), "Dataframe")
 31 | 		assert.are.same(ret_val:shape(), {rows = 1, cols = 3})
 32 | 
 33 | 		local ret_val = a:where('Col A', 222222222)
 34 | 		assert.are.same(ret_val:shape(), {rows = 0, cols = 3})
 35 | 
 36 | 		a:__init()
 37 | 		a:load_csv{path = specs_dir.."/data/advanced_short.csv",
 38 | 		verbose = false}
 39 | 		ret_val = a:where('Col B', 'B')
 40 | 		assert.are.same(ret_val:shape(), {rows = 2, cols = 3})
 41 | 		col_c = ret_val:get_column('Col C')
 42 | 		assert.is_true(isnan(col_c[1]))
 43 | 		assert.is.equal(col_c[2], 9)
 44 | 		assert.are.same(ret_val:get_column('Col A'), {2, 3})
 45 | 	end)
 46 | 
 47 | 	it("Updates multiple rows according to a custom condition", function()
 48 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
 49 | 
 50 | 		local start_val = a:get_column('Col B')
 51 | 		start_val[1] = start_val[1] * 2
 52 | 
 53 | 		a:update(
 54 | 			function(s_row) return s_row['Col A'] == 1 end,
 55 | 			function(upd_row) upd_row['Col B'] = upd_row['Col B'] * 2 return upd_row end
 56 | 		)
 57 | 		assert.are.same(a:get_column('Col B'), start_val)
 58 | 
 59 | 		-- Check a double match
 60 | 		local b = Dataframe(specs_dir.."/data/advanced_short.csv")
 61 | 
 62 | 		start_val = b:get_column('Col A')
 63 | 		start_val[2] = start_val[2] * 2
 64 | 		start_val[3] = start_val[3] * 2
 65 | 		b:update(
 66 | 			function(s_row) return s_row['Col B'] == 1 end,
 67 | 			function(upd_row) upd_row['Col A'] = upd_row['Col A'] * 2 return upd_row end
 68 | 		)
 69 | 
 70 | 		assert.are.same(b:get_column('Col A'), start_val)
 71 | 	end)
 72 | 
 73 | 	it("Updates a single cell given a column name and an value #set",function()
 74 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
 75 | 
 76 | 		a:set(1000, 'Col C', Df_Dict({['Col A']=99}))
 77 | 		assert.is.equal(a:get_column('Col A')[1], 99)
 78 | 	end)
 79 | 
 80 | 	it("Updates all matching cells when using #set",function()
 81 | 		local a = Dataframe(Df_Dict{a = {1,2,3}, b = {1,1,2}})
 82 | 
 83 | 		a:set(1, 'b', Df_Dict({['a']=4}))
 84 | 		assert.are.same(a:get_column('a'), {4,4,3})
 85 | 	end)
 86 | 
 87 | 	it("Updates a single cell given a an index",function()
 88 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
 89 | 
 90 | 		a:set(2, Df_Dict({['Col A']=99}))
 91 | 		assert.is.equal(a:get_column('Col A')[2], 99)
 92 | 	end)
 93 | 
 94 | 	it("Updates a unique row given an index",function()
 95 | 		local a = Dataframe(specs_dir.."/data/simple_short.csv")
 96 | 
 97 | 		new = {
 98 | 		['Col A']=4,
 99 | 		['Col B']=4,
100 | 		['Col C']=4
101 | 		}
102 | 		a:_update_single_row(1, Df_Tbl(new), Df_Tbl(a:get_row(1)))
103 | 		assert.are.same(a:get_row(1), new)
104 | 	end)
105 | 
106 | 	describe("Check #wide2long", function()
107 | 		local df = Dataframe(Df_Dict({a = {1,2,3}, b={4,nil,5}, c={[3] = 6}}))
108 | 		a = df:wide2long(Df_Array("c", "b"), "id", "value")
109 | 
110 | 		it("Check that the number of rows are correct", function()
111 | 			assert.are.same(a:where('a', 1):size(1), 1)
112 | 			assert.are.same(a:where('a', 2):size(1), 1)
113 | 			assert.are.same(a:where('a', 3):size(1), 2)
114 | 		end)
115 | 
116 | 		it("Check that the value is correct when having one value", function()
117 | 			local row = a:where('a', 1):get_row(1)
118 | 			assert.are.same(row['id'], 'b')
119 | 			assert.are.same(row['value'], 4)
120 | 		end)
121 | 
122 | 
123 | 		it("Check that the value is correct when having no value", function()
124 | 			local row = a:where('a', 2):get_row(1)
125 | 			assert.is_true(isnan(row['id']))
126 | 			assert.is_true(isnan(row['value']))
127 | 		end)
128 | 
129 | 		it("Check that the order is correct when having multiple values", function()
130 | 			local row = a:where('a', 3):
131 | 				where('id', 'b'):
132 | 				get_row(1)
133 | 			assert.are.same(row['id'], 'b')
134 | 			assert.are.same(row['value'], 5)
135 | 
136 | 			local row = a:where('a', 3):
137 | 				where('id', 'c'):
138 | 				get_row(1)
139 | 			assert.are.same(row['id'], 'c')
140 | 			assert.are.same(row['value'], 6)
141 | 		end)
142 | 
143 | 		local df = Dataframe(Df_Dict({a = {1,2,3}, b={4,nil,5}, c={[3] = 6}}))
144 | 		b = df:wide2long("[bc]", "id", "value")
145 | 		it("Check that this works the same with regulare expressions", function()
146 | 			assert.are.same(b:where('a', 1):size(1), 1)
147 | 			assert.are.same(b:where('a', 2):size(1), 1)
148 | 			assert.are.same(b:where('a', 3):size(1), 2)
149 | 
150 | 			local row = b:where('a', 3):
151 | 				where('id', 'b'):
152 | 				get_row(1)
153 | 			assert.are.same(row['id'], 'b')
154 | 			assert.are.same(row['value'], 5)
155 | 
156 | 			local row = b:where('a', 3):
157 | 				where('id', 'c'):
158 | 				get_row(1)
159 | 			assert.are.same(row['id'], 'c')
160 | 			assert.are.same(row['value'], 6)
161 | 		end)
162 | 
163 | 		c = df:wide2long("c", "id", "value")
164 | 		it("Check that different columnt result in different result", function()
165 | 			assert.is_false(a == c)
166 | 		end)
167 | 	end)
168 | end)
169 | 


--------------------------------------------------------------------------------
/specs/dataframe/serialization_spec.lua:
--------------------------------------------------------------------------------
 1 | require 'lfs'
 2 | 
 3 | -- Ensure the test is launched within the specs/ folder
 4 | assert(string.match(lfs.currentdir(), "specs")~=nil, "You must run this test in specs folder")
 5 | 
 6 | local initial_dir = lfs.currentdir()
 7 | 
 8 | -- Go to specs folder
 9 | while (not string.match(lfs.currentdir(), "/specs$")) do
10 |   lfs.chdir("..")
11 | end
12 | 
13 | local specs_dir = lfs.currentdir()
14 | lfs.chdir("..")-- one more directory and it is lib root
15 | 
16 | -- Include Dataframe lib
17 | dofile("init.lua")
18 | 
19 | -- Go back into initial dir
20 | lfs.chdir(initial_dir)
21 | 
22 | describe("Serialization", function()
23 | 	-- Do not use advanced_short since it has nan that are 0/0 ~= 0/0 == true
24 | 	local df = Dataframe()
25 | 
26 | 	it("Deserializes a simple Dataframe object",function()
27 | 		df:load_csv{path = specs_dir.."/data/simple_short.csv", verbose = false}
28 | 
29 | 		b = torch.serialize(df)
30 | 		c = torch.deserialize(b)
31 | 
32 | 		assert.is.equal(torch.typename(c), "Dataframe")
33 | 
34 | 		--tester:eq(df, c)
35 | 	end)
36 | 
37 | 	it("Saves then load a Dataframe object",function()
38 | 		torch.save("test.t7", df)
39 | 		c = torch.load("test.t7")
40 | 
41 | 		os.remove("test.t7")
42 | 
43 | 		assert.is.equal(torch.typename(c), "Dataframe")
44 | 
45 | 		--tester:eq(df, c)
46 | 	end)
47 | 
48 | 	it("Saves with init",function()
49 | 		local a = Dataframe(specs_dir.."/data/realistic_29_row_data.csv")
50 | 
51 | 		a:create_subsets()
52 | 		a:fill_all_na()
53 | 
54 | 		torch.save("test.t7", a)
55 | 		c = torch.load("test.t7")
56 | 
57 | 		os.remove("test.t7")
58 | 
59 | 		assert.is.equal(torch.typename(c), "Dataframe")
60 | 
61 | 		--tester:eq(a, c)
62 | 	end)
63 | end)
64 | 


--------------------------------------------------------------------------------
/specs/helper_classes/df_array_spec.lua:
--------------------------------------------------------------------------------
 1 | require 'lfs'
 2 | 
 3 | -- Ensure the test is launched within the specs/ folder
 4 | assert(string.match(lfs.currentdir(), "specs")~=nil, "You must run this test in specs folder")
 5 | 
 6 | local initial_dir = lfs.currentdir()
 7 | 
 8 | -- Go to specs folder
 9 | while (not string.match(lfs.currentdir(), "/specs$")) do
10 |   lfs.chdir("..")
11 | end
12 | 
13 | local specs_dir = lfs.currentdir()
14 | lfs.chdir("..")-- one more directory and it is lib root
15 | 
16 | -- Include Dataframe lib
17 | dofile("init.lua")
18 | 
19 | -- Go back into initial dir
20 | lfs.chdir(initial_dir)
21 | 
22 | describe("Df_Array", function()
23 | 	local tableData = {1,2,3,4}
24 | 
25 | 	it("can be init with a table",function()
26 | 		local array = Df_Array(tableData)
27 | 
28 | 		assert.are.same(tableData,array.data)
29 | 	end)
30 | 
31 | 	it("can be init with a Dataseries",function()
32 | 		local series = Dataseries(Df_Array(tableData))
33 | 		local array = Df_Array(series)
34 | 
35 | 		assert.are.same(tableData,array.data)
36 | 	end)
37 | 
38 | 	it("can be init with a tensor",function()
39 | 		local tensor = torch.IntTensor(tableData)
40 | 		local array = Df_Array(tensor)
41 | 
42 | 		assert.are.same(tableData,array.data)
43 | 	end)
44 | 
45 | 	it("can be init with 'infinite' arguments",function()
46 | 		local array = Df_Array(1,2,3,4)
47 | 
48 | 		assert.are.same(tableData,array.data)
49 | 	end)
50 | 
51 | 	it("returns asked index with brackets",function()
52 | 		local array = Df_Array(tableData)
53 | 
54 | 		assert.are.same(array[3],3)
55 | 	end)
56 | 
57 | 	it("returns nil if index does not exists or it is not a number",function()
58 | 		local array = Df_Array(tableData)
59 | 
60 | 		assert.are.same(array[42],nil)
61 | 	end)
62 | 
63 | 	it("# returns its length",function()
64 | 		local array = Df_Array(tableData)
65 | 
66 | 		assert.are.same(#array,4)
67 | 	end)
68 | end)


--------------------------------------------------------------------------------
/specs/helper_classes/df_dict_spec.lua:
--------------------------------------------------------------------------------
 1 | require 'lfs'
 2 | 
 3 | -- Ensure the test is launched within the specs/ folder
 4 | assert(string.match(lfs.currentdir(), "specs")~=nil, "You must run this test in specs folder")
 5 | 
 6 | local initial_dir = lfs.currentdir()
 7 | 
 8 | -- Go to specs folder
 9 | while (not string.match(lfs.currentdir(), "/specs$")) do
10 |   lfs.chdir("..")
11 | end
12 | 
13 | local specs_dir = lfs.currentdir()
14 | lfs.chdir("..")-- one more directory and it is lib root
15 | 
16 | -- Include Dataframe lib
17 | dofile("init.lua")
18 | 
19 | -- Go back into initial dir
20 | lfs.chdir(initial_dir)
21 | 
22 | describe("Df_Dict",function()
23 | 	local simpleTable = {1,2,3,4}
24 | 	local simpleTableData = {["col1"]=1,["col2"]=2,["col3"]=3,["col4"]=4}
25 | 	local dimTableData = {["col1"]=1,["col2"]=2,["col3"]=3,["col4"]={4,5,6}}
26 | 
27 | 	it("can be init with a simple table without key",function()
28 | 		local dic = Df_Dict(simpleTable)
29 | 		assert.are.same(dic.data,simpleTable)
30 | 		assert.are.same(dic.keys,simpleTable)
31 | 	end)
32 | 
33 | 	it("can be init with a simple table keys",function()
34 | 		local dic = Df_Dict(simpleTableData)
35 | 		assert.are.same(dic.data,simpleTableData)
36 | 	end)
37 | 
38 | 	it("can be init with a multi-dimensional table",function()
39 | 		local dic = Df_Dict(dimTableData)
40 | 		assert.are.same(dic.data,dimTableData)
41 | 	end)
42 | 
43 | 	it("can check if all columns are the same size",function()
44 | 		local dic = Df_Dict(simpleTable)
45 | 		assert.is_true(dic:check_lengths())
46 | 
47 | 		dic = Df_Dict(simpleTableData)
48 | 		assert.is_true(dic:check_lengths())
49 | 
50 | 		dic = Df_Dict(dimTableData)
51 | 		assert.is_false(dic:check_lengths())
52 | 	end)
53 | 
54 | 	it("returns asked key's value with brackets",function()
55 | 		local dic = Df_Dict(simpleTable)
56 | 		assert.are.same(dic[3],3)
57 | 
58 | 		dic = Df_Dict(simpleTableData)
59 | 		assert.are.same(dic["$col3"],3)
60 | 
61 | 		dic = Df_Dict(dimTableData)
62 | 		assert.are.same(dic["$col4"],{4,5,6})
63 | 	end)
64 | 
65 | 	it("returns nil if index does not exists or it is not a number",function()
66 | 		local dic = Df_Dict(simpleTable)
67 | 		assert.are.same(dic[42],nil)
68 | 	end)
69 | 
70 | 	it("# returns its length",function()
71 | 		local dic = Df_Dict(simpleTable)
72 | 		assert.are.same(#dic,4)
73 | 
74 | 		dic = Df_Dict(simpleTableData)
75 | 		assert.are.same(#dic,4)
76 | 
77 | 		dic = Df_Dict(dimTableData)
78 | 		assert.are.same(#dic,4)
79 | 	end)
80 | end)
81 | 
82 | describe("Df_Tbl",function()
83 | 	local simpleTable = {1,2,3,4}
84 | 
85 | 	it("can be init with a table",function()
86 | 		local tbl = Df_Tbl(simpleTable)
87 | 		assert.are.same(tbl.data,simpleTable)
88 | 	end)
89 | 
90 | 	it("# returns its length",function()
91 | 		local tbl = Df_Tbl(simpleTable)
92 | 		assert.are.same(#tbl,4)
93 | 	end)
94 | end)
95 | 


--------------------------------------------------------------------------------
/specs/helper_classes/df_tbl_spec.lua:
--------------------------------------------------------------------------------
 1 | require 'lfs'
 2 | 
 3 | -- Ensure the test is launched within the specs/ folder
 4 | assert(string.match(lfs.currentdir(), "specs")~=nil, "You must run this test in specs folder")
 5 | 
 6 | local initial_dir = lfs.currentdir()
 7 | 
 8 | -- Go to specs folder
 9 | while (not string.match(lfs.currentdir(), "/specs$")) do
10 |   lfs.chdir("..")
11 | end
12 | 
13 | local specs_dir = lfs.currentdir()
14 | lfs.chdir("..")-- one more directory and it is lib root
15 | 
16 | -- Include Dataframe lib
17 | dofile("init.lua")
18 | 
19 | -- Go back into initial dir
20 | lfs.chdir(initial_dir)
21 | 
22 | describe("Df_Tbl",function()
23 | 	local simpleTable = {1,2,3,4}
24 | 
25 | 	it("can be init with a table",function()
26 | 		local tbl = Df_Tbl(simpleTable)
27 | 		assert.are.same(tbl.data,simpleTable)
28 | 	end)
29 | 
30 | 	it("# returns its length",function()
31 | 		local tbl = Df_Tbl(simpleTable)
32 | 		assert.are.same(#tbl,4)
33 | 	end)
34 | end)
35 | 


--------------------------------------------------------------------------------
/specs/linter.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | echo -e "**********";
3 | echo -e "* Linter *";
4 | echo -e "**********";
5 | echo "";
6 | 
7 | luacheck ../ --no-global --no-self --exclude-files ../specs/*


--------------------------------------------------------------------------------
/specs/output/Wiki-templates/Readme.md:
--------------------------------------------------------------------------------
1 | Much of the Wiki requires examples with tables. It is therefore useful to use iTorch and export to markdown that is then clenad from the scripts and entered into the Wiki.
2 | 


--------------------------------------------------------------------------------
/specs/output/Wiki-templates/Where_update_and_set.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "iTorch",
  5 |    "language": "lua",
  6 |    "name": "itorch"
  7 |   },
  8 |   "language_info": {
  9 |    "name": "lua",
 10 |    "version": "5.1"
 11 |   },
 12 |   "name": ""
 13 |  },
 14 |  "nbformat": 3,
 15 |  "nbformat_minor": 0,
 16 |  "worksheets": [
 17 |   {
 18 |    "cells": [
 19 |     {
 20 |      "cell_type": "heading",
 21 |      "level": 1,
 22 |      "metadata": {},
 23 |      "source": [
 24 |       "Load the packages"
 25 |      ]
 26 |     },
 27 |     {
 28 |      "cell_type": "code",
 29 |      "collapsed": false,
 30 |      "input": [
 31 |       "require 'torch'\n",
 32 |       "require 'Dataframe'"
 33 |      ],
 34 |      "language": "python",
 35 |      "metadata": {},
 36 |      "outputs": []
 37 |     },
 38 |     {
 39 |      "cell_type": "heading",
 40 |      "level": 1,
 41 |      "metadata": {},
 42 |      "source": [
 43 |       "Load the data"
 44 |      ]
 45 |     },
 46 |     {
 47 |      "cell_type": "code",
 48 |      "collapsed": false,
 49 |      "input": [
 50 |       "my_data = Dataframe('../../data/realistic_29_row_data.csv')"
 51 |      ],
 52 |      "language": "python",
 53 |      "metadata": {},
 54 |      "outputs": []
 55 |     },
 56 |     {
 57 |      "cell_type": "markdown",
 58 |      "metadata": {},
 59 |      "source": [
 60 |       "# Checkout the first couple of rows\n",
 61 |       "\n",
 62 |       "The simplest example way to have a quick look at the data is to use the `output` together with `head`/`tail` - the simplest form of subsetting"
 63 |      ]
 64 |     },
 65 |     {
 66 |      "cell_type": "code",
 67 |      "collapsed": false,
 68 |      "input": [
 69 |       "my_data:head(2):output()\n",
 70 |       "my_data:tail(2):output()"
 71 |      ],
 72 |      "language": "python",
 73 |      "metadata": {},
 74 |      "outputs": []
 75 |     },
 76 |     {
 77 |      "cell_type": "markdown",
 78 |      "metadata": {},
 79 |      "source": [
 80 |       "# Searching the dataframe\n",
 81 |       "\n",
 82 |       "The where can be convenient when you want to find a particular subset"
 83 |      ]
 84 |     },
 85 |     {
 86 |      "cell_type": "code",
 87 |      "collapsed": false,
 88 |      "input": [
 89 |       "my_data:where('Gender', 'Male'):head(2):output()"
 90 |      ],
 91 |      "language": "python",
 92 |      "metadata": {},
 93 |      "outputs": []
 94 |     },
 95 |     {
 96 |      "cell_type": "markdown",
 97 |      "metadata": {},
 98 |      "source": [
 99 |       "More flexible searching is allowed through custom search functions"
100 |      ]
101 |     },
102 |     {
103 |      "cell_type": "code",
104 |      "collapsed": false,
105 |      "input": [
106 |       "my_data:where(function(row) return row.Gender == \"Male\" and row.Weight > 70 end):output()"
107 |      ],
108 |      "language": "python",
109 |      "metadata": {},
110 |      "outputs": []
111 |     },
112 |     {
113 |      "cell_type": "markdown",
114 |      "metadata": {},
115 |      "source": [
116 |       "# Update\n",
117 |       "\n",
118 |       "We can easily update the table using an update function"
119 |      ]
120 |     },
121 |     {
122 |      "cell_type": "code",
123 |      "collapsed": false,
124 |      "input": [
125 |       "my_data:\n",
126 |       "    update(\n",
127 |       "        function(row) return row.Weight > 88 end,\n",
128 |       "        function(row)\n",
129 |       "            row.Weight = 88\n",
130 |       "            return row\n",
131 |       "        end)\n",
132 |       "\n",
133 |       "my_data:\n",
134 |       "    where(function(row) return row.Gender == \"Male\" and row.Weight > 70 end):\n",
135 |       "    output()"
136 |      ],
137 |      "language": "python",
138 |      "metadata": {},
139 |      "outputs": []
140 |     },
141 |     {
142 |      "cell_type": "markdown",
143 |      "metadata": {},
144 |      "source": [
145 |       "# The set function\n",
146 |       "\n",
147 |       "Closely related to the update is the simpler set function"
148 |      ]
149 |     },
150 |     {
151 |      "cell_type": "code",
152 |      "collapsed": false,
153 |      "input": [
154 |       "my_data:\n",
155 |       "    set{item_to_find = 55.5, \n",
156 |       "        column_name = 'Weight', \n",
157 |       "        new_value = Df_Dict({Gender = \"Female\"})}\n",
158 |       "\n",
159 |       "my_data:\n",
160 |       "    where(function(row) return row.Gender == \"Female\" and row.Weight < 60 end):\n",
161 |       "    output()"
162 |      ],
163 |      "language": "python",
164 |      "metadata": {},
165 |      "outputs": []
166 |     },
167 |     {
168 |      "cell_type": "code",
169 |      "collapsed": true,
170 |      "input": [],
171 |      "language": "python",
172 |      "metadata": {},
173 |      "outputs": []
174 |     }
175 |    ],
176 |    "metadata": {}
177 |   }
178 |  ]
179 | }


--------------------------------------------------------------------------------
/specs/output/cli_output.lua:
--------------------------------------------------------------------------------
  1 | require 'torch'
  2 | 
  3 | -- Make sure that directory structure is always the same
  4 | require('lfs')
  5 | if (string.match(lfs.currentdir(), "/specs/output$")) then
  6 |   lfs.chdir("../..")
  7 | end
  8 | paths.dofile('init.lua')
  9 | 
 10 | -- Go into tests so that the loading of CSV:s is the same as always
 11 | lfs.chdir("specs/output")
 12 | 
 13 | -- A quick way to get a feeling for how the __tostring method works
 14 | local a = Dataframe()
 15 | a:load_csv{path = "../data/simple_short.csv",
 16 |            verbose = false}
 17 | a:add_column('boolean', true)
 18 | a:set(2, Df_Dict{boolean = false})
 19 | a:set(3, Df_Dict{boolean = 0/0})
 20 | 
 21 | print("-- Simple table with boolean column --")
 22 | print(a)
 23 | 
 24 | a:output()
 25 | 
 26 | print("-- Advanced table --")
 27 | a:load_csv{path = "../data/advanced_short.csv",
 28 |            verbose = false}
 29 | print(a)
 30 | 
 31 | print(" - check digits")
 32 | 
 33 | a:output{digits = 2}
 34 | 
 35 | print("-- Long table --")
 36 | a:load_csv{path = "../data/realistic_29_row_data.csv",
 37 |            verbose = false}
 38 | a.tostring_defaults.no_rows = 5
 39 | print(a)
 40 | 
 41 | a.tostring_defaults.no_rows = 20
 42 | print(a)
 43 | 
 44 | a:as_categorical('Gender')
 45 | a.tostring_defaults.no_rows = 5
 46 | print(a)
 47 | 
 48 | females = a:where('Gender', 'Female')
 49 | print(females)
 50 | 
 51 | math.randomseed(10)
 52 | left_right = {}
 53 | for i = 1,a:shape()["rows"] do
 54 |   if (math.random() > 0.5) then
 55 |     table.insert(left_right, "left")
 56 |   else
 57 |     table.insert(left_right, "right")
 58 |   end
 59 | end
 60 | a:add_column("Side", Dataseries(Df_Array(left_right)))
 61 | print(a:head(4):tostring(Df_Array("Weight")))
 62 | 
 63 | a:as_categorical("Side")
 64 | print(a:head(4):tostring("Comm"))
 65 | 
 66 | tbl = {
 67 | 	no = {},
 68 | 	one = {},
 69 | 	two = {},
 70 | 	three = {},
 71 | 	four = {},
 72 | 	five = {},
 73 | 	six = {},
 74 | 	seven = {},
 75 | 	eight = {},
 76 | 	nine = {}
 77 | }
 78 | 
 79 | local long_txt = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud ex"
 80 | for k,v in pairs(tbl)	do
 81 | 	for i=1,4 do
 82 | 		if (k == "no") then
 83 | 			v[#v + 1] = i
 84 | 		else
 85 | 			v[#v + 1] = long_txt
 86 | 		end
 87 | 	end
 88 | end
 89 | 
 90 | a = Dataframe{data=Df_Dict(tbl),
 91 | 	            column_order=Df_Array("no", "one", "two", "three", "four", "five",
 92 | 	                                  "six", "seven", "eight", "nine")}
 93 | a:output()
 94 | 
 95 | a = Dataframe(Df_Dict{
 96 | 	Filename = 11,
 97 | 	fracture = 11,
 98 | 	Side = 11,
 99 | 	Exam_view = 11,
100 | 	osteoarthritis = 11,
101 | 	styloid = 11,
102 | 	prev_fracture = 11,
103 | 	Exam_body_part = 11
104 | })
105 | 
106 | print(a)
107 | 


--------------------------------------------------------------------------------
/specs/output/itorch_notebook_df_test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "kernelspec": {
  4 |    "display_name": "iTorch",
  5 |    "language": "lua",
  6 |    "name": "itorch"
  7 |   },
  8 |   "language_info": {
  9 |    "name": "lua",
 10 |    "version": "5.1"
 11 |   },
 12 |   "name": ""
 13 |  },
 14 |  "nbformat": 3,
 15 |  "nbformat_minor": 0,
 16 |  "worksheets": [
 17 |   {
 18 |    "cells": [
 19 |     {
 20 |      "cell_type": "code",
 21 |      "collapsed": false,
 22 |      "input": [
 23 |       "require 'torch'\n",
 24 |       "require 'lfs'\n",
 25 |       "\n",
 26 |       "-- Make sure that directory structure is always the same\n",
 27 |       "if (string.match(lfs.currentdir(), \"/specs/output$\")) then\n",
 28 |       "  lfs.chdir(\"../..\")\n",
 29 |       "end\n",
 30 |       "\n",
 31 |       "paths.dofile(lfs.currentdir() .. '/init.lua')\n",
 32 |       "\n",
 33 |       "-- Go into tests so that the loading of CSV:s is the same as always\n",
 34 |       "lfs.chdir(\"./specs/\")"
 35 |      ],
 36 |      "language": "python",
 37 |      "metadata": {},
 38 |      "outputs": []
 39 |     },
 40 |     {
 41 |      "cell_type": "code",
 42 |      "collapsed": false,
 43 |      "input": [
 44 |       "itorch ~= nil"
 45 |      ],
 46 |      "language": "python",
 47 |      "metadata": {},
 48 |      "outputs": []
 49 |     },
 50 |     {
 51 |      "cell_type": "code",
 52 |      "collapsed": false,
 53 |      "input": [
 54 |       "-- A quick way to get a feeling for how the __tostring method works\n",
 55 |       "a = Dataframe('./data/simple_short.csv')"
 56 |      ],
 57 |      "language": "python",
 58 |      "metadata": {},
 59 |      "outputs": []
 60 |     },
 61 |     {
 62 |      "cell_type": "code",
 63 |      "collapsed": false,
 64 |      "input": [
 65 |       "print(\"-- Regular print with a Dataframe --\")\n",
 66 |       "print(a)"
 67 |      ],
 68 |      "language": "python",
 69 |      "metadata": {},
 70 |      "outputs": []
 71 |     },
 72 |     {
 73 |      "cell_type": "code",
 74 |      "collapsed": false,
 75 |      "input": [
 76 |       "print(\"  -- Check regular table --  \")\n",
 77 |       "print({1, 2, 3, {1,2,3, {4,5,6}}})"
 78 |      ],
 79 |      "language": "python",
 80 |      "metadata": {},
 81 |      "outputs": []
 82 |     },
 83 |     {
 84 |      "cell_type": "code",
 85 |      "collapsed": false,
 86 |      "input": [
 87 |       "print(\"-- Long table --\")\n",
 88 |       "local a = Dataframe()\n",
 89 |       "a:load_csv{path = \"data/realistic_29_row_data.csv\",\n",
 90 |       "           verbose = false}\n",
 91 |       "\n",
 92 |       "math.randomseed(10)\n",
 93 |       "left_right = {}\n",
 94 |       "for i = 1,a:shape()[\"rows\"] do\n",
 95 |       "  if (math.random() > 0.5) then\n",
 96 |       "    table.insert(left_right, \"left\")\n",
 97 |       "  else\n",
 98 |       "    table.insert(left_right, \"right\")\n",
 99 |       "  end\n",
100 |       "end\n",
101 |       "a:add_column(\"Side\", Df_Array(left_right))\n",
102 |       "a:output()"
103 |      ],
104 |      "language": "python",
105 |      "metadata": {},
106 |      "outputs": []
107 |     },
108 |     {
109 |      "cell_type": "code",
110 |      "collapsed": false,
111 |      "input": [
112 |       "local a = Dataframe()\n",
113 |       "a:load_csv{path = \"data/realistic_29_row_data.csv\",\n",
114 |       "           verbose = false}\n",
115 |       "a:as_categorical(\"Gender\")\n",
116 |       "print(\"With set number of digits\")\n",
117 |       "a:output{digits = 1}"
118 |      ],
119 |      "language": "python",
120 |      "metadata": {},
121 |      "outputs": []
122 |     },
123 |     {
124 |      "cell_type": "code",
125 |      "collapsed": true,
126 |      "input": [],
127 |      "language": "python",
128 |      "metadata": {},
129 |      "outputs": []
130 |     }
131 |    ],
132 |    "metadata": {}
133 |   }
134 |  ]
135 | }


--------------------------------------------------------------------------------
/specs/run_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo -e "\x1B[32m+++++++++++++++++++++++++++++++\x1B[0m";
 3 | echo -e "\x1B[32m+\x1B[0m Start torch-dataframe specs \x1B[32m+\x1B[0m";
 4 | echo -e "\x1B[32m+++++++++++++++++++++++++++++++\x1B[0m";
 5 | echo "";
 6 | 
 7 | VERSION="any"
 8 | COVERAGE=false
 9 | while [[ $# -gt 0 ]]
10 | 	do
11 | 	key="$1"
12 | 
13 | 	case $key in
14 | 		-v|--version)
15 | 			VERSION="$2"
16 | 			shift # past argument
17 | 			;;
18 | 		-c|--coverage)
19 | 			COVERAGE=true
20 | 			;;
21 | 		*)
22 | 			# unknown option
23 | 			;;
24 | 	esac
25 | 	shift # past argument or value
26 | done
27 | 
28 | var=0
29 | count=0
30 | failed_scripts=()
31 | exclude_tags="skip_version_$VERSION"
32 | for f in `find . -name "*_spec*"`; do
33 | 	echo "";
34 | 	echo "********************************************";
35 | 	echo "Running specs in $f";
36 | 
37 | 	if [ "$COVERAGE" = true ]; then
38 | 		busted -v --coverage --exclude-tags=$exclude_tags,skip_all $f;
39 | 	else
40 | 		busted -v --exclude-tags=$exclude_tags,skip_all $f;
41 | 	fi
42 | 
43 | 	fail=$?
44 | 	var=$(($var+$fail))
45 | 	count=$(($count+1))
46 | 	if [ $fail -ne 0 ] ; then
47 | 		failed_scripts+=($f)
48 | 	fi
49 | 	echo "End $f";
50 | 	echo "********************************************";
51 | done
52 | 
53 | echo ""
54 | echo -e "\x1B[93m==============================================\x1B[0m"
55 | if [ $var -gt 0 ]
56 | then
57 | 	echo -e "Number of scripts failed: \x1B[31m$var\x1B[0m (total scripts: $count)"
58 | 	echo "Script(s) that failed:"
59 | 	for i in "${failed_scripts[@]}"; do
60 | 		echo " -!- $i";
61 | 	done
62 | else
63 | 	echo "Number of scripts failed: $var (total scripts: $count)"
64 | fi
65 | echo " - exclude-tags used: $exclude_tags"
66 | echo -e "\x1B[93m==============================================\x1B[0m"
67 | 
68 | exit $var
69 | 


--------------------------------------------------------------------------------
/specs/utils/test.lua:
--------------------------------------------------------------------------------
 1 | require 'lfs'
 2 | 
 3 | -- Make sure that directory structure is always the same
 4 | if (string.match(lfs.currentdir(), "/specs$")) then
 5 |   lfs.chdir("..")
 6 | end
 7 | 
 8 | -- Include Dataframe lib
 9 | dofile('init.lua')
10 | 
11 | a = Dataframe()
12 | a:load_csv{
13 |   path = "/media/max/Ext_Enc_Rack/Extracted/dataset_4_torch_lda.csv",
14 |   verbose = true,
15 |   rows2explore = 1e4
16 | }
17 | 


--------------------------------------------------------------------------------
/specs/utils/utils_spec.lua:
--------------------------------------------------------------------------------
 1 | require 'lfs'
 2 | 
 3 | -- Ensure the test is launched within the specs/ folder
 4 | assert(string.match(lfs.currentdir(), "specs")~=nil, "You must run this test in specs folder")
 5 | 
 6 | local initial_dir = lfs.currentdir()
 7 | 
 8 | -- Go to specs folder
 9 | while (not string.match(lfs.currentdir(), "/specs$")) do
10 |   lfs.chdir("..")
11 | end
12 | 
13 | local specs_dir = lfs.currentdir()
14 | lfs.chdir("..")-- one more directory and it is lib root
15 | 
16 | -- Include Dataframe lib
17 | dofile("init.lua")
18 | 
19 | -- Go back into initial dir
20 | lfs.chdir(initial_dir)
21 | 
22 | describe("#get_variable_type tests", function()
23 | 	describe("check integer rules", function()
24 | 		it("Single integer should give integer as result", function()
25 | 			local type = get_variable_type("1")
26 | 			assert.are.same(type, "integer")
27 | 			type = get_variable_type(23213)
28 | 			assert.are.same(type, "integer")
29 | 		end)
30 | 
31 | 		it("previous double should give double", function()
32 | 			local type = get_variable_type("1", "double")
33 | 			assert.are.same(type, "double")
34 | 			type = get_variable_type(23213, "double")
35 | 			assert.are.same(type, "double")
36 | 		end)
37 | 
38 | 		it("previous boolean should give string", function()
39 | 			local type = get_variable_type("1", "boolean")
40 | 			assert.are.same(type, "string")
41 | 			type = get_variable_type(23213, "boolean")
42 | 			assert.are.same(type, "string")
43 | 		end)
44 | 	end)
45 | 
46 | 	describe("check double rules", function()
47 | 		it("Single double should give double as result", function()
48 | 			local type = get_variable_type("1.2")
49 | 			assert.are.same(type, "double")
50 | 			type = get_variable_type(23213.2)
51 | 			assert.are.same(type, "double")
52 | 		end)
53 | 
54 | 		it("previous integer should give double", function()
55 | 			local type = get_variable_type("1.1", "integer")
56 | 			assert.are.same(type, "double")
57 | 			type = get_variable_type(23213.2, "integer")
58 | 			assert.are.same(type, "double")
59 | 		end)
60 | 
61 | 		it("previous boolean should give string", function()
62 | 			local type = get_variable_type("1.2", "boolean")
63 | 			assert.are.same(type, "string")
64 | 			type = get_variable_type(23213.2, "boolean")
65 | 			assert.are.same(type, "string")
66 | 		end)
67 | 	end)
68 | 
69 | 	describe("check boolean rules", function()
70 | 		it("Single boolean should give boolean as result", function()
71 | 			local type = get_variable_type("true")
72 | 			assert.are.same(type, "boolean")
73 | 			type = get_variable_type(true)
74 | 			assert.are.same(type, "boolean")
75 | 		end)
76 | 
77 | 		it("previous integer should give string", function()
78 | 			local type = get_variable_type("true", "integer")
79 | 			assert.are.same(type, "string")
80 | 			type = get_variable_type(false, "integer")
81 | 			assert.are.same(type, "string")
82 | 		end)
83 | 
84 | 		it("previous boolean should give boolean", function()
85 | 			local type = get_variable_type("false", "boolean")
86 | 			assert.are.same(type, "boolean")
87 | 			type = get_variable_type(true, "boolean")
88 | 			assert.are.same(type, "boolean")
89 | 		end)
90 | 
91 | 		it("True/false should be case independent", function()
92 | 			for _,spelling in pairs({"tRue", "fAlse", "FALSE", "TRUE", "True", "False"}) do
93 | 				local type = get_variable_type(spelling)
94 | 				assert.are.same(type, "boolean")
95 | 			end
96 | 		end)
97 | 	end)
98 | end)
99 | 


--------------------------------------------------------------------------------
/utils/doc_helpers/get_anchors.lua:
--------------------------------------------------------------------------------
 1 | function get_anchor_link(title, md_path, tag, indent)
 2 | 	indent = indent or "  "
 3 | 	md_path = md_path or ""
 4 | 	title = trim(title)
 5 | 	title = title:gsub("(.+)%([^)]+%)", "%1")
 6 | 	title = title:gsub("([^ `]+)%.__([^_()]+)__([^_`]*)", "%1.`__%2__`%3")
 7 | 	title = title:gsub("%.__([^_()`]+)$", ".`__%1`")
 8 | 	title = title:gsub("%._(.+)$", ".`_%1`")
 9 | 	tag = trim(tag)
10 | 
11 | 	return ("\n%s- [%s](%s#%s)"):
12 | 		format(indent, title, md_path, tag)
13 | end
14 | 
15 | function get_doc_anchors(base_path, md_path, pd, rough_toc, detailed_toc)
16 | 	if (not base_path:match("/$")) then
17 | 		base_path = base_path .. "/"
18 | 	end
19 | 	local rel_md_path = md_path:gsub((base_path):quote(), "")
20 | 	rough_toc = rough_toc .. "\n- [".. pd.title .."]("..rel_md_path..")"
21 | 	detailed_toc = detailed_toc .. "\n- **[".. pd.title .."]("..rel_md_path..")**"
22 | 	for i=1,#pd.anchors.titles do
23 | 		detailed_toc = detailed_toc .. get_anchor_link(pd.anchors.titles[i], rel_md_path, pd.anchors.tags[i])
24 | 	end
25 | 	return rough_toc, detailed_toc
26 | end
27 | 


--------------------------------------------------------------------------------
/utils/doc_helpers/parse_file.lua:
--------------------------------------------------------------------------------
 1 | 
 2 | function parse_doc(raw_docs, file_name)
 3 | 	-- Get documentation
 4 | 	local doc_tbl = {
 5 | 		content = trim(raw_docs),
 6 | 		anchors = {
 7 | 			tags = {},
 8 | 			titles = {}
 9 | 		},
10 | 		title = nil,
11 | 		title_rno = 0
12 | 	}
13 | 
14 | 	local rows = doc_tbl.content:split("\n")
15 | 	for row_no,row in ipairs(rows) do
16 | 		if (row:match("^#")) then
17 | 			doc_tbl.title = trim(row:gsub("#", ""))
18 | 			doc_tbl.title_rno = row_no
19 | 			break
20 | 		end
21 | 	end
22 | 
23 | 	-- If title not found use the file name
24 | 	if (not doc_tbl.title) then
25 | 		doc_tbl.title = "File: " .. file_name
26 | 	end
27 | 
28 | 	if (doc_tbl.content:len() > 0) then
29 | 		rows = doc_tbl.content:split("\n")
30 | 
31 | 		-- Remove empty rows and initial rows that are part of the title
32 | 		local tmp = {}
33 | 		for row_no,row in ipairs(rows) do
34 | 			if (row_no > doc_tbl.title_rno) then
35 | 				if(trim(row):len() > 0) then
36 | 					tmp[#tmp + 1] = row
37 | 				end
38 | 			end
39 | 		end
40 | 		rows = tmp
41 | 
42 | 		-- Find all the anchors in the text
43 | 		for idx,row in ipairs(rows) do
44 | 			if (row:match("<a%s*name%s*=[\"'][^\"']+[\"']%s*>")) then
45 | 				local subanchor_tag = row:gsub("<a%s*name%s*=[\"']([^\"']+)[\"']%s*>", "%1")
46 | 				local subtitle = subanchor_tag
47 | 
48 | 				if (rows[idx + 1] and
49 | 				rows[idx + 1]:match("^%s*#")) then
50 | 					subtitle = trim(rows[idx + 1]:gsub("^#+", ""))
51 | 				end
52 | 
53 | 				if (subtitle ~= title) then
54 | 					doc_tbl.anchors.titles[#doc_tbl.anchors.titles + 1] = subtitle
55 | 					doc_tbl.anchors.tags[#doc_tbl.anchors.tags + 1] = subanchor_tag
56 | 				end
57 | 			end
58 | 		end
59 | 	end
60 | 
61 | 	return doc_tbl
62 | end
63 | 


--------------------------------------------------------------------------------
/utils/doc_helpers/write_doc.lua:
--------------------------------------------------------------------------------
 1 | 
 2 | function write_doc(parsed_data, file_name)
 3 | 
 4 | 		-- Set the general anchor
 5 | 	local anchor = "__" .. parsed_data.title .. "__"
 6 | 	local title = parsed_data.title
 7 | 	if (title:match("^[A-Z][a-z]") and
 8 | 	    not title:match("^Data") and
 9 | 	    not title:match("^Df") and
10 | 	    not title:match("^Batc")) then
11 | 		title = title:sub(1,1):lower() .. title:sub(2)
12 | 	end
13 | 	local header = ("# API documentation for [%s](#%s)"):
14 | 		format(title, anchor)
15 | 
16 | 	for i=1,#parsed_data.anchors.tags do
17 | 		header = header .. get_anchor_link(parsed_data.anchors.titles[i], nil, parsed_data.anchors.tags[i], "")
18 | 	end
19 | 
20 | 	local docfile = io.open(file_name, "w")
21 | 	docfile:write(header)
22 | 	docfile:write(("\n\n<a name=\"%s\">\n%s"):format(anchor, parsed_data.content))
23 | 	docfile:close()
24 | 
25 | end
26 | 


--------------------------------------------------------------------------------
/utils/loader.lua:
--------------------------------------------------------------------------------
  1 | local argcheck = require "argcheck"
  2 | local paths = require "paths"
  3 | local argdoc = require 'argcheck.doc'
  4 | 
  5 | argdoc[[
  6 | 
  7 | ## Package load functions
  8 | 
  9 | ]]
 10 | 
 11 | paths.get_sorted_files  = argcheck{
 12 | 	doc=[[
 13 | <a name="paths.get_sorted_lua_files">
 14 | ### paths.get_sorted_lua_files(@ARGP)
 15 | 
 16 | Calls the `paths.files()` with the directory and sorts the files according to
 17 | name.
 18 | 
 19 | @ARGT
 20 | 
 21 | _Return value_: table with sorted file names
 22 | ]],
 23 | 	{name="path", type="string",
 24 | 	 doc="The directory path"},
 25 | 	{name="match_str", type="string", default="[.]lua$",
 26 | 	 doc="The file matching string to search for. Defaults to lua file endings."},
 27 | 	call=function(path, match_str)
 28 | 	local files = {}
 29 | 	for f in paths.files(path) do
 30 | 		if (f:match(match_str)) then
 31 | 			files[#files + 1] = f
 32 | 		end
 33 | 	end
 34 | 
 35 | 	table.sort(files)
 36 | 
 37 | 	return files
 38 | end}
 39 | 
 40 | load_dir_files = argcheck{
 41 | 	doc=[[
 42 | <a name="load_dir_files">
 43 | ### load_dir_files(ARGP)
 44 | 
 45 | Traverses a directory and loads all files within
 46 | 
 47 | @ARPT
 48 | 
 49 | _Return values_:
 50 |  1. The files loaded in the processed order
 51 |  2. The doc content if `docs` argument was true - otherwise it's an empty table
 52 | ]],
 53 | 	{name="path", type="string", doc="The directory"},
 54 | 	{name="params", type="table", doc="Objects to pass to the files", default={}},
 55 | 	{name="docs", type="boolean", doc="Run with argcheck.doc", default=false},
 56 | 	call = (function()
 57 | 	-- Hidden variable that makes sure we don't reload files
 58 | 	local loaded_files = {paths.thisfile()}
 59 | 
 60 | 	local function is_loaded(file)
 61 | 		for _,fn in ipairs(loaded_files) do
 62 | 			if (fn == file) then
 63 | 				return true
 64 | 			end
 65 | 		end
 66 | 
 67 | 		return false
 68 | 	end
 69 | 
 70 | 	local function load_file(file, params, docs, ret_docs, ret_fpaths)
 71 | 		if (docs) then
 72 | 			argdoc.record()
 73 | 		end
 74 | 
 75 | 		local ret = assert(loadfile(file))(table.unpack(params))
 76 | 
 77 | 		if (docs) then
 78 | 
 79 | 			-- Assigns to parent ret_docs
 80 | 			ret_docs[file] = argdoc.stop()
 81 | 		end
 82 | 
 83 | 		table.insert(loaded_files, file)
 84 | 		table.insert(ret_fpaths, file)
 85 | 		return ret
 86 | 	end
 87 | 
 88 | 	return function(path, params, docs)
 89 | 		assert(paths.dirp(path), ("The path '%s' isn't a valid directory"):format(path))
 90 | 		table.insert(params, path)
 91 | 		local ret_docs = {}
 92 | 		local ret_fpaths = {}
 93 | 
 94 | 		if (paths.filep(path .. "init.lua")) then
 95 | 			local obj = load_file(path .. "init.lua", params, docs, ret_docs, ret_fpaths)
 96 | 			table.insert(params, 1, obj)
 97 | 		end
 98 | 
 99 | 		local files = paths.get_sorted_files(path)
100 | 		for _,file in pairs(files) do
101 | 			file = path .. file
102 | 
103 | 			if (not is_loaded(file)) then
104 | 
105 | 				load_file(file, params, docs, ret_docs, ret_fpaths)
106 | 
107 | 			end
108 | 		end
109 | 
110 | 		return ret_fpaths, ret_docs
111 | 	end
112 | end)()}
113 | 


--------------------------------------------------------------------------------