├── @DataFrame └── DataFrame.m ├── README.md ├── examples ├── DataFrame_example.m └── html │ └── DataFrame_example.html └── tests ├── run_DataFrame_tests.m ├── test_DataFrame.m └── test_data └── ugly_data.csv /@DataFrame/DataFrame.m: -------------------------------------------------------------------------------- 1 | classdef DataFrame < dynamicprops 2 | %DATAFRAME - an implementation of a DataFrame class for Matlab 3 | % DATAFRAME - inspired by Pandas and R DataFrame, this class wraps the 4 | % Matlab table datatype as an object oriented alternative 5 | % 6 | % SYNTAX: 7 | % df = DataFrame( varargin ) 8 | % 9 | % Description: 10 | % df = DataFrame( varargin ) the DataFrame literally passes the input 11 | % arguement on to the table during construction 12 | % 13 | % PROPERTIES: 14 | % (dynamic) - the DataFrame properties will match the 15 | % table.Properties.VariableNames 16 | % 17 | % EXAMPLES: 18 | % df = DataFrame(0, 0, 0, 0, 'VariableNames', ... 19 | % {'test1', 'test2','test3','test4',}) 20 | % df.head() 21 | % 22 | % SEE ALSO: table 23 | % 24 | % Author: nick roth 25 | % email: nick.roth@nou-systems.com 26 | % Matlab ver.: 8.3.0.532 (R2014a) 27 | % Date: 01-Sep-2014 28 | 29 | %% Properties 30 | properties 31 | Properties 32 | end 33 | 34 | properties (Access = private) 35 | data 36 | end 37 | 38 | %% Methods 39 | methods 40 | % DATAFRAME Constructor 41 | function self = DataFrame(varargin) 42 | 43 | % Construct dataframe from table 44 | if nargin == 1 45 | tbl = varargin{1}; 46 | if istable(tbl) 47 | self.data = tbl; 48 | end 49 | else 50 | % Process inputs as if we are a Matlab table 51 | self.data = table(varargin{:}); 52 | end 53 | 54 | self.update_props(); 55 | end 56 | 57 | function head(self) 58 | %HEAD - implements the method that Pandas provides to see the 59 | %top rows of the table 60 | 61 | toprows = self.data(min(length(self),10), :); 62 | disp(toprows); 63 | end 64 | 65 | function out = getTable(self) 66 | %GETTABLE - returns a Matlab table from DataFrame object 67 | 68 | out = self.data; 69 | end 70 | 71 | function details(self) 72 | %DETAILS - prints to console a detailed summary of the data 73 | %within the DataFrame 74 | 75 | builtin('disp', self); 76 | self.head(); 77 | self.summary(); 78 | end 79 | 80 | function bool = is_column(self, col) 81 | %IS_COLUMN - returns boolean value true if the given string is 82 | %a valid column 83 | 84 | bool = false; 85 | cols = self.data.Properties.VariableNames; 86 | if isempty(setdiff(col, cols)) 87 | bool = true; 88 | end 89 | end 90 | 91 | function remove_cols(self, cols) 92 | %REMOVE_COLS - removes columns from the DataFrame, given a 93 | %single string, or a cell array of strings 94 | 95 | cols = cellstr(cols); 96 | for i = 1:length(cols) 97 | mp = findprop(self, cols{i}); 98 | delete(mp); 99 | end 100 | self.data(:, cols) = []; 101 | end 102 | 103 | function out = columns(self) 104 | %COLUMNS - provides an alternative way to access the column 105 | %names of the DataFrame 106 | 107 | out = self.data.Properties.VariableNames; 108 | end 109 | end 110 | 111 | methods (Access = private) 112 | function update_props(self) 113 | %UDPATE_PROPS - adds the table variables to the class as 114 | %dynamic properties. This way we can make it look like the 115 | %DataFrame object is actually a table 116 | 117 | vars = self.data.Properties.VariableNames; 118 | for i = 1:length(vars) 119 | var = vars{i}; 120 | self.addprop(var); 121 | end 122 | end 123 | end 124 | 125 | % Constructors 126 | methods (Static) 127 | function out = fromStruct(varargin) 128 | out = DataFrame(struct2table(varargin{:})); 129 | end 130 | 131 | function out = fromCSV(varargin) 132 | out = DataFrame(readtable(varargin{:})); 133 | end 134 | 135 | function out = fromArray(varargin) 136 | out = DataFrame(array2table(varargin{:})); 137 | end 138 | 139 | function out = fromCell(varargin) 140 | out = DataFrame(cell2table(varargin{:})); 141 | end 142 | end 143 | 144 | %% Pass Throughs 145 | methods 146 | function out = height(self) 147 | out = height(self.data); 148 | end 149 | 150 | function out = width(self) 151 | out = width(self.data); 152 | end 153 | 154 | function summary(self) 155 | summary(self.data); 156 | end 157 | 158 | function out = toStruct(self) 159 | out = table2struct(self.data); 160 | end 161 | 162 | function out = toCell(self) 163 | out = table2cell(self.data); 164 | end 165 | 166 | function out = toArray(self) 167 | out = table2array(self.data); 168 | end 169 | 170 | function writetable(self, varargin) 171 | writetable(self.data, varargin{:}) 172 | end 173 | 174 | function out = get.Properties(self) 175 | out = self.data.Properties; 176 | end 177 | end 178 | 179 | %% Static 180 | methods (Static) 181 | function [C,ia,ib] = intersect(A, B, varargin) 182 | [C,ia,ib] = intersect(A, B, varargin{:}); 183 | end 184 | 185 | function [Lia, Locb] = ismember(A, B, varargin) 186 | [Lia, Locb] = ismember(A, B, varargin{:}); 187 | end 188 | 189 | function out = rowfun(func, A, varargin) 190 | out = rowfun(func, A, varargin{:}); 191 | end 192 | 193 | function out = varfun(func, A, varargin) 194 | out = varfun(func, A, varargin{:}); 195 | end 196 | end 197 | 198 | %% Overrides 199 | methods 200 | 201 | function self = subsasgn(self, S, B) 202 | %SUBSASGN - overrides the builtin method so that we can 203 | %dynamically attach properties to the object when we are adding 204 | %new data to the DataFrame 205 | 206 | % If this column doesn't exist, add it to the object 207 | valid_prop = setdiff(S.subs, {':'}); 208 | if length(valid_prop) == 1 && ~self.is_column(valid_prop) 209 | self.addprop(valid_prop{:}); 210 | end 211 | 212 | % Call builtin on the underlying Matlab table 213 | switch S.type 214 | case '.' 215 | self.data = builtin('subsasgn', self.data, S, B); 216 | case '()' 217 | self.data = builtin('subsasgn', self.data, S, B); 218 | case '{}' 219 | self.data = builtin('subsasgn', self.data, S, B); 220 | end 221 | end 222 | 223 | function [varargout] = subsref(self, S) 224 | %SUBSREF - overrides the subscript reference method, which 225 | %provides the way for us to wrap the built in table type 226 | 227 | varargout{1} = []; 228 | 229 | call_type = S.type; 230 | var = S.subs; 231 | 232 | %Catch the subscript behavior so that we can pass through table 233 | %calls directly to it, and methods/class properties to the 234 | %DataFrame class 235 | switch call_type 236 | case '.' 237 | if ismethod(self, var) 238 | 239 | % check for method outputs 240 | mc = metaclass(self); 241 | ml = mc.MethodList; 242 | meth = findobj(ml, 'Name', var); 243 | 244 | if isempty(meth.OutputNames) 245 | % method calls without outputs 246 | builtin('subsref', self, S); 247 | else 248 | varargout{1} = builtin('subsref', self, S); 249 | end 250 | else 251 | %Pass through properties call 252 | varargout{1} = builtin('subsref', self.data, S); 253 | end 254 | case '()' 255 | % Process the '()' subscripting directly on table 256 | tbl = self.data; 257 | varargout{1} = builtin('subsref', tbl, S); 258 | case '{}' 259 | % Process the '{}' subscripting directly on table 260 | tbl = self.data; 261 | varargout{1} = builtin('subsref', tbl, S); 262 | end 263 | end 264 | 265 | function disp(self) 266 | %DISP - overrides the default disp method, which makes our 267 | %DataFrame look like a typical table 268 | 269 | disp(self.data); 270 | end 271 | 272 | function out = numel(~, varargin) 273 | %NUMEL - override the default numel method. It is critical that 274 | %both the first arguement and second arguement, "varargin" 275 | %exists, since this directly affects when Matlab will call the 276 | %method. Otherwise, '{}' subscripting breaks. 277 | 278 | out = 1; 279 | end 280 | end 281 | 282 | end 283 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | DataFrame 2 | ========= 3 | 4 | Matlab impelementation of DataFrame/Pandas concept. This project wraps and makes use of as much as possible of the Matlab table, but with the intent of providing a class implementation that could be specialized further. 5 | 6 |
The DataFrame is created in the same exact way as a table. The constructor just passes the arguements to the table constructor, then stores the table as as private property. We can still retrieve table properties
df = DataFrame(0, 0, 0, 0, ... 7 | 'VariableNames', {'test1', 'test2','test3','test4',}); 8 | df.Properties 9 |
ans = 10 | 11 | Description: '' 12 | VariableDescriptions: {} 13 | VariableUnits: {} 14 | DimensionNames: {'Row' 'Variable'} 15 | UserData: [] 16 | RowNames: {} 17 | VariableNames: {'test1' 'test2' 'test3' 'test4'} 18 | 19 |
DataFrame overrides the display method to show the table instead of the DataFrame object
df 20 |
df = 21 | 22 | test1 test2 test3 test4 23 | _____ _____ _____ _____ 24 | 25 | 0 0 0 0 26 | 27 |
The column names are pass throughs into the actual table structure. We can access the column as normal
df.test1 28 | df.test3 29 |
ans = 30 | 31 | 0 32 | 33 | 34 | ans = 35 | 36 | 0 37 | 38 |
You no longer need to know which possible functions you can use on a Matlab table. They are available to use either way.
df.height() 39 | height(df) 40 |
ans = 41 | 42 | 1 43 | 44 | 45 | ans = 46 | 47 | 1 48 | 49 |
Matlab blocks you from extending the table data structure, but we can get around that with this approach
df = DataFrame.fromCSV(which('ugly_data.csv'));
50 | df.head();
51 |
Name Zeros Lat Lng Normal Negative 52 | _______________ _____ ________ ________ ___________ ________ 53 | 54 | 'Connor Hayden' 0 -2.58201 71.06714 0.006049531 3 55 | 56 | 57 | GUID Date Timestamp 58 | ______________________________________ _________________ __________ 59 | 60 | '7B79A197-E85F-2BFF-F37E-727DEDAC9803' 'April 27th 2015' 1394414933 61 | 62 | 63 | AlphaNumeric 64 | _____________ 65 | 66 | 'FSU09MJG3ZB' 67 | 68 |
Column data will show empty since we are just dynamically passing through them on each subscript call. Properties shows up with contents, since it uses a getter method
df.details(); 69 |
DataFrame with properties: 70 | 71 | Properties: [1x1 struct] 72 | Normal: [] 73 | Lng: [] 74 | Negative: [] 75 | Zeros: [] 76 | Name: [] 77 | AlphaNumeric: [] 78 | Date: [] 79 | GUID: [] 80 | Lat: [] 81 | Timestamp: [] 82 | 83 | Name Zeros Lat Lng Normal Negative 84 | _______________ _____ ________ ________ ___________ ________ 85 | 86 | 'Connor Hayden' 0 -2.58201 71.06714 0.006049531 3 87 | 88 | 89 | GUID Date Timestamp 90 | ______________________________________ _________________ __________ 91 | 92 | '7B79A197-E85F-2BFF-F37E-727DEDAC9803' 'April 27th 2015' 1394414933 93 | 94 | 95 | AlphaNumeric 96 | _____________ 97 | 98 | 'FSU09MJG3ZB' 99 | 100 | 101 | Variables: 102 | 103 | Name: 20400x1 cell string 104 | 105 | Zeros: 20400x1 double 106 | Values: 107 | 108 | min 0 109 | median 0 110 | max 0 111 | 112 | Lat: 20400x1 double 113 | Values: 114 | 115 | min -89.42406 116 | median -1.02059 117 | max 88.89347 118 | 119 | Lng: 20400x1 double 120 | Values: 121 | 122 | min -177.18611 123 | median 0.934815 124 | max 176.4439 125 | 126 | Normal: 20400x1 double 127 | Values: 128 | 129 | min -0.543252895 130 | median -0.017611593 131 | max 0.573277103 132 | 133 | Negative: 20400x1 double 134 | Values: 135 | 136 | min -5 137 | median 0.5 138 | max 5 139 | 140 | GUID: 20400x1 cell string 141 | 142 | Date: 20400x1 cell string 143 | 144 | Timestamp: 20400x1 double 145 | Values: 146 | 147 | min 1378967930 148 | median 1403645750.5 149 | max 1439505058 150 | 151 | AlphaNumeric: 20400x1 cell string 152 | 153 |
tbl = readtable(which('ugly_data.csv')); 154 | try 155 | tbl.head(); 156 | head(tbl); 157 | catch err 158 | disp(err.message) 159 | end 160 |
Unrecognized variable name 'head'. 161 |
df.Lat2 = df.Lat; 162 | df.Lat3 = df.Lat; 163 | df.columns() 164 |
ans = 165 | 166 | Columns 1 through 7 167 | 168 | 'Name' 'Zeros' 'Lat' 'Lng' 'Normal' 'Negative' 'GUID' 169 | 170 | Columns 8 through 12 171 | 172 | 'Date' 'Timestamp' 'AlphaNumeric' 'Lat2' 'Lat3' 173 | 174 |
df.remove_cols({'Lat2', 'Lat3'}); 175 | df.columns() 176 |
ans = 177 | 178 | Columns 1 through 7 179 | 180 | 'Name' 'Zeros' 'Lat' 'Lng' 'Normal' 'Negative' 'GUID' 181 | 182 | Columns 8 through 10 183 | 184 | 'Date' 'Timestamp' 'AlphaNumeric' 185 | 186 |
methods('DataFrame')
187 |
Methods for class DataFrame: 188 | 189 | DataFrame disp is_column subsref toStruct 190 | addprop getTable numel summary width 191 | columns head remove_cols toArray writetable 192 | details height subsasgn toCell 193 | 194 | Static methods: 195 | 196 | fromArray fromCell intersect rowfun 197 | fromCSV fromStruct ismember varfun 198 | 199 | Call "methods('handle')" for methods of DataFrame inherited from handle. 200 |