├── .gitignore
├── .swiftpm
└── xcode
│ └── package.xcworkspace
│ └── contents.xcworkspacedata
├── Tests
└── KoalasTests
│ ├── XCTestManifests.swift
│ ├── DataPanelTests.swift
│ ├── DataSeriesAdvancedTests.swift
│ ├── DataFrameIOTests.swift
│ ├── DataFrameAdvancedTests.swift
│ ├── UtilityTests.swift
│ ├── ArithmeticOperatorTests.swift
│ └── DataSeriesTests.swift
├── Sources
└── Koalas
│ ├── DataFrame
│ ├── DataFrame+Date.swift
│ ├── DataFrameType.swift
│ ├── DataFrame+IO.swift
│ ├── DataFrame.swift
│ └── DataFrame+Arithmetics.swift
│ ├── DataSeries
│ ├── FillNilsMethod.swift
│ ├── DataSeriesType.swift
│ ├── Tuple.swift
│ ├── UnwrapUtils.swift
│ ├── SeriesArray+Date.swift
│ ├── SeriesArray.swift
│ ├── DataSeries.swift
│ ├── SeriesArrayExtensions.swift
│ └── DataSeries+Arithmetics.swift
│ └── DataPanel
│ └── DataPanel.swift
├── LICENSE
├── Package.swift
├── logo.svg
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | /.build
3 | /Packages
4 | /*.xcodeproj
5 | xcuserdata/
6 |
--------------------------------------------------------------------------------
/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/Tests/KoalasTests/XCTestManifests.swift:
--------------------------------------------------------------------------------
1 | import XCTest
2 |
3 | #if !canImport(ObjectiveC)
4 | public func allTests() -> [XCTestCaseEntry] {
5 | return [
6 | testCase([]),
7 | ]
8 | }
9 | #endif
10 |
--------------------------------------------------------------------------------
/Sources/Koalas/DataFrame/DataFrame+Date.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Sergey Kazakov on 23.06.2020.
6 | //
7 |
8 | import Foundation
9 |
10 | public extension DataFrame where Value == DataSeries {
11 | /**
12 | Converts a DataFrame containing Date DataSeries into a DataPanel with DateComponents.
13 | Extracts individual date components (year, month, day, hour, minute, second, etc.)
14 | from each date series and organizes them into a structured panel format.
15 | */
16 | func toDateComponents() -> DataPanel {
17 | return upscaleTransform { $0.toDateComponents() }
18 | }
19 | }
20 |
21 |
22 |
--------------------------------------------------------------------------------
/Sources/Koalas/DataSeries/FillNilsMethod.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Sergey Kazakov on 11.06.2020.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | Defines methods for filling nil values in DataSeries.
12 | Provides different strategies for handling missing data in time series or sequential data.
13 |
14 | - all: Fills all nil values with a constant value. Replaces every nil element with the specified value regardless of position.
15 |
16 | - backward: Fills nil values using backward fill strategy.
17 | Propagates the last known value backward to fill preceding nil values. Uses the specified initial value for the first nil values encountered.
18 |
19 | - forward: Fills nil values using forward fill strategy.
20 | Propagates the last known value forward to fill succeeding nil values. Uses the specified initial value for the first nil values encountered.
21 | */
22 | public enum FillNilsMethod {
23 | case all(value: T)
24 | case backward(initial: T)
25 | case forward(initial: T)
26 | }
27 |
--------------------------------------------------------------------------------
/Sources/Koalas/DataSeries/DataSeriesType.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Sergey Kazakov on 07.07.2020.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | A type that can represent either a DataSeries or a scalar value.
12 | Used for conditional operations where the result can be either a full DataSeries
13 | or a constant value applied across all elements.
14 | */
15 | public enum DataSeriesType {
16 | case ds(DS?)
17 | case value(V?)
18 |
19 | /**
20 | Converts this DataSeriesType to a DataSeries with the same shape as the reference series.
21 | If this is a scalar value, it creates a DataSeries filled with that value.
22 | If this is already a DataSeries, it returns it directly.
23 | */
24 | func toDataSeriesWithShape(of series: DataSeries) -> DataSeries? where DS == DataSeries {
25 | switch self {
26 | case .ds(let dataSeries):
27 | return dataSeries
28 | case .value(let scalarValue):
29 | return DataSeries(series.map { _ in return scalarValue })
30 | }
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/Sources/Koalas/DataFrame/DataFrameType.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Sergey Kazakov on 25.06.2020.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | A type that can represent either a DataFrame or a scalar value.
12 | Used for conditional operations where the result can be either a full DataFrame
13 | or a constant value applied across all elements.
14 | */
15 | public enum DataFrameType {
16 | case df(DF?)
17 | case value(V?)
18 |
19 | /**
20 | Converts this DataFrameType to a DataFrame with the same shape as the reference DataFrame.
21 | If this is a scalar value, it creates a DataFrame filled with that value.
22 | If this is already a DataFrame, it returns it directly.
23 | */
24 | func toDataframeWithShape(of dataframe: DataFrame) -> DataFrame? where DF == DataFrame {
25 | switch self {
26 | case .df(let df):
27 | return df
28 | case .value(let scalarValue):
29 | return dataframe.mapValues { DataSeries($0.map { _ in return scalarValue }) }
30 | }
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Kazakov Sergey
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Package.swift:
--------------------------------------------------------------------------------
1 | // swift-tools-version:5.2
2 | // The swift-tools-version declares the minimum version of Swift required to build this package.
3 |
4 | import PackageDescription
5 |
6 | let package = Package(
7 | name: "Koalas",
8 | platforms: [
9 | .macOS(.v10_15),
10 | .iOS(.v8)
11 | ],
12 |
13 | products: [
14 | // Products define the executables and libraries produced by a package, and make them visible to other packages.
15 | .library(
16 | name: "Koalas",
17 | targets: ["Koalas"]),
18 | ],
19 | dependencies: [
20 | // Dependencies declare other packages that this package depends on.
21 | // .package(url: /* package url */, from: "1.0.0"),
22 | ],
23 | targets: [
24 | // Targets are the basic building blocks of a package. A target can define a module or a test suite.
25 | // Targets can depend on other targets in this package, and on products in packages which this package depends on.
26 | .target(
27 | name: "Koalas",
28 | dependencies: []),
29 | .testTarget(
30 | name: "KoalasTests",
31 | dependencies: ["Koalas"]),
32 | ]
33 | )
34 |
--------------------------------------------------------------------------------
/Sources/Koalas/DataSeries/Tuple.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Sergey Kazakov on 03.07.2020.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | A tuple structure containing two elements of different types.
12 | Used for combining two DataSeries into a single series of paired values.
13 | Both elements must conform to Codable for serialization support.
14 | */
15 | public struct Tuple2: Codable {
16 | public let t1: T1
17 | public let t2: T2
18 |
19 | public init(t1: T1, t2: T2) {
20 | self.t1 = t1
21 | self.t2 = t2
22 | }
23 | }
24 |
25 | /**
26 | A tuple structure containing three elements of different types.
27 | Used for combining three DataSeries into a single series of grouped values.
28 | All elements must conform to Codable for serialization support.
29 | */
30 | public struct Tuple3: Codable {
31 | public let t1: T1
32 | public let t2: T2
33 | public let t3: T3
34 |
35 | public init(t1: T1, t2: T2, t3: T3) {
36 | self.t1 = t1
37 | self.t2 = t2
38 | self.t3 = t3
39 | }
40 | }
41 |
42 | /**
43 | A tuple structure containing four elements of different types.
44 | Used for combining four DataSeries into a single series of grouped values.
45 | All elements must conform to Codable for serialization support.
46 | */
47 | public struct Tuple4: Codable {
48 | public let t1: T1
49 | public let t2: T2
50 | public let t3: T3
51 | public let t4: T4
52 |
53 | public init(t1: T1, t2: T2, t3: T3, t4: T4) {
54 | self.t1 = t1
55 | self.t2 = t2
56 | self.t3 = t3
57 | self.t4 = t4
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/Sources/Koalas/DataSeries/UnwrapUtils.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Sergey Kazakov on 25.06.2020.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | Safely unwraps two optional values and applies a transformation function.
12 | Returns nil if either value is nil, otherwise applies the map function to the unwrapped values.
13 | */
14 | public func unwrap(_ lhs: T?, _ rhs: T?, map: (T, T) -> U) -> U? {
15 | guard let lhs = lhs, let rhs = rhs else {
16 | return nil
17 | }
18 |
19 | return map(lhs, rhs)
20 | }
21 |
22 | /**
23 | Safely unwraps two optional values of different types and applies a transformation function.
24 | Returns nil if either value is nil, otherwise applies the map function to the unwrapped values.
25 | */
26 | public func unwrap(_ lhs: T?, _ rhs: U?, map: (T, U) -> V?) -> V? {
27 | guard let lhs = lhs, let rhs = rhs else {
28 | return nil
29 | }
30 |
31 | return map(lhs, rhs)
32 | }
33 |
34 | /**
35 | Safely unwraps three optional values and applies a transformation function.
36 | Returns nil if any value is nil, otherwise applies the map function to the unwrapped values.
37 | */
38 | public func unwrap(_ t: T?, _ u: U?, _ v: V?, map: (T, U, V) -> S?) -> S? {
39 | guard let t = t, let u = u, let v = v else {
40 | return nil
41 | }
42 |
43 | return map(t, u, v)
44 | }
45 |
46 | /**
47 | Safely unwraps a single optional value and applies a transformation function.
48 | Returns nil if the value is nil, otherwise applies the map function to the unwrapped value.
49 | */
50 | public func unwrap(value: T?, map: (T) -> U?) -> U? {
51 | guard let value = value else {
52 | return nil
53 | }
54 |
55 | return map(value)
56 | }
57 |
--------------------------------------------------------------------------------
/Sources/Koalas/DataSeries/SeriesArray+Date.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Sergey Kazakov on 23.06.2020.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | Defines the keys for date components when extracting parts of dates.
12 | Used to organize date components (year, month, day) in a structured format.
13 | */
14 | public enum DateComponentsKeys: String {
15 | case year
16 | case month
17 | case day
18 | }
19 |
20 | public extension SeriesArray where Element == Date? {
21 | /**
22 | Converts a SeriesArray of dates into a DataFrame containing individual date components.
23 | Extracts year, month, and day from each date and organizes them into separate DataSeries.
24 | Returns a DataFrame with three columns: year, month, and day, each containing integer values.
25 | */
26 | func toDateComponents() -> DataFrame {
27 | let yearDateFormatter = DateFormatter()
28 | yearDateFormatter.dateFormat = "yyyy"
29 |
30 | let monthDateFormatter = DateFormatter()
31 | monthDateFormatter.dateFormat = "MM"
32 |
33 | let dayDateFormatter = DateFormatter()
34 | dayDateFormatter.dateFormat = "dd"
35 |
36 | let yearSeries = DataSeries(map { unwrap(value: $0) { date in Int(yearDateFormatter.string(from: date)) } })
37 | let monthSeries = DataSeries(map { unwrap(value: $0) { date in Int(monthDateFormatter.string(from: date)) } })
38 | let daySeries = DataSeries(map { unwrap(value: $0) { date in Int(dayDateFormatter.string(from: date)) } })
39 |
40 | return DataFrame(uniqueKeysWithValues: [(.year, yearSeries),
41 | (.month, monthSeries),
42 | (.day, daySeries)])
43 | }
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/Sources/Koalas/DataPanel/DataPanel.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Sergey Kazakov on 04.06.2020.
6 | //
7 |
8 | /**
9 | A dictionary-like structure that maps keys to DataFrames.
10 | Used to store and manipulate 3D tabular data with multiple keys and columns.
11 | */
12 | public typealias DataPanel = [Key: DataFrame]
13 |
14 | public extension DataPanel {
15 | /**
16 | Transposes the DataPanel by swapping the key dimensions.
17 | Converts from [Key1: DataFrame] to [Key2: DataFrame].
18 | Useful for restructuring data from wide to long format or vice versa.
19 | */
20 | func transposed() -> DataPanel
21 | where Value == DataFrame {
22 |
23 | var transposedData: [Key2: DataFrame] = [:]
24 | self.forEach {
25 | let key1 = $0.key
26 | $0.value.forEach {
27 | let key2 = $0.key
28 | let value = $0.value
29 | var df = transposedData[key2] ?? DataFrame()
30 | df[key1] = value
31 | transposedData[key2] = df
32 | }
33 | }
34 |
35 | return transposedData
36 | }
37 |
38 | /**
39 | Applies a transformation function to each DataSeries within all DataFrames in the DataPanel.
40 | Returns a new DataPanel with transformed DataSeries while preserving the structure.
41 | */
42 | func flatMapDataFrameValues(_ transform: (DataSeries) -> DataSeries ) -> DataPanel where Value == DataFrame {
43 | return self.mapValues { $0.mapValues { transform($0) } }
44 | }
45 |
46 | /**
47 | Applies a transformation function to each individual value within all DataSeries in the DataPanel.
48 | Handles nil values and returns a new DataPanel with transformed values.
49 | */
50 | func flatMapValues(_ transform: (V?) -> U? ) -> DataPanel where Value == DataFrame {
51 | return self.flatMapDataFrameValues { series in DataSeries(series.map { transform($0) }) }
52 | }
53 |
54 | /**
55 | Applies a transformation function to two specific DataFrames in the DataPanel.
56 | Takes two keys and a transformation function that operates on the corresponding DataFrames.
57 | Returns a single DataFrame as the result of the transformation.
58 | */
59 | func mapValues(keys: (Key, Key),
60 | transform: (DataFrame?, DataFrame?) -> DataFrame)
61 | -> DataFrame
62 |
63 | where
64 | Value == DataFrame {
65 |
66 | return transform(self[keys.0], self[keys.1])
67 | }
68 |
69 | /**
70 | Returns the dimensions of the DataPanel as (depth, width, height).
71 | Depth is the number of top-level keys, width and height are from the contained DataFrames.
72 | */
73 | func shape() -> (depth: Int, width: Int, height: Int) where Value == DataFrame {
74 | let valueShape = self.values.first?.shape() ?? (width: 0, height: 0)
75 | return (self.keys.count, valueShape.width, valueShape.height)
76 | }
77 | }
78 |
79 |
80 |
--------------------------------------------------------------------------------
/logo.svg:
--------------------------------------------------------------------------------
1 |
35 |
--------------------------------------------------------------------------------
/Sources/Koalas/DataFrame/DataFrame+IO.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Sergey Kazakov on 23.06.2020.
6 | //
7 |
8 | import Foundation
9 |
10 | extension DataFrame {
11 | /**
12 | Converts the DataFrame to an array of string lines representing the data in tabular format.
13 | The first line contains column headers (keys), followed by data rows.
14 | Each row is joined with the specified separator.
15 | */
16 | public func toStringRowLines(separator: String) -> [String] where Value == DataSeries, V: LosslessStringConvertible, Key: LosslessStringConvertible {
17 | var resultStringLines: [String] = []
18 | let sortedKeys = keys.sorted { return String($0) < String($1) }
19 |
20 | resultStringLines.append("\(sortedKeys.map { String($0) }.joined(separator: separator))")
21 | let sortedValues = sortedKeys.map { self[$0] }
22 |
23 | let height = shape().height
24 | for idx in 0..(toFile: String,
38 | atomically: Bool = true,
39 | encoding: String.Encoding = .utf8,
40 | columnSeparator: String) throws
41 | where
42 |
43 | Value == DataSeries,
44 | V: LosslessStringConvertible,
45 | Key: LosslessStringConvertible {
46 |
47 | let dataframeString = toStringRowLines(separator: columnSeparator).joined(separator: "\n")
48 | try dataframeString.write(toFile: toFile, atomically: atomically, encoding: encoding)
49 | }
50 |
51 | /**
52 | Initializes a DataFrame from a file containing tabular data.
53 | The first line is expected to contain column headers, followed by data rows.
54 | Uses the specified encoding and column separator to parse the file.
55 | */
56 | public init(
57 | contentsOfFile file: String,
58 | encoding: String.Encoding = .utf8,
59 | columnSeparator: String) throws
60 |
61 | where
62 | Value == DataSeries,
63 | V: LosslessStringConvertible,
64 | Key: LosslessStringConvertible,
65 | Key: Hashable {
66 |
67 | self = try Self.read(from: file, encoding: encoding, columnSeparator: columnSeparator)
68 | }
69 |
70 | /**
71 | Reads a DataFrame from a file and parses it according to the specified format.
72 | Expects the first line to contain column headers and subsequent lines to contain data.
73 | Returns a DataFrame with the parsed data structure.
74 | */
75 | fileprivate static func read(
76 | from file: String,
77 | encoding: String.Encoding = .utf8,
78 | columnSeparator: String) throws -> DataFrame
79 |
80 | where
81 | Value == DataSeries,
82 | V: LosslessStringConvertible,
83 | K: LosslessStringConvertible,
84 | K: Hashable {
85 |
86 | let fileString = try String(contentsOfFile: file, encoding: encoding)
87 |
88 | var df = DataFrame()
89 | var keys: [K] = []
90 |
91 | var lineNumber = 0
92 |
93 | fileString.enumerateLines { (line, _) in
94 | let lineComponents = line.components(separatedBy: columnSeparator)
95 | if lineNumber == 0 {
96 | keys = lineComponents
97 | .map { K($0) }
98 | .compactMap { $0 }
99 |
100 | keys.forEach { df[$0] = DataSeries() }
101 | } else {
102 | let valuesRow = lineComponents.map { V($0) }
103 | zip(keys, valuesRow).forEach { df[$0]?.append($01) }
104 | }
105 |
106 | lineNumber += 1
107 | }
108 |
109 | return df
110 | }
111 | }
112 |
--------------------------------------------------------------------------------
/Sources/Koalas/DataSeries/SeriesArray.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Sergey Kazakov on 06.06.2020.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | A generic array structure that conforms to RangeReplaceableCollection and Codable.
12 | Serves as the underlying data structure for DataSeries, providing a type-safe,
13 | serializable collection that supports all standard array operations including
14 | filtering, insertion, removal, and range replacement.
15 | */
16 | public struct SeriesArray: RangeReplaceableCollection, Codable {
17 |
18 | public typealias Element = T
19 | public typealias Index = Int
20 | public typealias SubSequence = SeriesArray
21 | public typealias Indices = Range
22 | fileprivate var array: Array
23 |
24 | public var startIndex: Int { return array.startIndex }
25 | public var endIndex: Int { return array.endIndex }
26 | public var indices: Range { return array.indices }
27 |
28 |
29 | public func index(after i: Int) -> Int {
30 | return array.index(after: i)
31 | }
32 |
33 | public init() { array = [] }
34 | }
35 |
36 | // Instance Methods
37 |
38 | public extension SeriesArray {
39 |
40 | init(_ elements: S) where S : Sequence, SeriesArray.Element == S.Element {
41 | array = Array(elements)
42 | }
43 |
44 | init(repeating repeatedValue: SeriesArray.Element, count: Int) {
45 | array = Array(repeating: repeatedValue, count: count)
46 | }
47 | }
48 |
49 | // Instance Methods
50 |
51 | public extension SeriesArray {
52 |
53 | mutating func append(_ newElement: SeriesArray.Element) {
54 | array.append(newElement)
55 | }
56 |
57 | mutating func append(contentsOf newElements: S) where S : Sequence, SeriesArray.Element == S.Element {
58 | array.append(contentsOf: newElements)
59 | }
60 |
61 | func filter(_ isIncluded: (SeriesArray.Element) throws -> Bool) rethrows -> SeriesArray {
62 | let subArray = try array.filter(isIncluded)
63 | return SeriesArray(subArray)
64 | }
65 |
66 | mutating func insert(_ newElement: SeriesArray.Element, at i: SeriesArray.Index) {
67 | array.insert(newElement, at: i)
68 | }
69 |
70 | mutating func insert(contentsOf newElements: S, at i: SeriesArray.Index) where S : Collection, SeriesArray.Element == S.Element {
71 | array.insert(contentsOf: newElements, at: i)
72 | }
73 |
74 | mutating func popLast() -> SeriesArray.Element? {
75 | return array.popLast()
76 | }
77 |
78 | @discardableResult mutating func remove(at i: SeriesArray.Index) -> SeriesArray.Element {
79 | return array.remove(at: i)
80 | }
81 |
82 | mutating func removeAll(keepingCapacity keepCapacity: Bool) {
83 | array.removeAll()
84 | }
85 |
86 | mutating func removeAll(where shouldBeRemoved: (SeriesArray.Element) throws -> Bool) rethrows {
87 | try array.removeAll(where: shouldBeRemoved)
88 | }
89 |
90 | @discardableResult mutating func removeFirst() -> SeriesArray.Element {
91 | return array.removeFirst()
92 | }
93 |
94 | mutating func removeFirst(_ k: Int) {
95 | array.removeFirst(k)
96 | }
97 |
98 | @discardableResult mutating func removeLast() -> SeriesArray.Element {
99 | return array.removeLast()
100 | }
101 |
102 | mutating func removeLast(_ k: Int) {
103 | array.removeLast(k)
104 | }
105 |
106 | mutating func removeSubrange(_ bounds: Range) {
107 | array.removeSubrange(bounds)
108 | }
109 |
110 | mutating func replaceSubrange(_ subrange: R, with newElements: C) where C : Collection, R : RangeExpression, T == C.Element, SeriesArray.Index == R.Bound {
111 | array.replaceSubrange(subrange, with: newElements)
112 | }
113 |
114 | mutating func reserveCapacity(_ n: Int) {
115 | array.reserveCapacity(n)
116 | }
117 | }
118 |
119 | // Subscripts
120 |
121 | public extension SeriesArray {
122 |
123 | subscript(bounds: Range) -> SeriesArray.SubSequence {
124 | get { return SeriesArray(array[bounds]) }
125 | }
126 |
127 | subscript(bounds: SeriesArray.Index) -> SeriesArray.Element {
128 | get { return array[bounds] }
129 | set(value) { array[bounds] = value }
130 | }
131 | }
132 |
133 | extension SeriesArray: CustomStringConvertible {
134 | public var description: String { return "\(array)" }
135 | }
136 |
--------------------------------------------------------------------------------
/Sources/Koalas/DataSeries/DataSeries.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Sergey Kazakov on 04.06.2020.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | A SeriesArray of optional values that can be encoded and decoded using Codable.
12 | Used to store and manipulate 1D tabular data with optional values.
13 | */
14 | public typealias DataSeries = SeriesArray
15 |
16 | /**
17 | Applies a conditional operation to DataSeries based on a boolean condition.
18 | Returns the true DataSeries where condition is true, false DataSeries where condition is false.
19 | Handles DataSeriesType which can be either a DataSeries or a scalar value.
20 | */
21 | public func whereCondition(_ condition: DataSeries?,
22 | then trueSeries: DataSeriesType, U>,
23 | else series: DataSeriesType, U>) -> DataSeries? {
24 |
25 | guard let condition = condition else {
26 | return nil
27 | }
28 |
29 | guard let trueDS = trueSeries.toDataSeriesWithShape(of: condition) else {
30 | return nil
31 | }
32 |
33 | guard let falseDS = series.toDataSeriesWithShape(of: condition) else {
34 | return nil
35 | }
36 |
37 | return whereCondition(condition, then: trueDS, else: falseDS)
38 | }
39 |
40 | /**
41 | Applies a conditional operation to DataSeries based on a boolean condition.
42 | Returns the true DataSeries where condition is true, false DataSeries where condition is false.
43 | */
44 | public func whereCondition(_ condition: DataSeries?, then trueSeries: DataSeries?, else series: DataSeries?) -> DataSeries? {
45 | return condition?.whereTrue(then: trueSeries, else: series)
46 | }
47 |
48 | /**
49 | Applies a conditional operation to DataSeries based on a boolean condition.
50 | Returns a DataSeries with trueValue where condition is true, value where condition is false.
51 | */
52 | public func whereCondition(_ condition: DataSeries?, then trueValue: U, else value: U) -> DataSeries? {
53 | guard let condition = condition else {
54 | return nil
55 | }
56 |
57 | let trueSeries = condition.mapTo(constant: trueValue)
58 | let falseSeries = condition.mapTo(constant: value)
59 |
60 | return condition.whereTrue(then: trueSeries, else: falseSeries)
61 | }
62 |
63 | /**
64 | Combines three DataSeries into a single DataSeries of tuples.
65 | Returns nil if any of the input series are nil or have different lengths.
66 | Each element in the result is a Tuple3 containing corresponding elements from the input series.
67 | */
68 | public func zipSeries(_ s1: DataSeries?, _ s2: DataSeries?, _ s3: DataSeries?) -> DataSeries>? {
69 | guard let s1 = s1,
70 | let s2 = s2,
71 | let s3 = s3
72 | else {
73 | return nil
74 | }
75 |
76 | assert(s1.count == s2.count, "Dataseries should have equal length")
77 | assert(s1.count == s3.count, "Dataseries should have equal length")
78 |
79 | let result = zip(s1, zip(s2, s3)).map { Tuple3(t1: $0.0, t2: $0.1.0, t3: $0.1.1) }
80 | return DataSeries(result)
81 | }
82 |
83 | /**
84 | Combines four DataSeries into a single DataSeries of tuples.
85 | Returns nil if any of the input series are nil or have different lengths.
86 | Each element in the result is a Tuple4 containing corresponding elements from the input series.
87 | */
88 | public func zipSeries(_ s1: DataSeries?, _ s2: DataSeries?, _ s3: DataSeries?, _ s4: DataSeries?) -> DataSeries>? {
89 | guard let s1 = s1,
90 | let s2 = s2,
91 | let s3 = s3,
92 | let s4 = s4
93 | else {
94 | return nil
95 | }
96 |
97 | assert(s1.count == s2.count, "Dataseries should have equal length")
98 | assert(s1.count == s3.count, "Dataseries should have equal length")
99 | assert(s1.count == s4.count, "Dataseries should have equal length")
100 |
101 |
102 | let result = zip(zip(s1, s2), zip(s3, s4)).map { Tuple4(t1: $0.0.0, t2: $0.0.1, t3: $0.1.0, t4: $0.1.1) }
103 | return DataSeries(result)
104 | }
105 |
106 | /**
107 | Combines two DataSeries into a single DataSeries of tuples.
108 | Returns nil if any of the input series are nil or have different lengths.
109 | Each element in the result is a Tuple2 containing corresponding elements from the input series.
110 | */
111 | public func zipSeries(_ s1: DataSeries?, _ s2: DataSeries?) -> DataSeries>? {
112 | guard let s1 = s1,
113 | let s2 = s2
114 | else {
115 | return nil
116 | }
117 |
118 | assert(s1.count == s2.count, "Dataseries should have equal length")
119 |
120 | let result = zip(s1, s2).map { Tuple2(t1: $0.0, t2: $0.1) }
121 | return DataSeries(result)
122 | }
123 |
--------------------------------------------------------------------------------
/Tests/KoalasTests/DataPanelTests.swift:
--------------------------------------------------------------------------------
1 | //
2 | // DataPanelTests.swift
3 | //
4 | //
5 | // Created by AI Assistant on 2024.
6 | //
7 |
8 | import XCTest
9 | @testable import Koalas
10 |
11 | final class DataPanelTests: XCTestCase {
12 |
13 | // MARK: - transposed Tests
14 |
15 | func test_transposed_SwapsKeyDimensions() {
16 | let s1 = DataSeries([1, 2, 3])
17 | let s2 = DataSeries([4, 5, 6])
18 | let s3 = DataSeries([7, 8, 9])
19 | let s4 = DataSeries([10, 11, 12])
20 |
21 | let df1 = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
22 | let df2 = DataFrame(dictionaryLiteral: ("col1", s3), ("col2", s4))
23 |
24 | let panel: DataPanel = [
25 | "row1": df1,
26 | "row2": df2
27 | ]
28 |
29 | let transposed = panel.transposed()
30 |
31 | // Check that keys are swapped
32 | XCTAssertEqual(transposed.keys.count, 2)
33 | XCTAssertTrue(transposed.keys.contains("col1"))
34 | XCTAssertTrue(transposed.keys.contains("col2"))
35 |
36 | // Check that values are correctly transposed
37 | XCTAssertEqual(transposed["col1"]?["row1"]?[0], 1)
38 | XCTAssertEqual(transposed["col1"]?["row2"]?[0], 7)
39 | XCTAssertEqual(transposed["col2"]?["row1"]?[0], 4)
40 | XCTAssertEqual(transposed["col2"]?["row2"]?[0], 10)
41 | }
42 |
43 | func test_transposed_WithEmptyPanel() {
44 | let panel: DataPanel = [:]
45 |
46 | let transposed = panel.transposed()
47 |
48 | XCTAssertEqual(transposed.count, 0)
49 | }
50 |
51 | func test_transposed_WithEmptyDataFrames() {
52 | let df1: DataFrame = [:]
53 | let df2: DataFrame = [:]
54 |
55 | let panel: DataPanel = [
56 | "row1": df1,
57 | "row2": df2
58 | ]
59 |
60 | let transposed = panel.transposed()
61 |
62 | XCTAssertEqual(transposed.count, 0)
63 | }
64 |
65 | // MARK: - flatMapDataFrameValues Tests
66 |
67 | func test_flatMapDataFrameValues_TransformsDataSeries() {
68 | let s1 = DataSeries([1, 2, 3])
69 | let s2 = DataSeries([4, 5, 6])
70 | let s3 = DataSeries([7, 8, 9])
71 | let s4 = DataSeries([10, 11, 12])
72 |
73 | let df1 = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
74 | let df2 = DataFrame(dictionaryLiteral: ("col1", s3), ("col2", s4))
75 |
76 | let panel: DataPanel = [
77 | "row1": df1,
78 | "row2": df2
79 | ]
80 |
81 | let result = panel.flatMapDataFrameValues { series in
82 | DataSeries(series.map { $0.map { $0 * 2 } })
83 | }
84 |
85 | XCTAssertEqual(result["row1"]?["col1"]?[0], 2)
86 | XCTAssertEqual(result["row1"]?["col1"]?[1], 4)
87 | XCTAssertEqual(result["row1"]?["col2"]?[0], 8)
88 | XCTAssertEqual(result["row2"]?["col1"]?[0], 14)
89 | XCTAssertEqual(result["row2"]?["col2"]?[0], 20)
90 | }
91 |
92 | func test_flatMapDataFrameValues_HandlesNilValues() {
93 | let s1 = DataSeries([1, nil, 3])
94 | let s2 = DataSeries([4, 5, nil])
95 |
96 | let df1 = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
97 |
98 | let panel: DataPanel = [
99 | "row1": df1
100 | ]
101 |
102 | let result = panel.flatMapDataFrameValues { series in
103 | DataSeries(series.map { ($0 ?? 0) * 2 })
104 | }
105 |
106 | XCTAssertEqual(result["row1"]?["col1"]?[0], 2)
107 | XCTAssertEqual(result["row1"]?["col1"]?[1], 0)
108 | XCTAssertEqual(result["row1"]?["col1"]?[2], 6)
109 | XCTAssertEqual(result["row1"]?["col2"]?[0], 8)
110 | XCTAssertEqual(result["row1"]?["col2"]?[1], 10)
111 | XCTAssertEqual(result["row1"]?["col2"]?[2], 0)
112 | }
113 |
114 | // MARK: - flatMapValues Tests
115 |
116 | func test_flatMapValues_TransformsIndividualValues() {
117 | let s1 = DataSeries([1, 2, 3])
118 | let s2 = DataSeries([4, 5, 6])
119 |
120 | let df1 = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
121 |
122 | let panel: DataPanel = [
123 | "row1": df1
124 | ]
125 |
126 | let result = panel.flatMapValues { value in
127 | value.map { $0 * 2 }
128 | }
129 |
130 | XCTAssertEqual(result["row1"]?["col1"]?[0], 2)
131 | XCTAssertEqual(result["row1"]?["col1"]?[1], 4)
132 | XCTAssertEqual(result["row1"]?["col1"]?[2], 6)
133 | XCTAssertEqual(result["row1"]?["col2"]?[0], 8)
134 | XCTAssertEqual(result["row1"]?["col2"]?[1], 10)
135 | XCTAssertEqual(result["row1"]?["col2"]?[2], 12)
136 | }
137 |
138 | func test_flatMapValues_HandlesNilValues() {
139 | let s1 = DataSeries([1, nil, 3])
140 | let s2 = DataSeries([4, 5, nil])
141 |
142 | let df1 = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
143 |
144 | let panel: DataPanel = [
145 | "row1": df1
146 | ]
147 |
148 | let result = panel.flatMapValues { value in
149 | value.map { $0 * 2 }
150 | }
151 |
152 | XCTAssertEqual(result["row1"]?["col1"]?[0], 2)
153 | XCTAssertNil(result["row1"]?["col1"]?[1])
154 | XCTAssertEqual(result["row1"]?["col1"]?[2], 6)
155 | XCTAssertEqual(result["row1"]?["col2"]?[0], 8)
156 | XCTAssertEqual(result["row1"]?["col2"]?[1], 10)
157 | XCTAssertNil(result["row1"]?["col2"]?[2])
158 | }
159 |
160 | // MARK: - mapValues Tests
161 |
162 | func test_mapValues_WithTwoKeys() {
163 | let s1 = DataSeries([1, 2, 3])
164 | let s2 = DataSeries([4, 5, 6])
165 | let s3 = DataSeries([7, 8, 9])
166 |
167 | let df1 = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
168 | let df2 = DataFrame(dictionaryLiteral: ("col1", s2), ("col2", s3))
169 |
170 | let panel: DataPanel = [
171 | "row1": df1,
172 | "row2": df2
173 | ]
174 |
175 | let result = panel.mapValues(keys: ("row1", "row2")) { df1, df2 in
176 | guard let df1 = df1, let df2 = df2 else { return DataFrame() }
177 | return df1 + df2
178 | }
179 |
180 | XCTAssertEqual(result["col1"]?[0], 5) // 1 + 4
181 | XCTAssertEqual(result["col1"]?[1], 7) // 2 + 5
182 | XCTAssertEqual(result["col1"]?[2], 9) // 3 + 6
183 | XCTAssertEqual(result["col2"]?[0], 11) // 4 + 7
184 | XCTAssertEqual(result["col2"]?[1], 13) // 5 + 8
185 | XCTAssertEqual(result["col2"]?[2], 15) // 6 + 9
186 | }
187 |
188 | func test_mapValues_WithNilDataFrames() {
189 | let s1 = DataSeries([1, 2, 3])
190 |
191 | let df1 = DataFrame(dictionaryLiteral: ("col1", s1))
192 |
193 | let panel: DataPanel = [
194 | "row1": df1
195 | ]
196 |
197 | let result = panel.mapValues(keys: ("row1", "nonexistent")) { df1, df2 in
198 | guard let df1 = df1, let df2 = df2 else { return DataFrame() }
199 | return df1 + df2
200 | }
201 |
202 | XCTAssertEqual(result.count, 0) // Should return empty DataFrame
203 | }
204 |
205 | // MARK: - shape Tests
206 |
207 | func test_shape_ReturnsCorrectDimensions() {
208 | let s1 = DataSeries([1, 2, 3])
209 | let s2 = DataSeries([4, 5, 6])
210 | let s3 = DataSeries([7, 8, 9])
211 | let s4 = DataSeries([10, 11, 12])
212 |
213 | let df1 = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
214 | let df2 = DataFrame(dictionaryLiteral: ("col1", s3), ("col2", s4))
215 |
216 | let panel: DataPanel = [
217 | "row1": df1,
218 | "row2": df2
219 | ]
220 |
221 | let shape = panel.shape()
222 |
223 | XCTAssertEqual(shape.depth, 2) // 2 top-level keys
224 | XCTAssertEqual(shape.width, 2) // 2 columns in each DataFrame
225 | XCTAssertEqual(shape.height, 3) // 3 rows in each DataFrame
226 | }
227 |
228 | func test_shape_WithEmptyPanel() {
229 | let panel: DataPanel = [:]
230 |
231 | let shape = panel.shape()
232 |
233 | XCTAssertEqual(shape.depth, 0)
234 | XCTAssertEqual(shape.width, 0)
235 | XCTAssertEqual(shape.height, 0)
236 | }
237 |
238 | func test_shape_WithEmptyDataFrames() {
239 | let df1: DataFrame = [:]
240 | let df2: DataFrame = [:]
241 |
242 | let panel: DataPanel = [
243 | "row1": df1,
244 | "row2": df2
245 | ]
246 |
247 | let shape = panel.shape()
248 |
249 | XCTAssertEqual(shape.depth, 2)
250 | XCTAssertEqual(shape.width, 0)
251 | XCTAssertEqual(shape.height, 0)
252 | }
253 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | [](https://swift.org)
6 | [](https://swift.org)
7 | [](LICENSE)
8 |
9 | A powerful Swift library for multidimensional data manipulation, inspired by Python's pandas. Koalas provides native Swift implementations of DataSeries, DataFrame, and DataPanel for efficient data analysis and manipulation.
10 |
11 | **Perfect for iOS/macOS apps that need data analysis capabilities without external dependencies.**
12 |
13 | ## Table of Contents
14 | - [Features](#features)
15 | - [Installation](#installation)
16 | - [Quick Start](#quick-start)
17 | - [Data Structures](#data-structures)
18 | - [Advanced Features](#advanced-features)
19 | - [Requirements](#requirements)
20 | - [License](#license)
21 |
22 | ## Features
23 |
24 | - **Multi-dimensional Data Structures**: DataSeries (1D), DataFrame (2D), DataPanel (3D)
25 | - **Comprehensive Arithmetic Operations**: Memberwise operations with automatic alignment
26 | - **Statistical Functions**: Sum, mean, standard deviation, expanding and rolling windows
27 | - **Missing Data Handling**: Multiple strategies for nil value management
28 | - **Data Manipulation**: Shifting, filling, conditional operations, and reshaping
29 | - **IO Operations**: CSV read/write, JSON encoding/decoding
30 | - **Type Safety**: Full Swift type system integration with generics and protocols
31 | - **Performance**: Built on Swift's collections
32 |
33 | ## Installation
34 |
35 | ### Swift Package Manager
36 |
37 | Add Koalas to your project using Swift Package Manager:
38 |
39 | 1. In Xcode, go to **File** → **Add Package Dependencies**
40 | 2. Enter the repository URL: `https://github.com/your-username/Koalas.git`
41 | 3. Select the version you want to use
42 | 4. Click **Add Package**
43 |
44 | Or add it to your `Package.swift`:
45 | ```swift
46 | dependencies: [
47 | .package(url: "https://github.com/your-username/Koalas.git", from: "1.0.0")
48 | ]
49 | ```
50 |
51 | ```swift
52 | import Koalas
53 | ```
54 |
55 | ## Quick Start
56 |
57 | ### Creating DataFrames
58 |
59 | ```swift
60 | import Koalas
61 |
62 | // Create a DataFrame with multiple columns
63 | let df = DataFrame(dictionaryLiteral:
64 | ("A", DataSeries([1, 2, 3, 4, 5])),
65 | ("B", DataSeries([10, 20, 30, 40, 50])),
66 | ("C", DataSeries([100, 200, 300, 400, 500]))
67 | )
68 |
69 | // Create a constant DataFrame with the same shape
70 | let constDf = df.mapTo(constant: 10.0)
71 | ```
72 |
73 | ### Basic Operations
74 |
75 | ```swift
76 | // Arithmetic operations
77 | let sum = df + constDf
78 | let diff = df - constDf
79 | let product = df * constDf
80 | let quotient = df / constDf
81 |
82 | // Statistical operations
83 | let columnSums = df.columnSum()
84 | let means = df.mean()
85 | let stdDevs = df.std()
86 | let sums = df.sum()
87 |
88 | // Expanding operations
89 | let expandingSums = df.expandingSum(initial: 0)
90 | let expandingMax = df.expandingMax()
91 | let expandingMin = df.expandingMin()
92 | ```
93 |
94 | ### Working with Missing Data
95 |
96 | ```swift
97 | // Create DataFrame with nil values
98 | var dfWithNils = DataFrame(dictionaryLiteral:
99 | ("A", DataSeries([1, nil, 3, nil, 5])),
100 | ("B", DataSeries([10, 20, nil, 40, 50]))
101 | )
102 |
103 | // Fill nil values
104 | let filledForward = dfWithNils.fillNils(method: .forward(initial: 0))
105 | let filledBackward = dfWithNils.fillNils(method: .backward(initial: nil))
106 | let filledConstant = dfWithNils.fillNils(method: .all(0))
107 | ```
108 |
109 | ### Time Series Operations
110 |
111 | ```swift
112 | // Shift data (useful for time series)
113 | let shifted = df.shiftedBy(2) // Shift forward by 2 positions
114 |
115 | // Rolling window operations
116 | let rollingSum = df.rollingSum(window: 3)
117 | let rollingMean = df.rollingMean(window: 3)
118 |
119 | // Custom rolling function
120 | let rollingCustom = df.rollingFunc(initial: 0, window: 3) { window in
121 | // Custom aggregation logic
122 | return window.compactMap { $0 }.reduce(0, +)
123 | }
124 | ```
125 |
126 | ### Conditional Operations
127 |
128 | ```swift
129 | // Create condition DataFrame
130 | let condition = df > 25
131 |
132 | // Apply conditional logic
133 | let result = whereCondition(condition, then: df * 2, else: df / 2)
134 | ```
135 |
136 | ### Data Import/Export
137 |
138 | ```swift
139 | // Write DataFrame to CSV
140 | try df.write(toFile: "data.csv", columnSeparator: ",")
141 |
142 | // Read DataFrame from CSV
143 | let importedDf = try DataFrame(
144 | contentsOfFile: "data.csv",
145 | columnSeparator: ","
146 | )
147 |
148 | // Convert to string representation
149 | let csvLines = df.toStringRowLines(separator: ",")
150 | ```
151 |
152 | ## Usage Examples
153 |
154 | ### Financial Data Analysis
155 | ```swift
156 | // Calculate moving averages for stock prices
157 | let stockData = DataFrame(dictionaryLiteral:
158 | ("price", DataSeries([100.0, 102.0, 98.0, 105.0, 103.0])),
159 | ("volume", DataSeries([1000, 1200, 800, 1500, 1100]))
160 | )
161 |
162 | let movingAverage = stockData["price"]!.rollingMean(window: 3)
163 | ```
164 |
165 | ### Data Cleaning
166 | ```swift
167 | // Clean dataset with missing values
168 | let rawData = DataFrame(dictionaryLiteral:
169 | ("name", DataSeries(["Alice", "Bob", nil, "Charlie"])),
170 | ("city", DataSeries(["NYC", nil, "LA", "Chicago"])),
171 | ("status", DataSeries(["active", "inactive", nil, "active"]))
172 | )
173 |
174 | let cleanedData = rawData.fillNils(method: .forward(initial: "Unknown"))
175 | ```
176 |
177 | ### Working with Different Data Types
178 | ```swift
179 | // For mixed data types, use separate DataFrames or zipSeries
180 | let names = DataSeries(["Alice", "Bob", "Charlie"])
181 | let ages = DataSeries([25, 30, 35])
182 | let scores = DataSeries([85, 92, 88])
183 |
184 | // Combine different types using zipSeries
185 | let combined = zipSeries(names, ages, scores)
186 | // Returns: DataSeries>
187 | ```
188 |
189 | ## Data Structures
190 |
191 | ### DataSeries
192 |
193 | A 1-dimensional data structure for handling arrays with optional values:
194 |
195 | ```swift
196 | // Create DataSeries
197 | let series = DataSeries([1, 2, nil, 4, 5])
198 |
199 | // Basic operations
200 | let doubled = series * 2
201 | let shifted = series.shiftedBy(1)
202 | let filled = series.fillNils(method: .forward(initial: 0))
203 |
204 | // Statistical functions
205 | let sum = series.sum()
206 | let mean = series.mean()
207 | let std = series.std()
208 | ```
209 |
210 | ### DataFrame
211 |
212 | A 2-dimensional data structure implemented as a dictionary of DataSeries:
213 |
214 | ```swift
215 | // Create DataFrame
216 | let df = DataFrame(dictionaryLiteral:
217 | ("col1", DataSeries([1, 2, 3])),
218 | ("col2", DataSeries([4, 5, 6]))
219 | )
220 |
221 | // Access shape - returns tuple
222 | let (width, height) = df.shape()
223 |
224 | // Column operations - these return optional values
225 | let columnSums = df.columnSum() // Returns DataSeries?
226 | let rowSums = df.sum() // Returns DataFrame
227 | ```
228 |
229 | ### DataPanel
230 |
231 | A 3-dimensional data structure for handling multiple DataFrames:
232 |
233 | ```swift
234 | // Create DataPanel
235 | let panel = DataPanel(dictionaryLiteral:
236 | ("group1", DataFrame(dictionaryLiteral:
237 | ("A", DataSeries([1, 2, 3])),
238 | ("B", DataSeries([4, 5, 6]))
239 | )),
240 | ("group2", DataFrame(dictionaryLiteral:
241 | ("A", DataSeries([7, 8, 9])),
242 | ("B", DataSeries([10, 11, 12]))
243 | ))
244 | )
245 |
246 | // Transpose panel
247 | let transposed = panel.transposed()
248 | ```
249 |
250 | ## Advanced Features
251 |
252 | ### Custom Aggregations
253 |
254 | ```swift
255 | // Custom rolling function
256 | let customRolling = df.rollingFunc(initial: 0, window: 3) { window in
257 | // Calculate median of window
258 | let sorted = window.compactMap { $0 }.sorted()
259 | let mid = sorted.count / 2
260 | return sorted.count % 2 == 0 ?
261 | (sorted[mid - 1] + sorted[mid]) / 2 :
262 | sorted[mid]
263 | }
264 | ```
265 |
266 | ### Data Alignment
267 |
268 | ```swift
269 | // DataFrames are automatically aligned by keys
270 | let df1 = DataFrame(dictionaryLiteral:
271 | ("A", DataSeries([1, 2, 3])),
272 | ("B", DataSeries([4, 5, 6]))
273 | )
274 |
275 | let df2 = DataFrame(dictionaryLiteral:
276 | ("B", DataSeries([7, 8, 9])),
277 | ("A", DataSeries([10, 11, 12]))
278 | )
279 |
280 | // Operations automatically align by column names
281 | let result = df1 + df2
282 | ```
283 |
284 | ### Type Safety
285 |
286 | ```swift
287 | // Strong typing ensures type safety
288 | let intDf = DataFrame(dictionaryLiteral:
289 | ("A", DataSeries([1, 2, 3]))
290 | )
291 |
292 | let doubleDf = DataFrame(dictionaryLiteral:
293 | ("A", DataSeries([1.0, 2.0, 3.0]))
294 | )
295 |
296 | // Type-safe operations
297 | let result: DataFrame = intDf + doubleDf //Error
298 | ```
299 |
300 | ## Requirements
301 |
302 | - **Swift**: 5.2 or later
303 | - **Platforms**: macOS 10.15+, iOS 8.0+
304 | - **Xcode**: 11.0 or later
305 |
306 | ## License
307 |
308 | Koalas is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
309 |
--------------------------------------------------------------------------------
/Sources/Koalas/DataFrame/DataFrame.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Sergey Kazakov on 04.06.2020.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | A dictionary-like structure that maps keys to DataSeries.
12 | Used to store and manipulate 2D tabular data with multiple columns (DataSeries).
13 | */
14 | public typealias DataFrame = Dictionary>
15 |
16 | public extension DataFrame {
17 | /**
18 | Initializes a DataFrame with unique keys and their corresponding DataSeries.
19 | Ensures all DataSeries have equal length for proper DataFrame structure.
20 | */
21 | init(uniqueKeysWithSeries keysAndValues: S)
22 | where
23 | S: Sequence,
24 | S.Element == (Key, DataSeries),
25 | Value == DataSeries {
26 |
27 | let firstDataSeriesCount = keysAndValues.first(where: { _ in true })?.1.count ?? 0
28 |
29 | let allSeriesCountsAreEqual = keysAndValues.allSatisfy { $0.1.count == firstDataSeriesCount }
30 |
31 | assert(allSeriesCountsAreEqual, "DataSeries should have equal length")
32 | self = Dictionary>(uniqueKeysWithValues: keysAndValues)
33 | }
34 | }
35 |
36 | public extension DataFrame {
37 | /**
38 | Transforms each DataSeries into a DataFrame and returns a transposed DataPanel.
39 | Useful for restructuring data from wide to long format.
40 | */
41 | func upscaleTransform(transform: (DataSeries) -> DataFrame) -> DataPanel where Value == DataSeries {
42 |
43 | let keyValues = map { ($0.key, transform($0.value)) }
44 | let dataPanel = DataPanel(uniqueKeysWithValues: keyValues)
45 |
46 | return dataPanel.transposed()
47 | }
48 |
49 | /**
50 | Applies a transformation function to each value in the DataFrame, handling nil values.
51 | Returns a new DataFrame with transformed values.
52 | */
53 | func flatMapValues(transform: (V?) -> U?) -> DataFrame where Value == DataSeries {
54 | return mapValues { series in DataSeries(series.map { transform($0) }) }
55 | }
56 |
57 | /**
58 | Maps all values in the DataFrame to a constant value.
59 | Returns a new DataFrame with the same keys but all values replaced by the constant.
60 | */
61 | func mapTo(constant value: Constant) -> DataFrame where Value == DataSeries {
62 | return mapValues { $0.mapTo(constant: value) }
63 | }
64 |
65 | /**
66 | Maps all values in the DataFrame to a single DataSeries.
67 | Returns nil if the provided series is nil or has different length than existing series.
68 | */
69 | func mapTo(series value: DataSeries?) -> DataFrame? where Value == DataSeries {
70 | guard let value = value else {
71 | return nil
72 | }
73 |
74 | return mapValues {
75 | assert($0.count == value.count, "DataSeries should have equal length")
76 | return value
77 | }
78 | }
79 |
80 | /**
81 | Applies a scan operation to each DataSeries in the DataFrame.
82 | Performs cumulative operations with an initial value and transformation function.
83 | */
84 | func scan(initial: T?, _ nextPartialResult: (T?, V?) -> T?) -> DataFrame where Value == DataSeries, V: Numeric {
85 | return mapValues { $0.scanSeries(initial: initial, nextPartialResult) }
86 | }
87 |
88 | /**
89 | Shifts all DataSeries in the DataFrame by the specified amount.
90 | Positive values shift forward, negative values shift backward.
91 | */
92 | func shiftedBy(_ amount: Int) -> DataFrame where Value == DataSeries {
93 | return mapValues { $0.shiftedBy(amount) }
94 | }
95 |
96 | /**
97 | Calculates expanding sum for each DataSeries in the DataFrame.
98 | Returns cumulative sums starting from the initial value.
99 | */
100 | func expandingSum(initial: V) -> DataFrame where Value == DataSeries, V: Numeric {
101 | return mapValues { $0.expandingSum(initial: initial) }
102 | }
103 |
104 | /**
105 | Calculates expanding maximum for each DataSeries in the DataFrame.
106 | Returns cumulative maximum values.
107 | */
108 | func expandingMax() -> DataFrame where Value == DataSeries, V: Comparable {
109 | return mapValues { $0.expandingMax() }
110 | }
111 |
112 | /**
113 | Calculates expanding minimum for each DataSeries in the DataFrame.
114 | Returns cumulative minimum values.
115 | */
116 | func expandingMin() -> DataFrame where Value == DataSeries, V: Comparable {
117 | return mapValues { $0.expandingMin() }
118 | }
119 |
120 | /**
121 | Applies a rolling window function to each DataSeries in the DataFrame.
122 | Uses a custom window function to process values within the specified window size.
123 | */
124 | func rollingFunc(initial: V, window: Int, windowFunc: (([V?]) -> V?)) -> DataFrame where Value == DataSeries, V: Numeric {
125 | return mapValues { $0.rollingFunc(initial: initial, window: window, windowFunc: windowFunc)}
126 | }
127 |
128 | /**
129 | Calculates rolling sum for each DataSeries in the DataFrame.
130 | Uses the specified window size for the rolling calculation.
131 | */
132 | func rollingSum(window: Int) -> DataFrame where Value == DataSeries, V: Numeric {
133 | return mapValues { $0.rollingSum(window: window) }
134 | }
135 |
136 | /**
137 | Calculates rolling mean for each DataSeries in the DataFrame.
138 | Uses the specified window size for the rolling calculation.
139 | */
140 | func rollingMean(window: Int) -> DataFrame where Value == DataSeries, V: FloatingPoint {
141 | return mapValues { $0.rollingMean(window: window) }
142 | }
143 |
144 | /**
145 | Compares this DataFrame with another DataFrame for equality.
146 | Returns true if both DataFrames have the same keys and corresponding DataSeries are equal.
147 | */
148 | func equalsTo(dataframe: DataFrame?) -> Bool where Value == DataSeries, V: Equatable {
149 | guard let dataframe = dataframe else {
150 | return false
151 | }
152 |
153 | guard Set(self.keys) == Set(dataframe.keys) else {
154 | return false
155 | }
156 |
157 | return self.first { !$0.value.equalsTo(series: dataframe[$0.key]) } == nil
158 | }
159 |
160 | /**
161 | Compares this DataFrame with another DataFrame for equality with precision tolerance.
162 | Useful for floating-point comparisons where exact equality is not required.
163 | */
164 | func equalsTo(dataframe: DataFrame?, with precision: V) -> Bool where Value == DataSeries, V: FloatingPoint {
165 | guard let dataframe = dataframe else {
166 | return false
167 | }
168 |
169 | guard Set(self.keys) == Set(dataframe.keys) else {
170 | return false
171 | }
172 |
173 | return self.first { !$0.value.equalsTo(series: dataframe[$0.key], with: precision) } == nil
174 | }
175 | }
176 |
177 | /**
178 | Applies a conditional operation to DataFrames based on a boolean condition.
179 | Returns the true DataFrame where condition is true, false DataFrame where condition is false.
180 | */
181 | public func whereCondition(_ condition: DataFrame?,
182 | then trueDF: DataFrameType, T>,
183 | else df: DataFrameType, T>) -> DataFrame? {
184 | guard let condition = condition else {
185 | return nil
186 | }
187 |
188 | guard let trueDF = trueDF.toDataframeWithShape(of: condition) else {
189 | return nil
190 | }
191 |
192 | guard let falseDF = df.toDataframeWithShape(of: condition) else {
193 | return nil
194 | }
195 |
196 | return whereCondition(condition, then: trueDF, else: falseDF)
197 | }
198 |
199 | /**
200 | Applies a conditional operation to DataFrames based on a boolean condition.
201 | Returns the true DataFrame where condition is true, false DataFrame where condition is false.
202 | */
203 | public func whereCondition(_ condition: DataFrame?,
204 | then trueDataFrame: DataFrame?,
205 | else dataframe: DataFrame?) -> DataFrame? {
206 |
207 | guard let condition = condition,
208 | let trueDataFrame = trueDataFrame,
209 | let dataframe = dataframe
210 | else {
211 | return nil
212 | }
213 |
214 | let keysSet = Set(condition.keys)
215 | assert(keysSet == Set(trueDataFrame.keys), "Dataframes should have equal keys sets")
216 | assert(keysSet == Set(dataframe.keys), "Dataframes should have equal keys sets")
217 |
218 | var res = DataFrame()
219 |
220 | keysSet.forEach { key in
221 | res[key] = unwrap(condition[key],
222 | trueDataFrame[key],
223 | dataframe[key]) { return whereCondition($0, then: $1, else: $2) }
224 | }
225 |
226 | return res
227 | }
228 |
229 | public extension DataFrame {
230 | /**
231 | Returns the dimensions of the DataFrame as (width, height).
232 | Width is the number of columns (keys), height is the length of DataSeries.
233 | */
234 | func shape() -> (width: Int, height: Int) where Value == DataSeries {
235 | return (self.keys.count, self.values.first?.count ?? 0)
236 | }
237 |
238 | /**
239 | Calculates the sum of each DataSeries in the DataFrame.
240 | Returns a DataFrame with single-value DataSeries containing the sums.
241 | */
242 | func sum(ignoreNils: Bool = true) -> DataFrame where Value == DataSeries, V: Numeric {
243 | mapValues { DataSeries([$0.sum(ignoreNils: ignoreNils)]) }
244 | }
245 |
246 | /**
247 | Calculates the sum of all columns (DataSeries) in the DataFrame.
248 | Returns a single DataSeries with the sum of corresponding elements across all columns.
249 | */
250 | func columnSum(ignoreNils: Bool = true) -> DataSeries? where Value == DataSeries, V: Numeric {
251 | guard let first = values.first else {
252 | return nil
253 | }
254 |
255 | let initial = DataSeries(repeating: 0, count: first.count)
256 |
257 | return values.reduce(initial) { (currentRes: DataSeries, next: DataSeries) -> DataSeries in
258 | let nextSeries: DataSeries = ignoreNils ? next.fillNils(with: 0) : next
259 | return currentRes + nextSeries
260 | }
261 | }
262 |
263 | /**
264 | Calculates the mean of each DataSeries in the DataFrame.
265 | Returns a DataFrame with single-value DataSeries containing the means.
266 | */
267 | func mean(shouldSkipNils: Bool = true) -> DataFrame where Value == DataSeries, V: FloatingPoint {
268 | mapValues { DataSeries([$0.mean(shouldSkipNils: shouldSkipNils)]) }
269 | }
270 |
271 | /**
272 | Calculates the standard deviation of each DataSeries in the DataFrame.
273 | Returns a DataFrame with single-value DataSeries containing the standard deviations.
274 | */
275 | func std(shouldSkipNils: Bool = true) -> DataFrame where Value == DataSeries, V: FloatingPoint {
276 | mapValues { DataSeries([$0.std(shouldSkipNils: shouldSkipNils)]) }
277 | }
278 |
279 | /**
280 | Fills nil values in all DataSeries of the DataFrame using the specified method.
281 | Returns a new DataFrame with nil values replaced according to the fill method.
282 | */
283 | func fillNils(method: FillNilsMethod) -> DataFrame where Value == DataSeries {
284 | mapValues { $0.fillNils(method: method) }
285 | }
286 | }
287 |
--------------------------------------------------------------------------------
/Tests/KoalasTests/DataSeriesAdvancedTests.swift:
--------------------------------------------------------------------------------
1 | //
2 | // DataSeriesAdvancedTests.swift
3 | //
4 | //
5 | // Created by AI Assistant on 2024.
6 | //
7 |
8 | import XCTest
9 | @testable import Koalas
10 |
11 | final class DataSeriesAdvancedTests: XCTestCase {
12 |
13 | // MARK: - whereCondition Tests
14 |
15 | func test_whereCondition_WithDataSeriesType() {
16 | let condition = DataSeries([true, false, true, false])
17 | let trueSeries = DataSeries([1, 2, 3, 4])
18 | let falseSeries = DataSeries([10, 20, 30, 40])
19 |
20 | let result = whereCondition(condition,
21 | then: .ds(trueSeries),
22 | else: .ds(falseSeries))
23 |
24 | XCTAssertNotNil(result)
25 | XCTAssertEqual(result?[0], 1) // true -> trueSeries
26 | XCTAssertEqual(result?[1], 20) // false -> falseSeries
27 | XCTAssertEqual(result?[2], 3) // true -> trueSeries
28 | XCTAssertEqual(result?[3], 40) // false -> falseSeries
29 | }
30 |
31 | func test_whereCondition_WithScalarValues() {
32 | let condition = DataSeries([true, false, true, false])
33 | let trueValue = 100
34 | let falseValue = 200
35 |
36 | let result = whereCondition(condition,
37 | then: trueValue,
38 | else: falseValue)
39 |
40 | XCTAssertNotNil(result)
41 | XCTAssertEqual(result?[0], 100) // true -> trueValue
42 | XCTAssertEqual(result?[1], 200) // false -> falseValue
43 | XCTAssertEqual(result?[2], 100) // true -> trueValue
44 | XCTAssertEqual(result?[3], 200) // false -> falseValue
45 | }
46 |
47 | func test_whereCondition_WithNilCondition() {
48 | let trueSeries = DataSeries([1, 2, 3, 4])
49 | let falseSeries = DataSeries([10, 20, 30, 40])
50 |
51 | let result = whereCondition(nil,
52 | then: .ds(trueSeries),
53 | else: .ds(falseSeries))
54 |
55 | XCTAssertNil(result)
56 | }
57 |
58 | func test_whereCondition_WithNilSeries() {
59 | let condition = DataSeries([true, false, true, false])
60 |
61 | let result: DataSeries? = whereCondition(condition,
62 | then: nil,
63 | else: nil)
64 |
65 | XCTAssertNil(result)
66 | }
67 |
68 | // MARK: - zipSeries Tests
69 |
70 | func test_zipSeries_TwoSeries() {
71 | let s1 = DataSeries([1, 2, 3, 4])
72 | let s2 = DataSeries([5, 6, 7, 8])
73 |
74 | let result = zipSeries(s1, s2)
75 |
76 | XCTAssertNotNil(result)
77 | XCTAssertEqual(result?.count, 4)
78 | XCTAssertEqual(result?[0]?.t1, 1)
79 | XCTAssertEqual(result?[0]?.t2, 5)
80 | XCTAssertEqual(result?[1]?.t1, 2)
81 | XCTAssertEqual(result?[1]?.t2, 6)
82 | XCTAssertEqual(result?[2]?.t1, 3)
83 | XCTAssertEqual(result?[2]?.t2, 7)
84 | XCTAssertEqual(result?[3]?.t1, 4)
85 | XCTAssertEqual(result?[3]?.t2, 8)
86 | }
87 |
88 | func test_zipSeries_ThreeSeries() {
89 | let s1 = DataSeries([1, 2, 3, 4])
90 | let s2 = DataSeries([5, 6, 7, 8])
91 | let s3 = DataSeries([9, 10, 11, 12])
92 |
93 | let result = zipSeries(s1, s2, s3)
94 |
95 | XCTAssertNotNil(result)
96 | XCTAssertEqual(result?.count, 4)
97 | XCTAssertEqual(result?[0]?.t1, 1)
98 | XCTAssertEqual(result?[0]?.t2, 5)
99 | XCTAssertEqual(result?[0]?.t3, 9)
100 | XCTAssertEqual(result?[1]?.t1, 2)
101 | XCTAssertEqual(result?[1]?.t2, 6)
102 | XCTAssertEqual(result?[1]?.t3, 10)
103 | }
104 |
105 | func test_zipSeries_FourSeries() {
106 | let s1 = DataSeries([1, 2, 3, 4])
107 | let s2 = DataSeries([5, 6, 7, 8])
108 | let s3 = DataSeries([9, 10, 11, 12])
109 | let s4 = DataSeries([13, 14, 15, 16])
110 |
111 | let result = zipSeries(s1, s2, s3, s4)
112 |
113 | XCTAssertNotNil(result)
114 | XCTAssertEqual(result?.count, 4)
115 | XCTAssertEqual(result?[0]?.t1, 1)
116 | XCTAssertEqual(result?[0]?.t2, 5)
117 | XCTAssertEqual(result?[0]?.t3, 9)
118 | XCTAssertEqual(result?[0]?.t4, 13)
119 | XCTAssertEqual(result?[1]?.t1, 2)
120 | XCTAssertEqual(result?[1]?.t2, 6)
121 | XCTAssertEqual(result?[1]?.t3, 10)
122 | XCTAssertEqual(result?[1]?.t4, 14)
123 | }
124 |
125 | func test_zipSeries_WithNilSeries() {
126 | let s1 = DataSeries([1, 2, 3, 4])
127 |
128 | let result: DataSeries>? = zipSeries(s1, nil)
129 |
130 | XCTAssertNil(result)
131 | }
132 |
133 | // func test_zipSeries_WithDifferentLengths() {
134 | // let s1 = DataSeries([1, 2, 3, 4])
135 | // let s2 = DataSeries([5, 6, 7]) // Different length
136 | //
137 | // let result = zipSeries(s1, s2)
138 | //
139 | // // Should assert and fail in debug mode
140 | // XCTAssertNotNil(result)
141 | // }
142 |
143 | // MARK: - whereTrue Tests
144 |
145 | func test_whereTrue_WithValidSeries() {
146 | let condition = DataSeries([true, false, true, false, nil])
147 | let trueSeries = DataSeries([1, 2, 3, 4, 5])
148 | let falseSeries = DataSeries([10, 20, 30, 40, 50])
149 |
150 | let result = condition.whereTrue(then: trueSeries, else: falseSeries)
151 |
152 | XCTAssertNotNil(result)
153 | XCTAssertEqual(result?[0], 1) // true -> trueSeries
154 | XCTAssertEqual(result?[1], 20) // false -> falseSeries
155 | XCTAssertEqual(result?[2], 3) // true -> trueSeries
156 | XCTAssertEqual(result?[3], 40) // false -> falseSeries
157 | XCTAssertNil(result?[4]) // nil -> nil
158 | }
159 |
160 | func test_whereTrue_WithNilSeries() {
161 | let condition = DataSeries([true, false, true])
162 |
163 | let result: DataSeries? = condition.whereTrue(then: nil, else: nil)
164 |
165 | XCTAssertNil(result)
166 | }
167 |
168 | // func test_whereTrue_WithDifferentLengths() {
169 | // let condition = DataSeries([true, false, true])
170 | // let trueSeries = DataSeries([1, 2]) // Shorter
171 | // let falseSeries = DataSeries([10, 20, 30])
172 | //
173 | // let result = condition.whereTrue(then: trueSeries, else: falseSeries)
174 | //
175 | // XCTAssertNotNil(result)
176 | // XCTAssertEqual(result?[0], 1) // true -> trueSeries
177 | // XCTAssertEqual(result?[1], 20) // false -> falseSeries
178 | // XCTAssertEqual(result?[2], 30) // true -> falseSeries (trueSeries too short)
179 | // }
180 |
181 | // MARK: - isEmptySeries Tests
182 |
183 | func test_isEmptySeries_WithAllNils() {
184 | let series: DataSeries = DataSeries([nil, nil, nil])
185 |
186 | XCTAssertTrue(series.isEmptySeries())
187 | }
188 |
189 | func test_isEmptySeries_WithSomeNils() {
190 | let series = DataSeries([1, nil, 3])
191 |
192 | XCTAssertFalse(series.isEmptySeries())
193 | }
194 |
195 | func test_isEmptySeries_WithNoNils() {
196 | let series = DataSeries([1, 2, 3])
197 |
198 | XCTAssertFalse(series.isEmptySeries())
199 | }
200 |
201 | func test_isEmptySeries_WithEmptySeries() {
202 | let series: DataSeries = DataSeries()
203 |
204 | XCTAssertTrue(series.isEmptySeries())
205 | }
206 |
207 | // MARK: - at and setAt Tests
208 |
209 | func test_at_WithValidIndex() {
210 | let series = DataSeries([1, 2, 3, 4, 5])
211 |
212 | XCTAssertEqual(series.at(index: 0), 1)
213 | XCTAssertEqual(series.at(index: 2), 3)
214 | XCTAssertEqual(series.at(index: 4), 5)
215 | }
216 |
217 | func test_at_WithInvalidIndex() {
218 | let series = DataSeries([1, 2, 3, 4, 5])
219 |
220 | XCTAssertNil(series.at(index: -1) as Any?)
221 | XCTAssertNil(series.at(index: 5) as Any?)
222 | XCTAssertNil(series.at(index: 10) as Any?)
223 | }
224 |
225 | func test_setAt_WithValidIndex() {
226 | let series = DataSeries([1, 2, 3, 4, 5])
227 |
228 | let result = series.setAt(index: 2, value: 100)
229 |
230 | XCTAssertEqual(result[0], 1)
231 | XCTAssertEqual(result[1], 2)
232 | XCTAssertEqual(result[2], 100)
233 | XCTAssertEqual(result[3], 4)
234 | XCTAssertEqual(result[4], 5)
235 | }
236 |
237 | func test_setAt_WithInvalidIndex() {
238 | let series = DataSeries([1, 2, 3, 4, 5])
239 |
240 | let result = series.setAt(index: 10, value: 100)
241 |
242 | // Should return unchanged series
243 | XCTAssertEqual(result[0], 1)
244 | XCTAssertEqual(result[1], 2)
245 | XCTAssertEqual(result[2], 3)
246 | XCTAssertEqual(result[3], 4)
247 | XCTAssertEqual(result[4], 5)
248 | }
249 |
250 | // MARK: - scanSeries Tests
251 |
252 | func test_scanSeries_WithAddition() {
253 | let series = DataSeries([1, 2, 3, 4, 5])
254 |
255 | let result = series.scanSeries(initial: 0) { current, next in
256 | (current ?? 0) + (next ?? 0)
257 | }
258 |
259 | XCTAssertEqual(result.count, 5)
260 | XCTAssertEqual(result[0], 1)
261 | XCTAssertEqual(result[1], 3)
262 | XCTAssertEqual(result[2], 6)
263 | XCTAssertEqual(result[3], 10)
264 | XCTAssertEqual(result[4], 15)
265 | }
266 |
267 | func test_scanSeries_WithNilValues() {
268 | let series = DataSeries([1, nil, 3, 4, 5])
269 |
270 | let result = series.scanSeries(initial: 0) { current, next in
271 | (current ?? 0) + (next ?? 0)
272 | }
273 |
274 | XCTAssertEqual(result.count, 5)
275 | XCTAssertEqual(result[0], 1)
276 | XCTAssertEqual(result[1], 1) // 1 + 0 (nil)
277 | XCTAssertEqual(result[2], 4) // 1 + 3
278 | XCTAssertEqual(result[3], 8) // 4 + 4
279 | XCTAssertEqual(result[4], 13) // 8 + 5
280 | }
281 |
282 | // MARK: - toDateComponents Tests
283 |
284 | func test_toDateComponents_WithValidDates() {
285 | let dateFormatter = DateFormatter()
286 | dateFormatter.dateFormat = "yyyy/MM/dd"
287 |
288 | let dates = DataSeries([
289 | dateFormatter.date(from: "2020/01/15"),
290 | dateFormatter.date(from: "2021/06/20"),
291 | dateFormatter.date(from: "2022/12/31")
292 | ])
293 |
294 | let result = dates.toDateComponents()
295 |
296 | XCTAssertEqual(result[.year]?.count, 3)
297 | XCTAssertEqual(result[.month]?.count, 3)
298 | XCTAssertEqual(result[.day]?.count, 3)
299 |
300 | XCTAssertEqual(result[.year]?[0], 2020)
301 | XCTAssertEqual(result[.month]?[0], 1)
302 | XCTAssertEqual(result[.day]?[0], 15)
303 |
304 | XCTAssertEqual(result[.year]?[1], 2021)
305 | XCTAssertEqual(result[.month]?[1], 6)
306 | XCTAssertEqual(result[.day]?[1], 20)
307 |
308 | XCTAssertEqual(result[.year]?[2], 2022)
309 | XCTAssertEqual(result[.month]?[2], 12)
310 | XCTAssertEqual(result[.day]?[2], 31)
311 | }
312 |
313 | func test_toDateComponents_WithNilDates() {
314 | let dateFormatter = DateFormatter()
315 | dateFormatter.dateFormat = "yyyy/MM/dd"
316 |
317 | let dates = DataSeries([
318 | dateFormatter.date(from: "2020/01/15"),
319 | nil,
320 | dateFormatter.date(from: "2022/12/31")
321 | ])
322 |
323 | let result = dates.toDateComponents()
324 |
325 | XCTAssertEqual(result[.year]?.count, 3)
326 | XCTAssertEqual(result[.month]?.count, 3)
327 | XCTAssertEqual(result[.day]?.count, 3)
328 |
329 | XCTAssertEqual(result[.year]?[0], 2020)
330 | XCTAssertNil(result[.year]?[1])
331 | XCTAssertEqual(result[.year]?[2], 2022)
332 | }
333 | }
--------------------------------------------------------------------------------
/Tests/KoalasTests/DataFrameIOTests.swift:
--------------------------------------------------------------------------------
1 | //
2 | // DataFrameIOTests.swift
3 | //
4 | //
5 | // Created by AI Assistant on 2024.
6 | //
7 |
8 | import XCTest
9 | @testable import Koalas
10 |
11 | final class DataFrameIOTests: XCTestCase {
12 |
13 | // MARK: - toStringRowLines Tests
14 |
15 | func test_toStringRowLines_WithValidData() {
16 | let s1 = DataSeries([1, 2, 3, 4])
17 | let s2 = DataSeries([5, 6, 7, 8])
18 |
19 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
20 |
21 | let result = df.toStringRowLines(separator: ",")
22 |
23 | XCTAssertEqual(result.count, 5) // Header + 4 data rows
24 | XCTAssertEqual(result[0], "col1,col2") // Header
25 | XCTAssertEqual(result[1], "1,5") // First row
26 | XCTAssertEqual(result[2], "2,6") // Second row
27 | XCTAssertEqual(result[3], "3,7") // Third row
28 | XCTAssertEqual(result[4], "4,8") // Fourth row
29 | }
30 |
31 | func test_toStringRowLines_WithNilValues() {
32 | let s1 = DataSeries([1, nil, 3, 4])
33 | let s2 = DataSeries([5, 6, nil, 8])
34 |
35 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
36 |
37 | let result = df.toStringRowLines(separator: ";")
38 |
39 | XCTAssertEqual(result.count, 5) // Header + 4 data rows
40 | XCTAssertEqual(result[0], "col1;col2") // Header
41 | XCTAssertEqual(result[1], "1;5") // First row
42 | XCTAssertEqual(result[2], "nil;6") // Second row
43 | XCTAssertEqual(result[3], "3;nil") // Third row
44 | XCTAssertEqual(result[4], "4;8") // Fourth row
45 | }
46 |
47 | func test_toStringRowLines_WithDifferentSeparator() {
48 | let s1 = DataSeries([1, 2, 3])
49 | let s2 = DataSeries([4, 5, 6])
50 |
51 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
52 |
53 | let result = df.toStringRowLines(separator: "|")
54 |
55 | XCTAssertEqual(result.count, 4) // Header + 3 data rows
56 | XCTAssertEqual(result[0], "col1|col2") // Header
57 | XCTAssertEqual(result[1], "1|4") // First row
58 | XCTAssertEqual(result[2], "2|5") // Second row
59 | XCTAssertEqual(result[3], "3|6") // Third row
60 | }
61 |
62 | func test_toStringRowLines_WithEmptyDataFrame() {
63 | let df: DataFrame = [:]
64 |
65 | let result = df.toStringRowLines(separator: ",")
66 |
67 | XCTAssertEqual(result.count, 1) // Only header
68 | XCTAssertEqual(result[0], "") // Empty header
69 | }
70 |
71 | // MARK: - write Tests
72 |
73 | func test_write_WithValidData() throws {
74 | let s1 = DataSeries([1, 2, 3, 4])
75 | let s2 = DataSeries([5, 6, 7, 8])
76 |
77 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
78 |
79 | let fileManager = FileManager.default
80 | let fileURL = fileManager.temporaryDirectory.appendingPathComponent("test_write").appendingPathExtension("csv")
81 |
82 | try df.write(toFile: fileURL.path, columnSeparator: ",")
83 |
84 | // Verify file was created
85 | XCTAssertTrue(fileManager.fileExists(atPath: fileURL.path))
86 |
87 | // Read and verify content
88 | let content = try String(contentsOf: fileURL, encoding: .utf8)
89 | let lines = content.components(separatedBy: .newlines).filter { !$0.isEmpty }
90 |
91 | XCTAssertEqual(lines.count, 5) // Header + 4 data rows
92 | XCTAssertEqual(lines[0], "col1,col2")
93 | XCTAssertEqual(lines[1], "1,5")
94 | XCTAssertEqual(lines[2], "2,6")
95 | XCTAssertEqual(lines[3], "3,7")
96 | XCTAssertEqual(lines[4], "4,8")
97 |
98 | // Clean up
99 | try fileManager.removeItem(at: fileURL)
100 | }
101 |
102 | func test_write_WithNilValues() throws {
103 | let s1 = DataSeries([1, nil, 3, 4])
104 | let s2 = DataSeries([5, 6, nil, 8])
105 |
106 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
107 |
108 | let fileManager = FileManager.default
109 | let fileURL = fileManager.temporaryDirectory.appendingPathComponent("test_write_nil").appendingPathExtension("csv")
110 |
111 | try df.write(toFile: fileURL.path, columnSeparator: ";")
112 |
113 | // Verify file was created
114 | XCTAssertTrue(fileManager.fileExists(atPath: fileURL.path))
115 |
116 | // Read and verify content
117 | let content = try String(contentsOf: fileURL, encoding: .utf8)
118 | let lines = content.components(separatedBy: .newlines).filter { !$0.isEmpty }
119 |
120 | XCTAssertEqual(lines.count, 5) // Header + 4 data rows
121 | XCTAssertEqual(lines[0], "col1;col2")
122 | XCTAssertEqual(lines[1], "1;5")
123 | XCTAssertEqual(lines[2], "nil;6")
124 | XCTAssertEqual(lines[3], "3;nil")
125 | XCTAssertEqual(lines[4], "4;8")
126 |
127 | // Clean up
128 | try fileManager.removeItem(at: fileURL)
129 | }
130 |
131 | func test_write_WithCustomEncoding() throws {
132 | let s1 = DataSeries([1, 2, 3])
133 | let s2 = DataSeries([4, 5, 6])
134 |
135 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
136 |
137 | let fileManager = FileManager.default
138 | let fileURL = fileManager.temporaryDirectory.appendingPathComponent("test_write_encoding").appendingPathExtension("csv")
139 |
140 | try df.write(toFile: fileURL.path, encoding: .utf8, columnSeparator: "|")
141 |
142 | // Verify file was created
143 | XCTAssertTrue(fileManager.fileExists(atPath: fileURL.path))
144 |
145 | // Read and verify content
146 | let content = try String(contentsOf: fileURL, encoding: .utf8)
147 | let lines = content.components(separatedBy: .newlines).filter { !$0.isEmpty }
148 |
149 | XCTAssertEqual(lines.count, 4) // Header + 3 data rows
150 | XCTAssertEqual(lines[0], "col1|col2")
151 | XCTAssertEqual(lines[1], "1|4")
152 | XCTAssertEqual(lines[2], "2|5")
153 | XCTAssertEqual(lines[3], "3|6")
154 |
155 | // Clean up
156 | try fileManager.removeItem(at: fileURL)
157 | }
158 |
159 | // MARK: - init from file Tests
160 |
161 | func test_initFromFile_WithValidData() throws {
162 | let fileManager = FileManager.default
163 | let fileURL = fileManager.temporaryDirectory.appendingPathComponent("test_init").appendingPathExtension("csv")
164 |
165 | // Create test file
166 | let content = """
167 | col1,col2,col3
168 | 1,5,9
169 | 2,6,10
170 | 3,7,11
171 | 4,8,12
172 | """
173 | try content.write(to: fileURL, atomically: true, encoding: .utf8)
174 |
175 | let df = try DataFrame(contentsOfFile: fileURL.path, columnSeparator: ",")
176 |
177 | XCTAssertEqual(df.count, 3) // 3 columns
178 | XCTAssertEqual(df["col1"]?.count, 4) // 4 rows
179 | XCTAssertEqual(df["col2"]?.count, 4)
180 | XCTAssertEqual(df["col3"]?.count, 4)
181 |
182 | XCTAssertEqual(df["col1"]?[0], 1)
183 | XCTAssertEqual(df["col1"]?[1], 2)
184 | XCTAssertEqual(df["col1"]?[2], 3)
185 | XCTAssertEqual(df["col1"]?[3], 4)
186 |
187 | XCTAssertEqual(df["col2"]?[0], 5)
188 | XCTAssertEqual(df["col2"]?[1], 6)
189 | XCTAssertEqual(df["col2"]?[2], 7)
190 | XCTAssertEqual(df["col2"]?[3], 8)
191 |
192 | XCTAssertEqual(df["col3"]?[0], 9)
193 | XCTAssertEqual(df["col3"]?[1], 10)
194 | XCTAssertEqual(df["col3"]?[2], 11)
195 | XCTAssertEqual(df["col3"]?[3], 12)
196 |
197 | // Clean up
198 | try fileManager.removeItem(at: fileURL)
199 | }
200 |
201 | func test_initFromFile_WithNilValues() throws {
202 | let fileManager = FileManager.default
203 | let fileURL = fileManager.temporaryDirectory.appendingPathComponent("test_init_nil").appendingPathExtension("csv")
204 |
205 | // Create test file with nil values
206 | let content = """
207 | col1,col2,col3
208 | 1,5,9
209 | nil,6,10
210 | 3,nil,11
211 | 4,8,nil
212 | """
213 | try content.write(to: fileURL, atomically: true, encoding: .utf8)
214 |
215 | let df = try DataFrame(contentsOfFile: fileURL.path, columnSeparator: ",")
216 |
217 | XCTAssertEqual(df.count, 3) // 3 columns
218 | XCTAssertEqual(df["col1"]?.count, 4) // 4 rows
219 |
220 | XCTAssertEqual(df["col1"]?[0], 1)
221 | XCTAssertNil(df["col1"]?[1])
222 | XCTAssertEqual(df["col1"]?[2], 3)
223 | XCTAssertEqual(df["col1"]?[3], 4)
224 |
225 | XCTAssertEqual(df["col2"]?[0], 5)
226 | XCTAssertEqual(df["col2"]?[1], 6)
227 | XCTAssertNil(df["col2"]?[2])
228 | XCTAssertEqual(df["col2"]?[3], 8)
229 |
230 | XCTAssertEqual(df["col3"]?[0], 9)
231 | XCTAssertEqual(df["col3"]?[1], 10)
232 | XCTAssertEqual(df["col3"]?[2], 11)
233 | XCTAssertNil(df["col3"]?[3])
234 |
235 | // Clean up
236 | try fileManager.removeItem(at: fileURL)
237 | }
238 |
239 | func test_initFromFile_WithDifferentSeparator() throws {
240 | let fileManager = FileManager.default
241 | let fileURL = fileManager.temporaryDirectory.appendingPathComponent("test_init_separator").appendingPathExtension("csv")
242 |
243 | // Create test file with different separator
244 | let content = """
245 | col1|col2
246 | 1|5
247 | 2|6
248 | 3|7
249 | """
250 | try content.write(to: fileURL, atomically: true, encoding: .utf8)
251 |
252 | let df = try DataFrame(contentsOfFile: fileURL.path, columnSeparator: "|")
253 |
254 | XCTAssertEqual(df.count, 2) // 2 columns
255 | XCTAssertEqual(df["col1"]?.count, 3) // 3 rows
256 | XCTAssertEqual(df["col2"]?.count, 3)
257 |
258 | XCTAssertEqual(df["col1"]?[0], 1)
259 | XCTAssertEqual(df["col1"]?[1], 2)
260 | XCTAssertEqual(df["col1"]?[2], 3)
261 |
262 | XCTAssertEqual(df["col2"]?[0], 5)
263 | XCTAssertEqual(df["col2"]?[1], 6)
264 | XCTAssertEqual(df["col2"]?[2], 7)
265 |
266 | // Clean up
267 | try fileManager.removeItem(at: fileURL)
268 | }
269 |
270 | func test_initFromFile_WithEmptyFile() throws {
271 | let fileManager = FileManager.default
272 | let fileURL = fileManager.temporaryDirectory.appendingPathComponent("test_init_empty").appendingPathExtension("csv")
273 |
274 | // Create empty file
275 | try "".write(to: fileURL, atomically: true, encoding: .utf8)
276 |
277 | let df = try DataFrame(contentsOfFile: fileURL.path, columnSeparator: ",")
278 |
279 | XCTAssertEqual(df.count, 0) // No columns
280 |
281 | // Clean up
282 | try fileManager.removeItem(at: fileURL)
283 | }
284 |
285 | func test_initFromFile_WithOnlyHeader() throws {
286 | let fileManager = FileManager.default
287 | let fileURL = fileManager.temporaryDirectory.appendingPathComponent("test_init_header").appendingPathExtension("csv")
288 |
289 | // Create file with only header
290 | let content = "col1,col2,col3"
291 | try content.write(to: fileURL, atomically: true, encoding: .utf8)
292 |
293 | let df = try DataFrame(contentsOfFile: fileURL.path, columnSeparator: ",")
294 |
295 | XCTAssertEqual(df.count, 3) // 3 columns
296 | XCTAssertEqual(df["col1"]?.count, 0) // No data rows
297 | XCTAssertEqual(df["col2"]?.count, 0)
298 | XCTAssertEqual(df["col3"]?.count, 0)
299 |
300 | // Clean up
301 | try fileManager.removeItem(at: fileURL)
302 | }
303 |
304 | func test_initFromFile_WithInvalidPath() {
305 | let invalidPath = "/nonexistent/path/file.csv"
306 |
307 | XCTAssertThrowsError(try DataFrame(contentsOfFile: invalidPath, columnSeparator: ","))
308 | }
309 |
310 | // MARK: - Round-trip Tests
311 |
312 | func test_writeAndRead_RoundTrip() throws {
313 | let s1 = DataSeries([1, 2, 3, 4])
314 | let s2 = DataSeries([5, 6, 7, 8])
315 | let s3 = DataSeries([9, 10, 11, 12])
316 |
317 | let originalDF = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2), ("col3", s3))
318 |
319 | let fileManager = FileManager.default
320 | let fileURL = fileManager.temporaryDirectory.appendingPathComponent("test_roundtrip").appendingPathExtension("csv")
321 |
322 | // Write DataFrame
323 | try originalDF.write(toFile: fileURL.path, columnSeparator: ",")
324 |
325 | // Read DataFrame back
326 | let readDF = try DataFrame(contentsOfFile: fileURL.path, columnSeparator: ",")
327 |
328 | // Verify they are equal
329 | XCTAssertTrue(originalDF.equalsTo(dataframe: readDF))
330 |
331 | // Clean up
332 | try fileManager.removeItem(at: fileURL)
333 | }
334 |
335 | func test_writeAndRead_RoundTripWithNilValues() throws {
336 | let s1 = DataSeries([1, nil, 3, 4])
337 | let s2 = DataSeries([5, 6, nil, 8])
338 | let s3 = DataSeries([9, 10, 11, nil])
339 |
340 | let originalDF = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2), ("col3", s3))
341 |
342 | let fileManager = FileManager.default
343 | let fileURL = fileManager.temporaryDirectory.appendingPathComponent("test_roundtrip_nil").appendingPathExtension("csv")
344 |
345 | // Write DataFrame
346 | try originalDF.write(toFile: fileURL.path, columnSeparator: ",")
347 |
348 | // Read DataFrame back
349 | let readDF = try DataFrame(contentsOfFile: fileURL.path, columnSeparator: ",")
350 |
351 | // Verify they are equal
352 | XCTAssertTrue(originalDF.equalsTo(dataframe: readDF))
353 |
354 | // Clean up
355 | try fileManager.removeItem(at: fileURL)
356 | }
357 | }
--------------------------------------------------------------------------------
/Tests/KoalasTests/DataFrameAdvancedTests.swift:
--------------------------------------------------------------------------------
1 | //
2 | // DataFrameAdvancedTests.swift
3 | //
4 | //
5 | // Created by AI Assistant on 2024.
6 | //
7 |
8 | import XCTest
9 | @testable import Koalas
10 |
11 | final class DataFrameAdvancedTests: XCTestCase {
12 |
13 | // MARK: - upscaleTransform Tests
14 |
15 | func test_upscaleTransform_TransformsDataSeriesToDataFrame() {
16 | let s1 = DataSeries([1, 2, 3, 4])
17 | let s2 = DataSeries([5, 6, 7, 8])
18 |
19 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
20 |
21 | let result = df.upscaleTransform { series in
22 | DataFrame(dictionaryLiteral: ("transformed", series))
23 | }
24 |
25 | XCTAssertEqual(result["transformed"]?["col1"]?.count, 4)
26 | XCTAssertEqual(result["transformed"]?["col2"]?.count, 4)
27 | XCTAssertEqual(result["transformed"]?["col1"]?[0], 1)
28 | XCTAssertEqual(result["transformed"]?["col2"]?[0], 5)
29 | }
30 |
31 | // MARK: - flatMapValues Tests
32 |
33 | func test_flatMapValues_TransformsValues() {
34 | let s1 = DataSeries([1, 2, 3, 4])
35 | let s2 = DataSeries([5, 6, 7, 8])
36 |
37 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
38 |
39 | let result = df.flatMapValues { value in
40 | value.map { $0 * 2 }
41 | }
42 |
43 | XCTAssertEqual(result["col1"]?[0], 2)
44 | XCTAssertEqual(result["col1"]?[1], 4)
45 | XCTAssertEqual(result["col2"]?[0], 10)
46 | XCTAssertEqual(result["col2"]?[1], 12)
47 | }
48 |
49 | func test_flatMapValues_HandlesNilValues() {
50 | let s1 = DataSeries([1, nil, 3, 4])
51 | let s2 = DataSeries([5, 6, nil, 8])
52 |
53 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
54 |
55 | let result = df.flatMapValues { value in
56 | value.map { $0 * 2 }
57 | }
58 |
59 | XCTAssertEqual(result["col1"]?[0], 2)
60 | XCTAssertNil(result["col1"]?[1])
61 | XCTAssertEqual(result["col1"]?[2], 6)
62 | XCTAssertEqual(result["col2"]?[0], 10)
63 | XCTAssertEqual(result["col2"]?[1], 12)
64 | XCTAssertNil(result["col2"]?[2])
65 | }
66 |
67 | // MARK: - mapTo Series Tests
68 |
69 | func test_mapToSeries_WithValidSeries() {
70 | let s1 = DataSeries([1, 2, 3, 4])
71 | let s2 = DataSeries([5, 6, 7, 8])
72 | let targetSeries = DataSeries([10, 20, 30, 40])
73 |
74 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
75 |
76 | let result = df.mapTo(series: targetSeries)
77 |
78 | XCTAssertNotNil(result)
79 | XCTAssertEqual(result?["col1"]?.count, 4)
80 | XCTAssertEqual(result?["col2"]?.count, 4)
81 | XCTAssertEqual(result?["col1"]?[0], 10)
82 | XCTAssertEqual(result?["col1"]?[1], 20)
83 | XCTAssertEqual(result?["col2"]?[0], 10)
84 | XCTAssertEqual(result?["col2"]?[1], 20)
85 | }
86 |
87 | func test_mapToSeries_WithNilSeries() {
88 | let s1 = DataSeries([1, 2, 3, 4])
89 | let s2 = DataSeries([5, 6, 7, 8])
90 |
91 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
92 |
93 | let result: DataFrame? = df.mapTo(series: nil)
94 |
95 | XCTAssertNil(result)
96 | }
97 |
98 | // func test_mapToSeries_WithDifferentLengthSeries() {
99 | // let s1 = DataSeries([1, 2, 3, 4])
100 | // let s2 = DataSeries([5, 6, 7, 8])
101 | // let targetSeries = DataSeries([10, 20, 30]) // Different length
102 | //
103 | // let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
104 | //
105 | // let result = df.mapTo(series: targetSeries)
106 | //
107 | // // Should assert and fail in debug mode, but we can't test assertions easily
108 | // // This test documents the expected behavior
109 | // XCTAssertNotNil(result)
110 | // }
111 |
112 | // MARK: - scan Tests
113 |
114 | func test_scan_WithAddition() {
115 | let s1 = DataSeries([1, 2, 3, 4])
116 | let s2 = DataSeries([5, 6, 7, 8])
117 |
118 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
119 |
120 | let result = df.scan(initial: 0) { current, next in
121 | (current ?? 0) + (next ?? 0)
122 | }
123 |
124 | XCTAssertEqual(result["col1"]?[0], 1)
125 | XCTAssertEqual(result["col1"]?[1], 3)
126 | XCTAssertEqual(result["col1"]?[2], 6)
127 | XCTAssertEqual(result["col1"]?[3], 10)
128 | XCTAssertEqual(result["col2"]?[0], 5)
129 | XCTAssertEqual(result["col2"]?[1], 11)
130 | XCTAssertEqual(result["col2"]?[2], 18)
131 | XCTAssertEqual(result["col2"]?[3], 26)
132 | }
133 |
134 | func test_scan_WithNilValues() {
135 | let s1 = DataSeries([1, nil, 3, 4])
136 | let s2 = DataSeries([5, 6, nil, 8])
137 |
138 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
139 |
140 | let result = df.scan(initial: 0) { current, next in
141 | (current ?? 0) + (next ?? 0)
142 | }
143 |
144 | XCTAssertEqual(result["col1"]?[0], 1)
145 | XCTAssertEqual(result["col1"]?[1], 1) // nil + 0
146 | XCTAssertEqual(result["col1"]?[2], 4)
147 | XCTAssertEqual(result["col1"]?[3], 8)
148 | }
149 |
150 | // MARK: - rollingFunc Tests
151 |
152 | func test_rollingFunc_WithSum() {
153 | let s1 = DataSeries([1, 2, 3, 4, 5])
154 | let s2 = DataSeries([5, 6, 7, 8, 9])
155 |
156 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
157 |
158 | let result = df.rollingFunc(initial: 0, window: 3) { window in
159 | window.compactMap { $0 }.reduce(0, +)
160 | }
161 |
162 | XCTAssertEqual(result["col1"]?[0], 1) // Window not full yet, but returns actual value
163 | XCTAssertEqual(result["col1"]?[1], 3) // Window not full yet, but returns actual sum
164 | XCTAssertEqual(result["col1"]?[2], 6) // 1+2+3
165 | XCTAssertEqual(result["col1"]?[3], 9) // 2+3+4
166 | XCTAssertEqual(result["col1"]?[4], 12) // 3+4+5
167 | }
168 |
169 | func test_rollingFunc_WithNilValues() {
170 | let s1 = DataSeries([1, nil, 3, 4, 5])
171 | let s2 = DataSeries([5, 6, nil, 8, 9])
172 |
173 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
174 |
175 | let result = df.rollingFunc(initial: 0, window: 3) { window in
176 | let validValues = window.compactMap { $0 }
177 | return validValues.isEmpty ? nil : validValues.reduce(0, +)
178 | }
179 |
180 | XCTAssertEqual(result["col1"]?[0], 1) // Window not full, but returns actual value
181 | XCTAssertEqual(result["col1"]?[1], 1) // Window not full, but returns actual sum
182 | XCTAssertEqual(result["col1"]?[2], 4) // 1+3 (nil ignored)
183 | XCTAssertEqual(result["col1"]?[3], 7) // 3+4 (nil ignored) - corrected from 8
184 | XCTAssertEqual(result["col1"]?[4], 12) // 4+5 (nil ignored)
185 | XCTAssertEqual(result["col2"]?[0], 5)
186 | XCTAssertEqual(result["col2"]?[1], 11)
187 | XCTAssertEqual(result["col2"]?[2], 11)
188 | XCTAssertEqual(result["col2"]?[3], 14)
189 | XCTAssertEqual(result["col2"]?[4], 17)
190 | }
191 |
192 | // MARK: - rollingSum Tests
193 |
194 | func test_rollingSum_WithValidData() {
195 | let s1 = DataSeries([1, 2, 3, 4, 5])
196 | let s2 = DataSeries([5, 6, 7, 8, 9])
197 |
198 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
199 |
200 | let result = df.rollingSum(window: 3)
201 |
202 | XCTAssertNil(result["col1"]?[0]) // Window not full, returns nil
203 | XCTAssertNil(result["col1"]?[1]) // Window not full, returns nil
204 | XCTAssertEqual(result["col1"]?[2], 6) // 1+2+3
205 | XCTAssertEqual(result["col1"]?[3], 9) // 2+3+4
206 | XCTAssertEqual(result["col1"]?[4], 12) // 3+4+5
207 | }
208 |
209 | func test_rollingSum_WithNilValues() {
210 | let s1 = DataSeries([1, nil, 3, 4, 5])
211 | let s2 = DataSeries([5, 6, nil, 8, 9])
212 |
213 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
214 |
215 | let result = df.rollingSum(window: 3)
216 |
217 | XCTAssertNil(result["col1"]?[0]) // Window not full, returns nil
218 | XCTAssertNil(result["col1"]?[1]) // Window not full, returns nil
219 | XCTAssertNil(result["col1"]?[2]) // Window has nil values
220 | XCTAssertNil(result["col1"]?[3]) // Window has nil values
221 | XCTAssertEqual(result["col1"]?[4], 12) // Last window has all non-nil values
222 | XCTAssertNil(result["col2"]?[0]) // Window not full, returns nil
223 | XCTAssertNil(result["col2"]?[1]) // Window not full, returns nil
224 | XCTAssertNil(result["col2"]?[2]) // Window has nil values
225 | XCTAssertNil(result["col2"]?[3]) // Window has nil values
226 | XCTAssertNil(result["col2"]?[4]) // Window has nil values - corrected from 17
227 | }
228 |
229 | // MARK: - rollingMean Tests
230 |
231 | func test_rollingMean_WithValidData() {
232 | let s1 = DataSeries([1.0, 2.0, 3.0, 4.0, 5.0])
233 | let s2 = DataSeries([5.0, 6.0, 7.0, 8.0, 9.0])
234 |
235 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
236 |
237 | let result = df.rollingMean(window: 3)
238 |
239 | XCTAssertNil(result["col1"]?[0]) // Window not full, returns nil
240 | XCTAssertNil(result["col1"]?[1]) // Window not full, returns nil
241 | XCTAssertEqual(result["col1"]?[2], 2.0) // (1+2+3)/3
242 | XCTAssertEqual(result["col1"]?[3], 3.0) // (2+3+4)/3
243 | XCTAssertEqual(result["col1"]?[4], 4.0) // (3+4+5)/3
244 | }
245 |
246 | func test_rollingMean_WithNilValues() {
247 | let s1 = DataSeries([1.0, nil, 3.0, 4.0, 5.0])
248 | let s2 = DataSeries([5.0, 6.0, nil, 8.0, 9.0])
249 |
250 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
251 |
252 | let result = df.rollingMean(window: 3)
253 |
254 | XCTAssertNil(result["col1"]?[0]) // Window not full, returns nil
255 | XCTAssertNil(result["col1"]?[1]) // Window not full, returns nil
256 | XCTAssertNil(result["col1"]?[2]) // Window has nil values
257 | XCTAssertNil(result["col1"]?[3]) // Window has nil values
258 | XCTAssertEqual(result["col1"]?[4], 4.0) // Last window has all non-nil values
259 | }
260 |
261 | // MARK: - shape Tests
262 |
263 | func test_shape_ReturnsCorrectDimensions() {
264 | let s1 = DataSeries([1, 2, 3, 4])
265 | let s2 = DataSeries([5, 6, 7, 8])
266 | let s3 = DataSeries([9, 10, 11, 12])
267 |
268 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2), ("col3", s3))
269 |
270 | let shape = df.shape()
271 |
272 | XCTAssertEqual(shape.width, 3) // 3 columns
273 | XCTAssertEqual(shape.height, 4) // 4 rows
274 | }
275 |
276 | func test_shape_WithEmptyDataFrame() {
277 | let df: DataFrame = [:]
278 |
279 | let shape = df.shape()
280 |
281 | XCTAssertEqual(shape.width, 0)
282 | XCTAssertEqual(shape.height, 0)
283 | }
284 |
285 | // MARK: - mean Tests
286 |
287 | func test_mean_WithValidData() {
288 | let s1 = DataSeries([1.0, 2.0, 3.0, 4.0])
289 | let s2 = DataSeries([5.0, 6.0, 7.0, 8.0])
290 |
291 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
292 |
293 | let result = df.mean(shouldSkipNils: true)
294 |
295 | XCTAssertEqual(result["col1"]?.count, 1)
296 | XCTAssertEqual(result["col2"]?.count, 1)
297 | XCTAssertEqual(result["col1"]?[0], 2.5) // (1+2+3+4)/4
298 | XCTAssertEqual(result["col2"]?[0], 6.5) // (5+6+7+8)/4
299 | }
300 |
301 | func test_mean_WithNilValues() {
302 | let s1 = DataSeries([1.0, nil, 3.0, 4.0])
303 | let s2 = DataSeries([5.0, 6.0, nil, 8.0])
304 |
305 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
306 |
307 | let result = df.mean(shouldSkipNils: true)
308 |
309 | XCTAssertEqual(result["col1"]?[0], 2.6666666666666665) // (1+3+4)/3
310 | XCTAssertEqual(result["col2"]?[0], 6.333333333333333) // (5+6+8)/3
311 | }
312 |
313 | func test_mean_WithNilValuesNotSkipped() {
314 | let s1 = DataSeries([1.0, nil, 3.0, 4.0])
315 | let s2 = DataSeries([5.0, 6.0, nil, 8.0])
316 |
317 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
318 |
319 | let result = df.mean(shouldSkipNils: false)
320 |
321 | XCTAssertEqual(result["col1"]?[0], 2.0) // (1+0+3+4)/4
322 | XCTAssertEqual(result["col2"]?[0], 4.75) // (5+6+0+8)/4
323 | }
324 |
325 | // MARK: - std Tests
326 |
327 | func test_std_WithValidData() {
328 | let s1 = DataSeries([1.0, 2.0, 3.0, 4.0])
329 | let s2 = DataSeries([5.0, 6.0, 7.0, 8.0])
330 |
331 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
332 |
333 | let result = df.std(shouldSkipNils: true)
334 |
335 | XCTAssertEqual(result["col1"]?.count, 1)
336 | XCTAssertEqual(result["col2"]?.count, 1)
337 | // Expected values calculated manually
338 | XCTAssertEqual(result["col1"]?[0] ?? 0.0, 1.2909944487358056, accuracy: 0.0001)
339 | XCTAssertEqual(result["col2"]?[0] ?? 0.0, 1.2909944487358056, accuracy: 0.0001)
340 | }
341 |
342 | func test_std_WithNilValues() {
343 | let s1 = DataSeries([1.0, nil, 3.0, 4.0])
344 | let s2 = DataSeries([5.0, 6.0, nil, 8.0])
345 |
346 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
347 |
348 | let result = df.std(shouldSkipNils: true)
349 |
350 | // Expected values calculated manually for (1,3,4) and (5,6,8)
351 | XCTAssertEqual(result["col1"]?[0] ?? 0.0, 1.5275252316519468, accuracy: 0.0001)
352 | XCTAssertEqual(result["col2"]?[0] ?? 0.0, 1.5275252316519468, accuracy: 0.0001)
353 | }
354 |
355 | func test_std_WithSingleValue() {
356 | let s1 = DataSeries([1.0])
357 | let s2 = DataSeries([5.0])
358 |
359 | let df = DataFrame(dictionaryLiteral: ("col1", s1), ("col2", s2))
360 |
361 | let result = df.std(shouldSkipNils: true)
362 |
363 | XCTAssertNil(result["col1"]?[0]) // Need at least 2 values for std
364 | XCTAssertNil(result["col2"]?[0]) // Need at least 2 values for std
365 | }
366 | }
--------------------------------------------------------------------------------
/Sources/Koalas/DataSeries/SeriesArrayExtensions.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Sergey Kazakov on 25.06.2020.
6 | //
7 |
8 | import Foundation
9 |
10 | public extension SeriesArray {
11 | /**
12 | Applies a conditional operation based on boolean values in the series.
13 | Returns trueSeries values where the condition is true, else series values where false.
14 | Returns nil if either input series is nil.
15 | */
16 | func whereTrue(then trueSeries: DataSeries?, else series: DataSeries?) -> DataSeries? where Element == Bool? {
17 | guard let trueSeries = trueSeries,
18 | let series = series
19 | else {
20 | return nil
21 | }
22 |
23 | let zip3 = zipSeriesArray(s1: self, s2: trueSeries, s3: series)
24 |
25 | let resultArray = zip3.map { zipped in zipped.0.map { $0 ? zipped.1 : zipped.2 } ?? nil }
26 | return DataSeries(resultArray)
27 | }
28 | }
29 |
30 | public extension SeriesArray {
31 | /**
32 | Checks if the series contains only nil values.
33 | Returns true if all elements are nil or if the series is empty.
34 | */
35 | func isEmptySeries() -> Bool where Element == T?, T: Equatable {
36 | guard let firstNonNil = first(where: { $0 != nil }) else {
37 | return true
38 | }
39 |
40 | return firstNonNil == nil
41 | }
42 |
43 | /**
44 | Compares this series with another series for equality.
45 | Returns true if both series have the same length and corresponding elements are equal.
46 | */
47 | func equalsTo(series: DataSeries?) -> Bool where Element == T?, T: Equatable {
48 | guard let series = series else {
49 | return false
50 | }
51 |
52 | guard count == series.count else {
53 | return false
54 | }
55 |
56 | return zip(self, series).first { !isElementEqual(lhs: $0.0, rhs: $0.1) } == nil
57 | }
58 |
59 | /**
60 | Compares this series with another series for equality with precision tolerance.
61 | Useful for floating-point comparisons where exact equality is not required.
62 | */
63 | func equalsTo(series: DataSeries?, with precision: T) -> Bool where Element == T?, T: FloatingPoint {
64 | guard let series = series else {
65 | return false
66 | }
67 |
68 | guard count == series.count else {
69 | return false
70 | }
71 |
72 | return zip(self, series).first { !isElementEqual(lhs: $0.0, rhs: $0.1, with: precision) } == nil
73 | }
74 |
75 | /**
76 | Compares this series with another series for equality.
77 | Returns true if both series have the same length and corresponding elements are equal.
78 | */
79 | func equalsTo(series: DataSeries?) -> Bool where Element == T?, T: Numeric {
80 | guard let series = series else {
81 | return false
82 | }
83 |
84 | guard count == series.count else {
85 | return false
86 | }
87 |
88 | return zip(self, series).first { !isElementEqual(lhs: $0.0, rhs: $0.1) } == nil
89 | }
90 |
91 | /**
92 | Fills all nil values in the series with a specified value.
93 | Returns a new series with nil values replaced by the provided value.
94 | */
95 | func fillNils(with value: Element) -> DataSeries where Element == T? {
96 | return DataSeries(map { $0 ?? value } )
97 | }
98 |
99 | /**
100 | Fills nil values using the specified method (all, backward, or forward fill).
101 | Returns a new series with nil values filled according to the method.
102 | */
103 | func fillNils(method: FillNilsMethod) -> DataSeries where Element == T? {
104 | switch method {
105 | case .all(let value):
106 | return fillNils(with: value)
107 | case .backward(let initial):
108 | let res = DataSeries(reversed()).scan(initial: initial) { ($1 ?? $0) }
109 | return DataSeries(res.reversed())
110 | case .forward(let initial):
111 | let res = scan(initial: initial) { ($1 ?? $0) }
112 | return DataSeries(res)
113 | }
114 | }
115 |
116 | /**
117 | Creates a new series with all elements set to a constant value.
118 | Returns a series of the same length with every element equal to the specified value.
119 | */
120 | func mapTo(constant value: T) -> DataSeries {
121 | return DataSeries(repeating: value, count: self.count)
122 | }
123 |
124 | /**
125 | Shifts the series by the specified number of positions.
126 | Positive values shift forward (add nils at beginning), negative values shift backward (add nils at end).
127 | */
128 | func shiftedBy(_ k: Int) -> DataSeries where Element == T? {
129 | let shift = abs(k)
130 | guard k > 0 else {
131 | var arr = self
132 | arr.append(contentsOf: DataSeries(repeating: nil, count: shift))
133 | arr.removeFirst(shift)
134 | return arr
135 | }
136 |
137 | var arr = self
138 | arr.insert(contentsOf: DataSeries(repeating: nil, count: shift), at: 0)
139 | arr.removeLast(shift)
140 | return arr
141 | }
142 |
143 | /**
144 | Calculates the sum of all non-nil values in the series.
145 | Returns nil if ignoreNils is false and there are nil values present.
146 | */
147 | func sum(ignoreNils: Bool = true) -> T? where Element == T?, T: Numeric {
148 | let nonNils = filter { $0 != nil }
149 | guard ignoreNils || nonNils.count == count else {
150 | return nil
151 | }
152 |
153 | return nonNils.map { $0 ?? 0 }.reduce(0, +)
154 | }
155 |
156 | /**
157 | Calculates the mean of all values in the series.
158 | If shouldSkipNils is true, only non-nil values are considered. Otherwise, nils are treated as 0.
159 | */
160 | func mean(shouldSkipNils: Bool = true) -> T? where Element == T?, T: FloatingPoint {
161 | let nonNils = shouldSkipNils ?
162 | DataSeries(self.filter { $0 != nil }) :
163 | self.fillNils(with: 0)
164 |
165 | guard nonNils.count > 0 else {
166 | return nil
167 | }
168 |
169 | let sum = nonNils.map { $0 ?? 0 }.reduce(0, +)
170 |
171 | return sum / T(nonNils.count)
172 | }
173 |
174 | /**
175 | Calculates the standard deviation of all values in the series.
176 | If shouldSkipNils is true, only non-nil values are considered. Otherwise, nils are treated as 0.
177 | */
178 | func std(shouldSkipNils: Bool = true) -> T? where Element == T?, T: FloatingPoint {
179 | let nonNils = shouldSkipNils ?
180 | DataSeries(self.filter { $0 != nil }) :
181 | self.fillNils(with: 0)
182 |
183 | guard nonNils.count > 1 else {
184 | return nil
185 | }
186 |
187 | let sum = nonNils.map { $0 ?? 0 }.reduce(0, +)
188 | let mean = sum / T(nonNils.count)
189 |
190 |
191 | let diff = nonNils - nonNils.mapTo(constant: mean)
192 | let squaredDiffSum = (diff * diff)?.map { $0 ?? 0 }.reduce(0, +)
193 | let squaredStd = (squaredDiffSum ?? 0) / T(nonNils.count - 1)
194 | return sqrt(squaredStd)
195 | }
196 |
197 | /**
198 | Calculates expanding (cumulative) sum of the series.
199 | Returns a series where each element is the sum of all previous elements plus the current element.
200 | */
201 | func expandingSum(initial: T) -> DataSeries where Element == T?, T: Numeric {
202 | let res = scan(initial: initial) { $0 + ($1 ?? 0) }
203 | return DataSeries(res)
204 | }
205 |
206 | /**
207 | Calculates expanding (cumulative) maximum of the series.
208 | Returns a series where each element is the maximum of all previous elements and the current element.
209 | */
210 | func expandingMax() -> DataSeries