├── spec
    ├── 01_empty_file
    │   ├── empty.xlsx
    │   └── empty_test.v
    ├── 03_simple_table
    │   ├── table.xlsx
    │   └── table_test.v
    ├── 02_single_column
    │   ├── column.xlsx
    │   └── column_test.v
    ├── 05_libreoffice_file
    │   ├── abc.xlsx
    │   └── open_libreoffice_file_test.v
    └── 04_1MB_file
    │   ├── Free_Test_Data_1MB_XLSX.xlsx
    │   └── one_mb_test.v
├── examples
    └── 01_marksheet
    │   ├── data.xlsx
    │   └── marks.v
├── .editorconfig
├── v.mod
├── .gitattributes
├── .gitignore
├── src
    ├── location_test.v
    ├── types.v
    ├── query.v
    ├── location.v
    ├── parser.v
    ├── cotent_types_test.v
    └── content_types.v
├── LICENSE
├── .github
    └── workflows
    │   └── ci.yml
└── README.md


/spec/01_empty_file/empty.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hungrybluedev/xlsx/HEAD/spec/01_empty_file/empty.xlsx


--------------------------------------------------------------------------------
/examples/01_marksheet/data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hungrybluedev/xlsx/HEAD/examples/01_marksheet/data.xlsx


--------------------------------------------------------------------------------
/spec/03_simple_table/table.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hungrybluedev/xlsx/HEAD/spec/03_simple_table/table.xlsx


--------------------------------------------------------------------------------
/spec/02_single_column/column.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hungrybluedev/xlsx/HEAD/spec/02_single_column/column.xlsx


--------------------------------------------------------------------------------
/spec/05_libreoffice_file/abc.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hungrybluedev/xlsx/HEAD/spec/05_libreoffice_file/abc.xlsx


--------------------------------------------------------------------------------
/spec/04_1MB_file/Free_Test_Data_1MB_XLSX.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hungrybluedev/xlsx/HEAD/spec/04_1MB_file/Free_Test_Data_1MB_XLSX.xlsx


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | [*]
2 | charset = utf-8
3 | end_of_line = lf
4 | insert_final_newline = true
5 | trim_trailing_whitespace = true
6 | 
7 | [*.v]
8 | indent_style = tab
9 | 


--------------------------------------------------------------------------------
/v.mod:
--------------------------------------------------------------------------------
1 | Module {
2 | 	name: 'xlsx'
3 | 	description: 'V package to work with Microsoft Excel files.'
4 | 	version: '0.0.1'
5 | 	license: 'MIT'
6 | 	dependencies: []
7 | }
8 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto eol=lf
2 | *.bat eol=crlf
3 | 
4 | **/*.v linguist-language=V
5 | **/*.vv linguist-language=V
6 | **/*.vsh linguist-language=V
7 | **/v.mod linguist-language=V
8 | 


--------------------------------------------------------------------------------
/spec/01_empty_file/empty_test.v:
--------------------------------------------------------------------------------
 1 | import xlsx
 2 | import os
 3 | 
 4 | fn test_empty() ! {
 5 | 	path := os.join_path(os.dir(@FILE), 'empty.xlsx')
 6 | 
 7 | 	document := xlsx.Document.from_file(path)!
 8 | 
 9 | 	sheet := document.sheets[1]
10 | 	assert sheet.get_all_data()! == xlsx.DataFrame{}
11 | }
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | main
 3 | *.exe
 4 | *.exe~
 5 | *.so
 6 | *.dylib
 7 | *.dll
 8 | *~
 9 | 
10 | # Ignore binary output folders
11 | bin/
12 | 
13 | # Ignore common editor/system specific metadata
14 | .DS_Store
15 | .idea/
16 | .vscode/
17 | *.iml
18 | 
19 | # ENV
20 | .env
21 | 
22 | # vweb and database
23 | *.db
24 | *.js
25 | 
26 | # Ignore backup XLSX files
27 | *~*.xlsx
28 | 
29 | # Ignore scratch files
30 | scratch/
31 | scratch*
32 | 


--------------------------------------------------------------------------------
/spec/05_libreoffice_file/open_libreoffice_file_test.v:
--------------------------------------------------------------------------------
 1 | import os
 2 | import xlsx
 3 | 
 4 | fn test_opening_a_libreoffice_calc_table() {
 5 | 	workbook := xlsx.Document.from_file(os.join_path(os.dir(@FILE), 'abc.xlsx'))!
 6 | 	println('[info] Successfully loaded workbook with ${workbook.sheets.len} worksheets.')
 7 | 	println('\nAvailable sheets:')
 8 | 	for index, key in workbook.sheets.keys() {
 9 | 		println('   sheet ${index + 1} has key: "${key}"')
10 | 	}
11 | 	sheet1 := workbook.sheets[1]
12 | 	dataset := sheet1.get_all_data()!
13 | 	count := dataset.row_count()
14 | 	println('\n[info] Sheet 1 has ${count} rows.')
15 | 	headers := dataset.raw_data[0]
16 | 	println('\nThe headers are:')
17 | 	assert headers.len == 1
18 | 	for index, header in headers {
19 | 		println('${index + 1}. ${header}')
20 | 		assert index == 0
21 | 		assert header == 'abc'
22 | 	}
23 | }
24 | 


--------------------------------------------------------------------------------
/spec/03_simple_table/table_test.v:
--------------------------------------------------------------------------------
 1 | import xlsx { Location }
 2 | import os
 3 | 
 4 | fn test_table() ! {
 5 | 	path := os.join_path(os.dir(@FILE), 'table.xlsx')
 6 | 	document := xlsx.Document.from_file(path)!
 7 | 
 8 | 	sheet := document.sheets[1]
 9 | 
10 | 	expected_data := xlsx.DataFrame{
11 | 		raw_data: [
12 | 			['Serial Number', 'X', 'Y'],
13 | 			['1', '2', '6'],
14 | 			['2', '4', '60'],
15 | 			['3', '6', '210'],
16 | 			['4', '8', '504'],
17 | 			['5', '10', '990'],
18 | 			['6', '12', '1716'],
19 | 			['7', '14', '2730'],
20 | 			['8', '16', '4080'],
21 | 			['9', '18', '5814'],
22 | 			['10', '20', '7980'],
23 | 		]
24 | 	}
25 | 
26 | 	full_data := sheet.get_all_data()!
27 | 
28 | 	assert full_data == expected_data
29 | 
30 | 	range_data := sheet.get_data(Location.from_encoding('A1')!, Location.from_encoding('C11')!)!
31 | 
32 | 	assert full_data == range_data
33 | }
34 | 


--------------------------------------------------------------------------------
/src/location_test.v:
--------------------------------------------------------------------------------
 1 | module main
 2 | 
 3 | import xlsx
 4 | 
 5 | fn test_location_conversion() ! {
 6 | 	pairs := {
 7 | 		'A1':         xlsx.Location{
 8 | 			row: 0
 9 | 			col: 0
10 | 			row_label: '1'
11 | 			col_label: 'A'
12 | 		}
13 | 		'B2':         xlsx.Location{
14 | 			row: 1
15 | 			col: 1
16 | 			row_label: '2'
17 | 			col_label: 'B'
18 | 		}
19 | 		'Z26':        xlsx.Location{
20 | 			row: 25
21 | 			col: 25
22 | 			row_label: '26'
23 | 			col_label: 'Z'
24 | 		}
25 | 		'AA27':       xlsx.Location{
26 | 			row: 26
27 | 			col: 26
28 | 			row_label: '27'
29 | 			col_label: 'AA'
30 | 		}
31 | 		'XFD1048576': xlsx.Location{
32 | 			row: 1048575
33 | 			col: 16383
34 | 			row_label: '1048576'
35 | 			col_label: 'XFD'
36 | 		}
37 | 	}
38 | 
39 | 	for label, location in pairs {
40 | 		assert xlsx.Location.from_cartesian(location.row, location.col)! == location
41 | 		assert xlsx.Location.from_encoding(label)! == location
42 | 	}
43 | }
44 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Subhomoy Haldar
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/types.v:
--------------------------------------------------------------------------------
 1 | module xlsx
 2 | 
 3 | pub struct Document {
 4 | pub:
 5 | 	shared_strings []string
 6 | 	sheets         map[int]Sheet
 7 | }
 8 | 
 9 | pub struct Location {
10 | pub:
11 | 	row       int
12 | 	col       int
13 | 	row_label string
14 | 	col_label string
15 | }
16 | 
17 | pub struct Dimension {
18 | pub:
19 | 	top_left     Location
20 | 	bottom_right Location
21 | }
22 | 
23 | pub struct Sheet {
24 | 	Dimension
25 | pub:
26 | 	name string
27 | 	rows []Row
28 | }
29 | 
30 | pub struct Row {
31 | pub:
32 | 	row_index int
33 | 	row_label string
34 | 	cells     []Cell
35 | }
36 | 
37 | pub enum CellType {
38 | 	string_type
39 | 	number_type
40 | }
41 | 
42 | pub fn CellType.from_code(code string) !CellType {
43 | 	match code {
44 | 		's' {
45 | 			return CellType.string_type
46 | 		}
47 | 		'n' {
48 | 			return CellType.number_type
49 | 		}
50 | 		else {
51 | 			return error('Unknown cell type code: ' + code)
52 | 		}
53 | 	}
54 | }
55 | 
56 | pub struct Cell {
57 | pub:
58 | 	cell_type CellType
59 | 	location  Location
60 | 	value     string
61 | }
62 | 
63 | pub struct DataFrame {
64 | pub:
65 | 	raw_data [][]string
66 | }
67 | 
68 | pub fn (data DataFrame) row_count() int {
69 | 	return data.raw_data.len
70 | }
71 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: Code Quality
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 |   schedule:
 9 |     - cron: "0 0 * * 4"
10 | 
11 | jobs:
12 |   code-quality:
13 |     strategy:
14 |       fail-fast: false
15 |       matrix:
16 |         os: [ubuntu-latest, macos-14, macos-latest]
17 |     runs-on: ${{ matrix.os }}
18 | 
19 |     steps:
20 |       - name: Checkout Latest V
21 |         uses: actions/checkout@v4
22 |         with:
23 |           repository: vlang/v
24 |           path: v
25 | 
26 |       - name: Checkout the XLSX module
27 |         uses: actions/checkout@v4
28 |         with:
29 |           path: xlsx
30 | 
31 |       - name: Build V
32 |         run: |
33 |           cd v && make
34 |           ./v symlink -githubci && git clone ../xlsx/ ~/.vmodules/xlsx
35 | 
36 |       - name: Run tests
37 |         run: cd xlsx && v test .
38 | 
39 |       - name: Ensure code is formatted
40 |         run: cd xlsx && v fmt -verify .
41 | 
42 |       - name: Ensure documentation is OK
43 |         run: cd xlsx && v check-md .
44 | 
45 |       - name: Ensure all examples compile
46 |         run: cd xlsx && v should-compile-all examples/
47 | 
48 |       - name: Ensure marks example, can run from an arbitrary working folder
49 |         run: xlsx/examples/01_marksheet/marks
50 | 


--------------------------------------------------------------------------------
/examples/01_marksheet/marks.v:
--------------------------------------------------------------------------------
 1 | import os
 2 | import xlsx
 3 | 
 4 | fn main() {
 5 | 	workbook := xlsx.Document.from_file(os.resource_abs_path('data.xlsx'))!
 6 | 	println('[info] Successfully loaded workbook with ${workbook.sheets.len} worksheets.')
 7 | 
 8 | 	println('\nAvailable sheets:')
 9 | 	// sheets are stored as a map, so we can iterate over the keys.
10 | 	for index, key in workbook.sheets.keys() {
11 | 		println('${index + 1}: "${key}"')
12 | 	}
13 | 
14 | 	// Excel uses 1-based indexing for sheets.
15 | 	sheet1 := workbook.sheets[1]
16 | 
17 | 	// Note that the Cell struct is able to the CellType.
18 | 	// So we can have an idea of what to expect before getting all
19 | 	// the data as a dataset with just string data.
20 | 	dataset := sheet1.get_all_data()!
21 | 
22 | 	count := dataset.row_count()
23 | 
24 | 	println('\n[info] Sheet 1 has ${count} rows.')
25 | 
26 | 	headers := dataset.raw_data[0]
27 | 
28 | 	println('\nThe headers are:')
29 | 	for index, header in headers {
30 | 		println('${index + 1}. ${header}')
31 | 	}
32 | 
33 | 	println('\nThe student names are:')
34 | 
35 | 	for index in 1 .. count {
36 | 		row := dataset.raw_data[index]
37 | 		// All data is stored as strings, so we need to convert it to the appropriate type.
38 | 		roll := row[0].int()
39 | 		name := row[1] + ' ' + row[2]
40 | 		println('${roll:02d}. ${name}')
41 | 	}
42 | }
43 | 


--------------------------------------------------------------------------------
/spec/02_single_column/column_test.v:
--------------------------------------------------------------------------------
 1 | import xlsx
 2 | import os
 3 | 
 4 | fn test_empty() ! {
 5 | 	path := os.join_path(os.dir(@FILE), 'column.xlsx')
 6 | 
 7 | 	document := xlsx.Document.from_file(path)!
 8 | 
 9 | 	expected_rows := [
10 | 		xlsx.Row{
11 | 			row_index: 0
12 | 			row_label: '1'
13 | 			cells: [
14 | 				xlsx.Cell{
15 | 					cell_type: .string_type
16 | 					location: xlsx.Location.from_encoding('A1')!
17 | 					value: 'Item 1'
18 | 				},
19 | 			]
20 | 		},
21 | 		xlsx.Row{
22 | 			row_index: 1
23 | 			row_label: '2'
24 | 			cells: [
25 | 				xlsx.Cell{
26 | 					cell_type: .string_type
27 | 					location: xlsx.Location.from_encoding('A2')!
28 | 					value: 'Item 2'
29 | 				},
30 | 			]
31 | 		},
32 | 		xlsx.Row{
33 | 			row_index: 2
34 | 			row_label: '3'
35 | 			cells: [
36 | 				xlsx.Cell{
37 | 					cell_type: .string_type
38 | 					location: xlsx.Location.from_encoding('A3')!
39 | 					value: 'Item 3'
40 | 				},
41 | 			]
42 | 		},
43 | 		xlsx.Row{
44 | 			row_index: 3
45 | 			row_label: '4'
46 | 			cells: [
47 | 				xlsx.Cell{
48 | 					cell_type: .string_type
49 | 					location: xlsx.Location.from_encoding('A4')!
50 | 					value: 'Item 4'
51 | 				},
52 | 			]
53 | 		},
54 | 		xlsx.Row{
55 | 			row_index: 4
56 | 			row_label: '5'
57 | 			cells: [
58 | 				xlsx.Cell{
59 | 					cell_type: .string_type
60 | 					location: xlsx.Location.from_encoding('A5')!
61 | 					value: 'Item 5'
62 | 				},
63 | 			]
64 | 		},
65 | 	]
66 | 	actual_rows := document.sheets[1].rows
67 | 	assert expected_rows == actual_rows, 'Data does not match for ${path}'
68 | }
69 | 


--------------------------------------------------------------------------------
/src/query.v:
--------------------------------------------------------------------------------
 1 | module xlsx
 2 | 
 3 | pub fn (sheet Sheet) get_cell(location Location) ?Cell {
 4 | 	if location.row >= sheet.rows.len {
 5 | 		return none
 6 | 	}
 7 | 	target_row := sheet.rows[location.row]
 8 | 	if location.col >= target_row.cells.len {
 9 | 		return none
10 | 	}
11 | 	return target_row.cells[location.col]
12 | }
13 | 
14 | pub fn (sheet Sheet) get_all_data() !DataFrame {
15 | 	return sheet.get_data(sheet.top_left, sheet.bottom_right)
16 | }
17 | 
18 | pub fn (sheet Sheet) get_data(top_left Location, bottom_right Location) !DataFrame {
19 | 	if top_left.row == 0 && bottom_right.row == 0 && sheet.rows.len == 0 {
20 | 		return DataFrame{}
21 | 	}
22 | 	if top_left.row >= sheet.rows.len {
23 | 		return error('top_left.row out of range')
24 | 	}
25 | 	if bottom_right.row > sheet.rows.len {
26 | 		return error('bottom_right.row out of range')
27 | 	}
28 | 	if top_left.col >= sheet.rows[top_left.row].cells.len {
29 | 		return error('top_left.col out of range')
30 | 	}
31 | 	if bottom_right.col > sheet.rows[bottom_right.row].cells.len {
32 | 		return error('bottom_right.col out of range')
33 | 	}
34 | 	mut row_values := [][]string{cap: top_left.row - bottom_right.row + 1}
35 | 
36 | 	for index in top_left.row .. bottom_right.row + 1 {
37 | 		row := sheet.rows[index]
38 | 		mut cell_values := []string{cap: top_left.col - bottom_right.col + 1}
39 | 		for column in top_left.col .. bottom_right.col + 1 {
40 | 			cell_values << row.cells[column].value
41 | 		}
42 | 		row_values << cell_values
43 | 	}
44 | 
45 | 	return DataFrame{
46 | 		raw_data: row_values
47 | 	}
48 | }
49 | 


--------------------------------------------------------------------------------
/src/location.v:
--------------------------------------------------------------------------------
 1 | module xlsx
 2 | 
 3 | import strings
 4 | 
 5 | const a_ascii = u8(`A`)
 6 | const max_rows = 1048576
 7 | const max_cols = 16384
 8 | 
 9 | fn col_to_label(col int) string {
10 | 	if col < 26 {
11 | 		col_char := u8(col) + xlsx.a_ascii
12 | 		return col_char.ascii_str()
13 | 	}
14 | 	return col_to_label(col / 26 - 1) + col_to_label(col % 26)
15 | }
16 | 
17 | fn label_to_col(label string) int {
18 | 	mut col := 0
19 | 	for ch in label {
20 | 		col *= 26
21 | 		col += ch - xlsx.a_ascii + 1
22 | 	}
23 | 	return col - 1
24 | }
25 | 
26 | pub fn Location.from_cartesian(row int, col int) !Location {
27 | 	if row < 0 {
28 | 		return error('Row must be >= 0')
29 | 	}
30 | 	if row > xlsx.max_rows {
31 | 		return error('Row must be <= ${xlsx.max_rows}')
32 | 	}
33 | 	if col < 0 {
34 | 		return error('Col must be >= 0')
35 | 	}
36 | 	if col > xlsx.max_cols {
37 | 		return error('Col must be <= ${xlsx.max_cols}')
38 | 	}
39 | 
40 | 	return Location{
41 | 		row: row
42 | 		col: col
43 | 		row_label: (row + 1).str()
44 | 		col_label: col_to_label(col)
45 | 	}
46 | }
47 | 
48 | pub fn Location.from_encoding(code string) !Location {
49 | 	if code.len < 2 {
50 | 		return error('Invalid location code. Must be at least 2 characters long.')
51 | 	}
52 | 
53 | 	mut column_buffer := strings.new_builder(8)
54 | 	mut row_buffer := strings.new_builder(8)
55 | 
56 | 	for location, ch in code {
57 | 		if ch.is_digit() {
58 | 			row_buffer.write_string(code[location..])
59 | 			break
60 | 		}
61 | 		column_buffer.write_u8(ch)
62 | 	}
63 | 
64 | 	row := row_buffer.str()
65 | 	col := column_buffer.str()
66 | 
67 | 	return Location{
68 | 		row: row.int() - 1
69 | 		col: label_to_col(col)
70 | 		row_label: row
71 | 		col_label: col
72 | 	}
73 | }
74 | 


--------------------------------------------------------------------------------
/spec/04_1MB_file/one_mb_test.v:
--------------------------------------------------------------------------------
 1 | import xlsx { Location }
 2 | import os
 3 | 
 4 | fn test_large() ! {
 5 | 	path := os.join_path(os.dir(@FILE), 'Free_Test_Data_1MB_XLSX.xlsx')
 6 | 
 7 | 	document := xlsx.Document.from_file(path)!
 8 | 
 9 | 	sheet := document.sheets[1]
10 | 	assert sheet.rows.len == 28385
11 | 
12 | 	part_data := xlsx.DataFrame{
13 | 		raw_data: [
14 | 			['141', 'Felisaas', 'Female', '62', '21/05/2026', 'France'],
15 | 			['142', 'Demetas', 'Female', '63', '15/10/2028', 'France'],
16 | 			['143', 'Jeromyw', 'Female', '64', '16/08/2027', 'United States'],
17 | 			['144', 'Rashid', 'Female', '65', '21/05/2026', 'United States'],
18 | 			['145', 'Dett', 'Male', '18', '21/05/2015', 'Great Britain'],
19 | 			['146', 'Nern', 'Female', '19', '15/10/2017', 'France'],
20 | 			['147', 'Kallsie', 'Male', '20', '16/08/2016', 'France'],
21 | 			['148', 'Siuau', 'Female', '21', '21/05/2015', 'Great Britain'],
22 | 			['149', 'Shennice', 'Male', '22', '21/05/2016', 'France'],
23 | 			['150', 'Chasse', 'Female', '23', '15/10/2018', 'France'],
24 | 			['151', 'Tommye', 'Male', '24', '16/08/2017', 'United States'],
25 | 			['152', 'Dorcast', 'Female', '25', '21/05/2016', 'United States'],
26 | 			['153', 'Angelee', 'Male', '26', '21/05/2017', 'Great Britain'],
27 | 			['154', 'Willoom', 'Female', '27', '15/10/2019', 'France'],
28 | 			['155', 'Waeston', 'Male', '28', '16/08/2018', 'Great Britain'],
29 | 			['156', 'Rosma', 'Female', '29', '21/05/2017', 'France'],
30 | 			['157', 'Felisaas', 'Male', '30', '21/05/2018', 'France'],
31 | 			['158', 'Demetas', 'Female', '31', '15/10/2020', 'Great Britain'],
32 | 			['159', 'Jeromyw', 'Female', '32', '16/08/2019', 'France'],
33 | 		]
34 | 	}
35 | 	extracted_data := sheet.get_data(Location.from_encoding('A142')!, Location.from_encoding('F160')!)!
36 | 	assert part_data == extracted_data
37 | }
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # xlsx
  2 | 
  3 | ## Description
  4 | 
  5 | A package in pure V for reading and writing (soon) Excel files in the XLSX format.
  6 | 
  7 | ## Roadmap
  8 | 
  9 | - [x] Read XLSX files.
 10 | - [ ] Write XLSX files.
 11 | 
 12 | ## Installation
 13 | 
 14 | ```bash
 15 | v install https://github.com/hungrybluedev/xlsx
 16 | ```
 17 | 
 18 | ## Usage
 19 | 
 20 | ### Reading XLSX files
 21 | 
 22 | Take the `data.xlsx` file from the `examples/01_marksheet` directory for this example.
 23 | 
 24 | ```v
 25 | import xlsx
 26 | 
 27 | fn main() {
 28 | 	workbook := xlsx.Document.from_file('path/to/data.xlsx')!
 29 | 	println('[info] Successfully loaded workbook with ${workbook.sheets.len} worksheets.')
 30 | 
 31 | 	println('\nAvailable sheets:')
 32 | 	// sheets are stored as a map, so we can iterate over the keys.
 33 | 	for index, key in workbook.sheets.keys() {
 34 | 		println('${index + 1}: "${key}"')
 35 | 	}
 36 | 
 37 | 	// Excel uses 1-based indexing for sheets.
 38 | 	sheet1 := workbook.sheets[1]
 39 | 
 40 | 	// Note that the Cell struct is able to the CellType.
 41 | 	// So we can have an idea of what to expect before getting all
 42 | 	// the data as a dataset with just string data.
 43 | 	dataset := sheet1.get_all_data()!
 44 | 
 45 | 	count := dataset.row_count()
 46 | 
 47 | 	println('\n[info] Sheet 1 has ${count} rows.')
 48 | 
 49 | 	headers := dataset.raw_data[0]
 50 | 
 51 | 	println('\nThe headers are:')
 52 | 	for index, header in headers {
 53 | 		println('${index + 1}. ${header}')
 54 | 	}
 55 | 
 56 | 	println('\nThe student names are:')
 57 | 
 58 | 	for index in 1 .. count {
 59 | 		row := dataset.raw_data[index]
 60 | 		// All data is stored as strings, so we need to convert it to the appropriate type.
 61 | 		roll := row[0].int()
 62 | 		name := row[1] + ' ' + row[2]
 63 | 		println('${roll:02d}. ${name}')
 64 | 	}
 65 | }
 66 | ```
 67 | 
 68 | Remember to replace `'path/to/data.xlsx'` with the actual path to the file.
 69 | 
 70 | After you are done, run the program:
 71 | 
 72 | ```bash
 73 | v run marksheet.v
 74 | ```
 75 | 
 76 | You should see the following output:
 77 | 
 78 | ```plaintext
 79 | [info] Successfully loaded workbook with 1 worksheets.
 80 | 
 81 | Available sheets:
 82 | 1: "1"
 83 | 
 84 | [info] Sheet 1 has 11 rows.
 85 | 
 86 | The headers are:
 87 | 1. Roll Number
 88 | 2. First Name
 89 | 3. Last Name
 90 | 4. Physics
 91 | 5. Chemistry
 92 | 6. Biology
 93 | 7. Mathematics
 94 | 8. Total
 95 | 9. Percentage
 96 | 
 97 | The student names are:
 98 | 01. Priya Patel
 99 | 02. Kwame Nkosi
100 | 03. Mei Chen
101 | 04. Aisha Adekunle
102 | 05. Javed Khan
103 | 06. Mei-Ling Wong
104 | 07. Oluwafemi Adeyemi
105 | 08. Yuki Takahashi
106 | 09. Rashid Al-Mansoori
107 | 10. Sanya Verma
108 | ```
109 | 
110 | Try running the example on other XLSX files to see how it works.
111 | Modify the example to suit your needs.
112 | 
113 | ### Writing XLSX files
114 | 
115 | _Coming soon!_
116 | 
117 | ## Get Involved
118 | 
119 | - It is a good idea to have examples files ready for testing.
120 | Ideally, the test files should be as small as possible.
121 | 
122 | - If it is a feature request, please provide a detailed description
123 | of the feature and how it should work.
124 | 
125 | ### On GitHub
126 | 
127 | 1. Create issues for bugs you find or features you want to see.
128 | 2. Fork the repository and create pull requests for contributions.
129 | 
130 | ### On Discord
131 | 
132 | 1. Join the V Discord server: https://discord.gg/vlang
133 | 2. Write in the `#xlsx` channel about your ideas and what you want to do.
134 | 
135 | ## License
136 | 
137 | This project is licensed under the MIT License. See [LICENSE](LICENSE) for more details.
138 | 
139 | ## Support
140 | 
141 | If you like this project, please consider supporting me on [GitHub Sponsors](https://github.com/sponsors/hungrybluedev).
142 | 
143 | ## Resources
144 | 
145 | 1. [Excel specifications and limits.](https://support.microsoft.com/en-us/office/excel-specifications-and-limits-1672b34d-7043-467e-8e27-269d656771c3)
146 | 2. [Test Data for sample XLSX files.](https://freetestdata.com/document-files/xlsx/)
147 | 


--------------------------------------------------------------------------------
/src/parser.v:
--------------------------------------------------------------------------------
  1 | module xlsx
  2 | 
  3 | import os
  4 | import compress.szip
  5 | import rand
  6 | import encoding.xml
  7 | 
  8 | fn create_temporary_directory() string {
  9 | 	for {
 10 | 		location := os.join_path(os.temp_dir(), 'xlsx-${rand.hex(10)}')
 11 | 		if os.exists(location) {
 12 | 			continue
 13 | 		}
 14 | 		os.mkdir(location) or { continue }
 15 | 		return location
 16 | 	}
 17 | 	// Should not reach here
 18 | 	return ''
 19 | }
 20 | 
 21 | fn load_shared_strings(path string, shared_strings_path string) ![]string {
 22 | 	mut shared_strings := []string{}
 23 | 
 24 | 	if !os.exists(shared_strings_path) {
 25 | 		return shared_strings
 26 | 	}
 27 | 
 28 | 	strings_doc := xml.XMLDocument.from_file(shared_strings_path) or {
 29 | 		return error('Failed to parse shared strings file of excel file: ${path}')
 30 | 	}
 31 | 
 32 | 	all_defined_strings := strings_doc.get_elements_by_tag('si')
 33 | 	for definition in all_defined_strings {
 34 | 		t_element := definition.children[0]
 35 | 		if t_element !is xml.XMLNode || (t_element as xml.XMLNode).name != 't' {
 36 | 			return error('Invalid shared string definition: ${definition}')
 37 | 		}
 38 | 
 39 | 		content := (t_element as xml.XMLNode).children[0]
 40 | 		if content !is string {
 41 | 			return error('Invalid shared string definition: ${definition}')
 42 | 		}
 43 | 		shared_strings << (content as string)
 44 | 	}
 45 | 
 46 | 	return shared_strings
 47 | }
 48 | 
 49 | fn load_worksheets_metadata(path string, worksheets_file_path string) !map[int]string {
 50 | 	if !os.exists(worksheets_file_path) {
 51 | 		return error('Worksheets file does not exist: ${path}')
 52 | 	}
 53 | 	worksheets_doc := xml.XMLDocument.from_file(worksheets_file_path) or {
 54 | 		return error('Failed to parse worksheets file of excel file: ${path}')
 55 | 	}
 56 | 
 57 | 	worksheets := worksheets_doc.get_elements_by_tag('sheet')
 58 | 	mut worksheets_metadata := map[int]string{}
 59 | 
 60 | 	for worksheet in worksheets {
 61 | 		worksheets_metadata[worksheet.attributes['sheetId'].int()] = worksheet.attributes['name']
 62 | 	}
 63 | 	return worksheets_metadata
 64 | }
 65 | 
 66 | pub fn Document.from_file(path string) !Document {
 67 | 	// Fail if the file does not exist.
 68 | 	if !os.exists(path) {
 69 | 		return error('File does not exist: ${path}')
 70 | 	}
 71 | 	// First, we extract the ZIP file into a temporary directory.
 72 | 	location := create_temporary_directory()
 73 | 
 74 | 	szip.extract_zip_to_dir(path, location) or {
 75 | 		return error('Failed to extract information from file: ${path}\nError:\n${err}')
 76 | 	}
 77 | 
 78 | 	// Then we list the files in the "xl" directory.
 79 | 	xl_path := os.join_path(location, 'xl')
 80 | 
 81 | 	// Load the strings from the shared strings file, if it exists.
 82 | 	shared_strings_path := os.join_path(xl_path, 'sharedStrings.xml')
 83 | 	shared_strings := load_shared_strings(path, shared_strings_path)!
 84 | 
 85 | 	// Load the sheets metadata from the workbook file.
 86 | 	worksheets_file_path := os.join_path(xl_path, 'workbook.xml')
 87 | 	sheet_metadata := load_worksheets_metadata(path, worksheets_file_path)!
 88 | 
 89 | 	// Finally, we can load the sheets.
 90 | 	all_sheet_paths := os.ls(os.join_path(xl_path, 'worksheets'))!
 91 | 
 92 | 	mut sheet_map := map[int]Sheet{}
 93 | 
 94 | 	for sheet_file in all_sheet_paths {
 95 | 		sheet_path := os.join_path(xl_path, 'worksheets', sheet_file)
 96 | 		sheet_id := sheet_file.all_after('sheet').all_before('.xml').int()
 97 | 		sheet_name := sheet_metadata[sheet_id] or {
 98 | 			return error('Failed to find sheet name for sheet ID: ${sheet_id}')
 99 | 		}
100 | 
101 | 		sheet_doc := xml.XMLDocument.from_file(sheet_path) or {
102 | 			return error('Failed to parse sheet file: ${sheet_path}')
103 | 		}
104 | 
105 | 		sheet := Sheet.from_doc(sheet_name, sheet_doc, shared_strings) or {
106 | 			return error('Failed to parse sheet file: ${sheet_path}')
107 | 		}
108 | 
109 | 		sheet_map[sheet_id] = sheet
110 | 	}
111 | 
112 | 	return Document{
113 | 		shared_strings: shared_strings
114 | 		sheets: sheet_map
115 | 	}
116 | }
117 | 
118 | fn Sheet.from_doc(name string, doc xml.XMLDocument, shared_strings []string) !Sheet {
119 | 	dimension_tags := doc.get_elements_by_tag('dimension')
120 | 	if dimension_tags.len != 1 {
121 | 		return error('Expected exactly one dimension tag.')
122 | 	}
123 | 	dimension_string := dimension_tags[0].attributes['ref'] or {
124 | 		return error('Dimension does not include location.')
125 | 	}
126 | 	dimension_parts := dimension_string.split(':')
127 | 	top_left := Location.from_encoding(dimension_parts[0])!
128 | 	bottom_right_code := if dimension_parts.len == 2 {
129 | 		dimension_parts[1]
130 | 	} else {
131 | 		dimension_parts[0]
132 | 	}
133 | 	mut bottom_right := Location.from_encoding(bottom_right_code)!
134 | 
135 | 	row_tags := doc.get_elements_by_tag('row')
136 | 
137 | 	mut rows := []Row{}
138 | 
139 | 	row_loop: for row in row_tags {
140 | 		// Get the location of the row.
141 | 		row_label := row.attributes['r'] or { return error('Row does not include location.') }
142 | 		row_index := row_label.int() - 1
143 | 
144 | 		span_string := row.attributes['spans'] or { '0:0' }
145 | 
146 | 		span := span_string.split(':').map(it.int())
147 | 		cell_count := span[1] - span[0] + 1
148 | 
149 | 		mut cells := []Cell{cap: cell_count}
150 | 
151 | 		for child in row.children {
152 | 			match child {
153 | 				xml.XMLNode {
154 | 					// First, we check if the cell is empty
155 | 					if child.children.len == 0 {
156 | 						bottom_right = Location.from_cartesian(row_index - 1, bottom_right.col)!
157 | 						break row_loop
158 | 					}
159 | 					matching_tags := child.children.filter(it is xml.XMLNode && it.name == 'v').map(it as xml.XMLNode)
160 | 					if matching_tags.len > 1 {
161 | 						return error('Expected only one value: ${child}')
162 | 					}
163 | 					value_tag := matching_tags[0]
164 | 
165 | 					cell_type := CellType.from_code(child.attributes['t'] or { 'n' })!
166 | 					value := if cell_type == .string_type {
167 | 						shared_strings[(value_tag.children[0] as string).int()]
168 | 					} else {
169 | 						value_tag.children[0] as string
170 | 					}
171 | 
172 | 					location_string := child.attributes['r'] or {
173 | 						return error('Cell does not include location.')
174 | 					}
175 | 
176 | 					cells << Cell{
177 | 						value: value
178 | 						cell_type: cell_type
179 | 						location: Location.from_encoding(location_string)!
180 | 					}
181 | 				}
182 | 				else {
183 | 					return error('Invalid cell of row: ${child}')
184 | 				}
185 | 			}
186 | 		}
187 | 
188 | 		rows << Row{
189 | 			row_index: row_index
190 | 			row_label: row_label
191 | 			cells: cells
192 | 		}
193 | 	}
194 | 	return Sheet{
195 | 		name: name
196 | 		rows: rows
197 | 		top_left: top_left
198 | 		bottom_right: bottom_right
199 | 	}
200 | }
201 | 


--------------------------------------------------------------------------------
/src/cotent_types_test.v:
--------------------------------------------------------------------------------
  1 | module main
  2 | 
  3 | import xlsx
  4 | import time
  5 | 
  6 | fn test_empty() ! {
  7 | 	empty_xml := '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"><Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/><Default Extension="xml" ContentType="application/xml"/><Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/><Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/><Override PartName="/xl/theme/theme1.xml" ContentType="application/vnd.openxmlformats-officedocument.theme+xml"/><Override PartName="/xl/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"/><Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/><Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/></Types>'
  8 | 	content_types := xlsx.ContentTypes.parse(empty_xml)!
  9 | 	expected_types := xlsx.ContentTypes{
 10 | 		defaults: [
 11 | 			xlsx.DefaultContentType{
 12 | 				extension: 'rels'
 13 | 				content_type: 'application/vnd.openxmlformats-package.relationships+xml'
 14 | 			},
 15 | 			xlsx.DefaultContentType{
 16 | 				extension: 'xml'
 17 | 				content_type: 'application/xml'
 18 | 			},
 19 | 		]
 20 | 		overrides: [
 21 | 			xlsx.OverrideContentType{
 22 | 				part_name: '/xl/workbook.xml'
 23 | 				content_type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml'
 24 | 			},
 25 | 			xlsx.OverrideContentType{
 26 | 				part_name: '/xl/worksheets/sheet1.xml'
 27 | 				content_type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml'
 28 | 			},
 29 | 			xlsx.OverrideContentType{
 30 | 				part_name: '/xl/theme/theme1.xml'
 31 | 				content_type: 'application/vnd.openxmlformats-officedocument.theme+xml'
 32 | 			},
 33 | 			xlsx.OverrideContentType{
 34 | 				part_name: '/xl/styles.xml'
 35 | 				content_type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml'
 36 | 			},
 37 | 			xlsx.OverrideContentType{
 38 | 				part_name: '/docProps/core.xml'
 39 | 				content_type: 'application/vnd.openxmlformats-package.core-properties+xml'
 40 | 			},
 41 | 			xlsx.OverrideContentType{
 42 | 				part_name: '/docProps/app.xml'
 43 | 				content_type: 'application/vnd.openxmlformats-officedocument.extended-properties+xml'
 44 | 			},
 45 | 		]
 46 | 	}
 47 | 
 48 | 	assert content_types == expected_types
 49 | 	assert content_types.str() == empty_xml
 50 | }
 51 | 
 52 | fn test_sample_data() {
 53 | 	data_contents := '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types"><Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/><Default Extension="xml" ContentType="application/xml"/><Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/><Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/><Override PartName="/xl/worksheets/sheet2.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/><Override PartName="/xl/theme/theme1.xml" ContentType="application/vnd.openxmlformats-officedocument.theme+xml"/><Override PartName="/xl/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"/><Override PartName="/xl/sharedStrings.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml"/><Override PartName="/docProps/core.xml" ContentType="application/vnd.openxmlformats-package.core-properties+xml"/><Override PartName="/docProps/app.xml" ContentType="application/vnd.openxmlformats-officedocument.extended-properties+xml"/></Types>'
 54 | 
 55 | 	content_types := xlsx.ContentTypes.parse(data_contents)!
 56 | 	expected_types := xlsx.ContentTypes{
 57 | 		defaults: [
 58 | 			xlsx.DefaultContentType{
 59 | 				extension: 'rels'
 60 | 				content_type: 'application/vnd.openxmlformats-package.relationships+xml'
 61 | 			},
 62 | 			xlsx.DefaultContentType{
 63 | 				extension: 'xml'
 64 | 				content_type: 'application/xml'
 65 | 			},
 66 | 		]
 67 | 		overrides: [
 68 | 			xlsx.OverrideContentType{
 69 | 				part_name: '/xl/workbook.xml'
 70 | 				content_type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml'
 71 | 			},
 72 | 			xlsx.OverrideContentType{
 73 | 				part_name: '/xl/worksheets/sheet1.xml'
 74 | 				content_type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml'
 75 | 			},
 76 | 			xlsx.OverrideContentType{
 77 | 				part_name: '/xl/worksheets/sheet2.xml'
 78 | 				content_type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml'
 79 | 			},
 80 | 			xlsx.OverrideContentType{
 81 | 				part_name: '/xl/theme/theme1.xml'
 82 | 				content_type: 'application/vnd.openxmlformats-officedocument.theme+xml'
 83 | 			},
 84 | 			xlsx.OverrideContentType{
 85 | 				part_name: '/xl/styles.xml'
 86 | 				content_type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml'
 87 | 			},
 88 | 			xlsx.OverrideContentType{
 89 | 				part_name: '/xl/sharedStrings.xml'
 90 | 				content_type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml'
 91 | 			},
 92 | 			xlsx.OverrideContentType{
 93 | 				part_name: '/docProps/core.xml'
 94 | 				content_type: 'application/vnd.openxmlformats-package.core-properties+xml'
 95 | 			},
 96 | 			xlsx.OverrideContentType{
 97 | 				part_name: '/docProps/app.xml'
 98 | 				content_type: 'application/vnd.openxmlformats-officedocument.extended-properties+xml'
 99 | 			},
100 | 		]
101 | 	}
102 | 
103 | 	assert content_types == expected_types
104 | 	assert content_types.str() == data_contents
105 | }
106 | 
107 | const core_properties_dataset = [
108 | 	xlsx.CoreProperties{
109 | 		created_by: 'Subhomoy Haldar'
110 | 		modified_by: 'Subhomoy Haldar'
111 | 		created_at: time.parse_iso8601('2024-02-10T10:24:19Z') or { panic('Failed to parse time.') }
112 | 		modified_at: time.parse_iso8601('2024-02-10T10:24:36Z') or {
113 | 			panic('Failed to parse time.')
114 | 		}
115 | 	},
116 | 	xlsx.CoreProperties{
117 | 		created_by: 'Person A'
118 | 		modified_by: 'Person B'
119 | 		created_at: time.parse_iso8601('2024-02-10T10:24:19Z') or { panic('Failed to parse time.') }
120 | 		modified_at: time.parse_iso8601('2024-02-15T12:08:10Z') or {
121 | 			panic('Failed to parse time.')
122 | 		}
123 | 	},
124 | ]
125 | 
126 | fn test_core_properties() {
127 | 	for data in core_properties_dataset {
128 | 		time_creation := data.created_at.ymmdd() + 'T' + data.created_at.hhmmss() + 'Z'
129 | 		time_modified := data.modified_at.ymmdd() + 'T' + data.modified_at.hhmmss() + 'Z'
130 | 
131 | 		core_content := '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"xmlns:dc="http://purl.org/dc/elements/1.1/"xmlns:dcterms="http://purl.org/dc/terms/"xmlns:dcmitype="http://purl.org/dc/dcmitype/"xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><dc:creator>${data.created_by}</dc:creator><cp:lastModifiedBy>${data.modified_by}</cp:lastModifiedBy><dcterms:created xsi:type="dcterms:W3CDTF">${time_creation}</dcterms:created><dcterms:modified xsi:type="dcterms:W3CDTF">${time_modified}</dcterms:modified></cp:coreProperties>'
132 | 
133 | 		core_properties := xlsx.CoreProperties.parse(core_content)!
134 | 		assert core_properties == data
135 | 		assert core_properties.str() == core_content
136 | 	}
137 | }
138 | 
139 | fn test_app_properties() {
140 | 	app_content := '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"><Application>Microsoft Excel</Application><DocSecurity>0</DocSecurity><ScaleCrop>false</ScaleCrop><HeadingPairs><vt:vector size="2" baseType="variant"><vt:variant><vt:lpstr>Worksheets</vt:lpstr></vt:variant><vt:variant><vt:i4>2</vt:i4></vt:variant></vt:vector></HeadingPairs><TitlesOfParts><vt:vector size="2" baseType="lpstr"><vt:lpstr>Sample Weather Info</vt:lpstr><vt:lpstr>Sample Altitude Info</vt:lpstr></vt:vector></TitlesOfParts><Company></Company><LinksUpToDate>false</LinksUpToDate><SharedDoc>false</SharedDoc><HyperlinksChanged>false</HyperlinksChanged><AppVersion>16.0300</AppVersion></Properties>'
141 | 
142 | 	app_properties := xlsx.AppProperties.parse(app_content)!
143 | 	expected_properties := xlsx.AppProperties{
144 | 		application: 'Microsoft Excel'
145 | 		doc_security: '0'
146 | 		scale_crop: false
147 | 		heading_pairs: [
148 | 			xlsx.HeadingPair{
149 | 				name: 'Worksheets'
150 | 				count: 2
151 | 			},
152 | 		]
153 | 		titles_of_parts: [
154 | 			xlsx.TitlesOfParts{'Sample Weather Info'},
155 | 			xlsx.TitlesOfParts{'Sample Altitude Info'},
156 | 		]
157 | 		company: ''
158 | 		links_up_to_date: false
159 | 		shared_doc: false
160 | 		hyperlinks_changed: false
161 | 		app_version: '16.0300'
162 | 	}
163 | 
164 | 	assert app_properties == expected_properties
165 | 	assert app_properties.str() == app_content
166 | }
167 | 


--------------------------------------------------------------------------------
/src/content_types.v:
--------------------------------------------------------------------------------
  1 | module xlsx
  2 | 
  3 | import encoding.xml
  4 | import strings
  5 | import time
  6 | 
  7 | pub struct DefaultContentType {
  8 | pub:
  9 | 	extension    string
 10 | 	content_type string
 11 | }
 12 | 
 13 | pub fn (default DefaultContentType) str() string {
 14 | 	return '<Default Extension="${default.extension}" ContentType="${default.content_type}"/>'
 15 | }
 16 | 
 17 | pub struct OverrideContentType {
 18 | pub:
 19 | 	part_name    string
 20 | 	content_type string
 21 | }
 22 | 
 23 | pub fn (override OverrideContentType) str() string {
 24 | 	return '<Override PartName="${override.part_name}" ContentType="${override.content_type}"/>'
 25 | }
 26 | 
 27 | pub struct ContentTypes {
 28 | pub:
 29 | 	defaults  []DefaultContentType
 30 | 	overrides []OverrideContentType
 31 | }
 32 | 
 33 | pub fn (content_type ContentTypes) str() string {
 34 | 	mut result := strings.new_builder(128)
 35 | 
 36 | 	result.write_string('<?xml version="1.0" encoding="UTF-8" standalone="yes"?>')
 37 | 	result.write_string('<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">')
 38 | 
 39 | 	for default in content_type.defaults {
 40 | 		result.write_string(default.str())
 41 | 	}
 42 | 	for override in content_type.overrides {
 43 | 		result.write_string(override.str())
 44 | 	}
 45 | 
 46 | 	result.write_string('</Types>')
 47 | 
 48 | 	return result.str()
 49 | }
 50 | 
 51 | pub fn ContentTypes.parse(content string) !ContentTypes {
 52 | 	mut defaults := []DefaultContentType{}
 53 | 	mut overrides := []OverrideContentType{}
 54 | 
 55 | 	doc := xml.XMLDocument.from_string(content) or {
 56 | 		return error('Failed to parse content types XML.')
 57 | 	}
 58 | 
 59 | 	content_types_node := doc.get_elements_by_tag('Types')
 60 | 	if content_types_node.len != 1 {
 61 | 		return error('Invalid content types XML. Expected a single <Types> element.')
 62 | 	}
 63 | 
 64 | 	default_tags := content_types_node[0].get_elements_by_tag('Default')
 65 | 	if default_tags.len < 2 {
 66 | 		return error('Invalid content types XML. Expected at least two <Default> elements.')
 67 | 	}
 68 | 	for tag in default_tags {
 69 | 		if 'Extension' !in tag.attributes {
 70 | 			return error("Invalid content types XML. Expected an 'Extension' attribute in <Default> element.")
 71 | 		}
 72 | 		if 'ContentType' !in tag.attributes {
 73 | 			return error("Invalid content types XML. Expected a 'ContentType' attribute in <Default> element.")
 74 | 		}
 75 | 		defaults << DefaultContentType{tag.attributes['Extension'], tag.attributes['ContentType']}
 76 | 	}
 77 | 
 78 | 	override_tags := content_types_node[0].get_elements_by_tag('Override')
 79 | 	for tag in override_tags {
 80 | 		if 'PartName' !in tag.attributes {
 81 | 			return error("Invalid content types XML. Expected a 'PartName' attribute in <Override> element.")
 82 | 		}
 83 | 		if 'ContentType' !in tag.attributes {
 84 | 			return error("Invalid content types XML. Expected a 'ContentType' attribute in <Override> element.")
 85 | 		}
 86 | 		overrides << OverrideContentType{tag.attributes['PartName'], tag.attributes['ContentType']}
 87 | 	}
 88 | 
 89 | 	return ContentTypes{defaults, overrides}
 90 | }
 91 | 
 92 | pub struct CoreProperties {
 93 | pub:
 94 | 	created_by  string
 95 | 	modified_by string
 96 | 	created_at  time.Time
 97 | 	modified_at time.Time
 98 | }
 99 | 
100 | pub fn (props CoreProperties) str() string {
101 | 	time_creation := props.created_at.ymmdd() + 'T' + props.created_at.hhmmss() + 'Z'
102 | 	time_modified := props.modified_at.ymmdd() + 'T' + props.modified_at.hhmmss() + 'Z'
103 | 	return '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"xmlns:dc="http://purl.org/dc/elements/1.1/"xmlns:dcterms="http://purl.org/dc/terms/"xmlns:dcmitype="http://purl.org/dc/dcmitype/"xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><dc:creator>${props.created_by}</dc:creator><cp:lastModifiedBy>${props.modified_by}</cp:lastModifiedBy><dcterms:created xsi:type="dcterms:W3CDTF">${time_creation}</dcterms:created><dcterms:modified xsi:type="dcterms:W3CDTF">${time_modified}</dcterms:modified></cp:coreProperties>'
104 | }
105 | 
106 | fn extract_first_element_by_tag(node xml.XMLNode, tag string) !xml.XMLNode {
107 | 	tags := node.get_elements_by_tag(tag)
108 | 	if tags.len != 1 {
109 | 		return error('Invalid core properties XML. Expected a single <${tag}> element.')
110 | 	}
111 | 	return tags[0]
112 | }
113 | 
114 | fn extract_first_child_as_string(node xml.XMLNode) !string {
115 | 	if node.children.len != 1 {
116 | 		return error('Invalid core properties XML. Expected a single child in <${node.name}> element.')
117 | 	}
118 | 	if node.children[0] !is string {
119 | 		return error('Invalid core properties XML. Expected a string child in <${node.name}> element.')
120 | 	}
121 | 	return node.children[0] as string
122 | }
123 | 
124 | pub fn CoreProperties.parse(content string) !CoreProperties {
125 | 	doc := xml.XMLDocument.from_string(content) or {
126 | 		return error('Failed to parse core properties XML.')
127 | 	}
128 | 
129 | 	core_properties_nodes := doc.get_elements_by_tag('cp:coreProperties')
130 | 	if core_properties_nodes.len != 1 {
131 | 		return error('Invalid core properties XML. Expected a single <cp:coreProperties> element.')
132 | 	}
133 | 	core_properties_node := core_properties_nodes[0]
134 | 
135 | 	mut created_by := ''
136 | 	mut modified_by := ''
137 | 	mut created_at := time.Time{}
138 | 	mut modified_at := time.Time{}
139 | 
140 | 	creator_tag := extract_first_element_by_tag(core_properties_node, 'dc:creator')!
141 | 	created_by = extract_first_child_as_string(creator_tag)!
142 | 
143 | 	modified_by_tag := extract_first_element_by_tag(core_properties_node, 'cp:lastModifiedBy')!
144 | 	modified_by = extract_first_child_as_string(modified_by_tag)!
145 | 
146 | 	created_at_tag := extract_first_element_by_tag(core_properties_node, 'dcterms:created')!
147 | 	created_at = time.parse_iso8601(extract_first_child_as_string(created_at_tag)!) or {
148 | 		return error('Invalid core properties XML. Failed to parse created time.')
149 | 	}
150 | 
151 | 	modified_at_tag := extract_first_element_by_tag(core_properties_node, 'dcterms:modified')!
152 | 	modified_at = time.parse_iso8601(extract_first_child_as_string(modified_at_tag)!) or {
153 | 		return error('Invalid core properties XML. Failed to parse modified time.')
154 | 	}
155 | 
156 | 	if created_at > modified_at {
157 | 		return error('Invalid core properties XML. Created time is newer than modified time.')
158 | 	}
159 | 
160 | 	return CoreProperties{created_by, modified_by, created_at, modified_at}
161 | }
162 | 
163 | pub struct HeadingPair {
164 | 	name  string
165 | 	count int
166 | }
167 | 
168 | pub fn (pair HeadingPair) str() string {
169 | 	return '<vt:vector size="2" baseType="variant"><vt:variant><vt:lpstr>${pair.name}</vt:lpstr></vt:variant><vt:variant><vt:i4>${pair.count}</vt:i4></vt:variant></vt:vector>'
170 | }
171 | 
172 | fn HeadingPair.parse(node xml.XMLNode) ![]HeadingPair {
173 | 	mut pairs := []HeadingPair{}
174 | 
175 | 	vector_tags := node.get_elements_by_tag('vt:vector')
176 | 	for vector_tag in vector_tags {
177 | 		variant_tags := vector_tag.get_elements_by_tag('vt:variant')
178 | 		if variant_tags.len != 2 {
179 | 			return error('Invalid app properties XML. Expected two <vt:variant> elements.')
180 | 		}
181 | 
182 | 		name_tag := variant_tags[0].get_elements_by_tag('vt:lpstr')
183 | 		if name_tag.len != 1 {
184 | 			return error('Invalid app properties XML. Expected a single <vt:lpstr> element.')
185 | 		}
186 | 		name := name_tag[0].children[0] as string
187 | 
188 | 		count_tag := variant_tags[1].get_elements_by_tag('vt:i4')
189 | 		if count_tag.len != 1 {
190 | 			return error('Invalid app properties XML. Expected a single <vt:i4> element.')
191 | 		}
192 | 		count_text := count_tag[0].children[0] as string
193 | 		count := count_text.int()
194 | 
195 | 		pairs << HeadingPair{name, count}
196 | 	}
197 | 
198 | 	return pairs
199 | }
200 | 
201 | fn encode_heading_pairs(pairs []HeadingPair) string {
202 | 	mut result := strings.new_builder(256)
203 | 
204 | 	result.write_string('<HeadingPairs>')
205 | 	for pair in pairs {
206 | 		result.write_string(pair.str())
207 | 	}
208 | 	result.write_string('</HeadingPairs>')
209 | 
210 | 	return result.str()
211 | }
212 | 
213 | pub struct TitlesOfParts {
214 | 	entity string
215 | }
216 | 
217 | pub fn (title TitlesOfParts) str() string {
218 | 	return '<vt:lpstr>${title.entity}</vt:lpstr>'
219 | }
220 | 
221 | fn encode_titles_of_parts(titles []TitlesOfParts) string {
222 | 	mut result := strings.new_builder(128)
223 | 
224 | 	result.write_string('<TitlesOfParts><vt:vector size="${titles.len}" baseType="lpstr">')
225 | 	for title in titles {
226 | 		result.write_string(title.str())
227 | 	}
228 | 	result.write_string('</vt:vector></TitlesOfParts>')
229 | 
230 | 	return result.str()
231 | }
232 | 
233 | fn TitlesOfParts.parse(node xml.XMLNode) ![]TitlesOfParts {
234 | 	mut titles := []TitlesOfParts{}
235 | 
236 | 	lpstr_tags := node.get_elements_by_tag('vt:lpstr')
237 | 	for tag in lpstr_tags {
238 | 		titles << TitlesOfParts{tag.children[0] as string}
239 | 	}
240 | 
241 | 	return titles
242 | }
243 | 
244 | pub struct AppProperties {
245 | 	application        string
246 | 	doc_security       string
247 | 	scale_crop         bool
248 | 	links_up_to_date   bool
249 | 	shared_doc         bool
250 | 	hyperlinks_changed bool
251 | 	app_version        string
252 | 	company            string
253 | 	heading_pairs      []HeadingPair
254 | 	titles_of_parts    []TitlesOfParts
255 | }
256 | 
257 | pub fn (prop AppProperties) str() string {
258 | 	return '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"xmlns:vt="http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"><Application>${prop.application}</Application><DocSecurity>${prop.doc_security}</DocSecurity><ScaleCrop>${prop.scale_crop}</ScaleCrop>${encode_heading_pairs(prop.heading_pairs)}${encode_titles_of_parts(prop.titles_of_parts)}<Company>${prop.company}</Company><LinksUpToDate>${prop.links_up_to_date}</LinksUpToDate><SharedDoc>${prop.shared_doc}</SharedDoc><HyperlinksChanged>${prop.hyperlinks_changed}</HyperlinksChanged><AppVersion>${prop.app_version}</AppVersion></Properties>'
259 | }
260 | 
261 | pub fn AppProperties.parse(content string) !AppProperties {
262 | 	doc := xml.XMLDocument.from_string(content) or {
263 | 		return error('Failed to parse app properties XML.')
264 | 	}
265 | 
266 | 	properties_nodes := doc.get_elements_by_tag('Properties')
267 | 	if properties_nodes.len != 1 {
268 | 		return error('Invalid app properties XML. Expected a single <Properties> element.')
269 | 	}
270 | 
271 | 	properties_node := properties_nodes[0]
272 | 
273 | 	application_tag := extract_first_element_by_tag(properties_node, 'Application')!
274 | 	application := extract_first_child_as_string(application_tag)!
275 | 
276 | 	doc_security_tag := extract_first_element_by_tag(properties_node, 'DocSecurity')!
277 | 	doc_security := extract_first_child_as_string(doc_security_tag)!
278 | 	if doc_security != '0' && doc_security != '1' {
279 | 		return error('Invalid app properties XML. Expected a "0" or "1" value for <DocSecurity>.')
280 | 	}
281 | 
282 | 	scale_crop_tag := extract_first_element_by_tag(properties_node, 'ScaleCrop')!
283 | 	scale_crop_text := extract_first_child_as_string(scale_crop_tag)!
284 | 	scale_crop := scale_crop_text == 'true'
285 | 
286 | 	links_up_to_date_tag := extract_first_element_by_tag(properties_node, 'LinksUpToDate')!
287 | 	links_up_to_date_text := extract_first_child_as_string(links_up_to_date_tag)!
288 | 	links_up_to_date := links_up_to_date_text == 'true'
289 | 
290 | 	shared_doc_tag := extract_first_element_by_tag(properties_node, 'SharedDoc')!
291 | 	shared_doc_text := extract_first_child_as_string(shared_doc_tag)!
292 | 	shared_doc := shared_doc_text == 'true'
293 | 
294 | 	hyperlinks_changed_tag := extract_first_element_by_tag(properties_node, 'HyperlinksChanged')!
295 | 	hyperlinks_changed_text := extract_first_child_as_string(hyperlinks_changed_tag)!
296 | 	hyperlinks_changed := hyperlinks_changed_text == 'true'
297 | 
298 | 	app_version_tag := extract_first_element_by_tag(properties_node, 'AppVersion')!
299 | 	app_version := extract_first_child_as_string(app_version_tag)!
300 | 
301 | 	company_tag := extract_first_element_by_tag(properties_node, 'Company')!
302 | 	company := extract_first_child_as_string(company_tag) or { '' }
303 | 
304 | 	heading_pairs_tag := extract_first_element_by_tag(properties_node, 'HeadingPairs')!
305 | 	heading_pairs := HeadingPair.parse(heading_pairs_tag) or {
306 | 		return error('Invalid app properties XML. Failed to parse heading pairs.\n${err}')
307 | 	}
308 | 
309 | 	titles_of_parts_tag := extract_first_element_by_tag(properties_node, 'TitlesOfParts')!
310 | 	titles_of_parts := TitlesOfParts.parse(titles_of_parts_tag) or {
311 | 		return error('Invalid app properties XML. Failed to parse titles of parts.\n${err}')
312 | 	}
313 | 
314 | 	return AppProperties{
315 | 		application: application
316 | 		doc_security: doc_security
317 | 		scale_crop: scale_crop
318 | 		links_up_to_date: links_up_to_date
319 | 		shared_doc: shared_doc
320 | 		hyperlinks_changed: hyperlinks_changed
321 | 		app_version: app_version
322 | 		company: company
323 | 		heading_pairs: heading_pairs
324 | 		titles_of_parts: titles_of_parts
325 | 	}
326 | }
327 | 


--------------------------------------------------------------------------------