├── dbg
├── .gitignore
├── Workbook1.xls
├── .travis.yml
├── ieee754.c
├── ummap.h
├── myerr.h
├── Makefile
├── xls2txt.h
├── ummap.c
├── list.h
├── cp.c
├── ole.c
├── README.md
└── xls2txt.c
/dbg:
--------------------------------------------------------------------------------
1 | r
2 | bt
3 | q
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | xls2txt
3 |
--------------------------------------------------------------------------------
/Workbook1.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hroptatyr/xls2txt/HEAD/Workbook1.xls
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: c
2 |
3 | sudo: false
4 |
5 | os:
6 | - osx
7 |
8 | osx_image: xcode8.3
9 |
10 | compiler:
11 | - clang
12 |
13 | script:
14 | - make
15 | - make check
16 |
17 | ## whitelist
18 | branches:
19 | only:
20 | - bld/macos
21 | - master
22 |
23 | notifications:
24 | email:
25 | - devel@fresse.org
26 |
27 | deploy:
28 | provider: releases
29 | api_key:
30 | secure: djHDB3+NuGRXkIiXksrgWUBme4nq1RV1V2NX2WrwL4FUgYcUXuGGKjPt4VhMl2wnEy1yCe4Ao3z7TEmcKgkpAqVKnSpe1qbTit7pixfDNeZsmDjgEM042twFpQG/0IFX14jlRqHD7BBV/eADdkg04icld4aL1aOsJzuey+sPgt8=
31 | file: xls2txt
32 | skip_cleanup: true
33 | on:
34 | repo: hroptatyr/xls2txt
35 | branch: bld/macos
36 |
--------------------------------------------------------------------------------
/ieee754.c:
--------------------------------------------------------------------------------
1 | #define _ISOC99_SOURCE
2 | #include
3 | #include "xls2txt.h"
4 |
5 | #ifndef __i386__
6 |
7 | double ieee754(u64 v)
8 | {
9 | int s, e;
10 | double r;
11 |
12 | s = v>>52;
13 | v &= 0x000FFFFFFFFFFFFFull;
14 | e = s & 0x7FF;
15 | if(!e)
16 | goto denorm;
17 | if(e < 0x7FF) {
18 | v += 0x0010000000000000ull, e--;
19 | denorm:
20 | r = ldexp(v, e - 0x3FF - 52 + 1);
21 | } else if(v) {
22 | r = NAN; s ^= 0x800;
23 | } else
24 | r = INFINITY;
25 | if(s & 0x800)
26 | r = -r;
27 | return r;
28 | }
29 |
30 | #else
31 |
32 | double ieee754(u64 v)
33 | {
34 | union {
35 | u64 v;
36 | double d;
37 | } u;
38 | u.v = v;
39 | return u.d;
40 | }
41 |
42 | #endif
43 |
--------------------------------------------------------------------------------
/ummap.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2005 Jan Bobrowski
3 | *
4 | * This library is free software; you can redistribute it and/or
5 | * modify it under the terms of the GNU Lesser General Public
6 | * License version 2.1 as published by the Free Software Foundation.
7 | */
8 |
9 | #include "list.h"
10 |
11 | #ifndef container_of
12 | #include
13 | #define container_of(P,T,M) ((T*)((char*)(P)-offsetof(T,M)))
14 | #endif
15 |
16 | struct ummap {
17 | list_t list;
18 | void *addr;
19 | int size;
20 | int (*handler)(struct ummap *, void *);
21 | };
22 |
23 | extern unsigned um_page_sc, um_page_sz;
24 |
25 | int um_map(struct ummap *um);
26 | void um_unmap(struct ummap *um);
27 | int um_access_page(void *p);
28 |
--------------------------------------------------------------------------------
/myerr.h:
--------------------------------------------------------------------------------
1 | /*
2 | * fake up a quick myerr.h:
3 | *
4 | * void err(int eval, const char *fmt, ...);
5 | * void errx(int eval, const char *fmt, ...);
6 | * void warnx(const char *fmt, ...);
7 | */
8 | #include
9 | #include
10 | #include
11 |
12 | #define err(eval, fmt, ...) { \
13 | (void)fprintf(stderr, "xls2txt: "fmt": ", ##__VA_ARGS__); \
14 | (void)fprintf(stderr, "%s\n", strerror(errno)); \
15 | exit(eval); }
16 |
17 | #define errx(eval, fmt, ...) { \
18 | (void)fprintf(stderr, "xls2txt: "); \
19 | (void)fprintf(stderr, fmt"\n", ##__VA_ARGS__); \
20 | exit(eval); }
21 |
22 | #define warnx(fmt, ...) \
23 | (void)fprintf(stderr, "xls2txt: " fmt "\n", ##__VA_ARGS__)
24 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # gmake
2 |
3 | NAME = xls2txt
4 | VERSION = 0.15
5 | BINDEST = /usr/local/bin
6 | PKG=$(NAME)-$(VERSION)
7 | FILES = Makefile xls2txt.[ch] ole.c cp.c ummap.[ch] ieee754.c list.h myerr.h
8 |
9 | CFLAGS ?= -O2 -g -Wall
10 | LDFLAGS = -lm
11 |
12 | xls2txt: xls2txt.o ole.o cp.o ummap.o ieee754.o
13 |
14 | xls2txt.o: xls2txt.c xls2txt.h
15 | $(CC) $(CFLAGS) -DVERSION=$(VERSION) -c $< -o $@
16 |
17 | install: xls2txt
18 | install -s $< $(BINDEST)
19 |
20 | clean:
21 | rm -f xls2txt $(addsuffix .o,$(basename $(filter %.c %.[ch],$(FILES))))
22 |
23 | dist:
24 | ln -s . $(PKG)
25 | tar czf $(PKG).tar.gz --group=root --owner=root $(addprefix $(PKG)/, $(FILES)); \
26 | rm $(PKG)
27 |
28 | check: xls2txt
29 | ./$< -l Workbook1.xls
30 | ./$< Workbook1.xls
31 |
32 | .PHONY: install clean dist check
33 |
--------------------------------------------------------------------------------
/xls2txt.h:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "myerr.h"
6 |
7 | typedef unsigned char u8;
8 | typedef unsigned short u16;
9 | typedef unsigned int u32;
10 | typedef signed int s32;
11 | typedef unsigned long
12 | #ifndef __LP64__
13 | long
14 | #endif
15 | u64;
16 |
17 | #ifdef __i386__
18 | #define g16(P) (*(const u16*)(P))
19 | #define g32(P) (*(const u32*)(P))
20 | #define g64(P) (*(const u64*)(P))
21 | #define p16(P,V) (*(u16*)(P)=(V))
22 | #else
23 | static inline u16 g16(const void *p) {return ((const u8*)p)[0] | ((const u8*)p)[1]<<8;}
24 | static inline u32 g32(const void *p) {return g16(p) | g16((const u8*)p+2)<<16;}
25 | static inline u64 g64(const void *p) {return g32(p) | (u64)g32((const u8*)p+4)<<32;}
26 | static inline void p16(void *p, u16 v) {((u8*)p)[0]=v; ((u8*)p)[1]=v>>8;}
27 | #endif
28 |
29 | #define elemof(T) (sizeof T/sizeof*T)
30 | #define endof(T) (T+elemof(T))
31 |
32 | typedef struct {
33 | u8 *ptr;
34 | unsigned len;
35 | } meml_t;
36 |
37 | double ieee754(u64);
38 |
39 | int ole_open(char *name);
40 | meml_t get_workbook();
41 |
42 | int find_charset(char *name);
43 | void set_charset(int n); // output charset
44 | u8 *print_uni(u8 *p, int l, u8 f);
45 | void set_codepage(int n); // sheet codepage
46 | u8 *print_cp_str(u8 *p, int l);
47 |
--------------------------------------------------------------------------------
/ummap.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2005 Jan Bobrowski
3 | *
4 | * This library is free software; you can redistribute it and/or
5 | * modify it under the terms of the GNU Lesser General Public
6 | * License version 2.1 as published by the Free Software Foundation.
7 | */
8 |
9 | /* These procedures allow the user to employ virtual memory to map
10 | * arbitrary data to memory. The data can then be computed on-demand
11 | * instead of preparing it on start.
12 | */
13 |
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include // ffs
19 | #include "myerr.h"
20 | #include "ummap.h"
21 |
22 | unsigned um_page_sz, um_page_sc;
23 |
24 | static void um_sig(int n, siginfo_t *i, void *c);
25 | static struct sigaction um_sa;
26 | static LIST(maps);
27 |
28 | static void um_init()
29 | {
30 | um_page_sz = getpagesize();
31 | um_page_sc = ffs(um_page_sz) - 1;
32 |
33 | um_sa.sa_sigaction = um_sig;
34 | um_sa.sa_flags = SA_SIGINFO|SA_RESETHAND;
35 | }
36 |
37 | static void um_sig(int n, siginfo_t *i, void *c)
38 | {
39 | struct ummap *um;
40 | unsigned long o;
41 |
42 | if(i->si_code == SEGV_ACCERR
43 | #ifdef __OpenBSD__ // XXX others too?
44 | //#if #system(bsd)
45 | || i->si_code == SEGV_MAPERR
46 | #endif
47 | #ifdef __APPLE__
48 | || i->si_code == SEGV_MAPERR
49 | #endif
50 | ) {
51 | list_t *l;
52 | for(l=maps.next; l!=&maps; l=l->next) {
53 | um = list_item(l, struct ummap, list);
54 | o = (char*)i->si_addr - (char*)um->addr;
55 | if(o < um->size)
56 | goto found;
57 | }
58 | }
59 | return;
60 |
61 | found:
62 | if(um->handler(um, (char*)um->addr + (o & -um_page_sz)) >= 0) {
63 | sigaction(SIGSEGV, &um_sa, 0);
64 | sigaction(SIGBUS, &um_sa, 0);
65 | }
66 | }
67 |
68 | int um_access_page(void *p)
69 | {
70 | #if 0
71 | return (int)mmap(
72 | p, um_page_sz,
73 | PROT_READ|PROT_WRITE,
74 | MAP_PRIVATE|MAP_ANON|MAP_FIXED,
75 | -1, 0) == MAP_FAILED ? -1 : 0;
76 | #else
77 | return mprotect(p, um_page_sz, PROT_READ|PROT_WRITE);
78 | #endif
79 | }
80 |
81 | int um_map(struct ummap *um)
82 | {
83 | void *p;
84 | int v;
85 |
86 | if(!um_page_sz)
87 | um_init();
88 |
89 | p = mmap(0, um->size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
90 | if(p==MAP_FAILED)
91 | return -1;
92 | um->addr = p;
93 |
94 | v = 0;
95 | v += sigaction(SIGSEGV, &um_sa, 0);
96 | v += sigaction(SIGBUS, &um_sa, 0);
97 | if(v>=0) list_add(&maps, &um->list);
98 | else munmap(p, um->size);
99 | return v;
100 | }
101 |
102 | void um_unmap(struct ummap *um)
103 | {
104 | munmap(um->addr, um->size);
105 | }
106 |
--------------------------------------------------------------------------------
/list.h:
--------------------------------------------------------------------------------
1 | /* list.h by Jan Bobrowski. Inspired by list.h from Linux */
2 |
3 | #ifndef LIST_H
4 | #define LIST_H
5 |
6 | typedef struct list {
7 | struct list *next, *prev;
8 | } list_t;
9 |
10 | static inline void list_link(struct list *a, struct list *b)
11 | {
12 | a->next = b;
13 | b->prev = a;
14 | }
15 |
16 | static inline void list_add(struct list *head, struct list *item)
17 | {
18 | struct list *first = head->next;
19 | list_link(head, item);
20 | list_link(item, first);
21 | }
22 |
23 | static inline void list_add_end(struct list *head, struct list *item)
24 | {
25 | struct list *last = head->prev;
26 | list_link(item, head);
27 | list_link(last, item);
28 | }
29 |
30 | static inline list_t *list_del(struct list *item)
31 | {
32 | struct list *prev = item->prev, *next = item->next;
33 | list_link(prev, next);
34 | return next;
35 | }
36 |
37 | static inline void list_init(struct list *head)
38 | {
39 | list_link(head, head);
40 | }
41 |
42 | /* delete item from one list and add it to another */
43 | static inline void list_del_add(list_t *head, list_t *item)
44 | {
45 | list_t *prev = item->prev, *next = item->next;
46 | list_link(prev, next);
47 | next = head->next;
48 | list_link(head, item);
49 | list_link(item, next);
50 | }
51 |
52 | /*static inline list_check(list_t *l)
53 | {
54 | list_t *a = l;
55 | list_t *b;
56 | do {
57 | b = a->next;
58 | assert(b->prev == a);
59 | if(a==l) break;
60 | a = b;
61 | } while(1);
62 | }*/
63 |
64 | static inline void list_del_add_end(list_t *head, list_t *item)
65 | {
66 | list_t *prev = item->prev, *next = item->next;
67 | list_link(prev, next);
68 | prev = head->prev;
69 | list_link(item, head);
70 | item->prev = prev;
71 | prev->next = item;
72 | }
73 |
74 | static inline void list_del_init(struct list *item)
75 | {
76 | struct list *prev = item->prev, *next = item->next;
77 | list_link(item, item);
78 | list_link(prev, next);
79 | }
80 |
81 | static inline void list_join(struct list *a, struct list *b)
82 | {
83 | list_t *ae = a->prev;
84 | list_t *be = b->prev;
85 | b->prev = ae;
86 | a->prev = be;
87 | ae->next = b;
88 | be->next = a;
89 | }
90 |
91 | static inline int list_empty(struct list *head)
92 | {
93 | return head->next == head;
94 | }
95 |
96 | #define LIST(L) struct list L = {&L, &L}
97 |
98 | #define list_entry(L, T, M) ((T*)((char*)(L) - (long)(&((T*)0)->M)))
99 | #define list_item(L, T, M) ((T*)((char*)(L) - (long)(&((T*)0)->M)))
100 |
101 | #define list_first(H, T, M) list_item((H)->next, T, M)
102 | #define list_last(H, T, M) list_item((H)->prev, T, M)
103 | /* GNU C */
104 | #define list_next(O, M) list_item((O)->M.next, typeof(*(O)), M)
105 | #define list_prev(O, M) list_item((O)->M.prev, typeof(*(O)), M)
106 |
107 | /* remove first element and return it */
108 | static inline struct list *list_get(struct list *head)
109 | {
110 | struct list *item = head->next;
111 | struct list *next = item->next;
112 | list_link(head, next);
113 | return item;
114 | }
115 |
116 | /* remove first element, initialize and return it */
117 | static inline struct list *list_get_init(struct list *head)
118 | {
119 | struct list *item = head->next;
120 | struct list *next = item->next;
121 | list_link(item, item);
122 | list_link(head, next);
123 | return item;
124 | }
125 |
126 | #define list_get_entry(H, T, M) list_item(list_get((H)), T, M)
127 | #define list_get_init_entry(H, T, M) list_item(list_get_init((H)), T, M)
128 | #define list_get_item(H, T, M) list_item(list_get((H)), T, M)
129 | #define list_get_init_item(H, T, M) list_item(list_get_init((H)), T, M)
130 |
131 | #endif
132 |
--------------------------------------------------------------------------------
/cp.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2005-2007 Jan Bobrowski
3 | *
4 | * This program is free software; you can redistribute it and/or
5 | * modify it under the terms of the GNU General Public License
6 | * version 2 as published by the Free Software Foundation.
7 | */
8 |
9 | #include "xls2txt.h"
10 | #include
11 |
12 | static u8 uni2cs[0x2E0-0xA0];
13 | static u8 *cs = 0;
14 | static char badchar = '?';
15 |
16 | static u8 fallbacks[] = " "
17 | " !cL\1Y|\4<\1-\6'\6>\3?AAAAAA\1CEEEEIIII\1NOOOOO\1OUUUUY\2aa"
18 | "aaaa\1ceeeeiiii\1nooooo\1ouuuuy\1yAaAaAaCcCcCcCcDdDdEeEeEeEe"
19 | "EeGgGgGgGgHhHhIiIiIiIiIi\2JjKk\1LlLlLlLlLlNnNnNnn\2OoOoOo\2R"
20 | "rRrRrSsSsSsSsTtTtTtUuUuUuUuUuUuWwYyYZzZzZzsbBBb\3Cc\2Dd\4FfG"
21 | "\3IKkl\2NnOOo\2Pp\5tTtTUu\1VYyZz\26AaIiOoUuUuUuUuUu\1AaAa\2G"
22 | "gGgKkOoOo\2j\3Gg\2NnAa\2OoAaAaEeEeIiIiOoOoRrRrUuUuSsTt\2HhNd"
23 | "\2ZzAaEeOoOoOoOoYy";
24 |
25 | static u8 latin2[] = {160,
26 | 0xA0,3,0xA4,2,0xA7,0xA8,4,0xAD,2,0xB0,3,0xB4,3,0xB8,8,0xC1,0xC2,
27 | 1,0xC4,2,0xC7,1,0xC9,1,0xCB,1,0xCD,0xCE,4,0xD3,0xD4,1,0xD6,0xD7,
28 | 2,0xDA,1,0xDC,0xDD,1,0xDF,1,0xE1,0xE2,1,0xE4,2,0xE7,1,0xE9,1,
29 | 0xEB,1,0xED,0xEE,4,0xF3,0xF4,1,0xF6,0xF7,2,0xFA,1,0xFC,0xFD,4,
30 | 0xC3,0xE3,0xA1,0xB1,0xC6,0xE6,4,0xC8,0xE8,0xCF,0xEF,0xD0,0xF0,6,
31 | 0xCA,0xEA,0xCC,0xEC,29,0xC5,0xE5,2,0xA5,0xB5,2,0xA3,0xB3,0xD1,
32 | 0xF1,2,0xD2,0xF2,7,0xD5,0xF5,2,0xC0,0xE0,2,0xD8,0xF8,0xA6,0xB6,2,
33 | 0xAA,0xBA,0xA9,0xB9,0xDE,0xFE,0xAB,0xBB,8,0xD9,0xF9,0xDB,0xFB,7,
34 | 0xAC,0xBC,0xAF,0xBF,0xAE,0xBE,159,159,10,0xB7,16,0xA2,0xFF,1,
35 | 0xB2,1,0xBD,0
36 | };
37 |
38 | int find_charset(char *name)
39 | {
40 | const char names[] = "utf8asc\0iso1iso2";
41 | int l, p;
42 | l = strlen(name);
43 | if(l<3 || l>4) return -1;
44 | for(p=0; names[p]; p+=4)
45 | if(memcmp(names+p, name, 4)==0)
46 | return p>>2;
47 | return -1;
48 | }
49 |
50 | static void expand(u8 *s)
51 | {
52 | u8 *d = uni2cs;
53 | u8 m = *s++;
54 | do {
55 | u8 c = *s++;
56 | if(c=endof(uni2cs)) break;
58 | else *d++ = c;
59 | } while(*s);
60 | }
61 |
62 | void set_charset(int n)
63 | {
64 | cs = 0;
65 | if(n==0) // utf8
66 | return;
67 |
68 | // memset(uni2cs, 0, sizeof uni2cs);
69 | expand(fallbacks);
70 |
71 | switch(n) {
72 | int u;
73 | case 1: // ascii
74 | break;
75 | case 2: // latin 1
76 | for(u=0x00A0; u<0x0100; u++)
77 | uni2cs[u-0xA0] = u;
78 | break;
79 | case 3: // latin 2
80 | expand(latin2);
81 | break;
82 | }
83 | cs = uni2cs;
84 | }
85 |
86 | static void print_uni_char(u16 u)
87 | {
88 | unsigned v = u;
89 | if(v<0x00A0) {
90 | if(v<0x20 || v>=0x7F)
91 | v = v==10 ? ' ' : badchar;
92 | } else if(cs) {
93 | v -= 0xA0;
94 | if(v >= sizeof uni2cs || !(v = cs[v]))
95 | v = badchar;
96 | } else {
97 | v = v>>6 | 0xC0;
98 | if(u >= 0x800) {
99 | putchar(u>>12 | 0xE0);
100 | v = v&077 | 0x80;
101 | }
102 | putchar(v);
103 | v = u&077 | 0x80;
104 | }
105 | putchar(v);
106 | }
107 |
108 | u8 *print_uni(u8 *p, int l, u8 f)
109 | {
110 | if(f&1)
111 | while(--l >= 0) {
112 | print_uni_char(g16(p));
113 | p += 2;
114 | }
115 | else
116 | while(--l >= 0)
117 | print_uni_char(*p++);
118 | return p;
119 | }
120 |
121 | // codepage
122 |
123 | static u16 *cp = 0;
124 |
125 | static u16 cp1250[128] = {
126 | 0,0,0x201A,0,0x201E,0x2026,0x2020,0x2021,0,0x2030,0x0160,0x2039,
127 | 0x015A,0x0164,0x017D,0x0179,0,0x2018,0x2019,0x201C,0x201D,0x2022,
128 | 0x2013,0x2014,0,0x2122,0x0161,0x203A,0x015B,0x0165,0x017E,0x017A,
129 | 0x00A0,0x02C7,0x02D8,0x0141,0x00A4,0x0104,0x00A6,0x00A7,0x00A8,
130 | 0x00A9,0x015E,0x00AB,0x00AC,0x00AD,0x00AE,0x017B,0x00B0,0x00B1,
131 | 0x02DB,0x0142,0x00B4,0x00B5,0x00B6,0x00B7,0x00B8,0x0105,0x015F,
132 | 0x00BB,0x013D,0x02DD,0x013E,0x017C,0,0x00C1,0x00C2,0x0102,0x00C4,
133 | 0x0139,0x0106,0x00C7,0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,
134 | 0x00CE,0x010E,0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,
135 | 0x00D7,0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF,
136 | 0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7,0x010D,
137 | 0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F,0x0111,0x0144,
138 | 0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,0x0159,0x016F,0x00FA,
139 | 0x0171,0x00FC,0x00FD,0x0163,0x02D9,
140 | };
141 |
142 | static u16 cp1252[128] = {
143 | 0x20AC,0x0081,0x201A,0x0192,0x201E,0x2026,0x2020,0x2021,0x02C6,
144 | 0x2030,0x0160,0x2039,0x0152,0x008D,0x017D,0x008F,0x0090,0x2018,
145 | 0x2019,0x201C,0x201D,0x2022,0x2013,0x2014,0x02DC,0x2122,0x0161,
146 | 0x203A,0x0153,0x009D,0x017E,0x0178,0x00A0,0x00A1,0x00A2,0x00A3,
147 | 0x00A4,0x00A5,0x00A6,0x00A7,0x00A8,0x00A9,0x00AA,0x00AB,0x00AC,
148 | 0x00AD,0x00AE,0x00AF,0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,
149 | 0x00B6,0x00B7,0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,
150 | 0x00BF,0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
151 | 0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,0x00D0,
152 | 0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,0x00D8,0x00D9,
153 | 0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF,0x00E0,0x00E1,0x00E2,
154 | 0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,0x00E8,0x00E9,0x00EA,0x00EB,
155 | 0x00EC,0x00ED,0x00EE,0x00EF,0x00F0,0x00F1,0x00F2,0x00F3,0x00F4,
156 | 0x00F5,0x00F6,0x00F7,0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,
157 | 0x00FE,0x00FF,
158 | };
159 |
160 | static u16 cp1200[128]; // not initialized
161 |
162 | void set_codepage(int n)
163 | {
164 | if(n==1200) {
165 | int i;
166 | for(i=0x80; i<=0xFF; i++) cp1200[i-0x80] = i;
167 | cp = cp1200;
168 | } else if(n==1250 || n==0x8001) cp = cp1250;
169 | else if(n==1252) cp = cp1252;
170 | else if(n!=0x16F) warnx("%d: Codepage not supported", n);
171 | }
172 |
173 | u8 *print_cp_str(u8 *p, int l)
174 | {
175 | u8 *e = p + l;
176 | while(p
3 | *
4 | * This program is free software; you can redistribute it and/or
5 | * modify it under the terms of the GNU General Public License
6 | * version 2 as published by the Free Software Foundation.
7 | */
8 |
9 | /*
10 | * Based on information from sc.openoffice.org/compdocfileformat.pdf
11 | */
12 |
13 | #include "xls2txt.h"
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include "ummap.h"
19 |
20 | #define BADSEC (-5)
21 |
22 | struct stream_kind {
23 | unsigned secsc;
24 | unsigned secsz;
25 | u32 maxsec;
26 | s32 (*sat_get)(struct stream_kind *sk, u32 n);
27 | u8 *(*sec_ptr)(struct stream_kind *sk, u32 n);
28 | };
29 |
30 | struct stream {
31 | struct stream_kind *kind;
32 | s32 start;
33 | s32 c_sec;
34 | unsigned c_pos;
35 | u8 *c_ptr;
36 | };
37 |
38 | struct ole {
39 | meml_t map;
40 | int fd;
41 | char *name;
42 |
43 | s32 root;
44 | unsigned sec_tshld;
45 | struct stream ssat;
46 | struct stream container;
47 |
48 | s32 msat[109];
49 | s32 msat_start;
50 | // s32 msat_size;
51 |
52 | struct stream_kind large_sec;
53 | struct stream_kind small_sec;
54 | } ole;
55 |
56 | #define oleerr(S) errx(1, "%s: %s", ole.name, S);
57 | #define oleerrf(F,A...) errx(1, "%s: " F, ole.name, A);
58 |
59 | static meml_t mmap_fd(int fd) {
60 | struct stat st;
61 | meml_t m;
62 | if(fstat(fd, &st)<0) err(1, "fstat");
63 | m.ptr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, ole.fd, 0);
64 | if(m.ptr==MAP_FAILED) err(1, "mmap");
65 | m.len = st.st_size;
66 | return m;
67 | }
68 |
69 | static s32 sat_get_lg(struct stream_kind *sk, u32 n);
70 | static u8 *sec_ptr_lg(struct stream_kind *sk, u32 n);
71 | static s32 sat_get_sm(struct stream_kind *sk, u32 n);
72 | static u8 *sec_ptr_sm(struct stream_kind *sk, u32 n);
73 |
74 | int ole_open(char *name)
75 | {
76 | u8 h[0x200];
77 | int v;
78 |
79 | ole.name = name;
80 | v = open(name, O_RDONLY);
81 | if(v<0) err(1, "%s", name);
82 | ole.fd = v;
83 |
84 | v = read(ole.fd, h, sizeof h);
85 | if(vsecsc = g16(h+30);
111 | sk->secsz = 1<secsc;
112 | sk->maxsec = g32(h+44) << ole.large_sec.secsc-2;
113 | sk->sat_get = sat_get_lg;
114 | sk->sec_ptr = sec_ptr_lg;
115 | }
116 |
117 | ole.sec_tshld = g32(h+56);
118 | {
119 | struct stream_kind *sk = &ole.small_sec;
120 | sk->secsc = g16(h+32);
121 | sk->secsz = 1<secsc;
122 | sk->maxsec = g32(h+64) << ole.large_sec.secsc-2;
123 | sk->sat_get = sat_get_sm;
124 | sk->sec_ptr = sec_ptr_sm;
125 | }
126 |
127 | ole.ssat.start = g32(h+60);
128 |
129 | ole.root = g32(h+48);
130 | if(ole.root < 0)
131 | oleerr("There's no root stream");
132 |
133 | return 1;
134 | }
135 |
136 | static void str_open(struct stream *str, struct stream_kind *sk, s32 start)
137 | {
138 | str->start = start;
139 | str->c_sec = start;
140 | str->c_pos = 0;
141 | str->kind = sk;
142 | str->c_ptr = sk->sec_ptr(sk, start);
143 | }
144 |
145 | #define SID_OK(K,N) ((u32)(N)<=(K)->maxsec)
146 | #define SID_GET(P,I) ((s32)g32((s32*)(P)+(I)))
147 |
148 | static s32 sat_get_lg(struct stream_kind *sk, u32 n)
149 | {
150 | unsigned m, maxsecidx;
151 | s32 b;
152 |
153 | maxsecidx = (1 << sk->secsc-2) - 1;
154 | m = n >> sk->secsc-2; n &= maxsecidx;
155 | if(m < elemof(ole.msat))
156 | b = ole.msat[m];
157 | else {
158 | u8 *p;
159 | b = ole.msat_start;
160 | m -= elemof(ole.msat);
161 | for(;;) {
162 | if(!SID_OK(sk, b))
163 | return BADSEC;
164 | p = sk->sec_ptr(sk, b);
165 | if(m < maxsecidx)
166 | break;
167 | b = SID_GET(p, maxsecidx);
168 | m -= maxsecidx;
169 | }
170 | b = SID_GET(p, m);
171 | }
172 | if(SID_OK(sk, b)) {
173 | u8 *p = sk->sec_ptr(sk, b);
174 | return SID_GET(p, n);
175 | }
176 | return BADSEC;
177 | }
178 |
179 | static int str_seek(struct stream *str, unsigned o);
180 |
181 | static u8 *sec_ptr_lg(struct stream_kind *sk, u32 n)
182 | {
183 | return ole.map.ptr + (n<secsc);
184 | }
185 |
186 | static s32 sat_get_sm(struct stream_kind *sk, u32 n)
187 | {
188 | int o = str_seek(&ole.ssat, 4*n);
189 | if(o<0) return BADSEC;
190 | return g32(ole.ssat.c_ptr + o);
191 | }
192 |
193 | static u8 *sec_ptr_sm(struct stream_kind *sk, u32 n)
194 | {
195 | int o = str_seek(&ole.container, n<secsc);
196 | if(o<0) oleerr("small sector not found");
197 | return ole.container.c_ptr + o;
198 | }
199 |
200 | static int str_seek(struct stream *str, unsigned o)
201 | {
202 | struct stream_kind *sk = str->kind;
203 | unsigned e = str->c_pos + sk->secsz;
204 | s32 b = str->c_sec;
205 |
206 | if(o < e) {
207 | if(o >= str->c_pos)
208 | goto ret;
209 | e = sk->secsz;
210 | b = str->start;
211 | if(o < e) goto found;
212 | }
213 | do {
214 | b = sk->sat_get(sk, b);
215 | if(!SID_OK(sk, b)) return -1;
216 | e += sk->secsz;
217 | } while(o >= e);
218 |
219 | found:
220 | str->c_sec = b;
221 | str->c_pos = e - sk->secsz;
222 | str->c_ptr = sk->sec_ptr(sk, b);
223 | ret:
224 | return o - str->c_pos;
225 | }
226 |
227 | static void open_small_streams()
228 | {
229 | struct stream_kind *sk = &ole.large_sec;
230 | u8 *p = sec_ptr_lg(sk, ole.root);
231 |
232 | if(!SID_OK(sk, ole.ssat.start) ||
233 | !SID_OK(sk, g32(p+0x74))) oleerr("Small sector storage empty");
234 |
235 | str_open(&ole.container, &ole.large_sec, g32(p+0x74));
236 | str_open(&ole.ssat, &ole.large_sec, ole.ssat.start);
237 | }
238 |
239 | static struct ummap wbk_um;
240 | static struct stream wbk_str;
241 |
242 | /* this is executed by the signal handler */
243 | static int str_get_page(struct ummap *um, u8 *d)
244 | {
245 | struct stream_kind *sk = wbk_str.kind;
246 | int n, c, l;
247 | u8 *s;
248 |
249 | n = str_seek(&wbk_str, d - (u8*)um->addr);
250 | if(n<0) return n;
251 |
252 | sk = wbk_str.kind;
253 | c = sk->secsz - n;
254 | s = wbk_str.c_ptr + n;
255 |
256 | n = um_access_page(d);
257 | if(n<0) return n;
258 |
259 | l = um_page_sz - c;
260 | if(l <= 0) {
261 | memcpy(d, s, um_page_sz);
262 | return 0;
263 | }
264 | memcpy(d, s, c);
265 | d += c;
266 |
267 | for(;;) {
268 | s32 b = sk->sat_get(sk, wbk_str.c_sec);
269 | if(!SID_OK(sk, b)) return 0;
270 | s = sk->sec_ptr(sk, b);
271 | wbk_str.c_sec = b;
272 | wbk_str.c_pos += sk->secsz;
273 | wbk_str.c_ptr = s;
274 |
275 | if(l <= sk->secsz) break;
276 | l -= sk->secsz;
277 | memcpy(d, s, sk->secsz);
278 | d += sk->secsz;
279 | }
280 | memcpy(d, s, l);
281 |
282 | return 0;
283 | }
284 |
285 | static u8 *find_slot(char *name)
286 | {
287 | struct stream_kind * const sk = &ole.large_sec;
288 | s32 b;
289 | u8 *p, *e;
290 | u16 l;
291 |
292 | b = ole.root;
293 | p = sk->sec_ptr(sk, b);
294 | l = 2*(strlen(name) + 1);
295 | e = p + sk->secsz;
296 | for(;;) {
297 | if(p[0x42]==2 && g16(p+0x40)==l) {
298 | unsigned i = 0;
299 | for(;; i++) {
300 | if(2*i >= l)
301 | return p; // found
302 | if(p[2*i] != (u8)name[i] || p[2*i+1])
303 | break;
304 | }
305 | }
306 | p += 0x80;
307 | if(p < e) continue;
308 |
309 | b = sk->sat_get(sk, b);
310 | if(!SID_OK(sk, b)) break;
311 | p = sk->sec_ptr(sk, b);
312 | e = p + sk->secsz;
313 | }
314 | return 0;
315 | }
316 |
317 | meml_t get_workbook()
318 | {
319 | struct stream_kind *sk;
320 | u32 len, sid;
321 | u8 *p;
322 |
323 | if(!ole.map.ptr)
324 | return mmap_fd(ole.fd);
325 |
326 | p = find_slot("Workbook");
327 | if(!p) {
328 | p = find_slot("Book");
329 | if(!p)
330 | oleerr("No Workbook found");
331 | }
332 |
333 | sid = g32(p+0x74);
334 | len = g32(p+0x78);
335 |
336 | sk = &ole.large_sec;
337 | if(len < ole.sec_tshld) {
338 | if(!ole.container.c_ptr)
339 | open_small_streams();
340 | sk = &ole.small_sec;
341 | }
342 |
343 | if(!SID_OK(sk, sid))
344 | oleerr("Stream is empty");
345 |
346 | str_open(&wbk_str, sk, sid);
347 |
348 | wbk_um.size = len;
349 | wbk_um.handler = (int(*)(struct ummap*,void*))str_get_page;
350 |
351 | if(um_map(&wbk_um) < 0)
352 | err(1, "um_map");
353 |
354 | return (meml_t){wbk_um.addr, wbk_um.size};
355 | }
356 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | XLS2TXT
2 |
3 | Converting Excel to Text, Simplifying Complexity
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | ## Table of Contents
19 |
20 | - [ Overview](#-overview)
21 | - [ Features](#-features)
22 | - [ Project Structure](#-project-structure)
23 | - [ Project Index](#-project-index)
24 | - [ Getting Started](#-getting-started)
25 | - [ Prerequisites](#-prerequisites)
26 | - [ Installation](#-installation)
27 | - [ Usage](#-usage)
28 | - [ Testing](#-testing)
29 | - [ Contributing](#-contributing)
30 | - [ License](#-license)
31 | - [ Acknowledgments](#-acknowledgments)
32 |
33 | ---
34 |
35 | ## Overview
36 |
37 | xls2txt is a powerful tool that converts Microsoft Excel files to plain text formats, enabling seamless data exchange between systems. With its modular architecture and open-source principles, it streamlines data conversion and export, making it an essential component for various applications, from file management to numerical computations.
38 |
39 | ---
40 |
41 | ## Features
42 |
43 | | | Feature | Summary |
44 | | :--- | :---: | :--- |
45 | | ⚙️ | **Architecture** | - Modular design with multiple components working together to achieve a common goal.
- Scalable architecture, as indicated by the project's structure and use of virtual memory mapping.
- Unified interface for mapping and unmapping memory regions through `ummap.h`.
|
46 | | 🔩 | **Code Quality** | - High-quality code with proper error handling mechanisms, such as those defined in `myerr.h`.
- Efficient numerical computations using IEEE 754 double precision floating point numbers conversion in `ieee754.c`.
- Robust framework for managing complex data structures through the dynamic linked list implementation in `list.h`.
|
47 | | 📄 | **Documentation** | - Primary language is C, with a focus on documentation and transparency, as indicated by open-source licenses and references to external documentation.
- Clear explanations of error handling mechanisms and code functionality through comments and documentation files.
- Use of standard formats for data exchange, such as the Excel file format referenced in `sc.openoffice.org/excelfileformat.pdf`.
|
48 | | 🔌 | **Integrations** | - Integration with other components, such as debug logs and character encoding conversions, to facilitate data-driven decision-making and improve overall system reliability.
- Use of virtual memory mapping to enable on-demand computation instead of preparation at start-up.
- Connection to the Excel file format through `xls2txt.c`, which reads and interprets Excel file structures and converts relevant data to plain text format.
|
49 | | 🤖 | **Artificial Intelligence** | - No explicit use of AI or machine learning algorithms in the provided codebase, but the project's focus on data conversion and exchange may involve AI-powered tools in the broader context.
- No references to AI-related libraries or frameworks in the codebase.
|
50 | | 📈 | **Performance** | - Efficient numerical computations using IEEE 754 double precision floating point numbers conversion in `ieee754.c`.
- Robust framework for managing complex data structures through the dynamic linked list implementation in `list.h`.
- Use of virtual memory mapping to enable on-demand computation instead of preparation at start-up.
|
51 |
52 | ---
53 |
54 | ## Project Structure
55 |
56 | ```sh
57 | └── xls2txt/
58 | ├── Makefile
59 | ├── Workbook1.xls
60 | ├── cp.c
61 | ├── dbg
62 | ├── ieee754.c
63 | ├── list.h
64 | ├── myerr.h
65 | ├── ole.c
66 | ├── ummap.c
67 | ├── ummap.h
68 | ├── xls2txt.c
69 | └── xls2txt.h
70 | ```
71 |
72 |
73 | ### Project Index
74 |
75 | XLS2TXT/
76 |
77 | __root__
78 |
79 |
80 |
81 | | xls2txt.h |
82 | - Analyzes the xls2txt.h file, revealing its purpose as a foundational component of the project's overall architecture - It provides essential data types and macros to facilitate memory management, data conversion, and string manipulation within the codebase - The file serves as a crucial bridge between low-level system interactions and higher-level application logic, enabling efficient processing of various data formats and character encodings. |
83 |
84 |
85 | | ummap.c |
86 | - The ummap.c file enables the use of virtual memory mapping arbitrary data to memory, allowing on-demand computation instead of preparation at start-up - It provides a mechanism for managing mapped pages and handling segmentation faults and bus errors - The code achieves efficient memory management and error handling, making it an essential component of the project's overall architecture. |
87 |
88 |
89 | | dbg |
90 | - Analyzes debug logs to identify recurring issues - The dbg file provides a centralized location for logging critical errors and exceptions, enabling the team to track patterns and optimize the codebase architecture - By integrating with other components, it facilitates data-driven decision-making and improves overall system reliability - It plays a crucial role in ensuring the project's stability and performance. |
91 |
92 |
93 | | Makefile |
94 | - The Makefile serves as the backbone of the project's build process, orchestrating the compilation and installation of various components - It ensures that the executable is built from source files, installed in a designated directory, and cleaned up upon request - The file also facilitates distribution and verification of the software package - Overall, it streamlines the development workflow, enabling efficient management of dependencies and output. |
95 |
96 |
97 | | ummap.h |
98 | - Map the entire project structure to understand its purpose.
99 |
100 | The ummap.h file serves as a core component of the project's memory management system, providing a unified interface for mapping and unmapping memory regions - It enables efficient access control and tracking of mapped pages, facilitating secure memory allocation and deallocation within the system. |
101 |
102 |
103 | | xls2txt.c |
104 | - **Summary**
105 |
106 | The `xls2txt.c` file is a critical component of the project's overall architecture - It serves as a bridge between Microsoft Excel files and plain text formats, enabling data conversion and export.
107 |
108 | In essence, this code achieves the following:
109 |
110 | * Reads and interprets Excel file structures
111 | * Converts relevant data to plain text format
112 | * Generates human-readable output
113 |
114 | By integrating with other components of the project, `xls2txt.c` plays a vital role in facilitating data exchange between different systems - Its functionality is crucial for the overall success of the project, which aims to provide a robust and efficient solution for converting Excel files to various formats.
115 |
116 | **Additional Context**
117 |
118 | The project's structure suggests that it is designed to be modular and scalable, with multiple components working together to achieve a common goal - The inclusion of open-source licenses and references to external documentation (e.g., `sc.openoffice.org/excelfileformat.pdf`) indicates a commitment to transparency and community involvement.
119 |
120 | Overall, the `xls2txt.c` file is a key component of the project's architecture, enabling data conversion and export while adhering to open-source principles. |
121 |
122 |
123 | | ieee754.c |
124 | - Converts IEEE 754 double precision floating point numbers to a standard format - Achieves this by handling various edge cases such as denormalized and infinity values, while also considering different architectures (x86 and others) - The function is designed to be portable and efficient, allowing it to be used throughout the codebase for accurate numerical computations. |
125 |
126 |
127 | | myerr.h |
128 | - Document the error handling mechanism in the project's core functionality - The provided myerr.h file defines three macros to handle errors and warnings in a centralized manner - These macros, err, errx, and warnx, ensure that error messages are printed to stderr along with the corresponding system error code, facilitating easier debugging and error reporting within the xls2txt application. |
129 |
130 |
131 | | cp.c |
132 | - The provided C code snippet appears to be part of a larger program that handles character encoding conversions - The `set_codepage` function sets the current code page based on the input value, and the `print_cp_str` function prints a string using the specified code page - However, the `cp1200` array is not initialized, which may cause issues when used. |
133 |
134 |
135 | | ole.c |
136 | - The `get_workbook` function retrieves the workbook data from the file - It first checks if a map is already available and returns its address if so - If not, it maps a new ummap structure to the file using `um_map` - The `str_get_page` function is used as the handler for the mapped pages. |
137 |
138 |
139 | | list.h |
140 | - The provided list.h file serves as the foundation for a dynamic linked list data structure, enabling efficient insertion, deletion, and manipulation of nodes within the list - It facilitates operations such as adding items to the end or beginning of the list, removing specific elements, and checking for emptiness - The code provides a robust framework for managing complex data structures in various applications. |
141 |
142 |
143 |
144 |
145 |
146 |
147 | ---
148 | ## Getting Started
149 |
150 | ### Prerequisites
151 |
152 | Before getting started with xls2txt, ensure your runtime environment meets the following requirements:
153 |
154 | - **Programming Language:** C
155 |
156 |
157 | ### Installation
158 |
159 | Install xls2txt using one of the following methods:
160 |
161 | **Build from source:**
162 |
163 | 1. Clone the xls2txt repository:
164 | ```sh
165 | ❯ git clone https://github.com/hroptatyr/xls2txt
166 | ```
167 |
168 | 2. Navigate to the project directory:
169 | ```sh
170 | ❯ cd xls2txt
171 | ```
172 |
173 | 3. Install the project dependencies:
174 |
175 | echo 'INSERT-INSTALL-COMMAND-HERE'
176 |
177 |
178 |
179 | ### Usage
180 | Run xls2txt using the following command:
181 | echo 'INSERT-RUN-COMMAND-HERE'
182 |
183 | ### Testing
184 | Run the test suite using the following command:
185 | echo 'INSERT-TEST-COMMAND-HERE'
186 |
187 | ---
188 |
189 | ## Contributing
190 |
191 | - **💬 [Join the Discussions](https://github.com/hroptatyr/xls2txt/discussions)**: Share your insights, provide feedback, or ask questions.
192 | - **🐛 [Report Issues](https://github.com/hroptatyr/xls2txt/issues)**: Submit bugs found or log feature requests for the `xls2txt` project.
193 | - **💡 [Submit Pull Requests](https://github.com/hroptatyr/xls2txt/blob/main/CONTRIBUTING.md)**: Review open PRs, and submit your own PRs.
194 |
195 |
196 | Contributing Guidelines
197 |
198 | 1. **Fork the Repository**: Start by forking the project repository to your github account.
199 | 2. **Clone Locally**: Clone the forked repository to your local machine using a git client.
200 | ```sh
201 | git clone https://github.com/hroptatyr/xls2txt
202 | ```
203 | 3. **Create a New Branch**: Always work on a new branch, giving it a descriptive name.
204 | ```sh
205 | git checkout -b new-feature-x
206 | ```
207 | 4. **Make Your Changes**: Develop and test your changes locally.
208 | 5. **Commit Your Changes**: Commit with a clear message describing your updates.
209 | ```sh
210 | git commit -m 'Implemented new feature x.'
211 | ```
212 | 6. **Push to github**: Push the changes to your forked repository.
213 | ```sh
214 | git push origin new-feature-x
215 | ```
216 | 7. **Submit a Pull Request**: Create a PR against the original project repository. Clearly describe the changes and their motivations.
217 | 8. **Review**: Once your PR is reviewed and approved, it will be merged into the main branch. Congratulations on your contribution!
218 |
219 |
220 |
221 | Contributor Graph
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 | ---
231 |
232 | ## License
233 |
234 | This project is protected under the [SELECT-A-LICENSE](https://choosealicense.com/licenses) License. For more details, refer to the [LICENSE](https://choosealicense.com/licenses/) file.
235 |
236 | ---
237 |
238 | ## Acknowledgments
239 |
240 | - List any resources, contributors, inspiration, etc. here.
241 |
242 | ---
243 |
--------------------------------------------------------------------------------
/xls2txt.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2005-2011 Jan Bobrowski
3 | * Copyright (c) 2011-2017 Sebastian Freundt
4 | *
5 | * This program is free software; you can redistribute it and/or
6 | * modify it under the terms of the GNU General Public License
7 | * version 2 as published by the Free Software Foundation.
8 | */
9 |
10 | /*
11 | * Based on information from sc.openoffice.org/excelfileformat.pdf
12 | * Some bugs spotted by Sebastian Freundt were fixed
13 | */
14 |
15 | #include "xls2txt.h"
16 | #include
17 | #ifdef linux
18 | # include
19 | #endif /* linux */
20 | #include
21 | #include
22 |
23 | #define TRUNC errx(1, "Truncated &%d", __LINE__)
24 | #define BADF(T) errx(1, *T""?T" &%d":"Format error &%d", __LINE__);
25 |
26 | struct g {
27 | unsigned all:1;
28 | unsigned sel:1; // -n
29 | unsigned nofmt:1;
30 | unsigned titles:1;
31 | unsigned biff2ok:1; // -2
32 | int nr; // sheet number
33 | int row, col; // current pos
34 | unsigned top, bottom, left, right;
35 | } g;
36 |
37 | struct sst {
38 | u8 *ptr, *rend;
39 | };
40 |
41 | struct fmt {
42 | unsigned type:8; // 1:num, 3:date, 4:time, 5:date-time
43 | unsigned arg:8;
44 | };
45 |
46 | struct tab {
47 | void *tab;
48 | int nelem, aelem, esize;
49 | };
50 |
51 | static inline void *tab_ptr(struct tab *tab, unsigned n)
52 | {
53 | return (char *)tab->tab + n * tab->esize;
54 | }
55 |
56 | #define TAB(S,T,N) (*(T*)((char*)(S).tab + (N) * sizeof(T)))
57 |
58 | void *tab_alloc(struct tab *tab, unsigned n, const void *dflt)
59 | {
60 | u8 *p;
61 |
62 | if(n < tab->nelem)
63 | return tab_ptr(tab, n);
64 | if(n >= tab->aelem) {
65 | int sz = n+16 & ~15;
66 | tab->aelem = sz;
67 | tab->tab = realloc(tab->tab, sz * tab->esize);
68 | if(!tab->tab) err(1, "realloc");
69 | }
70 | p = tab_ptr(tab, tab->nelem);
71 | n -= tab->nelem;
72 | tab->nelem += n + 1;
73 | if(n) do {
74 | memcpy(p, dflt, tab->esize);
75 | p += tab->esize;
76 | } while(--n);
77 | return p;
78 | }
79 |
80 | struct xls {
81 | meml_t map;
82 | u8 *end;
83 | u8 *shptr;
84 | /*
85 | 00:* BIFF2
86 | 02:* BIFF3
87 | 04:* BIFF4
88 | 08:0500 BIFF5 (and BIFF7)
89 | 08:0600 BIFF8
90 |
91 | 08:0000 BIFF5
92 | 08:0200 BIFF2
93 | 08:0300 BIFF3
94 | 08:0400 BIFF4
95 | */
96 | enum {BIFF2=2,BIFF3=3,BIFF4=4,BIFF5=5,BIFF8=6} biffv;
97 | unsigned e1904;
98 |
99 | struct sst *sst;
100 | unsigned nsst;
101 |
102 | struct tab fmt;
103 | struct tab xf_ptr;
104 | struct tab xf_fmt;
105 | };
106 |
107 | static struct xls x;
108 |
109 | void check_biffv(u8 *p)
110 | {
111 | int v;
112 | if(p[0]!=9)
113 | errx(1, "Format not recognized");
114 | switch(p[1]) {
115 | case 0:
116 | biff2:
117 | if (!g.biff2ok) {
118 | errx(1, "Format not supported (BIFF2), try with -d");
119 | }
120 | v = BIFF2; goto ok;
121 | case 2: v = BIFF3; goto ok;
122 | case 4: v = BIFF4; goto ok;
123 | case 8: break;
124 | default:
125 | nsupp:
126 | errx(1, "Format not supported");
127 | }
128 | switch(p[5]) {
129 | case 0: v = BIFF5; break;
130 | case 2: goto biff2;
131 | default:
132 | v = p[5];
133 | if(vBIFF8)
134 | goto nsupp;
135 | }
136 | ok:
137 | x.biffv = v;
138 | }
139 |
140 | static u8 *print_str(u8 *p, int l)
141 | {
142 | if(x.biffv < BIFF8) {
143 | p = print_cp_str(p, l);
144 | } else {
145 | u8 f = *p++;
146 | int a=0;
147 | if(f&8) {a += 4*g16(p); p += 2;}
148 | if(f&4) {a += g32(p); p += 4;}
149 | p = a + print_uni(p, l, f);
150 | }
151 | return p;
152 | }
153 |
154 | static void print_sst(int n)
155 | {
156 | u8 *p, *re, f;
157 | unsigned l;
158 |
159 | if(n<0 || n>=x.nsst)
160 | BADF("Wrong string index");
161 |
162 | p = x.sst[n].ptr;
163 | re = x.sst[n].rend;
164 | l = g16(p); f = p[2]; p += 3;
165 | p += (f&8 ? 2 : 0) + (f&4 ? 4 : 0);
166 | for(;;) {
167 | int s = re - p;
168 | f &= 1;
169 | if(l <= s>>f)
170 | break;
171 |
172 | if(re[0] != 0x3C) // CONTINUE
173 | BADF("String truncated");
174 |
175 | l -= s>>f;
176 | if(s&f)
177 | BADF("String cut at the middle of a char");
178 | print_uni(p, s>>f, f);
179 |
180 | p = re + 4;
181 | re = p + g16(re+2);
182 | f = *p++;
183 | }
184 | print_uni(p, l, f);
185 | }
186 |
187 | static u8 *read_sst(u8 *p, u8 *re, u8 *fe)
188 | {
189 | unsigned nsst;
190 |
191 | x.nsst = g32(p+4);
192 | if(!x.nsst)
193 | return re;
194 |
195 | x.sst = calloc(x.nsst, sizeof *x.sst);
196 | if(!x.sst) err(1, "calloc");
197 |
198 | p += 8;
199 |
200 | for(nsst = 0;;) {
201 | unsigned l, a;
202 | u8 f;
203 |
204 | if(re-p < 3)
205 | BADF("String table truncated");
206 |
207 | x.sst[nsst].ptr = p;
208 | x.sst[nsst].rend = re;
209 | if(++nsst == x.nsst)
210 | break;
211 |
212 | l = g16(p);
213 | f = p[2]; p += 3;
214 | a = 0;
215 | if(f&8) {a = 4*g16(p); p += 2;}
216 | if(f&4) {a += g32(p); p += 4;}
217 | // fmt_assert(p= l<>f;
226 | if(re[0] != 0x3C) // CONTINUE
227 | BADF("String truncated");
228 | p = re + 4;
229 | re = p + g16(re+2);
230 | // fmt_assert(re < fe);
231 | f = *p++;
232 | }
233 | p += l< a) break;
237 | a -= s;
238 |
239 | if(re[0] != 0x3C) // CONTINUE
240 | BADF("String truncated");
241 | p = re + 4;
242 | re = p + g16(re+2);
243 | // fmt_assert(re < fe);
244 | }
245 | p += a;
246 | }
247 | return re;
248 | }
249 |
250 | static const struct fmt default_fmt;
251 | static const u8 *null_ptr;
252 |
253 | static void xls_init_struc()
254 | {
255 | static u8 t[] = {0,0x10,0x12,0x10,0x12,0x10,0x10,0x12,0x12,0x12,0x14,
256 | 0x22,0,0,0x30,0x30,0x30,0x30,0x40,0x40,0x40,0x40,0x50,0,0,0,
257 | 0,0,0,0,0,0,0,0,0,0,0,0x10,0x10,0x12,0x12,0x10,0x10,0x12,0x12,
258 | 0x40,0x40,0x40,0x21};
259 | struct fmt *tab;
260 | int i;
261 |
262 | x.fmt.esize = sizeof default_fmt;
263 | x.fmt.nelem = 0;
264 | x.xf_ptr.esize = sizeof null_ptr;
265 | x.xf_ptr.nelem = 0;
266 | x.xf_fmt.esize = sizeof null_ptr;
267 | x.xf_fmt.nelem = 0;
268 | x.e1904 = 0;
269 |
270 | tab_alloc(&x.fmt, elemof(t)-1, &default_fmt);
271 | tab = x.fmt.tab;
272 | for (i=0; i < elemof(t); i++) {
273 | tab[i].type = t[i] >> 4;
274 | tab[i].arg = t[i] & 0xf;
275 | }
276 | }
277 |
278 | static void
279 | getstr(u16 *d, u8 *p, int l)
280 | {
281 | int v = 0;
282 | if (x.biffv >= BIFF8) {
283 | v = *p++ & 1;
284 | }
285 | // XXX
286 | if (v) {
287 | while (--l>=0) {
288 | d[l] = g16(p+2*l);
289 | }
290 | } else {
291 | while (--l>=0) {
292 | d[l] = p[l];
293 | }
294 | }
295 | return;
296 | }
297 |
298 | static void
299 | parse_fmt(struct fmt *f, u16 *p, int l)
300 | {
301 | u16 *e = p + l;
302 | u16 *q, *d;
303 |
304 | f->type = 0;
305 | f->arg = 0;
306 |
307 | if (e == p) {
308 | return;
309 | }
310 | q = p;
311 | while (*q=='[') {
312 | do {
313 | if (++q == e) {
314 | return;
315 | }
316 | } while(*q != ']');
317 | if (++q == e) {
318 | return;
319 | }
320 | }
321 | if (*p == 'Y' || *p == 'M' || *p == 'D' || *p == 'd' || *p == 'm') {
322 | f->type = 5;
323 | return;
324 | }
325 | if (*p == 'h') {
326 | f->type = 4;
327 | return;
328 | }
329 |
330 | p = q;
331 | d = 0;
332 | for (;;) {
333 | if (*q == '.') {
334 | d = q;
335 | break;
336 | }
337 | if (*q>=128 || !strchr("0#?, ", *q) || ++q==e) {
338 | break;
339 | }
340 | }
341 | if (!d) {
342 | if(p!=q && (q==e || *q!='/')) {
343 | // f->arg = 0;
344 | f->type = 1;
345 | }
346 | return;
347 | }
348 | while (++q < e) {
349 | if (*q != '0' && *q != '#') {
350 | break;
351 | }
352 | }
353 |
354 | f->arg = q - d - 1;
355 | f->type = 1;
356 | return;
357 | }
358 |
359 | static void
360 | set_fmt(u8 *p)
361 | {
362 | u8 *q;
363 | int n, l;
364 | struct fmt *fmt;
365 | u16 t[128];
366 |
367 | q = p+1;
368 | if (x.biffv >= BIFF4) {
369 | q += 2;
370 | }
371 | n = x.biffv < BIFF5 ? x.fmt.nelem : g16(p);
372 | l = q[-1];
373 | if (x.biffv >= BIFF8) {
374 | l = g16(p+2), q++;
375 | }
376 |
377 | if (l > elemof(t)) {
378 | return;
379 | }
380 |
381 | getstr(t, q, l);
382 | fmt = (struct fmt*)tab_alloc(&x.fmt, n, &default_fmt);
383 | parse_fmt(fmt, t, l);
384 | return;
385 | }
386 |
387 | static const struct fmt*
388 | fmt_from_xf(int xf)
389 | {
390 | const struct fmt *fmt = &default_fmt;
391 | int n, st, ua, org_xf;
392 | u8 *p;
393 |
394 | if (xf >= x.xf_ptr.nelem) {
395 | bad_xf:
396 | warnx("Strange XF index %u -- ignored", xf);
397 | return fmt;
398 | }
399 |
400 | org_xf = xf;
401 |
402 | again:
403 | p = TAB(x.xf_ptr, u8*, xf);
404 | if (!p) {
405 | goto bad_xf;
406 | }
407 |
408 | if (x.biffv < BIFF5) {
409 | /* 0x02 */
410 | n = p[1];
411 | st = 2;
412 | ua = x.biffv < BIFF4 ? 3 : 5;
413 | } else {
414 | /* 0xE0 */
415 | n = g16(p+2);
416 | st = 4;
417 | ua = x.biffv < BIFF8 ? 7 : 9;
418 | }
419 | st = p[st];
420 | ua = p[ua];
421 |
422 | if (!((st ^ ua) & 4) && (st + ua != 1)) {
423 | /* format not present */
424 | if (!(st & 4) || xf!=org_xf) {
425 | /* not a style or loop */
426 | p += x.biffv!=BIFF4 ? 4 : 2;
427 | xf = g16(p) >> 4;
428 | if (xf!=org_xf && xf < x.xf_ptr.nelem) {
429 | goto again;
430 | }
431 | }
432 | } else if (n < x.fmt.nelem) {
433 | fmt = &TAB(x.fmt, struct fmt, n);
434 | }
435 |
436 | *(const struct fmt**)tab_alloc(&x.xf_fmt, org_xf, &null_ptr) = fmt;
437 | return fmt;
438 | }
439 |
440 | static void print_time(int m, int f, double v);
441 |
442 | static void
443 | print_fmt(const u8 *xfp, double v)
444 | {
445 | const struct fmt *f;
446 | unsigned xf;
447 |
448 | if (g.nofmt) {
449 | printf("%f", v);
450 | return;
451 | }
452 |
453 | if (x.biffv == BIFF2) {
454 | int n = xfp[1] & 63;
455 | f = &default_fmt;
456 | if (n < x.fmt.nelem) {
457 | f = &TAB(x.fmt, struct fmt, n);
458 | }
459 | goto have_fmt;
460 | }
461 |
462 | xf = g16(xfp);
463 | if (xf < x.xf_fmt.nelem) {
464 | f = TAB(x.xf_fmt, struct fmt*, xf);
465 | if (f) {
466 | goto have_fmt;
467 | }
468 | }
469 | f = fmt_from_xf(xf);
470 | have_fmt:
471 |
472 | switch (f->type) {
473 | case 0:
474 | if (ceil(v) == v) {
475 | printf("%.f", v);
476 | break;
477 | }
478 | default:
479 | printf("%f", v);
480 | break;
481 | case 1:
482 | printf("%.*f", f->arg, v);
483 | break;
484 | case 2:
485 | printf("%.*E", f->arg, v);
486 | break;
487 | case 3:
488 | case 4:
489 | case 5:
490 | print_time(f->type-2, f->arg, v);
491 | break;
492 | }
493 | return;
494 | }
495 |
496 | static void
497 | print_time(int m, int f, double v)
498 | {
499 | int d;
500 | time_t t;
501 | struct tm *tm;
502 |
503 | d = v;
504 | v -= d;
505 | if (x.e1904) {
506 | d += 4*365;
507 | } else if (d <= 60) {
508 | d++;
509 | }
510 | d -= 25569;
511 |
512 | t = d*24*60*60 + (unsigned)(v*24*60*60);
513 | tm = gmtime(&t);
514 | if (!tm) {
515 | printf("#BAD"); // XXX
516 | return;
517 | }
518 | if (m==3 && !f && !v) {
519 | m = 1;
520 | }
521 | if (m&1) {
522 | printf("%04u-%02u-%02u",
523 | tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
524 | if (m==1) {
525 | return;
526 | }
527 | printf(" ");
528 | }
529 | printf("%2u:%02u:%02u", tm->tm_hour, tm->tm_min, tm->tm_sec);
530 | return;
531 | }
532 |
533 | static void
534 | print_rk(const u8 *xfp, u32 rk)
535 | {
536 | double v;
537 | if (rk & 2) {
538 | v = (s32)rk>>2;
539 | } else {
540 | v = ieee754((u64)(rk&~3) << 32);
541 | }
542 | if (rk & 1) {
543 | v /= 100;
544 | }
545 | print_fmt(xfp, v);
546 | return;
547 | }
548 |
549 | struct rr {
550 | int o, l, id;
551 | };
552 |
553 | #define GETRR(P) \
554 | if (4 > x.map.len-rr.o) { \
555 | TRUNC; \
556 | } \
557 | rr.l = g16(x.map.ptr+rr.o+2); \
558 | rr.id = x.map.ptr[rr.o]; \
559 | rr.o += 4; \
560 | if (rr.l > x.map.len-rr.o) { \
561 | TRUNC; \
562 | } \
563 | (P) = x.map.ptr + rr.o; \
564 | rr.o += rr.l;
565 |
566 | #define EXPLEN(L) if(rr.l < (L)) errx(1, "Record too short &%d", __LINE__);
567 |
568 | static int
569 | skip_substream(int o)
570 | {
571 | struct rr rr;
572 | int d = 1;
573 | rr.o = o;
574 | for (;;) {
575 | u8 *p, sv;
576 | GETRR(p)
577 | sv = p[-3];
578 | switch(rr.id) {
579 | case 0x09:
580 | if (sv<0x10) {
581 | d++;
582 | }
583 | break;
584 | case 0x0A:
585 | if (!sv && !--d) {
586 | return rr.o;
587 | }
588 | }
589 | }
590 | TRUNC;
591 | }
592 |
593 | static int
594 | read_init_rr(int o)
595 | {
596 | struct rr rr;
597 | int sh, nr;
598 | u8 *p;
599 |
600 | xls_init_struc();
601 | rr.o = o;
602 | nr = g.nr; sh = 0;
603 |
604 | for (;;) {
605 | GETRR(p)
606 |
607 | switch(rr.id) {
608 | case 0x42: // CODEPAGE
609 | set_codepage(g16(p));
610 | break;
611 | case 0xFC: // SST
612 | rr.o = read_sst(p, x.map.ptr+rr.o, x.end) - x.map.ptr;
613 | break;
614 | case 0x1E: // FORMAT
615 | set_fmt(p);
616 | break;
617 | case 0x43:
618 | case 0xE0: // XF
619 | *(u8**)tab_alloc(&x.xf_ptr, x.xf_ptr.nelem, &null_ptr) = p;
620 | break;
621 | case 0x04: // LABEL
622 | case 0x03: // NUMBER
623 | case 0x06: // FORMULA
624 | case 0x07: // STRING
625 | case 0x7E: // RK
626 | return sh;
627 | case 0x09: // BOF
628 | if (p[-3]>=0x10) {
629 | break;
630 | }
631 | rr.o = skip_substream(rr.o);
632 | break;
633 | case 0x0A: // EOF
634 | if (p[-3]) {
635 | break;
636 | }
637 | return sh;
638 | case 0x85: // SHEET
639 | if(!nr--) {
640 | sh = p - 4 - x.map.ptr;
641 | }
642 | break;
643 | case 0x22: // DATEMODE
644 | x.e1904 = p[0];
645 | break;
646 | }
647 | }
648 | }
649 |
650 | int to_cell(int r, int c)
651 | {
652 | if(r < g.top || r > g.bottom) {
653 | g.row = r;
654 | return 0;
655 | }
656 | if(g.row < g.top)
657 | g.row = g.top;
658 | if(g.row < r) {
659 | g.col = 0;
660 | do {
661 | putchar('\n');
662 | g.row++;
663 | } while(g.row < r);
664 | }
665 | if(c < g.left || c > g.right) {
666 | g.col = c;
667 | return 0;
668 | }
669 | if(g.col < g.left)
670 | g.col = g.left;
671 | while(g.col < c) {
672 | putchar('\t');
673 | g.col++;
674 | }
675 | return 1;
676 | }
677 |
678 | static int to_cell_p(u8 *p) {return to_cell(g16(p), g16(p+2));}
679 |
680 | static inline int to_nx_cell() {return to_cell(g.row, g.col+1);}
681 |
682 | void print_sheet(int o, u8 *name, int nr)
683 | {
684 | struct rr rr;
685 | u8 pvrec;
686 |
687 | if(g.titles) {
688 | if(nr) putchar('\f');
689 | if(name) print_str(name+1, *name);
690 | putchar('\n');
691 | }
692 |
693 | rr.o = o;
694 | g.col = g.row = 0;
695 | pvrec = 0;
696 |
697 | for(;;) {
698 | u8 *p;
699 |
700 | GETRR(p)
701 | if (rr.id == 0x0A && !p[-3]) {
702 | // EOF
703 | break;
704 | }
705 |
706 | switch(rr.id) {
707 | case 0x09: // BOF
708 | if (p[-3]>=0x10) {
709 | break;
710 | }
711 | rr.o = skip_substream(rr.o);
712 | break;
713 | case 0x04: // LABEL
714 | if (to_cell_p(p)) {
715 | print_str(p+8, x.biffv==BIFF2 ? p[7] : g16(p+6));
716 | }
717 | break;
718 | case 0xFD: // LABELSST
719 | if (to_cell_p(p)) {
720 | print_sst(g32(p+6));
721 | }
722 | break;
723 | case 0x7E: // RK
724 | if (to_cell_p(p)) {
725 | print_rk(p+4, g32(p+6));
726 | }
727 | break;
728 | case 0xBD: { // MULRK
729 | u8 *q = p + rr.l - 11;
730 | int f = to_cell_p(p);
731 | for(;;) {
732 | p += 6;
733 | if (f) {
734 | print_rk(p-2, g32(p));
735 | }
736 | if (p>=q) {
737 | break;
738 | }
739 | f = to_nx_cell();
740 | }
741 | } break;
742 | case 0x02: // INTEGER
743 | if (to_cell_p(p)) {
744 | print_fmt(p+4, g16(p+7));
745 | }
746 | break;
747 | case 0x03: // NUMBER
748 | if(!to_cell_p(p)) {
749 | break;
750 | }
751 | number:
752 | print_fmt(p+4, ieee754(g64(x.biffv==BIFF2 ? p+7 : p+6)));
753 | break;
754 | case 0x06: // FORMULA
755 | if(!to_cell_p(p)) {
756 | pvrec = 0;
757 | break;
758 | }
759 | if (x.biffv==BIFF2 || g16(p+6+6) != 0xFFFF) {
760 | pvrec = 0;
761 | goto number;
762 | }
763 | // p[6] == 0: STRING follows
764 | if (p[6] == 1) {
765 | printf("%s", p[6+2] ? "true" : "false");
766 | }
767 | break;
768 | case 0x07: // STRING
769 | if (pvrec==0x06) {
770 | print_str(p+2, g16(p));
771 | }
772 | break;
773 | case 0xD6: // RSTRING
774 | if (to_cell_p(p)) {
775 | print_str(p+8, g16(p+6));
776 | }
777 | break;
778 | }
779 | pvrec = rr.id;
780 |
781 | if(g.row > g.bottom) {
782 | break;
783 | }
784 | }
785 | putchar('\n');
786 | }
787 |
788 | void print_xls()
789 | {
790 | struct rr rr;
791 | int done;
792 | u8 *p;
793 |
794 | done = 0;
795 | rr.o = 0;
796 | GETRR(p)
797 |
798 | switch(g16(p+2)) {
799 | case 0x10: // single sheet
800 | if(g.nr) goto not_found;
801 | read_init_rr(rr.o);
802 | print_sheet(rr.o, 0, 0);
803 | return;
804 | case 0x100: goto workbook;
805 | case 5: goto globals;
806 | default:
807 | BADF("Bad content");
808 | }
809 |
810 | /* BIFF5+ */
811 | globals:
812 | rr.o = read_init_rr(rr.o);
813 | if(!rr.o)
814 | goto not_found;
815 | for(;;) {
816 | u32 o;
817 | GETRR(p)
818 | if(rr.id != 0x85) // SHEET
819 | break;
820 | o = rr.o;
821 | rr.o = g32(p);
822 | if(rr.o >= x.map.len)
823 | TRUNC;
824 | if(rr.o <= p-x.map.ptr)
825 | BADF( );
826 | if(p[4]==0) {
827 | u8 *q;
828 | GETRR(q)
829 | if(rr.id != 0x09) BADF( );
830 | print_sheet(rr.o, p+6, done++);
831 | if(!g.all) break;
832 | } else if(g.sel)
833 | goto not_found;
834 | rr.o = o;
835 | }
836 | return;
837 |
838 | /* BIFF4W */
839 | workbook:
840 | for(;;) {
841 | GETRR(p)
842 | switch(rr.id) {
843 | u32 o;
844 | case 0x42: // CODEPAGE
845 | EXPLEN(2)
846 | set_codepage(g16(p));
847 | break;
848 | case 0x8E: // SHEETOFFSET
849 | EXPLEN(4)
850 | o = g32(p);
851 | if(o >= x.map.len) TRUNC;
852 | rr.o = o;
853 | goto found;
854 | case 0x0A: // EOF
855 | if(p[-3]) break;
856 | BADF( );
857 | }
858 | }
859 | found:
860 | GETRR(p)
861 | if(rr.id != 0x8F) // SHEETHDR
862 | goto not_found;
863 | EXPLEN(5)
864 | for(;;) {
865 | u32 o = g32(p);
866 | if(o >= x.map.len-rr.o)
867 | TRUNC;
868 | o += rr.o;
869 | if(!g.nr--) {
870 | u8 *name = p+4;
871 | GETRR(p)
872 | if(rr.id != 0x09) // BOF
873 | BADF( )
874 | if(g16(p+2) == 0x10) {
875 | read_init_rr(rr.o);
876 | print_sheet(rr.o, name, done++);
877 | if(!g.all)
878 | break;
879 | } else if(g.sel)
880 | errx(1, "Not a sheet");
881 | g.nr = 0;
882 | }
883 | rr.o = o;
884 | GETRR(p)
885 | if(rr.id != 0x8F) {
886 | if(!done)
887 | goto not_found;
888 | break;
889 | }
890 | }
891 | return;
892 |
893 | not_found:
894 | errx(1, "No such sheet");
895 | }
896 |
897 | void list_xls()
898 | {
899 | struct rr rr;
900 | u8 *p;
901 | int nr;
902 |
903 | rr.o = 0;
904 | GETRR(p)
905 | if(rr.id != 0x09) // BOF
906 | BADF( );
907 | switch(g16(p+2)) {
908 | case 0x10:
909 | printf("Single sheet\n");
910 | return;
911 | case 5:
912 | case 0x100:
913 | break;
914 | default:
915 | printf("Unknown contents\n");
916 | return;
917 | }
918 |
919 | nr = 0;
920 | for(;;) {
921 | GETRR(p)
922 | switch(rr.id) {
923 | u8 *q;
924 | char *k;
925 | case 0x0A: // EOF
926 | if(p[-3]) break;
927 | return;
928 | case 0x09: // BOF
929 | if(p[-3]>=0x10) break;
930 | rr.o = skip_substream(rr.o);
931 | break;
932 | case 0x42: // CODEPAGE
933 | set_codepage(g16(p));
934 | break;
935 | case 0x85: // SHEET
936 | k = "sheet";
937 | q = p;
938 | if(x.biffv > BIFF4) {
939 | switch(q[5]) {
940 | case 0: break;
941 | case 2: k="chart"; break;
942 | case 6: k="vbasic"; break;
943 | default: k=""; break;
944 | }
945 | q += 6;
946 | }
947 | printf("%2u. %-8s ", nr++, k);
948 | print_str(q+1, q[0]);
949 | putchar('\n');
950 | break;
951 | }
952 | }
953 | }
954 |
955 | static char *parse_cell(char *s, unsigned *r, unsigned *c)
956 | {
957 | unsigned a = *s - 'A';
958 | if(a < 26) {
959 | unsigned v = a;
960 | for(;;) {
961 | a = *++s - 'A';
962 | if(a >= 26) break;
963 | v = 26*v + a;
964 | }
965 | *c = v;
966 | }
967 | a += 'A' - '0';
968 | if(a < 10) {
969 | unsigned v = a;
970 | for(;;) {
971 | a = *++s - '0';
972 | if(a >= 10) break;
973 | v = 10*v + a;
974 | }
975 | *r = v - 1;
976 | }
977 | return s;
978 | }
979 |
980 | void parse_range(char *s)
981 | {
982 | s = parse_cell(s, &g.top, &g.left);
983 | if(!*s) return;
984 | if(*s==':') {
985 | s = parse_cell(s+1, &g.bottom, &g.right);
986 | if(!*s) return;
987 | }
988 | errx(1, "unexpected char '%c' in cell range", *s);
989 | }
990 |
991 | int main(int argc, char *argv[])
992 | {
993 | char o=0;
994 |
995 | for(;;) switch(getopt(argc, argv, "n:AlC:a12P:fdhV?-")) {
996 | int n;
997 | case -1: goto endopt;
998 | case 'n': g.sel=1; g.nr = atoi(optarg); break;
999 | case 'A': g.sel=0; g.all=1; g.titles=1; break;
1000 | case 'l': o = 'l'; break;
1001 | case 'C':
1002 | n = find_charset(optarg);
1003 | if(n<0) warnx("%s: Unknown charset", optarg);
1004 | set_charset(n);
1005 | break;
1006 | case 'a': set_charset(1); break;
1007 | case '1': set_charset(2); break;
1008 | case '2': set_charset(3); break;
1009 | case 'P':
1010 | n = atoi(optarg);
1011 | if(n) set_codepage(n);
1012 | break;
1013 | case 'f': g.nofmt = 1; break;
1014 | case 'd': g.biff2ok = 1; break;
1015 | case '?':
1016 | if(optopt!='?') break;
1017 | case '-':
1018 | case 'h':
1019 | case 'V':
1020 | #define _STR(T) #T
1021 | #define STR(T) _STR(T)
1022 | printf("xls2txt " STR(VERSION) " / "
1023 | "Copyright 2011 Jan Bobrowski / GPL\n");
1024 | goto usage;
1025 | }
1026 | endopt:
1027 |
1028 | g.right = g.bottom = 0xFFFF;
1029 | switch(argc-optind) {
1030 | default:
1031 | usage:
1032 | printf(
1033 | "usage: xls2txt [-C cs] [-n sheetnum|-A] [-f] file.xls [X:X]\n"
1034 | " xls2txt [-C cs] -l file.xls\n"
1035 | " X:X\tcell range (eg. A1:C5, D2:E)\n"
1036 | " -l\tlist sheets\n"
1037 | " -n num\tselect sheet\n"
1038 | " -A\tall sheets (\\f separated)\n"
1039 | " -C cs\toutput charset (utf8 asc iso1 iso2), utf8 is default\n"
1040 | " -f\tdon't try to format numbers\n"
1041 | " -a\tascii output (same as -C asc)\n"
1042 | );
1043 | return 1;
1044 | case 1: break;
1045 | case 2: parse_range(argv[argc-1]);
1046 | break;
1047 | }
1048 |
1049 | ole_open(argv[optind]);
1050 | x.map = get_workbook();
1051 | x.end = x.map.ptr + x.map.len;
1052 | check_biffv(x.map.ptr);
1053 | if(o)
1054 | list_xls();
1055 | else
1056 | print_xls();
1057 |
1058 | return 0;
1059 | }
1060 |
--------------------------------------------------------------------------------