├── dbg ├── .gitignore ├── Workbook1.xls ├── .travis.yml ├── ieee754.c ├── ummap.h ├── myerr.h ├── Makefile ├── xls2txt.h ├── ummap.c ├── list.h ├── cp.c ├── ole.c ├── README.md └── xls2txt.c /dbg: -------------------------------------------------------------------------------- 1 | r 2 | bt 3 | q 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | xls2txt 3 | -------------------------------------------------------------------------------- /Workbook1.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hroptatyr/xls2txt/HEAD/Workbook1.xls -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | sudo: false 4 | 5 | os: 6 | - osx 7 | 8 | osx_image: xcode8.3 9 | 10 | compiler: 11 | - clang 12 | 13 | script: 14 | - make 15 | - make check 16 | 17 | ## whitelist 18 | branches: 19 | only: 20 | - bld/macos 21 | - master 22 | 23 | notifications: 24 | email: 25 | - devel@fresse.org 26 | 27 | deploy: 28 | provider: releases 29 | api_key: 30 | secure: djHDB3+NuGRXkIiXksrgWUBme4nq1RV1V2NX2WrwL4FUgYcUXuGGKjPt4VhMl2wnEy1yCe4Ao3z7TEmcKgkpAqVKnSpe1qbTit7pixfDNeZsmDjgEM042twFpQG/0IFX14jlRqHD7BBV/eADdkg04icld4aL1aOsJzuey+sPgt8= 31 | file: xls2txt 32 | skip_cleanup: true 33 | on: 34 | repo: hroptatyr/xls2txt 35 | branch: bld/macos 36 | -------------------------------------------------------------------------------- /ieee754.c: -------------------------------------------------------------------------------- 1 | #define _ISOC99_SOURCE 2 | #include 3 | #include "xls2txt.h" 4 | 5 | #ifndef __i386__ 6 | 7 | double ieee754(u64 v) 8 | { 9 | int s, e; 10 | double r; 11 | 12 | s = v>>52; 13 | v &= 0x000FFFFFFFFFFFFFull; 14 | e = s & 0x7FF; 15 | if(!e) 16 | goto denorm; 17 | if(e < 0x7FF) { 18 | v += 0x0010000000000000ull, e--; 19 | denorm: 20 | r = ldexp(v, e - 0x3FF - 52 + 1); 21 | } else if(v) { 22 | r = NAN; s ^= 0x800; 23 | } else 24 | r = INFINITY; 25 | if(s & 0x800) 26 | r = -r; 27 | return r; 28 | } 29 | 30 | #else 31 | 32 | double ieee754(u64 v) 33 | { 34 | union { 35 | u64 v; 36 | double d; 37 | } u; 38 | u.v = v; 39 | return u.d; 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /ummap.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2005 Jan Bobrowski 3 | * 4 | * This library is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU Lesser General Public 6 | * License version 2.1 as published by the Free Software Foundation. 7 | */ 8 | 9 | #include "list.h" 10 | 11 | #ifndef container_of 12 | #include 13 | #define container_of(P,T,M) ((T*)((char*)(P)-offsetof(T,M))) 14 | #endif 15 | 16 | struct ummap { 17 | list_t list; 18 | void *addr; 19 | int size; 20 | int (*handler)(struct ummap *, void *); 21 | }; 22 | 23 | extern unsigned um_page_sc, um_page_sz; 24 | 25 | int um_map(struct ummap *um); 26 | void um_unmap(struct ummap *um); 27 | int um_access_page(void *p); 28 | -------------------------------------------------------------------------------- /myerr.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fake up a quick myerr.h: 3 | * 4 | * void err(int eval, const char *fmt, ...); 5 | * void errx(int eval, const char *fmt, ...); 6 | * void warnx(const char *fmt, ...); 7 | */ 8 | #include 9 | #include 10 | #include 11 | 12 | #define err(eval, fmt, ...) { \ 13 | (void)fprintf(stderr, "xls2txt: "fmt": ", ##__VA_ARGS__); \ 14 | (void)fprintf(stderr, "%s\n", strerror(errno)); \ 15 | exit(eval); } 16 | 17 | #define errx(eval, fmt, ...) { \ 18 | (void)fprintf(stderr, "xls2txt: "); \ 19 | (void)fprintf(stderr, fmt"\n", ##__VA_ARGS__); \ 20 | exit(eval); } 21 | 22 | #define warnx(fmt, ...) \ 23 | (void)fprintf(stderr, "xls2txt: " fmt "\n", ##__VA_ARGS__) 24 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # gmake 2 | 3 | NAME = xls2txt 4 | VERSION = 0.15 5 | BINDEST = /usr/local/bin 6 | PKG=$(NAME)-$(VERSION) 7 | FILES = Makefile xls2txt.[ch] ole.c cp.c ummap.[ch] ieee754.c list.h myerr.h 8 | 9 | CFLAGS ?= -O2 -g -Wall 10 | LDFLAGS = -lm 11 | 12 | xls2txt: xls2txt.o ole.o cp.o ummap.o ieee754.o 13 | 14 | xls2txt.o: xls2txt.c xls2txt.h 15 | $(CC) $(CFLAGS) -DVERSION=$(VERSION) -c $< -o $@ 16 | 17 | install: xls2txt 18 | install -s $< $(BINDEST) 19 | 20 | clean: 21 | rm -f xls2txt $(addsuffix .o,$(basename $(filter %.c %.[ch],$(FILES)))) 22 | 23 | dist: 24 | ln -s . $(PKG) 25 | tar czf $(PKG).tar.gz --group=root --owner=root $(addprefix $(PKG)/, $(FILES)); \ 26 | rm $(PKG) 27 | 28 | check: xls2txt 29 | ./$< -l Workbook1.xls 30 | ./$< Workbook1.xls 31 | 32 | .PHONY: install clean dist check 33 | -------------------------------------------------------------------------------- /xls2txt.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "myerr.h" 6 | 7 | typedef unsigned char u8; 8 | typedef unsigned short u16; 9 | typedef unsigned int u32; 10 | typedef signed int s32; 11 | typedef unsigned long 12 | #ifndef __LP64__ 13 | long 14 | #endif 15 | u64; 16 | 17 | #ifdef __i386__ 18 | #define g16(P) (*(const u16*)(P)) 19 | #define g32(P) (*(const u32*)(P)) 20 | #define g64(P) (*(const u64*)(P)) 21 | #define p16(P,V) (*(u16*)(P)=(V)) 22 | #else 23 | static inline u16 g16(const void *p) {return ((const u8*)p)[0] | ((const u8*)p)[1]<<8;} 24 | static inline u32 g32(const void *p) {return g16(p) | g16((const u8*)p+2)<<16;} 25 | static inline u64 g64(const void *p) {return g32(p) | (u64)g32((const u8*)p+4)<<32;} 26 | static inline void p16(void *p, u16 v) {((u8*)p)[0]=v; ((u8*)p)[1]=v>>8;} 27 | #endif 28 | 29 | #define elemof(T) (sizeof T/sizeof*T) 30 | #define endof(T) (T+elemof(T)) 31 | 32 | typedef struct { 33 | u8 *ptr; 34 | unsigned len; 35 | } meml_t; 36 | 37 | double ieee754(u64); 38 | 39 | int ole_open(char *name); 40 | meml_t get_workbook(); 41 | 42 | int find_charset(char *name); 43 | void set_charset(int n); // output charset 44 | u8 *print_uni(u8 *p, int l, u8 f); 45 | void set_codepage(int n); // sheet codepage 46 | u8 *print_cp_str(u8 *p, int l); 47 | -------------------------------------------------------------------------------- /ummap.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2005 Jan Bobrowski 3 | * 4 | * This library is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU Lesser General Public 6 | * License version 2.1 as published by the Free Software Foundation. 7 | */ 8 | 9 | /* These procedures allow the user to employ virtual memory to map 10 | * arbitrary data to memory. The data can then be computed on-demand 11 | * instead of preparing it on start. 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include // ffs 19 | #include "myerr.h" 20 | #include "ummap.h" 21 | 22 | unsigned um_page_sz, um_page_sc; 23 | 24 | static void um_sig(int n, siginfo_t *i, void *c); 25 | static struct sigaction um_sa; 26 | static LIST(maps); 27 | 28 | static void um_init() 29 | { 30 | um_page_sz = getpagesize(); 31 | um_page_sc = ffs(um_page_sz) - 1; 32 | 33 | um_sa.sa_sigaction = um_sig; 34 | um_sa.sa_flags = SA_SIGINFO|SA_RESETHAND; 35 | } 36 | 37 | static void um_sig(int n, siginfo_t *i, void *c) 38 | { 39 | struct ummap *um; 40 | unsigned long o; 41 | 42 | if(i->si_code == SEGV_ACCERR 43 | #ifdef __OpenBSD__ // XXX others too? 44 | //#if #system(bsd) 45 | || i->si_code == SEGV_MAPERR 46 | #endif 47 | #ifdef __APPLE__ 48 | || i->si_code == SEGV_MAPERR 49 | #endif 50 | ) { 51 | list_t *l; 52 | for(l=maps.next; l!=&maps; l=l->next) { 53 | um = list_item(l, struct ummap, list); 54 | o = (char*)i->si_addr - (char*)um->addr; 55 | if(o < um->size) 56 | goto found; 57 | } 58 | } 59 | return; 60 | 61 | found: 62 | if(um->handler(um, (char*)um->addr + (o & -um_page_sz)) >= 0) { 63 | sigaction(SIGSEGV, &um_sa, 0); 64 | sigaction(SIGBUS, &um_sa, 0); 65 | } 66 | } 67 | 68 | int um_access_page(void *p) 69 | { 70 | #if 0 71 | return (int)mmap( 72 | p, um_page_sz, 73 | PROT_READ|PROT_WRITE, 74 | MAP_PRIVATE|MAP_ANON|MAP_FIXED, 75 | -1, 0) == MAP_FAILED ? -1 : 0; 76 | #else 77 | return mprotect(p, um_page_sz, PROT_READ|PROT_WRITE); 78 | #endif 79 | } 80 | 81 | int um_map(struct ummap *um) 82 | { 83 | void *p; 84 | int v; 85 | 86 | if(!um_page_sz) 87 | um_init(); 88 | 89 | p = mmap(0, um->size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0); 90 | if(p==MAP_FAILED) 91 | return -1; 92 | um->addr = p; 93 | 94 | v = 0; 95 | v += sigaction(SIGSEGV, &um_sa, 0); 96 | v += sigaction(SIGBUS, &um_sa, 0); 97 | if(v>=0) list_add(&maps, &um->list); 98 | else munmap(p, um->size); 99 | return v; 100 | } 101 | 102 | void um_unmap(struct ummap *um) 103 | { 104 | munmap(um->addr, um->size); 105 | } 106 | -------------------------------------------------------------------------------- /list.h: -------------------------------------------------------------------------------- 1 | /* list.h by Jan Bobrowski. Inspired by list.h from Linux */ 2 | 3 | #ifndef LIST_H 4 | #define LIST_H 5 | 6 | typedef struct list { 7 | struct list *next, *prev; 8 | } list_t; 9 | 10 | static inline void list_link(struct list *a, struct list *b) 11 | { 12 | a->next = b; 13 | b->prev = a; 14 | } 15 | 16 | static inline void list_add(struct list *head, struct list *item) 17 | { 18 | struct list *first = head->next; 19 | list_link(head, item); 20 | list_link(item, first); 21 | } 22 | 23 | static inline void list_add_end(struct list *head, struct list *item) 24 | { 25 | struct list *last = head->prev; 26 | list_link(item, head); 27 | list_link(last, item); 28 | } 29 | 30 | static inline list_t *list_del(struct list *item) 31 | { 32 | struct list *prev = item->prev, *next = item->next; 33 | list_link(prev, next); 34 | return next; 35 | } 36 | 37 | static inline void list_init(struct list *head) 38 | { 39 | list_link(head, head); 40 | } 41 | 42 | /* delete item from one list and add it to another */ 43 | static inline void list_del_add(list_t *head, list_t *item) 44 | { 45 | list_t *prev = item->prev, *next = item->next; 46 | list_link(prev, next); 47 | next = head->next; 48 | list_link(head, item); 49 | list_link(item, next); 50 | } 51 | 52 | /*static inline list_check(list_t *l) 53 | { 54 | list_t *a = l; 55 | list_t *b; 56 | do { 57 | b = a->next; 58 | assert(b->prev == a); 59 | if(a==l) break; 60 | a = b; 61 | } while(1); 62 | }*/ 63 | 64 | static inline void list_del_add_end(list_t *head, list_t *item) 65 | { 66 | list_t *prev = item->prev, *next = item->next; 67 | list_link(prev, next); 68 | prev = head->prev; 69 | list_link(item, head); 70 | item->prev = prev; 71 | prev->next = item; 72 | } 73 | 74 | static inline void list_del_init(struct list *item) 75 | { 76 | struct list *prev = item->prev, *next = item->next; 77 | list_link(item, item); 78 | list_link(prev, next); 79 | } 80 | 81 | static inline void list_join(struct list *a, struct list *b) 82 | { 83 | list_t *ae = a->prev; 84 | list_t *be = b->prev; 85 | b->prev = ae; 86 | a->prev = be; 87 | ae->next = b; 88 | be->next = a; 89 | } 90 | 91 | static inline int list_empty(struct list *head) 92 | { 93 | return head->next == head; 94 | } 95 | 96 | #define LIST(L) struct list L = {&L, &L} 97 | 98 | #define list_entry(L, T, M) ((T*)((char*)(L) - (long)(&((T*)0)->M))) 99 | #define list_item(L, T, M) ((T*)((char*)(L) - (long)(&((T*)0)->M))) 100 | 101 | #define list_first(H, T, M) list_item((H)->next, T, M) 102 | #define list_last(H, T, M) list_item((H)->prev, T, M) 103 | /* GNU C */ 104 | #define list_next(O, M) list_item((O)->M.next, typeof(*(O)), M) 105 | #define list_prev(O, M) list_item((O)->M.prev, typeof(*(O)), M) 106 | 107 | /* remove first element and return it */ 108 | static inline struct list *list_get(struct list *head) 109 | { 110 | struct list *item = head->next; 111 | struct list *next = item->next; 112 | list_link(head, next); 113 | return item; 114 | } 115 | 116 | /* remove first element, initialize and return it */ 117 | static inline struct list *list_get_init(struct list *head) 118 | { 119 | struct list *item = head->next; 120 | struct list *next = item->next; 121 | list_link(item, item); 122 | list_link(head, next); 123 | return item; 124 | } 125 | 126 | #define list_get_entry(H, T, M) list_item(list_get((H)), T, M) 127 | #define list_get_init_entry(H, T, M) list_item(list_get_init((H)), T, M) 128 | #define list_get_item(H, T, M) list_item(list_get((H)), T, M) 129 | #define list_get_init_item(H, T, M) list_item(list_get_init((H)), T, M) 130 | 131 | #endif 132 | -------------------------------------------------------------------------------- /cp.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2005-2007 Jan Bobrowski 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License 6 | * version 2 as published by the Free Software Foundation. 7 | */ 8 | 9 | #include "xls2txt.h" 10 | #include 11 | 12 | static u8 uni2cs[0x2E0-0xA0]; 13 | static u8 *cs = 0; 14 | static char badchar = '?'; 15 | 16 | static u8 fallbacks[] = " " 17 | " !cL\1Y|\4<\1-\6'\6>\3?AAAAAA\1CEEEEIIII\1NOOOOO\1OUUUUY\2aa" 18 | "aaaa\1ceeeeiiii\1nooooo\1ouuuuy\1yAaAaAaCcCcCcCcDdDdEeEeEeEe" 19 | "EeGgGgGgGgHhHhIiIiIiIiIi\2JjKk\1LlLlLlLlLlNnNnNnn\2OoOoOo\2R" 20 | "rRrRrSsSsSsSsTtTtTtUuUuUuUuUuUuWwYyYZzZzZzsbBBb\3Cc\2Dd\4FfG" 21 | "\3IKkl\2NnOOo\2Pp\5tTtTUu\1VYyZz\26AaIiOoUuUuUuUuUu\1AaAa\2G" 22 | "gGgKkOoOo\2j\3Gg\2NnAa\2OoAaAaEeEeIiIiOoOoRrRrUuUuSsTt\2HhNd" 23 | "\2ZzAaEeOoOoOoOoYy"; 24 | 25 | static u8 latin2[] = {160, 26 | 0xA0,3,0xA4,2,0xA7,0xA8,4,0xAD,2,0xB0,3,0xB4,3,0xB8,8,0xC1,0xC2, 27 | 1,0xC4,2,0xC7,1,0xC9,1,0xCB,1,0xCD,0xCE,4,0xD3,0xD4,1,0xD6,0xD7, 28 | 2,0xDA,1,0xDC,0xDD,1,0xDF,1,0xE1,0xE2,1,0xE4,2,0xE7,1,0xE9,1, 29 | 0xEB,1,0xED,0xEE,4,0xF3,0xF4,1,0xF6,0xF7,2,0xFA,1,0xFC,0xFD,4, 30 | 0xC3,0xE3,0xA1,0xB1,0xC6,0xE6,4,0xC8,0xE8,0xCF,0xEF,0xD0,0xF0,6, 31 | 0xCA,0xEA,0xCC,0xEC,29,0xC5,0xE5,2,0xA5,0xB5,2,0xA3,0xB3,0xD1, 32 | 0xF1,2,0xD2,0xF2,7,0xD5,0xF5,2,0xC0,0xE0,2,0xD8,0xF8,0xA6,0xB6,2, 33 | 0xAA,0xBA,0xA9,0xB9,0xDE,0xFE,0xAB,0xBB,8,0xD9,0xF9,0xDB,0xFB,7, 34 | 0xAC,0xBC,0xAF,0xBF,0xAE,0xBE,159,159,10,0xB7,16,0xA2,0xFF,1, 35 | 0xB2,1,0xBD,0 36 | }; 37 | 38 | int find_charset(char *name) 39 | { 40 | const char names[] = "utf8asc\0iso1iso2"; 41 | int l, p; 42 | l = strlen(name); 43 | if(l<3 || l>4) return -1; 44 | for(p=0; names[p]; p+=4) 45 | if(memcmp(names+p, name, 4)==0) 46 | return p>>2; 47 | return -1; 48 | } 49 | 50 | static void expand(u8 *s) 51 | { 52 | u8 *d = uni2cs; 53 | u8 m = *s++; 54 | do { 55 | u8 c = *s++; 56 | if(c=endof(uni2cs)) break; 58 | else *d++ = c; 59 | } while(*s); 60 | } 61 | 62 | void set_charset(int n) 63 | { 64 | cs = 0; 65 | if(n==0) // utf8 66 | return; 67 | 68 | // memset(uni2cs, 0, sizeof uni2cs); 69 | expand(fallbacks); 70 | 71 | switch(n) { 72 | int u; 73 | case 1: // ascii 74 | break; 75 | case 2: // latin 1 76 | for(u=0x00A0; u<0x0100; u++) 77 | uni2cs[u-0xA0] = u; 78 | break; 79 | case 3: // latin 2 80 | expand(latin2); 81 | break; 82 | } 83 | cs = uni2cs; 84 | } 85 | 86 | static void print_uni_char(u16 u) 87 | { 88 | unsigned v = u; 89 | if(v<0x00A0) { 90 | if(v<0x20 || v>=0x7F) 91 | v = v==10 ? ' ' : badchar; 92 | } else if(cs) { 93 | v -= 0xA0; 94 | if(v >= sizeof uni2cs || !(v = cs[v])) 95 | v = badchar; 96 | } else { 97 | v = v>>6 | 0xC0; 98 | if(u >= 0x800) { 99 | putchar(u>>12 | 0xE0); 100 | v = v&077 | 0x80; 101 | } 102 | putchar(v); 103 | v = u&077 | 0x80; 104 | } 105 | putchar(v); 106 | } 107 | 108 | u8 *print_uni(u8 *p, int l, u8 f) 109 | { 110 | if(f&1) 111 | while(--l >= 0) { 112 | print_uni_char(g16(p)); 113 | p += 2; 114 | } 115 | else 116 | while(--l >= 0) 117 | print_uni_char(*p++); 118 | return p; 119 | } 120 | 121 | // codepage 122 | 123 | static u16 *cp = 0; 124 | 125 | static u16 cp1250[128] = { 126 | 0,0,0x201A,0,0x201E,0x2026,0x2020,0x2021,0,0x2030,0x0160,0x2039, 127 | 0x015A,0x0164,0x017D,0x0179,0,0x2018,0x2019,0x201C,0x201D,0x2022, 128 | 0x2013,0x2014,0,0x2122,0x0161,0x203A,0x015B,0x0165,0x017E,0x017A, 129 | 0x00A0,0x02C7,0x02D8,0x0141,0x00A4,0x0104,0x00A6,0x00A7,0x00A8, 130 | 0x00A9,0x015E,0x00AB,0x00AC,0x00AD,0x00AE,0x017B,0x00B0,0x00B1, 131 | 0x02DB,0x0142,0x00B4,0x00B5,0x00B6,0x00B7,0x00B8,0x0105,0x015F, 132 | 0x00BB,0x013D,0x02DD,0x013E,0x017C,0,0x00C1,0x00C2,0x0102,0x00C4, 133 | 0x0139,0x0106,0x00C7,0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD, 134 | 0x00CE,0x010E,0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6, 135 | 0x00D7,0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF, 136 | 0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7,0x010D, 137 | 0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F,0x0111,0x0144, 138 | 0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,0x0159,0x016F,0x00FA, 139 | 0x0171,0x00FC,0x00FD,0x0163,0x02D9, 140 | }; 141 | 142 | static u16 cp1252[128] = { 143 | 0x20AC,0x0081,0x201A,0x0192,0x201E,0x2026,0x2020,0x2021,0x02C6, 144 | 0x2030,0x0160,0x2039,0x0152,0x008D,0x017D,0x008F,0x0090,0x2018, 145 | 0x2019,0x201C,0x201D,0x2022,0x2013,0x2014,0x02DC,0x2122,0x0161, 146 | 0x203A,0x0153,0x009D,0x017E,0x0178,0x00A0,0x00A1,0x00A2,0x00A3, 147 | 0x00A4,0x00A5,0x00A6,0x00A7,0x00A8,0x00A9,0x00AA,0x00AB,0x00AC, 148 | 0x00AD,0x00AE,0x00AF,0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5, 149 | 0x00B6,0x00B7,0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE, 150 | 0x00BF,0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7, 151 | 0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,0x00D0, 152 | 0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,0x00D8,0x00D9, 153 | 0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF,0x00E0,0x00E1,0x00E2, 154 | 0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,0x00E8,0x00E9,0x00EA,0x00EB, 155 | 0x00EC,0x00ED,0x00EE,0x00EF,0x00F0,0x00F1,0x00F2,0x00F3,0x00F4, 156 | 0x00F5,0x00F6,0x00F7,0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD, 157 | 0x00FE,0x00FF, 158 | }; 159 | 160 | static u16 cp1200[128]; // not initialized 161 | 162 | void set_codepage(int n) 163 | { 164 | if(n==1200) { 165 | int i; 166 | for(i=0x80; i<=0xFF; i++) cp1200[i-0x80] = i; 167 | cp = cp1200; 168 | } else if(n==1250 || n==0x8001) cp = cp1250; 169 | else if(n==1252) cp = cp1252; 170 | else if(n!=0x16F) warnx("%d: Codepage not supported", n); 171 | } 172 | 173 | u8 *print_cp_str(u8 *p, int l) 174 | { 175 | u8 *e = p + l; 176 | while(p 3 | * 4 | * This program is free software; you can redistribute it and/or 5 | * modify it under the terms of the GNU General Public License 6 | * version 2 as published by the Free Software Foundation. 7 | */ 8 | 9 | /* 10 | * Based on information from sc.openoffice.org/compdocfileformat.pdf 11 | */ 12 | 13 | #include "xls2txt.h" 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "ummap.h" 19 | 20 | #define BADSEC (-5) 21 | 22 | struct stream_kind { 23 | unsigned secsc; 24 | unsigned secsz; 25 | u32 maxsec; 26 | s32 (*sat_get)(struct stream_kind *sk, u32 n); 27 | u8 *(*sec_ptr)(struct stream_kind *sk, u32 n); 28 | }; 29 | 30 | struct stream { 31 | struct stream_kind *kind; 32 | s32 start; 33 | s32 c_sec; 34 | unsigned c_pos; 35 | u8 *c_ptr; 36 | }; 37 | 38 | struct ole { 39 | meml_t map; 40 | int fd; 41 | char *name; 42 | 43 | s32 root; 44 | unsigned sec_tshld; 45 | struct stream ssat; 46 | struct stream container; 47 | 48 | s32 msat[109]; 49 | s32 msat_start; 50 | // s32 msat_size; 51 | 52 | struct stream_kind large_sec; 53 | struct stream_kind small_sec; 54 | } ole; 55 | 56 | #define oleerr(S) errx(1, "%s: %s", ole.name, S); 57 | #define oleerrf(F,A...) errx(1, "%s: " F, ole.name, A); 58 | 59 | static meml_t mmap_fd(int fd) { 60 | struct stat st; 61 | meml_t m; 62 | if(fstat(fd, &st)<0) err(1, "fstat"); 63 | m.ptr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, ole.fd, 0); 64 | if(m.ptr==MAP_FAILED) err(1, "mmap"); 65 | m.len = st.st_size; 66 | return m; 67 | } 68 | 69 | static s32 sat_get_lg(struct stream_kind *sk, u32 n); 70 | static u8 *sec_ptr_lg(struct stream_kind *sk, u32 n); 71 | static s32 sat_get_sm(struct stream_kind *sk, u32 n); 72 | static u8 *sec_ptr_sm(struct stream_kind *sk, u32 n); 73 | 74 | int ole_open(char *name) 75 | { 76 | u8 h[0x200]; 77 | int v; 78 | 79 | ole.name = name; 80 | v = open(name, O_RDONLY); 81 | if(v<0) err(1, "%s", name); 82 | ole.fd = v; 83 | 84 | v = read(ole.fd, h, sizeof h); 85 | if(vsecsc = g16(h+30); 111 | sk->secsz = 1<secsc; 112 | sk->maxsec = g32(h+44) << ole.large_sec.secsc-2; 113 | sk->sat_get = sat_get_lg; 114 | sk->sec_ptr = sec_ptr_lg; 115 | } 116 | 117 | ole.sec_tshld = g32(h+56); 118 | { 119 | struct stream_kind *sk = &ole.small_sec; 120 | sk->secsc = g16(h+32); 121 | sk->secsz = 1<secsc; 122 | sk->maxsec = g32(h+64) << ole.large_sec.secsc-2; 123 | sk->sat_get = sat_get_sm; 124 | sk->sec_ptr = sec_ptr_sm; 125 | } 126 | 127 | ole.ssat.start = g32(h+60); 128 | 129 | ole.root = g32(h+48); 130 | if(ole.root < 0) 131 | oleerr("There's no root stream"); 132 | 133 | return 1; 134 | } 135 | 136 | static void str_open(struct stream *str, struct stream_kind *sk, s32 start) 137 | { 138 | str->start = start; 139 | str->c_sec = start; 140 | str->c_pos = 0; 141 | str->kind = sk; 142 | str->c_ptr = sk->sec_ptr(sk, start); 143 | } 144 | 145 | #define SID_OK(K,N) ((u32)(N)<=(K)->maxsec) 146 | #define SID_GET(P,I) ((s32)g32((s32*)(P)+(I))) 147 | 148 | static s32 sat_get_lg(struct stream_kind *sk, u32 n) 149 | { 150 | unsigned m, maxsecidx; 151 | s32 b; 152 | 153 | maxsecidx = (1 << sk->secsc-2) - 1; 154 | m = n >> sk->secsc-2; n &= maxsecidx; 155 | if(m < elemof(ole.msat)) 156 | b = ole.msat[m]; 157 | else { 158 | u8 *p; 159 | b = ole.msat_start; 160 | m -= elemof(ole.msat); 161 | for(;;) { 162 | if(!SID_OK(sk, b)) 163 | return BADSEC; 164 | p = sk->sec_ptr(sk, b); 165 | if(m < maxsecidx) 166 | break; 167 | b = SID_GET(p, maxsecidx); 168 | m -= maxsecidx; 169 | } 170 | b = SID_GET(p, m); 171 | } 172 | if(SID_OK(sk, b)) { 173 | u8 *p = sk->sec_ptr(sk, b); 174 | return SID_GET(p, n); 175 | } 176 | return BADSEC; 177 | } 178 | 179 | static int str_seek(struct stream *str, unsigned o); 180 | 181 | static u8 *sec_ptr_lg(struct stream_kind *sk, u32 n) 182 | { 183 | return ole.map.ptr + (n<secsc); 184 | } 185 | 186 | static s32 sat_get_sm(struct stream_kind *sk, u32 n) 187 | { 188 | int o = str_seek(&ole.ssat, 4*n); 189 | if(o<0) return BADSEC; 190 | return g32(ole.ssat.c_ptr + o); 191 | } 192 | 193 | static u8 *sec_ptr_sm(struct stream_kind *sk, u32 n) 194 | { 195 | int o = str_seek(&ole.container, n<secsc); 196 | if(o<0) oleerr("small sector not found"); 197 | return ole.container.c_ptr + o; 198 | } 199 | 200 | static int str_seek(struct stream *str, unsigned o) 201 | { 202 | struct stream_kind *sk = str->kind; 203 | unsigned e = str->c_pos + sk->secsz; 204 | s32 b = str->c_sec; 205 | 206 | if(o < e) { 207 | if(o >= str->c_pos) 208 | goto ret; 209 | e = sk->secsz; 210 | b = str->start; 211 | if(o < e) goto found; 212 | } 213 | do { 214 | b = sk->sat_get(sk, b); 215 | if(!SID_OK(sk, b)) return -1; 216 | e += sk->secsz; 217 | } while(o >= e); 218 | 219 | found: 220 | str->c_sec = b; 221 | str->c_pos = e - sk->secsz; 222 | str->c_ptr = sk->sec_ptr(sk, b); 223 | ret: 224 | return o - str->c_pos; 225 | } 226 | 227 | static void open_small_streams() 228 | { 229 | struct stream_kind *sk = &ole.large_sec; 230 | u8 *p = sec_ptr_lg(sk, ole.root); 231 | 232 | if(!SID_OK(sk, ole.ssat.start) || 233 | !SID_OK(sk, g32(p+0x74))) oleerr("Small sector storage empty"); 234 | 235 | str_open(&ole.container, &ole.large_sec, g32(p+0x74)); 236 | str_open(&ole.ssat, &ole.large_sec, ole.ssat.start); 237 | } 238 | 239 | static struct ummap wbk_um; 240 | static struct stream wbk_str; 241 | 242 | /* this is executed by the signal handler */ 243 | static int str_get_page(struct ummap *um, u8 *d) 244 | { 245 | struct stream_kind *sk = wbk_str.kind; 246 | int n, c, l; 247 | u8 *s; 248 | 249 | n = str_seek(&wbk_str, d - (u8*)um->addr); 250 | if(n<0) return n; 251 | 252 | sk = wbk_str.kind; 253 | c = sk->secsz - n; 254 | s = wbk_str.c_ptr + n; 255 | 256 | n = um_access_page(d); 257 | if(n<0) return n; 258 | 259 | l = um_page_sz - c; 260 | if(l <= 0) { 261 | memcpy(d, s, um_page_sz); 262 | return 0; 263 | } 264 | memcpy(d, s, c); 265 | d += c; 266 | 267 | for(;;) { 268 | s32 b = sk->sat_get(sk, wbk_str.c_sec); 269 | if(!SID_OK(sk, b)) return 0; 270 | s = sk->sec_ptr(sk, b); 271 | wbk_str.c_sec = b; 272 | wbk_str.c_pos += sk->secsz; 273 | wbk_str.c_ptr = s; 274 | 275 | if(l <= sk->secsz) break; 276 | l -= sk->secsz; 277 | memcpy(d, s, sk->secsz); 278 | d += sk->secsz; 279 | } 280 | memcpy(d, s, l); 281 | 282 | return 0; 283 | } 284 | 285 | static u8 *find_slot(char *name) 286 | { 287 | struct stream_kind * const sk = &ole.large_sec; 288 | s32 b; 289 | u8 *p, *e; 290 | u16 l; 291 | 292 | b = ole.root; 293 | p = sk->sec_ptr(sk, b); 294 | l = 2*(strlen(name) + 1); 295 | e = p + sk->secsz; 296 | for(;;) { 297 | if(p[0x42]==2 && g16(p+0x40)==l) { 298 | unsigned i = 0; 299 | for(;; i++) { 300 | if(2*i >= l) 301 | return p; // found 302 | if(p[2*i] != (u8)name[i] || p[2*i+1]) 303 | break; 304 | } 305 | } 306 | p += 0x80; 307 | if(p < e) continue; 308 | 309 | b = sk->sat_get(sk, b); 310 | if(!SID_OK(sk, b)) break; 311 | p = sk->sec_ptr(sk, b); 312 | e = p + sk->secsz; 313 | } 314 | return 0; 315 | } 316 | 317 | meml_t get_workbook() 318 | { 319 | struct stream_kind *sk; 320 | u32 len, sid; 321 | u8 *p; 322 | 323 | if(!ole.map.ptr) 324 | return mmap_fd(ole.fd); 325 | 326 | p = find_slot("Workbook"); 327 | if(!p) { 328 | p = find_slot("Book"); 329 | if(!p) 330 | oleerr("No Workbook found"); 331 | } 332 | 333 | sid = g32(p+0x74); 334 | len = g32(p+0x78); 335 | 336 | sk = &ole.large_sec; 337 | if(len < ole.sec_tshld) { 338 | if(!ole.container.c_ptr) 339 | open_small_streams(); 340 | sk = &ole.small_sec; 341 | } 342 | 343 | if(!SID_OK(sk, sid)) 344 | oleerr("Stream is empty"); 345 | 346 | str_open(&wbk_str, sk, sid); 347 | 348 | wbk_um.size = len; 349 | wbk_um.handler = (int(*)(struct ummap*,void*))str_get_page; 350 | 351 | if(um_map(&wbk_um) < 0) 352 | err(1, "um_map"); 353 | 354 | return (meml_t){wbk_um.addr, wbk_um.size}; 355 | } 356 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

XLS2TXT

2 |

3 | Converting Excel to Text, Simplifying Complexity 4 |

5 |

6 | license 7 | last-commit 8 | repo-top-language 9 | repo-language-count 10 |

11 |

12 |

13 |

14 | 15 |

16 |
17 | 18 | ## Table of Contents 19 | 20 | - [ Overview](#-overview) 21 | - [ Features](#-features) 22 | - [ Project Structure](#-project-structure) 23 | - [ Project Index](#-project-index) 24 | - [ Getting Started](#-getting-started) 25 | - [ Prerequisites](#-prerequisites) 26 | - [ Installation](#-installation) 27 | - [ Usage](#-usage) 28 | - [ Testing](#-testing) 29 | - [ Contributing](#-contributing) 30 | - [ License](#-license) 31 | - [ Acknowledgments](#-acknowledgments) 32 | 33 | --- 34 | 35 | ## Overview 36 | 37 | xls2txt is a powerful tool that converts Microsoft Excel files to plain text formats, enabling seamless data exchange between systems. With its modular architecture and open-source principles, it streamlines data conversion and export, making it an essential component for various applications, from file management to numerical computations. 38 | 39 | --- 40 | 41 | ## Features 42 | 43 | | | Feature | Summary | 44 | | :--- | :---: | :--- | 45 | | ⚙️ | **Architecture** |
  • Modular design with multiple components working together to achieve a common goal.
  • Scalable architecture, as indicated by the project's structure and use of virtual memory mapping.
  • Unified interface for mapping and unmapping memory regions through `ummap.h`.
| 46 | | 🔩 | **Code Quality** |
  • High-quality code with proper error handling mechanisms, such as those defined in `myerr.h`.
  • Efficient numerical computations using IEEE 754 double precision floating point numbers conversion in `ieee754.c`.
  • Robust framework for managing complex data structures through the dynamic linked list implementation in `list.h`.
| 47 | | 📄 | **Documentation** |
  • Primary language is C, with a focus on documentation and transparency, as indicated by open-source licenses and references to external documentation.
  • Clear explanations of error handling mechanisms and code functionality through comments and documentation files.
  • Use of standard formats for data exchange, such as the Excel file format referenced in `sc.openoffice.org/excelfileformat.pdf`.
| 48 | | 🔌 | **Integrations** |
  • Integration with other components, such as debug logs and character encoding conversions, to facilitate data-driven decision-making and improve overall system reliability.
  • Use of virtual memory mapping to enable on-demand computation instead of preparation at start-up.
  • Connection to the Excel file format through `xls2txt.c`, which reads and interprets Excel file structures and converts relevant data to plain text format.
| 49 | | 🤖 | **Artificial Intelligence** |
  • No explicit use of AI or machine learning algorithms in the provided codebase, but the project's focus on data conversion and exchange may involve AI-powered tools in the broader context.
  • No references to AI-related libraries or frameworks in the codebase.
| 50 | | 📈 | **Performance** |
  • Efficient numerical computations using IEEE 754 double precision floating point numbers conversion in `ieee754.c`.
  • Robust framework for managing complex data structures through the dynamic linked list implementation in `list.h`.
  • Use of virtual memory mapping to enable on-demand computation instead of preparation at start-up.
| 51 | 52 | --- 53 | 54 | ## Project Structure 55 | 56 | ```sh 57 | └── xls2txt/ 58 | ├── Makefile 59 | ├── Workbook1.xls 60 | ├── cp.c 61 | ├── dbg 62 | ├── ieee754.c 63 | ├── list.h 64 | ├── myerr.h 65 | ├── ole.c 66 | ├── ummap.c 67 | ├── ummap.h 68 | ├── xls2txt.c 69 | └── xls2txt.h 70 | ``` 71 | 72 | 73 | ### Project Index 74 |
75 | XLS2TXT/ 76 |
77 | __root__ 78 |
79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 101 | 102 | 103 | 104 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 |
xls2txt.h- Analyzes the xls2txt.h file, revealing its purpose as a foundational component of the project's overall architecture
- It provides essential data types and macros to facilitate memory management, data conversion, and string manipulation within the codebase
- The file serves as a crucial bridge between low-level system interactions and higher-level application logic, enabling efficient processing of various data formats and character encodings.
ummap.c- The ummap.c file enables the use of virtual memory mapping arbitrary data to memory, allowing on-demand computation instead of preparation at start-up
- It provides a mechanism for managing mapped pages and handling segmentation faults and bus errors
- The code achieves efficient memory management and error handling, making it an essential component of the project's overall architecture.
dbg- Analyzes debug logs to identify recurring issues
- The dbg file provides a centralized location for logging critical errors and exceptions, enabling the team to track patterns and optimize the codebase architecture
- By integrating with other components, it facilitates data-driven decision-making and improves overall system reliability
- It plays a crucial role in ensuring the project's stability and performance.
Makefile- The Makefile serves as the backbone of the project's build process, orchestrating the compilation and installation of various components
- It ensures that the executable is built from source files, installed in a designated directory, and cleaned up upon request
- The file also facilitates distribution and verification of the software package
- Overall, it streamlines the development workflow, enabling efficient management of dependencies and output.
ummap.h- Map the entire project structure to understand its purpose. 99 | 100 | The ummap.h file serves as a core component of the project's memory management system, providing a unified interface for mapping and unmapping memory regions
- It enables efficient access control and tracking of mapped pages, facilitating secure memory allocation and deallocation within the system.
xls2txt.c- **Summary** 105 | 106 | The `xls2txt.c` file is a critical component of the project's overall architecture
- It serves as a bridge between Microsoft Excel files and plain text formats, enabling data conversion and export. 107 | 108 | In essence, this code achieves the following: 109 | 110 | * Reads and interprets Excel file structures 111 | * Converts relevant data to plain text format 112 | * Generates human-readable output 113 | 114 | By integrating with other components of the project, `xls2txt.c` plays a vital role in facilitating data exchange between different systems
- Its functionality is crucial for the overall success of the project, which aims to provide a robust and efficient solution for converting Excel files to various formats. 115 | 116 | **Additional Context** 117 | 118 | The project's structure suggests that it is designed to be modular and scalable, with multiple components working together to achieve a common goal
- The inclusion of open-source licenses and references to external documentation (e.g., `sc.openoffice.org/excelfileformat.pdf`) indicates a commitment to transparency and community involvement. 119 | 120 | Overall, the `xls2txt.c` file is a key component of the project's architecture, enabling data conversion and export while adhering to open-source principles.
ieee754.c- Converts IEEE 754 double precision floating point numbers to a standard format
- Achieves this by handling various edge cases such as denormalized and infinity values, while also considering different architectures (x86 and others)
- The function is designed to be portable and efficient, allowing it to be used throughout the codebase for accurate numerical computations.
myerr.h- Document the error handling mechanism in the project's core functionality
- The provided myerr.h file defines three macros to handle errors and warnings in a centralized manner
- These macros, err, errx, and warnx, ensure that error messages are printed to stderr along with the corresponding system error code, facilitating easier debugging and error reporting within the xls2txt application.
cp.c- The provided C code snippet appears to be part of a larger program that handles character encoding conversions
- The `set_codepage` function sets the current code page based on the input value, and the `print_cp_str` function prints a string using the specified code page
- However, the `cp1200` array is not initialized, which may cause issues when used.
ole.c- The `get_workbook` function retrieves the workbook data from the file
- It first checks if a map is already available and returns its address if so
- If not, it maps a new ummap structure to the file using `um_map`
- The `str_get_page` function is used as the handler for the mapped pages.
list.h- The provided list.h file serves as the foundation for a dynamic linked list data structure, enabling efficient insertion, deletion, and manipulation of nodes within the list
- It facilitates operations such as adding items to the end or beginning of the list, removing specific elements, and checking for emptiness
- The code provides a robust framework for managing complex data structures in various applications.
143 |
144 |
145 |
146 | 147 | --- 148 | ## Getting Started 149 | 150 | ### Prerequisites 151 | 152 | Before getting started with xls2txt, ensure your runtime environment meets the following requirements: 153 | 154 | - **Programming Language:** C 155 | 156 | 157 | ### Installation 158 | 159 | Install xls2txt using one of the following methods: 160 | 161 | **Build from source:** 162 | 163 | 1. Clone the xls2txt repository: 164 | ```sh 165 | ❯ git clone https://github.com/hroptatyr/xls2txt 166 | ``` 167 | 168 | 2. Navigate to the project directory: 169 | ```sh 170 | ❯ cd xls2txt 171 | ``` 172 | 173 | 3. Install the project dependencies: 174 | 175 | echo 'INSERT-INSTALL-COMMAND-HERE' 176 | 177 | 178 | 179 | ### Usage 180 | Run xls2txt using the following command: 181 | echo 'INSERT-RUN-COMMAND-HERE' 182 | 183 | ### Testing 184 | Run the test suite using the following command: 185 | echo 'INSERT-TEST-COMMAND-HERE' 186 | 187 | --- 188 | 189 | ## Contributing 190 | 191 | - **💬 [Join the Discussions](https://github.com/hroptatyr/xls2txt/discussions)**: Share your insights, provide feedback, or ask questions. 192 | - **🐛 [Report Issues](https://github.com/hroptatyr/xls2txt/issues)**: Submit bugs found or log feature requests for the `xls2txt` project. 193 | - **💡 [Submit Pull Requests](https://github.com/hroptatyr/xls2txt/blob/main/CONTRIBUTING.md)**: Review open PRs, and submit your own PRs. 194 | 195 |
196 | Contributing Guidelines 197 | 198 | 1. **Fork the Repository**: Start by forking the project repository to your github account. 199 | 2. **Clone Locally**: Clone the forked repository to your local machine using a git client. 200 | ```sh 201 | git clone https://github.com/hroptatyr/xls2txt 202 | ``` 203 | 3. **Create a New Branch**: Always work on a new branch, giving it a descriptive name. 204 | ```sh 205 | git checkout -b new-feature-x 206 | ``` 207 | 4. **Make Your Changes**: Develop and test your changes locally. 208 | 5. **Commit Your Changes**: Commit with a clear message describing your updates. 209 | ```sh 210 | git commit -m 'Implemented new feature x.' 211 | ``` 212 | 6. **Push to github**: Push the changes to your forked repository. 213 | ```sh 214 | git push origin new-feature-x 215 | ``` 216 | 7. **Submit a Pull Request**: Create a PR against the original project repository. Clearly describe the changes and their motivations. 217 | 8. **Review**: Once your PR is reviewed and approved, it will be merged into the main branch. Congratulations on your contribution! 218 |
219 | 220 |
221 | Contributor Graph 222 |
223 |

224 | 225 | 226 | 227 |

228 |
229 | 230 | --- 231 | 232 | ## License 233 | 234 | This project is protected under the [SELECT-A-LICENSE](https://choosealicense.com/licenses) License. For more details, refer to the [LICENSE](https://choosealicense.com/licenses/) file. 235 | 236 | --- 237 | 238 | ## Acknowledgments 239 | 240 | - List any resources, contributors, inspiration, etc. here. 241 | 242 | --- 243 | -------------------------------------------------------------------------------- /xls2txt.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2005-2011 Jan Bobrowski 3 | * Copyright (c) 2011-2017 Sebastian Freundt 4 | * 5 | * This program is free software; you can redistribute it and/or 6 | * modify it under the terms of the GNU General Public License 7 | * version 2 as published by the Free Software Foundation. 8 | */ 9 | 10 | /* 11 | * Based on information from sc.openoffice.org/excelfileformat.pdf 12 | * Some bugs spotted by Sebastian Freundt were fixed 13 | */ 14 | 15 | #include "xls2txt.h" 16 | #include 17 | #ifdef linux 18 | # include 19 | #endif /* linux */ 20 | #include 21 | #include 22 | 23 | #define TRUNC errx(1, "Truncated &%d", __LINE__) 24 | #define BADF(T) errx(1, *T""?T" &%d":"Format error &%d", __LINE__); 25 | 26 | struct g { 27 | unsigned all:1; 28 | unsigned sel:1; // -n 29 | unsigned nofmt:1; 30 | unsigned titles:1; 31 | unsigned biff2ok:1; // -2 32 | int nr; // sheet number 33 | int row, col; // current pos 34 | unsigned top, bottom, left, right; 35 | } g; 36 | 37 | struct sst { 38 | u8 *ptr, *rend; 39 | }; 40 | 41 | struct fmt { 42 | unsigned type:8; // 1:num, 3:date, 4:time, 5:date-time 43 | unsigned arg:8; 44 | }; 45 | 46 | struct tab { 47 | void *tab; 48 | int nelem, aelem, esize; 49 | }; 50 | 51 | static inline void *tab_ptr(struct tab *tab, unsigned n) 52 | { 53 | return (char *)tab->tab + n * tab->esize; 54 | } 55 | 56 | #define TAB(S,T,N) (*(T*)((char*)(S).tab + (N) * sizeof(T))) 57 | 58 | void *tab_alloc(struct tab *tab, unsigned n, const void *dflt) 59 | { 60 | u8 *p; 61 | 62 | if(n < tab->nelem) 63 | return tab_ptr(tab, n); 64 | if(n >= tab->aelem) { 65 | int sz = n+16 & ~15; 66 | tab->aelem = sz; 67 | tab->tab = realloc(tab->tab, sz * tab->esize); 68 | if(!tab->tab) err(1, "realloc"); 69 | } 70 | p = tab_ptr(tab, tab->nelem); 71 | n -= tab->nelem; 72 | tab->nelem += n + 1; 73 | if(n) do { 74 | memcpy(p, dflt, tab->esize); 75 | p += tab->esize; 76 | } while(--n); 77 | return p; 78 | } 79 | 80 | struct xls { 81 | meml_t map; 82 | u8 *end; 83 | u8 *shptr; 84 | /* 85 | 00:* BIFF2 86 | 02:* BIFF3 87 | 04:* BIFF4 88 | 08:0500 BIFF5 (and BIFF7) 89 | 08:0600 BIFF8 90 | 91 | 08:0000 BIFF5 92 | 08:0200 BIFF2 93 | 08:0300 BIFF3 94 | 08:0400 BIFF4 95 | */ 96 | enum {BIFF2=2,BIFF3=3,BIFF4=4,BIFF5=5,BIFF8=6} biffv; 97 | unsigned e1904; 98 | 99 | struct sst *sst; 100 | unsigned nsst; 101 | 102 | struct tab fmt; 103 | struct tab xf_ptr; 104 | struct tab xf_fmt; 105 | }; 106 | 107 | static struct xls x; 108 | 109 | void check_biffv(u8 *p) 110 | { 111 | int v; 112 | if(p[0]!=9) 113 | errx(1, "Format not recognized"); 114 | switch(p[1]) { 115 | case 0: 116 | biff2: 117 | if (!g.biff2ok) { 118 | errx(1, "Format not supported (BIFF2), try with -d"); 119 | } 120 | v = BIFF2; goto ok; 121 | case 2: v = BIFF3; goto ok; 122 | case 4: v = BIFF4; goto ok; 123 | case 8: break; 124 | default: 125 | nsupp: 126 | errx(1, "Format not supported"); 127 | } 128 | switch(p[5]) { 129 | case 0: v = BIFF5; break; 130 | case 2: goto biff2; 131 | default: 132 | v = p[5]; 133 | if(vBIFF8) 134 | goto nsupp; 135 | } 136 | ok: 137 | x.biffv = v; 138 | } 139 | 140 | static u8 *print_str(u8 *p, int l) 141 | { 142 | if(x.biffv < BIFF8) { 143 | p = print_cp_str(p, l); 144 | } else { 145 | u8 f = *p++; 146 | int a=0; 147 | if(f&8) {a += 4*g16(p); p += 2;} 148 | if(f&4) {a += g32(p); p += 4;} 149 | p = a + print_uni(p, l, f); 150 | } 151 | return p; 152 | } 153 | 154 | static void print_sst(int n) 155 | { 156 | u8 *p, *re, f; 157 | unsigned l; 158 | 159 | if(n<0 || n>=x.nsst) 160 | BADF("Wrong string index"); 161 | 162 | p = x.sst[n].ptr; 163 | re = x.sst[n].rend; 164 | l = g16(p); f = p[2]; p += 3; 165 | p += (f&8 ? 2 : 0) + (f&4 ? 4 : 0); 166 | for(;;) { 167 | int s = re - p; 168 | f &= 1; 169 | if(l <= s>>f) 170 | break; 171 | 172 | if(re[0] != 0x3C) // CONTINUE 173 | BADF("String truncated"); 174 | 175 | l -= s>>f; 176 | if(s&f) 177 | BADF("String cut at the middle of a char"); 178 | print_uni(p, s>>f, f); 179 | 180 | p = re + 4; 181 | re = p + g16(re+2); 182 | f = *p++; 183 | } 184 | print_uni(p, l, f); 185 | } 186 | 187 | static u8 *read_sst(u8 *p, u8 *re, u8 *fe) 188 | { 189 | unsigned nsst; 190 | 191 | x.nsst = g32(p+4); 192 | if(!x.nsst) 193 | return re; 194 | 195 | x.sst = calloc(x.nsst, sizeof *x.sst); 196 | if(!x.sst) err(1, "calloc"); 197 | 198 | p += 8; 199 | 200 | for(nsst = 0;;) { 201 | unsigned l, a; 202 | u8 f; 203 | 204 | if(re-p < 3) 205 | BADF("String table truncated"); 206 | 207 | x.sst[nsst].ptr = p; 208 | x.sst[nsst].rend = re; 209 | if(++nsst == x.nsst) 210 | break; 211 | 212 | l = g16(p); 213 | f = p[2]; p += 3; 214 | a = 0; 215 | if(f&8) {a = 4*g16(p); p += 2;} 216 | if(f&4) {a += g32(p); p += 4;} 217 | // fmt_assert(p= l<>f; 226 | if(re[0] != 0x3C) // CONTINUE 227 | BADF("String truncated"); 228 | p = re + 4; 229 | re = p + g16(re+2); 230 | // fmt_assert(re < fe); 231 | f = *p++; 232 | } 233 | p += l< a) break; 237 | a -= s; 238 | 239 | if(re[0] != 0x3C) // CONTINUE 240 | BADF("String truncated"); 241 | p = re + 4; 242 | re = p + g16(re+2); 243 | // fmt_assert(re < fe); 244 | } 245 | p += a; 246 | } 247 | return re; 248 | } 249 | 250 | static const struct fmt default_fmt; 251 | static const u8 *null_ptr; 252 | 253 | static void xls_init_struc() 254 | { 255 | static u8 t[] = {0,0x10,0x12,0x10,0x12,0x10,0x10,0x12,0x12,0x12,0x14, 256 | 0x22,0,0,0x30,0x30,0x30,0x30,0x40,0x40,0x40,0x40,0x50,0,0,0, 257 | 0,0,0,0,0,0,0,0,0,0,0,0x10,0x10,0x12,0x12,0x10,0x10,0x12,0x12, 258 | 0x40,0x40,0x40,0x21}; 259 | struct fmt *tab; 260 | int i; 261 | 262 | x.fmt.esize = sizeof default_fmt; 263 | x.fmt.nelem = 0; 264 | x.xf_ptr.esize = sizeof null_ptr; 265 | x.xf_ptr.nelem = 0; 266 | x.xf_fmt.esize = sizeof null_ptr; 267 | x.xf_fmt.nelem = 0; 268 | x.e1904 = 0; 269 | 270 | tab_alloc(&x.fmt, elemof(t)-1, &default_fmt); 271 | tab = x.fmt.tab; 272 | for (i=0; i < elemof(t); i++) { 273 | tab[i].type = t[i] >> 4; 274 | tab[i].arg = t[i] & 0xf; 275 | } 276 | } 277 | 278 | static void 279 | getstr(u16 *d, u8 *p, int l) 280 | { 281 | int v = 0; 282 | if (x.biffv >= BIFF8) { 283 | v = *p++ & 1; 284 | } 285 | // XXX 286 | if (v) { 287 | while (--l>=0) { 288 | d[l] = g16(p+2*l); 289 | } 290 | } else { 291 | while (--l>=0) { 292 | d[l] = p[l]; 293 | } 294 | } 295 | return; 296 | } 297 | 298 | static void 299 | parse_fmt(struct fmt *f, u16 *p, int l) 300 | { 301 | u16 *e = p + l; 302 | u16 *q, *d; 303 | 304 | f->type = 0; 305 | f->arg = 0; 306 | 307 | if (e == p) { 308 | return; 309 | } 310 | q = p; 311 | while (*q=='[') { 312 | do { 313 | if (++q == e) { 314 | return; 315 | } 316 | } while(*q != ']'); 317 | if (++q == e) { 318 | return; 319 | } 320 | } 321 | if (*p == 'Y' || *p == 'M' || *p == 'D' || *p == 'd' || *p == 'm') { 322 | f->type = 5; 323 | return; 324 | } 325 | if (*p == 'h') { 326 | f->type = 4; 327 | return; 328 | } 329 | 330 | p = q; 331 | d = 0; 332 | for (;;) { 333 | if (*q == '.') { 334 | d = q; 335 | break; 336 | } 337 | if (*q>=128 || !strchr("0#?, ", *q) || ++q==e) { 338 | break; 339 | } 340 | } 341 | if (!d) { 342 | if(p!=q && (q==e || *q!='/')) { 343 | // f->arg = 0; 344 | f->type = 1; 345 | } 346 | return; 347 | } 348 | while (++q < e) { 349 | if (*q != '0' && *q != '#') { 350 | break; 351 | } 352 | } 353 | 354 | f->arg = q - d - 1; 355 | f->type = 1; 356 | return; 357 | } 358 | 359 | static void 360 | set_fmt(u8 *p) 361 | { 362 | u8 *q; 363 | int n, l; 364 | struct fmt *fmt; 365 | u16 t[128]; 366 | 367 | q = p+1; 368 | if (x.biffv >= BIFF4) { 369 | q += 2; 370 | } 371 | n = x.biffv < BIFF5 ? x.fmt.nelem : g16(p); 372 | l = q[-1]; 373 | if (x.biffv >= BIFF8) { 374 | l = g16(p+2), q++; 375 | } 376 | 377 | if (l > elemof(t)) { 378 | return; 379 | } 380 | 381 | getstr(t, q, l); 382 | fmt = (struct fmt*)tab_alloc(&x.fmt, n, &default_fmt); 383 | parse_fmt(fmt, t, l); 384 | return; 385 | } 386 | 387 | static const struct fmt* 388 | fmt_from_xf(int xf) 389 | { 390 | const struct fmt *fmt = &default_fmt; 391 | int n, st, ua, org_xf; 392 | u8 *p; 393 | 394 | if (xf >= x.xf_ptr.nelem) { 395 | bad_xf: 396 | warnx("Strange XF index %u -- ignored", xf); 397 | return fmt; 398 | } 399 | 400 | org_xf = xf; 401 | 402 | again: 403 | p = TAB(x.xf_ptr, u8*, xf); 404 | if (!p) { 405 | goto bad_xf; 406 | } 407 | 408 | if (x.biffv < BIFF5) { 409 | /* 0x02 */ 410 | n = p[1]; 411 | st = 2; 412 | ua = x.biffv < BIFF4 ? 3 : 5; 413 | } else { 414 | /* 0xE0 */ 415 | n = g16(p+2); 416 | st = 4; 417 | ua = x.biffv < BIFF8 ? 7 : 9; 418 | } 419 | st = p[st]; 420 | ua = p[ua]; 421 | 422 | if (!((st ^ ua) & 4) && (st + ua != 1)) { 423 | /* format not present */ 424 | if (!(st & 4) || xf!=org_xf) { 425 | /* not a style or loop */ 426 | p += x.biffv!=BIFF4 ? 4 : 2; 427 | xf = g16(p) >> 4; 428 | if (xf!=org_xf && xf < x.xf_ptr.nelem) { 429 | goto again; 430 | } 431 | } 432 | } else if (n < x.fmt.nelem) { 433 | fmt = &TAB(x.fmt, struct fmt, n); 434 | } 435 | 436 | *(const struct fmt**)tab_alloc(&x.xf_fmt, org_xf, &null_ptr) = fmt; 437 | return fmt; 438 | } 439 | 440 | static void print_time(int m, int f, double v); 441 | 442 | static void 443 | print_fmt(const u8 *xfp, double v) 444 | { 445 | const struct fmt *f; 446 | unsigned xf; 447 | 448 | if (g.nofmt) { 449 | printf("%f", v); 450 | return; 451 | } 452 | 453 | if (x.biffv == BIFF2) { 454 | int n = xfp[1] & 63; 455 | f = &default_fmt; 456 | if (n < x.fmt.nelem) { 457 | f = &TAB(x.fmt, struct fmt, n); 458 | } 459 | goto have_fmt; 460 | } 461 | 462 | xf = g16(xfp); 463 | if (xf < x.xf_fmt.nelem) { 464 | f = TAB(x.xf_fmt, struct fmt*, xf); 465 | if (f) { 466 | goto have_fmt; 467 | } 468 | } 469 | f = fmt_from_xf(xf); 470 | have_fmt: 471 | 472 | switch (f->type) { 473 | case 0: 474 | if (ceil(v) == v) { 475 | printf("%.f", v); 476 | break; 477 | } 478 | default: 479 | printf("%f", v); 480 | break; 481 | case 1: 482 | printf("%.*f", f->arg, v); 483 | break; 484 | case 2: 485 | printf("%.*E", f->arg, v); 486 | break; 487 | case 3: 488 | case 4: 489 | case 5: 490 | print_time(f->type-2, f->arg, v); 491 | break; 492 | } 493 | return; 494 | } 495 | 496 | static void 497 | print_time(int m, int f, double v) 498 | { 499 | int d; 500 | time_t t; 501 | struct tm *tm; 502 | 503 | d = v; 504 | v -= d; 505 | if (x.e1904) { 506 | d += 4*365; 507 | } else if (d <= 60) { 508 | d++; 509 | } 510 | d -= 25569; 511 | 512 | t = d*24*60*60 + (unsigned)(v*24*60*60); 513 | tm = gmtime(&t); 514 | if (!tm) { 515 | printf("#BAD"); // XXX 516 | return; 517 | } 518 | if (m==3 && !f && !v) { 519 | m = 1; 520 | } 521 | if (m&1) { 522 | printf("%04u-%02u-%02u", 523 | tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday); 524 | if (m==1) { 525 | return; 526 | } 527 | printf(" "); 528 | } 529 | printf("%2u:%02u:%02u", tm->tm_hour, tm->tm_min, tm->tm_sec); 530 | return; 531 | } 532 | 533 | static void 534 | print_rk(const u8 *xfp, u32 rk) 535 | { 536 | double v; 537 | if (rk & 2) { 538 | v = (s32)rk>>2; 539 | } else { 540 | v = ieee754((u64)(rk&~3) << 32); 541 | } 542 | if (rk & 1) { 543 | v /= 100; 544 | } 545 | print_fmt(xfp, v); 546 | return; 547 | } 548 | 549 | struct rr { 550 | int o, l, id; 551 | }; 552 | 553 | #define GETRR(P) \ 554 | if (4 > x.map.len-rr.o) { \ 555 | TRUNC; \ 556 | } \ 557 | rr.l = g16(x.map.ptr+rr.o+2); \ 558 | rr.id = x.map.ptr[rr.o]; \ 559 | rr.o += 4; \ 560 | if (rr.l > x.map.len-rr.o) { \ 561 | TRUNC; \ 562 | } \ 563 | (P) = x.map.ptr + rr.o; \ 564 | rr.o += rr.l; 565 | 566 | #define EXPLEN(L) if(rr.l < (L)) errx(1, "Record too short &%d", __LINE__); 567 | 568 | static int 569 | skip_substream(int o) 570 | { 571 | struct rr rr; 572 | int d = 1; 573 | rr.o = o; 574 | for (;;) { 575 | u8 *p, sv; 576 | GETRR(p) 577 | sv = p[-3]; 578 | switch(rr.id) { 579 | case 0x09: 580 | if (sv<0x10) { 581 | d++; 582 | } 583 | break; 584 | case 0x0A: 585 | if (!sv && !--d) { 586 | return rr.o; 587 | } 588 | } 589 | } 590 | TRUNC; 591 | } 592 | 593 | static int 594 | read_init_rr(int o) 595 | { 596 | struct rr rr; 597 | int sh, nr; 598 | u8 *p; 599 | 600 | xls_init_struc(); 601 | rr.o = o; 602 | nr = g.nr; sh = 0; 603 | 604 | for (;;) { 605 | GETRR(p) 606 | 607 | switch(rr.id) { 608 | case 0x42: // CODEPAGE 609 | set_codepage(g16(p)); 610 | break; 611 | case 0xFC: // SST 612 | rr.o = read_sst(p, x.map.ptr+rr.o, x.end) - x.map.ptr; 613 | break; 614 | case 0x1E: // FORMAT 615 | set_fmt(p); 616 | break; 617 | case 0x43: 618 | case 0xE0: // XF 619 | *(u8**)tab_alloc(&x.xf_ptr, x.xf_ptr.nelem, &null_ptr) = p; 620 | break; 621 | case 0x04: // LABEL 622 | case 0x03: // NUMBER 623 | case 0x06: // FORMULA 624 | case 0x07: // STRING 625 | case 0x7E: // RK 626 | return sh; 627 | case 0x09: // BOF 628 | if (p[-3]>=0x10) { 629 | break; 630 | } 631 | rr.o = skip_substream(rr.o); 632 | break; 633 | case 0x0A: // EOF 634 | if (p[-3]) { 635 | break; 636 | } 637 | return sh; 638 | case 0x85: // SHEET 639 | if(!nr--) { 640 | sh = p - 4 - x.map.ptr; 641 | } 642 | break; 643 | case 0x22: // DATEMODE 644 | x.e1904 = p[0]; 645 | break; 646 | } 647 | } 648 | } 649 | 650 | int to_cell(int r, int c) 651 | { 652 | if(r < g.top || r > g.bottom) { 653 | g.row = r; 654 | return 0; 655 | } 656 | if(g.row < g.top) 657 | g.row = g.top; 658 | if(g.row < r) { 659 | g.col = 0; 660 | do { 661 | putchar('\n'); 662 | g.row++; 663 | } while(g.row < r); 664 | } 665 | if(c < g.left || c > g.right) { 666 | g.col = c; 667 | return 0; 668 | } 669 | if(g.col < g.left) 670 | g.col = g.left; 671 | while(g.col < c) { 672 | putchar('\t'); 673 | g.col++; 674 | } 675 | return 1; 676 | } 677 | 678 | static int to_cell_p(u8 *p) {return to_cell(g16(p), g16(p+2));} 679 | 680 | static inline int to_nx_cell() {return to_cell(g.row, g.col+1);} 681 | 682 | void print_sheet(int o, u8 *name, int nr) 683 | { 684 | struct rr rr; 685 | u8 pvrec; 686 | 687 | if(g.titles) { 688 | if(nr) putchar('\f'); 689 | if(name) print_str(name+1, *name); 690 | putchar('\n'); 691 | } 692 | 693 | rr.o = o; 694 | g.col = g.row = 0; 695 | pvrec = 0; 696 | 697 | for(;;) { 698 | u8 *p; 699 | 700 | GETRR(p) 701 | if (rr.id == 0x0A && !p[-3]) { 702 | // EOF 703 | break; 704 | } 705 | 706 | switch(rr.id) { 707 | case 0x09: // BOF 708 | if (p[-3]>=0x10) { 709 | break; 710 | } 711 | rr.o = skip_substream(rr.o); 712 | break; 713 | case 0x04: // LABEL 714 | if (to_cell_p(p)) { 715 | print_str(p+8, x.biffv==BIFF2 ? p[7] : g16(p+6)); 716 | } 717 | break; 718 | case 0xFD: // LABELSST 719 | if (to_cell_p(p)) { 720 | print_sst(g32(p+6)); 721 | } 722 | break; 723 | case 0x7E: // RK 724 | if (to_cell_p(p)) { 725 | print_rk(p+4, g32(p+6)); 726 | } 727 | break; 728 | case 0xBD: { // MULRK 729 | u8 *q = p + rr.l - 11; 730 | int f = to_cell_p(p); 731 | for(;;) { 732 | p += 6; 733 | if (f) { 734 | print_rk(p-2, g32(p)); 735 | } 736 | if (p>=q) { 737 | break; 738 | } 739 | f = to_nx_cell(); 740 | } 741 | } break; 742 | case 0x02: // INTEGER 743 | if (to_cell_p(p)) { 744 | print_fmt(p+4, g16(p+7)); 745 | } 746 | break; 747 | case 0x03: // NUMBER 748 | if(!to_cell_p(p)) { 749 | break; 750 | } 751 | number: 752 | print_fmt(p+4, ieee754(g64(x.biffv==BIFF2 ? p+7 : p+6))); 753 | break; 754 | case 0x06: // FORMULA 755 | if(!to_cell_p(p)) { 756 | pvrec = 0; 757 | break; 758 | } 759 | if (x.biffv==BIFF2 || g16(p+6+6) != 0xFFFF) { 760 | pvrec = 0; 761 | goto number; 762 | } 763 | // p[6] == 0: STRING follows 764 | if (p[6] == 1) { 765 | printf("%s", p[6+2] ? "true" : "false"); 766 | } 767 | break; 768 | case 0x07: // STRING 769 | if (pvrec==0x06) { 770 | print_str(p+2, g16(p)); 771 | } 772 | break; 773 | case 0xD6: // RSTRING 774 | if (to_cell_p(p)) { 775 | print_str(p+8, g16(p+6)); 776 | } 777 | break; 778 | } 779 | pvrec = rr.id; 780 | 781 | if(g.row > g.bottom) { 782 | break; 783 | } 784 | } 785 | putchar('\n'); 786 | } 787 | 788 | void print_xls() 789 | { 790 | struct rr rr; 791 | int done; 792 | u8 *p; 793 | 794 | done = 0; 795 | rr.o = 0; 796 | GETRR(p) 797 | 798 | switch(g16(p+2)) { 799 | case 0x10: // single sheet 800 | if(g.nr) goto not_found; 801 | read_init_rr(rr.o); 802 | print_sheet(rr.o, 0, 0); 803 | return; 804 | case 0x100: goto workbook; 805 | case 5: goto globals; 806 | default: 807 | BADF("Bad content"); 808 | } 809 | 810 | /* BIFF5+ */ 811 | globals: 812 | rr.o = read_init_rr(rr.o); 813 | if(!rr.o) 814 | goto not_found; 815 | for(;;) { 816 | u32 o; 817 | GETRR(p) 818 | if(rr.id != 0x85) // SHEET 819 | break; 820 | o = rr.o; 821 | rr.o = g32(p); 822 | if(rr.o >= x.map.len) 823 | TRUNC; 824 | if(rr.o <= p-x.map.ptr) 825 | BADF( ); 826 | if(p[4]==0) { 827 | u8 *q; 828 | GETRR(q) 829 | if(rr.id != 0x09) BADF( ); 830 | print_sheet(rr.o, p+6, done++); 831 | if(!g.all) break; 832 | } else if(g.sel) 833 | goto not_found; 834 | rr.o = o; 835 | } 836 | return; 837 | 838 | /* BIFF4W */ 839 | workbook: 840 | for(;;) { 841 | GETRR(p) 842 | switch(rr.id) { 843 | u32 o; 844 | case 0x42: // CODEPAGE 845 | EXPLEN(2) 846 | set_codepage(g16(p)); 847 | break; 848 | case 0x8E: // SHEETOFFSET 849 | EXPLEN(4) 850 | o = g32(p); 851 | if(o >= x.map.len) TRUNC; 852 | rr.o = o; 853 | goto found; 854 | case 0x0A: // EOF 855 | if(p[-3]) break; 856 | BADF( ); 857 | } 858 | } 859 | found: 860 | GETRR(p) 861 | if(rr.id != 0x8F) // SHEETHDR 862 | goto not_found; 863 | EXPLEN(5) 864 | for(;;) { 865 | u32 o = g32(p); 866 | if(o >= x.map.len-rr.o) 867 | TRUNC; 868 | o += rr.o; 869 | if(!g.nr--) { 870 | u8 *name = p+4; 871 | GETRR(p) 872 | if(rr.id != 0x09) // BOF 873 | BADF( ) 874 | if(g16(p+2) == 0x10) { 875 | read_init_rr(rr.o); 876 | print_sheet(rr.o, name, done++); 877 | if(!g.all) 878 | break; 879 | } else if(g.sel) 880 | errx(1, "Not a sheet"); 881 | g.nr = 0; 882 | } 883 | rr.o = o; 884 | GETRR(p) 885 | if(rr.id != 0x8F) { 886 | if(!done) 887 | goto not_found; 888 | break; 889 | } 890 | } 891 | return; 892 | 893 | not_found: 894 | errx(1, "No such sheet"); 895 | } 896 | 897 | void list_xls() 898 | { 899 | struct rr rr; 900 | u8 *p; 901 | int nr; 902 | 903 | rr.o = 0; 904 | GETRR(p) 905 | if(rr.id != 0x09) // BOF 906 | BADF( ); 907 | switch(g16(p+2)) { 908 | case 0x10: 909 | printf("Single sheet\n"); 910 | return; 911 | case 5: 912 | case 0x100: 913 | break; 914 | default: 915 | printf("Unknown contents\n"); 916 | return; 917 | } 918 | 919 | nr = 0; 920 | for(;;) { 921 | GETRR(p) 922 | switch(rr.id) { 923 | u8 *q; 924 | char *k; 925 | case 0x0A: // EOF 926 | if(p[-3]) break; 927 | return; 928 | case 0x09: // BOF 929 | if(p[-3]>=0x10) break; 930 | rr.o = skip_substream(rr.o); 931 | break; 932 | case 0x42: // CODEPAGE 933 | set_codepage(g16(p)); 934 | break; 935 | case 0x85: // SHEET 936 | k = "sheet"; 937 | q = p; 938 | if(x.biffv > BIFF4) { 939 | switch(q[5]) { 940 | case 0: break; 941 | case 2: k="chart"; break; 942 | case 6: k="vbasic"; break; 943 | default: k=""; break; 944 | } 945 | q += 6; 946 | } 947 | printf("%2u. %-8s ", nr++, k); 948 | print_str(q+1, q[0]); 949 | putchar('\n'); 950 | break; 951 | } 952 | } 953 | } 954 | 955 | static char *parse_cell(char *s, unsigned *r, unsigned *c) 956 | { 957 | unsigned a = *s - 'A'; 958 | if(a < 26) { 959 | unsigned v = a; 960 | for(;;) { 961 | a = *++s - 'A'; 962 | if(a >= 26) break; 963 | v = 26*v + a; 964 | } 965 | *c = v; 966 | } 967 | a += 'A' - '0'; 968 | if(a < 10) { 969 | unsigned v = a; 970 | for(;;) { 971 | a = *++s - '0'; 972 | if(a >= 10) break; 973 | v = 10*v + a; 974 | } 975 | *r = v - 1; 976 | } 977 | return s; 978 | } 979 | 980 | void parse_range(char *s) 981 | { 982 | s = parse_cell(s, &g.top, &g.left); 983 | if(!*s) return; 984 | if(*s==':') { 985 | s = parse_cell(s+1, &g.bottom, &g.right); 986 | if(!*s) return; 987 | } 988 | errx(1, "unexpected char '%c' in cell range", *s); 989 | } 990 | 991 | int main(int argc, char *argv[]) 992 | { 993 | char o=0; 994 | 995 | for(;;) switch(getopt(argc, argv, "n:AlC:a12P:fdhV?-")) { 996 | int n; 997 | case -1: goto endopt; 998 | case 'n': g.sel=1; g.nr = atoi(optarg); break; 999 | case 'A': g.sel=0; g.all=1; g.titles=1; break; 1000 | case 'l': o = 'l'; break; 1001 | case 'C': 1002 | n = find_charset(optarg); 1003 | if(n<0) warnx("%s: Unknown charset", optarg); 1004 | set_charset(n); 1005 | break; 1006 | case 'a': set_charset(1); break; 1007 | case '1': set_charset(2); break; 1008 | case '2': set_charset(3); break; 1009 | case 'P': 1010 | n = atoi(optarg); 1011 | if(n) set_codepage(n); 1012 | break; 1013 | case 'f': g.nofmt = 1; break; 1014 | case 'd': g.biff2ok = 1; break; 1015 | case '?': 1016 | if(optopt!='?') break; 1017 | case '-': 1018 | case 'h': 1019 | case 'V': 1020 | #define _STR(T) #T 1021 | #define STR(T) _STR(T) 1022 | printf("xls2txt " STR(VERSION) " / " 1023 | "Copyright 2011 Jan Bobrowski / GPL\n"); 1024 | goto usage; 1025 | } 1026 | endopt: 1027 | 1028 | g.right = g.bottom = 0xFFFF; 1029 | switch(argc-optind) { 1030 | default: 1031 | usage: 1032 | printf( 1033 | "usage: xls2txt [-C cs] [-n sheetnum|-A] [-f] file.xls [X:X]\n" 1034 | " xls2txt [-C cs] -l file.xls\n" 1035 | " X:X\tcell range (eg. A1:C5, D2:E)\n" 1036 | " -l\tlist sheets\n" 1037 | " -n num\tselect sheet\n" 1038 | " -A\tall sheets (\\f separated)\n" 1039 | " -C cs\toutput charset (utf8 asc iso1 iso2), utf8 is default\n" 1040 | " -f\tdon't try to format numbers\n" 1041 | " -a\tascii output (same as -C asc)\n" 1042 | ); 1043 | return 1; 1044 | case 1: break; 1045 | case 2: parse_range(argv[argc-1]); 1046 | break; 1047 | } 1048 | 1049 | ole_open(argv[optind]); 1050 | x.map = get_workbook(); 1051 | x.end = x.map.ptr + x.map.len; 1052 | check_biffv(x.map.ptr); 1053 | if(o) 1054 | list_xls(); 1055 | else 1056 | print_xls(); 1057 | 1058 | return 0; 1059 | } 1060 | --------------------------------------------------------------------------------