├── .dockerignore
├── .github
└── workflows
│ ├── badge.svg
│ ├── cleanup.yml
│ └── timbl.yml
├── .gitignore
├── AUTHORS
├── COPYING
├── Dockerfile
├── MAINTAINERS
├── Makefile.am
├── NEWS
├── README
├── README.md
├── TODO
├── bootstrap.sh
├── build-deps.sh
├── codemeta.json
├── configure.ac
├── demos
├── .gitignore
├── Makefile.am
├── api_test1.cxx
├── api_test2.cxx
├── api_test3.cxx
├── api_test4.cxx
├── api_test5.cxx
├── api_test6.cxx
├── classify.cxx
├── cross_val.test
├── dimin.script
├── dimin.test
├── dimin.train
├── small_1.train
├── small_2.train
├── small_3.train
├── small_4.train
├── small_5.train
└── tse.cxx
├── docs
├── Makefile.am
├── Timbl_6.4_Manual.pdf
├── texfiles
│ ├── Timbl_6.3_API.tex
│ ├── Timbl_6.3_Manual.tex
│ ├── Timbl_6.4_Manual.tex
│ ├── distanceweight-ided.eps
│ ├── fspace.eps
│ ├── fullname.bst
│ ├── fullname.sty
│ ├── ilk.bib
│ ├── mble-method.eps
│ ├── pos-neg.eps
│ └── roc-auc.eps
└── timbl.1
├── include
├── Makefile.am
└── timbl
│ ├── .gitignore
│ ├── BestArray.h
│ ├── Choppers.h
│ ├── Common.h
│ ├── Features.h
│ ├── GetOptClass.h
│ ├── IBtree.h
│ ├── Instance.h
│ ├── MBLClass.h
│ ├── Makefile.am
│ ├── Matrices.h
│ ├── Metrics.h
│ ├── MsgClass.h
│ ├── Options.h
│ ├── Statistics.h
│ ├── StringOps.h
│ ├── Targets.h
│ ├── Testers.h
│ ├── TimblAPI.h
│ ├── TimblExperiment.h
│ ├── Types.h
│ └── neighborSet.h
├── m4
├── .gitignore
├── Makefile.am
└── ac_osx_pkg.m4
├── src
├── .gitignore
├── BestArray.cxx
├── CVExperiment.cxx
├── Choppers.cxx
├── Common.cxx
├── Features.cxx
├── GetOptClass.cxx
├── IBprocs.cxx
├── IBtree.cxx
├── IGExperiment.cxx
├── Instance.cxx
├── LOOExperiment.cxx
├── MBLClass.cxx
├── Makefile.am
├── Metrics.cxx
├── MsgClass.cxx
├── Statistics.cxx
├── StringOps.cxx
├── TRIBLExperiments.cxx
├── Targets.cxx
├── Testers.cxx
├── Timbl.cxx
├── TimblAPI.cxx
├── TimblExperiment.cxx
├── Types.cxx
├── neighborSet.cxx
└── simpletest.cxx
└── timbl.pc.in
/.dockerignore:
--------------------------------------------------------------------------------
1 | .git
2 | .cache
3 | .*
4 | _*
5 | *.cache
6 | *.pyc
7 | build
8 | *.egg-info
9 | gource*
10 | *.tar.gz
11 | *.pdf
12 | TODO
13 | *.lock
14 |
--------------------------------------------------------------------------------
/.github/workflows/badge.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
75 |
--------------------------------------------------------------------------------
/.github/workflows/cleanup.yml:
--------------------------------------------------------------------------------
1 | ---
2 | name: Delete old workflow runs
3 | on:
4 | schedule:
5 | - cron: '0 15 14 * *'
6 | # Run monthly, at 15:00 on the 14t day of month. (testing)
7 |
8 | jobs:
9 | del_runs:
10 | runs-on: ubuntu-latest
11 | permissions:
12 | actions: write
13 | steps:
14 | - name: Delete workflow runs
15 | uses: Mattraks/delete-workflow-runs@v2
16 | with:
17 | token: ${{ github.token }}
18 | repository: ${{ github.repository }}
19 | retain_days: 30
20 | keep_minimum_runs: 6
21 |
--------------------------------------------------------------------------------
/.github/workflows/timbl.yml:
--------------------------------------------------------------------------------
1 | ---
2 | name: C/C++ CI
3 |
4 | on:
5 | schedule:
6 | - cron: "0 20 3 * 5" # run test once a month
7 | push:
8 | branches:
9 | - master
10 | - develop
11 | paths:
12 | - configure.ac
13 | - 'src/**'
14 | - 'include/**'
15 | - '.github/workflows/*'
16 |
17 | pull_request:
18 | branches: [master]
19 |
20 | jobs:
21 | notification:
22 | runs-on: ubuntu-latest
23 | name: Notifications
24 | steps:
25 | - name: IRC notification of starting the builds
26 | uses: LanguageMachines/ticcactions/irc-init@v1
27 |
28 | build:
29 | runs-on: ${{ matrix.os }}
30 | needs: notification
31 | strategy:
32 | matrix:
33 | os: [ubuntu-latest, macos-latest]
34 | compiler: [g++-12, clang++]
35 |
36 | steps:
37 |
38 | - name: Cancel Previous Runs
39 | uses: styfle/cancel-workflow-action@0.12.1
40 | with:
41 | access_token: ${{ github.token }}
42 |
43 | - uses: actions/checkout@v4.1.1
44 |
45 | - uses: LanguageMachines/ticcactions/cpp-build-env@v1
46 | - uses: LanguageMachines/ticcactions/cpp-dependencies@v1
47 | - uses: LanguageMachines/ticcactions/irc-nick@v1
48 |
49 | - uses: LanguageMachines/ticcactions/cpp-submodule-build@v1
50 | with:
51 | branch: ${{ github.ref_name }}
52 | module: ticcutils
53 |
54 | - uses: LanguageMachines/ticcactions/setup-cppcheck@v1
55 | - name: Static Code-check
56 | if: ${{ env.action_status == '' }}
57 | run: cppcheck ${{ env.cpc_opts }} .
58 |
59 | - uses: LanguageMachines/ticcactions/cpp-safe-build@v1
60 |
61 | - name: Notify IRC of results
62 | uses: LanguageMachines/ticcactions/irc-status@v1
63 | with:
64 | branch: ${{ github.ref_name }}
65 | nickname: ${{ env.nick }}
66 | step: test
67 | status: ${{ env.action_status }}
68 | details: ${{ env.action_details }}
69 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.gz
3 | Makefile
4 | Makefile.in
5 | compile
6 | config.guess
7 | config.h
8 | config.h.in
9 | config.log
10 | config.status
11 | config.sub
12 | configure
13 | INSTALL
14 | aclocal.m4
15 | autom4te.cache/
16 | depcomp
17 | install-sh
18 | libtool
19 | ltmain.sh
20 | missing
21 | stamp-h1
22 | test-driver
23 | timbl.pc
24 | ChangeLog
25 |
--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | TiMBL authors
2 |
3 | Lead programmer:
4 |
5 | Ko van der Sloot
6 |
7 | Code, algorithm, and design contributions by:
8 |
9 | Peter Berck
10 | Antal van den Bosch
11 | Walter Daelemans
12 | Maarten van Gompel
13 | Ton Weijters
14 | Jakub Zavrel
15 |
16 | Contributors:
17 |
18 | People who contributed to Timbl by suggesting improvements, filing bug
19 | reports, asking the right questions etc.:
20 |
21 | Robert Andersson
22 | Vincent Van Asch
23 | Joris Bleys
24 | Johan Bos
25 | Joan Bresnan
26 | Stefan Breuer
27 | Sabine Buchholz
28 | Bertjan Busser
29 | Sander Canisius
30 | Giovanni Cassani
31 | Win Carus
32 | Felix Filoz
33 | Alan Frankel
34 | Sven Hartrumpf
35 | Iris Hendrickx
36 | Lyndon Hiew
37 | Steve Hunt
38 | Valentin Jijkoun
39 | Gunn Inger Lyse
40 | Svetoslav Marinov
41 | Erwin Marsi
42 | Liam McGrath
43 | Jens Nilsson
44 | Ties Kemper
45 | Tom DePlonty
46 | Adam Radziszewski
47 | Albert Russel
48 | Yvan Saeys
49 | Frank Scheelen
50 | Armin Schmidt
51 | Olaf Seibert
52 | Gabriel Skantze
53 | Carline Sporleder
54 | Herman Stehouwer
55 | Erik Tjong Kim Sang
56 | Joseph Turian
57 | Frederik Vaassen
58 | Corne Versloot
59 | Colin Wilson
60 | Linda Yung
61 | Bram Vandekerckhove
62 | Menno van Zaanen
63 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:latest
2 | #VERSION can be:
3 | # - stable: builds latest stable versions from source (default)
4 | # - distro: uses packages as provided by Alpine Linux (may be slightly out of date)
5 | # - devel: latest development version (git master/main branch)
6 | ARG VERSION="stable"
7 | LABEL org.opencontainers.image.authors="Maarten van Gompel "
8 | LABEL description="timbl - tilburg memory-based learner"
9 |
10 | RUN mkdir -p /data
11 | RUN mkdir -p /usr/src/timbl
12 | COPY . /usr/src/timbl
13 |
14 | RUN if [ "$VERSION" = "distro" ]; then \
15 | rm -Rf /usr/src/timbl &&\
16 | echo -e "----------------------------------------------------------\nNOTE: Installing latest release as provided by Alpine package manager.\nThis version may diverge from the one in the git master tree or even from the latest release on github!\nFor development, build with --build-arg VERSION=development.\n----------------------------------------------------------\n" &&\
17 | apk update && apk add timbl; \
18 | else \
19 | PACKAGES="libbz2 icu-libs libxml2 libgomp libstdc++" &&\
20 | BUILD_PACKAGES="build-base autoconf-archive autoconf automake libtool bzip2-dev icu-dev libxml2-dev git" &&\
21 | apk add $PACKAGES $BUILD_PACKAGES &&\
22 | cd /usr/src/ && ./timbl/build-deps.sh &&\
23 | cd timbl && sh ./bootstrap.sh && ./configure && make && make install &&\
24 | apk del $BUILD_PACKAGES && rm -Rf /usr/src; \
25 | fi
26 |
27 | WORKDIR /
28 |
29 | ENTRYPOINT [ "timbl" ]
30 |
--------------------------------------------------------------------------------
/MAINTAINERS:
--------------------------------------------------------------------------------
1 | Maarten van Gompel (KNAW Humanities Cluster)
2 | Ko van der Sloot
3 |
--------------------------------------------------------------------------------
/Makefile.am:
--------------------------------------------------------------------------------
1 |
2 | ACLOCAL_AMFLAGS =-I m4 --install
3 |
4 | SUBDIRS = src include demos docs m4
5 |
6 | EXTRA_DIST = bootstrap.sh AUTHORS TODO NEWS README.md timbl.pc.in codemeta.json
7 |
8 | pkgconfigdir = $(libdir)/pkgconfig
9 | pkgconfig_DATA = timbl.pc
10 |
11 | ChangeLog: NEWS
12 | git pull; git2cl > ChangeLog
13 |
14 | docker:
15 | docker build -t timbl:latest .
16 |
17 | docker-dev:
18 | docker build -t timbl:dev --build-arg VERSION=development .
19 |
20 | deps:
21 | ./build-deps.sh
22 |
--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
1 | 6.10 2024-12-16
2 | [Ko van der Sloot]
3 | * C++17 is required
4 | * updated to latest ticcutils
5 | * improved GitHub CI
6 | * code quality
7 |
8 | 6.9 2023-10-21
9 | [Ko van der Sloot]
10 | * better code: const correctness etc.
11 |
12 | 6.8.2 2023-02-22
13 | [Ko van der Sloot]
14 | * plugged a memory leak
15 | * C++ code quality improved
16 | * removed dependency on deprecated sprintf function
17 | * removed dependency on libtar
18 |
19 | 6.8.1 2023-01-04
20 | [Ko van der Sloot]
21 | * fix for some odd distro issues
22 | * for now re-added 2 backward compatability functions
23 |
24 | 6.8 2023-01-02
25 | [Ko van der Sloot]
26 | * major code refactoring
27 | - BREAKS API and ABI
28 | - library bumped
29 | - getting rid of a lot of pointers and C-style arrays
30 | - removed C-style casts
31 | - Unicode is the default now for most functions. Some 'string' functions
32 | are still available in the API.
33 | - In general modernizing to C++11
34 | - weeded out CppCheck warnings
35 | * improved GitHub action
36 |
37 | 6.7 2022-07-22
38 | [Maarten van Gompel]
39 | * updated metadata (codemeta.json) following new (proposed) CLARIAH requirements (CLARIAH/clariah-plus#38)
40 | * added builds-deps.sh for automatically building and installing dependencies
41 | * added Dockerfile and instructions
42 | * no functional changes
43 |
44 | 6.6 2020-12-15
45 | [Ko vd Sloot]
46 | * Internally we use NFC normalized UnicodeString's now.
47 | Timbl should be robust for UTF8 files, even exotic languages.
48 | * added some Unicode fuctions to the API
49 | * bumped library version
50 | * several code refactorings
51 | * added the possibility to use the options -f and -i, without -t
52 |
53 | 6.5 2020-04-15
54 | [Ko vd Sloot]
55 | * adapted to the newest TiCC::CommandLine implementation
56 | * small code refactorings
57 |
58 | 6.4.14 2019-10-21
59 | [Ko vd Sloot]
60 | * added JSON support. Still EXPERIMENTAL!
61 | the JSON syntax might change in the future. So handle with care.
62 | * confidence score calculation is now a real TimblExperiment member
63 | * removed Boost dependency.
64 |
65 | 6.4.13 2018-11-28
66 | [Ko van der Sloot]
67 | - added a '--limit' option to use only the most significant features
68 |
69 | 6.4.12 2018-05-16
70 | [Ko van der Sloot]
71 | Bugfix release:
72 | - updated usage(). Info on -G 2 option was wrong.
73 | - changed an error message to be more clear.
74 | - fixed building of the TeX documentation
75 |
76 | [Maarten van Gompel]
77 | - Added codemeta.json metadata
78 |
79 | 6.4.11 2018-01-09
80 | [Ko van der Sloot]
81 | Bugfix release:
82 | - Fixed a major bug in similarity metric calculations. (Cosine and Dot product)
83 |
84 | 6.4.10 2017-11-09
85 | [Ko van der Sloot]
86 | Bugfix release:
87 | - allow for spaces in TABBED input (they are significant)
88 | - corrected some typos in messages and man page
89 | - minor code refactorings
90 |
91 | 6.4.9 2017-05-04
92 | [Ko van der Sloot]
93 | Maintenance release:
94 | - removed unused/non-functional functions from the API
95 | - code refactoring. Mostly based on CPPCHECK static analyzer.
96 | - small bugs:
97 | -e options didn't always do what you expected
98 | - added missing files in docs
99 | [Maarten van Gompel]
100 | - updated README.md
101 |
102 | 6.4.8 2016-07-11
103 | [Ko van der Sloot]
104 | Maintance release:
105 | - code refactoring and improvement
106 | - relying more on ticcutils
107 | - fixed exit codes
108 | - accept long options: --version and --help
109 | - fix out-of-range problem in Sparse Format
110 |
111 | 6.4.7 2016-01-14
112 | [Ko van der Sloot][Maarten van Gompel]
113 | * repository moved to GitHub
114 | * added travis support
115 | * code updates. (clearer code mainly)
116 | * depending a bit more on ticcutils (CommanLine, StringOps)
117 | * some small bug fixes (LOO with a 1 line file)
118 |
119 | 6.4.6 2014-09-23
120 | [Ko van der Sloot]
121 | * release
122 |
123 | 6.4.5 2014-09-16
124 | * small bug fixes
125 |
126 | 6.4.4 2013-04-03
127 | * rely more on ticcutils stuff. A lot of functions are moved there
128 | * added a GetAccuracy option to the API
129 | * bug fix in Choppers.cxx
130 |
131 | 6.4.3 2012-10-11
132 | * added an --occurrences option for training/testing files with an occurrence
133 | value.
134 | * made Tree.cxx and Trie.h 'omp thread-safe' and moved them to ticcutils
135 | * added a "Tabbed' inputformat (© Maarten van Gompel)
136 | * The Micro Avagare F-score calculation is now according to the Manual.
137 | There were small differences caused by a mixup of test and train data.
138 |
139 | 6.4.2 2011-12-20
140 | * start to use Requires.private in timbl.pc
141 | * added a 'check' target to Make system
142 | * achieved a considerable speedup for multithreaded testing.
143 | * fixed a small problem in LogBuffer. Also simplified and cleaned up
144 | LogBuffer and LogStream code. All dependencies need recompiling!
145 | * implemented +vcf (confidence) output
146 | * The -T option for TreeOrdening is now named --Treeorder
147 | * fixed tiebreaking for -R (random) option, closes bug 43, again.
148 | * some small fixes for '-pedantic' compiler option
149 | * avoid zero result in Exponential Decay (bug 89).
150 | * removed unused relativeWeight() function. (was duplicated)
151 |
152 | 6.4.1 2011-08-25
153 |
154 | [ Ko van der Sloot ]
155 | - added Version() and VersionName() functions. We want them for every
156 | member of the family
157 | - fixed a problem with including 'config'h' in the API
158 | - fixed a problem with normalization on empty distributions.
159 | - added a Confidence( class ) function to the instances API.
160 | returns the Weight of 'class'. Which is influenced by normalization!
161 | - added logProbability normalization
162 | - the +vS option was not always honoured. Now Timbl shuts it's big mouth better
163 | - Expand() is now also enabled for TRIBL and TRIBL2
164 |
165 | 6.4.0
166 | - decapped Timbl and libTimbl to timbl and libtimbl
167 | this will shake the whole timbl family tree!
168 | - small fixes to survive -pedantic compiler option without warnings
169 |
170 | 6.3.4
171 | - we now support incremental learning from a file on the command line.
172 | - implemented a --clones option to use multiple threads for testing
173 | - fixed bug 58. Emit Error when reading an InstanceBase without a test.
174 | - fixed bug 61. Give a Warning when a trainingset contains only 1 class.
175 | - cleaned up build system
176 |
177 | 6.3.3
178 | - several small fixes.
179 |
180 | 6.3.2
181 | - fixed bug 44. Segfault in weird cases
182 | - fixed bug 45. Needless processing of traindata when required option is missing
183 | - fixed bug 46. Tribl2 sometimes fails to correctly output +v+k+n
184 | - fixed bug 47. Unclear error message when InnerProduct fails
185 | - several small uncritical enhancements
186 |
187 | 6.3.1 - 2010-11-17
188 | - Little API change in TimblOptions (more clear i hope)
189 | - Little bug fixes and improvement (logging mostly)
190 | - Moved LogStream stuff back in from TimblServer
191 |
192 | 6.2.3
193 | forgot to edit this file
194 | 6.2.2
195 | forgot to edit this file
196 |
197 | 6.2.1 - 2009-11-30
198 |
199 | - Fixed compilation problem on Cygwin
200 | - Added functions to API
201 | - Improved server functionality (undocumented yet)
202 |
203 | 6.2.0 - 2009-11-03
204 |
205 | - Stable release
206 |
207 | 6.1.99.0.20091021.1 - 2009-10-21
208 |
209 | - Another snapshot from SVN. Now needs libxml2 for building.
210 |
211 | 6.1.99.0.20091014.1 - 2009-10-14
212 |
213 | - Another snapshot from SVN. (By mistake, aka 6.2.0).
214 |
215 | 6.2.0.pre3 - 2009-10-05
216 |
217 | - Another snapshot from SVN.
218 |
219 | 6.2.0-pre1 - 2009-09-03
220 |
221 | - snapshot from SVN.
222 |
223 | 2009-08-31: Ko vd Sloot
224 | It's been a long time sine the prevous news
225 | lot of overhauling of the code took place
226 | 2008-03-04: Ko vd Sloot
227 | numerous small bug fixes.
228 | getting ready for 6.1.2
229 | 2007-12-03: Ko vd Sloot
230 | Packaging seems to be fine now.
231 | 2007-10-01: Ko vd Sloot
232 | first packaging attempt
233 |
234 | # $Id$
235 | # $URL$
236 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | Please see README.md for for information.
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://github.com/LanguageMachines/timbl/actions/)
2 | [](http://applejack.science.ru.nl/languagemachines/)
3 | [](https://zenodo.org/badge/latestdoi/20526237)
4 |
5 | ===========================================
6 | TiMBL: Tilburg Memory Based Learner
7 | ===========================================
8 |
9 | TiMBL 6.4 (c) CLS/ILK/CLiPS 1998 - 2024
10 | Centre for Language Studies, Radboud University Nijmegen
11 | Induction of Linguistic Knowledge Research Group, Tilburg University and
12 | Centre for Dutch Language and Speech, University of Antwerp
13 |
14 | **Website:** https://languagemachines.github.io/timbl/
15 |
16 |
17 | TiMBL is an open source software package implementing several memory-based
18 | learning algorithms, among which IB1-IG, an implementation of k-nearest
19 | neighbor classification with feature weighting suitable for symbolic feature
20 | spaces, and IGTree, a decision-tree approximation of IB1-IG. All implemented
21 | algorithms have in common that they store some representation of the training
22 | set explicitly in memory. During testing, new cases are classified by
23 | extrapolation from the most similar stored cases.
24 |
25 | For over fifteen years TiMBL has been mostly used in natural language
26 | processing as a machine learning classifier component, but its use extends to
27 | virtually any supervised machine learning domain. Due to its particular
28 | decision-tree-based implementation, TiMBL is in many cases far more efficient
29 | in classification than a standard k-nearest neighbor algorithm would be.
30 |
31 |
32 | -----------------------------------------------------------------------
33 |
34 | This is a major extension to the sixth main release of TiMBL.
35 | Most significant change: **The main program is now called 'timbl' and not
36 | 'Timbl' anymore. Be warned!**
37 | This change is part of our effort to get our MBL software into software
38 | distributions like Debian, Ubuntu, RedHat .
39 |
40 | Comments and bug-reports are welcome at our issue tracker at
41 | https://github.com/LanguageMachines/timbl/issues or by mailing
42 | lamasoftware (at) science.ru.nl.
43 | Documentation and more info may be found on https://languagemachines.github.io/timbl .
44 |
45 | TiMBL is distributed under the GNU Public Licence v3 (see the file COPYING).
46 |
47 | -----------------------------------------------------------------------
48 |
49 | This software has been tested on:
50 | - Intel platforms running several versions of Linux, including Ubuntu, Debian,
51 | Arch Linux, Fedora (both 32 and 64 bits)
52 | - MAC platform running OS X 10.10
53 |
54 | Alternatively, with some effort, you may get it to work on a Windows platform using Cygwin.
55 |
56 | Compilers:
57 | - GCC (use 7.0 or later)
58 | - Clang
59 |
60 | Contents of this distribution:
61 | - Sources
62 | - Licensing information ( COPYING )
63 | - Build system based on GNU Autotools
64 | - Container build file ( Dockerfile )
65 | - Example data files ( in the demos directory )
66 | - Documentation ( in the docs directory )
67 |
68 | Dependencies:
69 | To be able to succesfully build TiMBL from the tarball, you need the
70 | following pakages:
71 | - ticcutils (https://github.com/LanguageMachines/ticcutils)
72 | - pkg-config
73 | - libxml2-dev
74 |
75 | To install TiMBL, first consult whether your distribution's package manager has an up-to-date package for TiMBL.
76 |
77 | To compile and install manually from source instead, provided you have all the dependencies installed:
78 |
79 | $ bash bootstrap.sh
80 | $ ./configure
81 | $ make
82 | $ make install
83 |
84 | If you want to automatically download and install the latest stable versions of
85 | the required dependencies, then run `./build-deps.sh` prior to the above. You
86 | can pass a target directory prefix as first argument and you may need to
87 | prepend `sudo` to ensure you can install there. The dependencies are:
88 |
89 | * [ticcutils](https://github.com/LanguageMachines/ticcutils)
90 |
91 | A `Dockerfile` for a container build is also available, specify `--build-arg VERSION=development` if you want the latest
92 | development version instead.
93 |
94 | You will still need to take care to install the following 3rd party
95 | dependencies through your distribution's package manager, as they are not
96 | provided by our script:
97 |
98 | * ``icu`` - A C++ library for Unicode and Globalization support. On Debian/Ubuntu systems, install the package libicu-dev.
99 | * A sane build environment with a C++ compiler (e.g. gcc 4.9 or above or clang), make, autotools, libtool, pkg-config
100 |
101 |
--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
1 | * reorganize clone() splitChild() and such in TimblExperiment.
2 | it is very confusing now.
3 |
--------------------------------------------------------------------------------
/bootstrap.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # bootstrap - script to bootstrap the distribution rolling engine
4 |
5 | # usage:
6 | # $ sh ./bootstrap && ./configure && make dist[check]
7 | #
8 | # this yields a tarball which one can install doing
9 | #
10 | # $ tar zxf PACKAGENAME-*.tar.gz
11 | # $ cd PACKAGENAME-*
12 | # $ ./configure
13 | # $ make
14 | # # make install
15 |
16 | # requirements:
17 | # GNU autoconf, from e.g. ftp.gnu.org:/pub/gnu/autoconf/
18 | # GNU automake, from e.g. http://ftp.gnu.org/gnu/automake/
19 |
20 | automake=automake
21 | aclocal=aclocal
22 |
23 | # if you want to autogenerate a ChangeLog form svn:
24 | #
25 | # svn2cl, a python script, as used in the GNU Enterprise project.
26 | # By jcater (Jason Cater), contributions by reinhard (Reinhard Müller).
27 | # Get it from
28 | # http://www.gnuenterprise.org/cgi-bin/viewcvs.cgi/*checkout*/gnue/trunk/gnue-common/utils/svn2cl .
29 | # svn2cl is used in Makefile.am too.
30 | #
31 | # (Another svn2cl implementation, in perl, is at
32 | # http://www.contactor.se/~dast/svn/archive-2002-04/0910.shtml)
33 | #
34 | # see also toplevel Makefile.am
35 |
36 | # test -f ChangeLog || {
37 | # svn log --verbose > ChangeLog
38 | #}
39 |
40 | # inspired by hack as used in mcl (from http://micans.org/)
41 |
42 | # autoconf-archive Debian package, aclocal-archive RPM, obsolete/badly supported OS, installed in home dir
43 | acdirs="/usr/share/autoconf-archive/ /usr/share/aclocal/ /usr/local/share/aclocal/ $HOME/local/share/autoconf-archive/ /opt/homebrew/share/aclocal/"
44 |
45 | found=false
46 | for d in $acdirs
47 | do
48 | if test -f ${d}pkg.m4
49 | then
50 | found=true
51 | break
52 | fi
53 | done
54 |
55 | if ! $found
56 | then
57 | cat <&2
13 | echo " Building latest stable release of main dependencies from source.">&2
14 | echo "------------------------------------------------------------------------">&2
15 | else
16 | echo "------------------------------------------------------------------------">&2
17 | echo " Building development versions of main dependencie from source.">&2
18 | echo " (This is experimental and may contain bugs! DO NOT PUBLISH!)">&2
19 | echo "-----------------------------------------------------------------------">&2
20 | fi
21 |
22 | PWD="$(pwd)"
23 | BUILDDIR="$(mktemp -dt "build-deps.XXXXXX")"
24 | cd "$BUILDDIR"
25 | BUILD_SOURCES="LanguageMachines/ticcutils"
26 | for SUFFIX in $BUILD_SOURCES; do \
27 | NAME="$(basename "$SUFFIX")"
28 | git clone "https://github.com/$SUFFIX"
29 | cd "$NAME"
30 | REF=$(git tag -l | grep -E "^v?[0-9]+(\.[0-9])*" | sort -t. -k 1.2,1n -k 2,2n -k 3,3n -k 4,4n | tail -n 1)
31 | if [ "$VERSION" = "stable" ] && [ -n "$REF" ]; then
32 | git -c advice.detachedHead=false checkout "$REF"
33 | fi
34 | sh ./bootstrap.sh && ./configure --prefix "$PREFIX" && make && make install
35 | cd ..
36 | done
37 | cd "$PWD"
38 | [ -n "$BUILDDIR" ] && rm -Rf "$BUILDDIR"
39 |
--------------------------------------------------------------------------------
/codemeta.json:
--------------------------------------------------------------------------------
1 | {
2 | "@context": [
3 | "https://doi.org/10.5063/schema/codemeta-2.0",
4 | "http://schema.org",
5 | "https://w3id.org/software-types"
6 | ],
7 | "@type": "SoftwareSourceCode",
8 | "identifier": "timbl",
9 | "name": "TiMBL",
10 | "version": "6.10",
11 | "description": "TiMBL is an open source software package implementing several memory-based learning algorithms, among which IB1-IG, an implementation of k-nearest neighbor classification with feature weighting suitable for symbolic feature spaces, and IGTree, a decision-tree approximation of IB1-IG. All implemented algorithms have in common that they store some representation of the training set explicitly in memory. During testing, new cases are classified by extrapolation from the most similar stored cases.",
12 | "license": "https://spdx.org/licenses/GPL-3.0",
13 | "url": "https://languagemachines.github.io/timbl",
14 | "author": [
15 | {
16 | "@type": "Person",
17 | "givenName": "Ko",
18 | "familyName": "van der Sloot",
19 | "email": "ko.vandersloot@let.ru.nl",
20 | "affiliation": {
21 | "@id": "https://www.ru.nl/clst",
22 | "@type": "Organization",
23 | "name": "Centre for Language and Speech Technology",
24 | "url": "https://www.ru.nl/clst",
25 | "parentOrganization": {
26 | "@id": "https://www.ru.nl/cls",
27 | "@type": "Organization",
28 | "name": "Centre for Language Studies",
29 | "url": "https://www.ru.nl/cls",
30 | "parentOrganization": {
31 | "@id": "https://www.ru.nl",
32 | "name": "Radboud University",
33 | "@type": "Organization",
34 | "url": "https://www.ru.nl",
35 | "location": {
36 | "@type": "Place",
37 | "name": "Nijmegen"
38 | }
39 | }
40 |
41 | }
42 | },
43 | "position": 1
44 | },
45 | {
46 | "@id": "https://orcid.org/0000-0003-2493-656X",
47 | "@type": "Person",
48 | "givenName": "Antal",
49 | "familyName": "van den Bosch",
50 | "email": "antal.vandenbosch@let.ru.nl",
51 | "affiliation": { "@id": "https://cls.ru.nl" },
52 | "position": 2
53 | },
54 | {
55 | "@type": "Person",
56 | "givenName": "Walter",
57 | "familyName": "Daelemans",
58 | "position": 3
59 | },
60 | {
61 | "@id": "https://orcid.org/0000-0002-1046-0006",
62 | "@type": "Person",
63 | "givenName": "Maarten",
64 | "familyName": "van Gompel",
65 | "email": "proycon@anaproy.nl",
66 | "affiliation": { "@id": "https://knaw.huc.nl" },
67 | "position": 4
68 | },
69 | {
70 | "@type": "Person",
71 | "givenName": "Ton",
72 | "familyName": "Weijters",
73 | "position": 5
74 | },
75 | {
76 | "@type": "Person",
77 | "givenName": "Jakub",
78 | "familyName": "Zavrel",
79 | "position": 6
80 | }
81 | ],
82 | "sourceOrganization": { "@id": "https://www.ru.nl/clst" },
83 | "programmingLanguage": {
84 | "@type": "ComputerLanguage",
85 | "identifier": "c++",
86 | "name": "C++"
87 | },
88 | "operatingSystem": [ "Linux", "BSD", "macOS" ],
89 | "codeRepository": "https://github.com/LanguageMachines/timbl",
90 | "softwareRequirements": [
91 | {
92 | "@type": "SoftwareApplication",
93 | "identifier": "libxml2",
94 | "name": "libxml2"
95 | },
96 | {
97 | "@type": "SoftwareApplication",
98 | "identifier": "ticcutils",
99 | "name": "ticcutils"
100 | }
101 | ],
102 | "readme": "https://github.com/LanguageMachines/timbl/blob/master/README.md",
103 | "issueTracker": "https://github.com/LanguageMachines/timbl/issues",
104 | "contIntegration": "https://travis-ci.org/LanguageMachines/timbl",
105 | "releaseNotes": "https://github.com/LanguageMachines/timbl/releases",
106 | "developmentStatus": "https://www.repostatus.org/#active",
107 | "keywords": [ "nlp", "natural language processing", "memory based learning", "machine learning", "knn", "k-nearest neighbours", "decision tree" ],
108 | "referencePublication": [
109 | {
110 | "@type": "TechArticle",
111 | "name": "TiMBL: Tilburg Memory Based Learner, Reference Guide",
112 | "author": [ "Walter Daelemans", "Jakub Zavrel", "Ko van der Sloot", "Antal van den Bosch" ],
113 | "url": "https://github.com/LanguageMachines/timbl/raw/master/docs/Timbl_6.4_Manual.pdf"
114 | },
115 | {
116 | "@type": "Book",
117 | "name": "Memory-Based Language Processing",
118 | "author": [ "Walter Daelemans", "Antal van den Bosch" ],
119 | "url": "http://ilk.uvt.nl/mblp",
120 | "publisher": "Cambridge University Press"
121 | }
122 | ],
123 | "dateCreated": "1998",
124 | "targetProduct": [
125 | {
126 | "@type": "SoftwareLibrary",
127 | "executableName": "libtimbl",
128 | "name": "libtimbl",
129 | "runtimePlatform": [ "Linux", "BSD", "macOS" ],
130 | "description": "Memory-based Learning Library with API for C++"
131 | },
132 | {
133 | "@type": "CommandLineApplication",
134 | "executableName": "timbl",
135 | "name": "timbl",
136 | "runtimePlatform": [ "Linux", "BSD", "macOS" ],
137 | "description": "Memory-based learner, command-line tool"
138 | }
139 | ]
140 | }
141 |
--------------------------------------------------------------------------------
/configure.ac:
--------------------------------------------------------------------------------
1 | # -*- Autoconf -*-
2 | # Process this file with autoconf to produce a configure script.
3 |
4 | AC_PREREQ([2.69])
5 | AC_INIT([timbl],[6.10],[lamasoftware@science.ru.nl]) #also adapt in codemeta.json!
6 | AM_INIT_AUTOMAKE
7 | AC_CONFIG_SRCDIR([.])
8 | AC_CONFIG_MACRO_DIR([m4])
9 | AC_CONFIG_HEADERS([config.h])
10 |
11 | AX_REQUIRE_DEFINED([AX_CXX_COMPILE_STDCXX_17])
12 |
13 | # Checks for programs.
14 | AC_PROG_CXX( [g++ c++] )
15 | AX_CXX_COMPILE_STDCXX_17
16 |
17 | # libtool stuff
18 | LT_INIT
19 |
20 | # when running tests, use CXX
21 | AC_LANG([C++])
22 |
23 | AC_OPENMP
24 | if test "x$ac_cv_prog_cxx_openmp" != "x"; then
25 | if test "x$ac_cv_prog_cxx_openmp" != "xunsupported"; then
26 | CXXFLAGS="$CXXFLAGS $OPENMP_CXXFLAGS"
27 | AC_DEFINE([HAVE_OPENMP], [1] , [Define to 1 if you have OpenMP] )
28 | else
29 | AC_MSG_NOTICE([We don't have OpenMP for Clang. Multithreaded operation is di
30 | sabled])
31 | fi
32 | fi
33 |
34 | #checks for libraries.
35 |
36 | # Checks for header files.
37 | AC_CHECK_HEADERS([sys/time.h])
38 |
39 | # Checks for typedefs, structures, and compiler characteristics.
40 | AC_HEADER_STDBOOL
41 | AC_C_INLINE
42 | AC_TYPE_SIZE_T
43 |
44 | # Checks for library functions.
45 | AC_CHECK_FUNCS([floor gettimeofday pow rint sqrt ])
46 |
47 | PKG_PROG_PKG_CONFIG
48 |
49 | if test "x$prefix" = "xNONE"; then
50 | prefix="/usr/local"
51 | fi
52 |
53 | if test "x$PKG_CONFIG_PATH" = x; then
54 | export PKG_CONFIG_PATH="$prefix/lib/pkgconfig"
55 | else
56 | export PKG_CONFIG_PATH="$prefix/lib/pkgconfig:$PKG_CONFIG_PATH"
57 | fi
58 |
59 | AC_OSX_PKG( [icu4c] )
60 |
61 | PKG_PROG_PKG_CONFIG
62 | PKG_CHECK_MODULES([XML2], [libxml-2.0 >= 2.6.16] )
63 | CXXFLAGS="$CXXFLAGS $XML2_CFLAGS"
64 | LIBS="$LIBS $XML2_LIBS"
65 |
66 | PKG_CHECK_MODULES([ticcutils], [ticcutils >= 0.30] )
67 | CXXFLAGS="$CXXFLAGS $ticcutils_CFLAGS"
68 | LIBS="$LIBS $ticcutils_LIBS"
69 |
70 | PKG_CHECK_MODULES([ICU], [icu-uc >= 50 icu-io] )
71 | CXXFLAGS="$CXXFLAGS $ICU_CFLAGS"
72 | LIBS="$ICU_LIBS $LIBS"
73 |
74 | AC_CONFIG_FILES([
75 | Makefile
76 | timbl.pc
77 | m4/Makefile
78 | src/Makefile
79 | docs/Makefile
80 | include/Makefile
81 | include/timbl/Makefile
82 | demos/Makefile
83 | ])
84 | AC_OUTPUT
85 |
--------------------------------------------------------------------------------
/demos/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.o
3 | *.lo
4 | Makefile
5 | Makefile.in
6 | .deps/
7 | .libs/
8 | tse
9 | api_test1
10 | api_test2
11 | api_test3
12 | api_test4
13 | api_test5
14 | api_test6
15 | classify
16 |
--------------------------------------------------------------------------------
/demos/Makefile.am:
--------------------------------------------------------------------------------
1 | # $Id$
2 | # $URL: $
3 |
4 | AM_CPPFLAGS = -I@top_srcdir@/include
5 | AM_CXXFLAGS = -std=c++17
6 |
7 | noinst_PROGRAMS = api_test1 api_test2 api_test3 api_test4 api_test5 api_test6\
8 | tse classify
9 |
10 | LDADD = ../src/libtimbl.la
11 |
12 | tse_SOURCES = tse.cxx
13 |
14 | classify_SOURCES = classify.cxx
15 |
16 | api_test1_SOURCES = api_test1.cxx
17 |
18 | api_test2_SOURCES = api_test2.cxx
19 |
20 | api_test3_SOURCES = api_test3.cxx
21 |
22 | api_test4_SOURCES = api_test4.cxx
23 |
24 | api_test5_SOURCES = api_test5.cxx
25 |
26 | api_test6_SOURCES = api_test6.cxx
27 |
28 | exdir = $(datadir)/doc/@PACKAGE@/examples
29 |
30 | ex_DATA = dimin.script dimin.train dimin.test cross_val.test \
31 | small_1.train small_2.train small_3.train small_4.train small_5.train
32 |
33 |
34 | EXTRA_DIST = $(ex_DATA)
35 |
--------------------------------------------------------------------------------
/demos/api_test1.cxx:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 1998 - 2015
3 | ILK - Tilburg University
4 | CLiPS - University of Antwerp
5 |
6 | This file is part of timbl
7 |
8 | timbl is free software; you can redistribute it and/or modify
9 | it under the terms of the GNU General Public License as published by
10 | the Free Software Foundation; either version 3 of the License, or
11 | (at your option) any later version.
12 |
13 | timbl is distributed in the hope that it will be useful,
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | GNU General Public License for more details.
17 |
18 | You should have received a copy of the GNU General Public License
19 | along with this program; if not, see .
20 |
21 | For questions and suggestions, see:
22 | http://ilk.uvt.nl/software.html
23 | or send mail to:
24 | timbl@uvt.nl
25 | */
26 |
27 | #include "timbl/TimblAPI.h"
28 | int main(){
29 | Timbl::TimblAPI My_Experiment( "-a IGTREE +vDI+DB+F", "test1" );
30 | My_Experiment.SetOptions( "-w3 -vDB" );
31 | My_Experiment.ShowSettings( std::cout );
32 | My_Experiment.Learn( "dimin.train" );
33 | My_Experiment.Test( "dimin.test", "my_first_test.out" );
34 | My_Experiment.SetOptions( "-mM" );
35 | My_Experiment.Test( "dimin.test", "my_first_test.out" );
36 | }
37 |
--------------------------------------------------------------------------------
/demos/api_test2.cxx:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 1998 - 2015
3 | ILK - Tilburg University
4 | CLiPS - University of Antwerp
5 |
6 | This file is part of timbl
7 |
8 | timbl is free software; you can redistribute it and/or modify
9 | it under the terms of the GNU General Public License as published by
10 | the Free Software Foundation; either version 3 of the License, or
11 | (at your option) any later version.
12 |
13 | timbl is distributed in the hope that it will be useful,
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | GNU General Public License for more details.
17 |
18 | You should have received a copy of the GNU General Public License
19 | along with this program; if not, see .
20 |
21 | For questions and suggestions, see:
22 | http://ilk.uvt.nl/software.html
23 | or send mail to:
24 | timbl@uvt.nl
25 | */
26 |
27 | #include
28 |
29 | #include "timbl/TimblAPI.h"
30 |
31 | int main(){
32 | Timbl::TimblAPI *My_Experiment = new Timbl::TimblAPI( "-a IB2 +vF+DI+DB" ,
33 | "test2" );
34 | My_Experiment->SetOptions( "-b100" );
35 | My_Experiment->ShowSettings( std::cout );
36 | My_Experiment->Learn( "dimin.train" );
37 | My_Experiment->Test( "dimin.test", "my_second_test.out" );
38 | delete My_Experiment;
39 | exit(1);
40 | }
41 |
--------------------------------------------------------------------------------
/demos/api_test3.cxx:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 1998 - 2015
3 | ILK - Tilburg University
4 | CLiPS - University of Antwerp
5 |
6 | This file is part of timbl
7 |
8 | timbl is free software; you can redistribute it and/or modify
9 | it under the terms of the GNU General Public License as published by
10 | the Free Software Foundation; either version 3 of the License, or
11 | (at your option) any later version.
12 |
13 | timbl is distributed in the hope that it will be useful,
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | GNU General Public License for more details.
17 |
18 | You should have received a copy of the GNU General Public License
19 | along with this program; if not, see .
20 |
21 | For questions and suggestions, see:
22 | http://ilk.uvt.nl/software.html
23 | or send mail to:
24 | timbl@uvt.nl
25 | */
26 |
27 | #include
28 |
29 | #include "timbl/TimblAPI.h"
30 | using Timbl::TimblAPI;
31 |
32 | int main(){
33 | TimblAPI *My_Experiment = new TimblAPI( "-t cross_validate" );
34 | My_Experiment->Test( "cross_val.test" );
35 | delete My_Experiment;
36 | exit(0);
37 | }
38 |
--------------------------------------------------------------------------------
/demos/api_test4.cxx:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 1998 - 2015
3 | ILK - Tilburg University
4 | CLiPS - University of Antwerp
5 |
6 | This file is part of timbl
7 |
8 | timbl is free software; you can redistribute it and/or modify
9 | it under the terms of the GNU General Public License as published by
10 | the Free Software Foundation; either version 3 of the License, or
11 | (at your option) any later version.
12 |
13 | timbl is distributed in the hope that it will be useful,
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | GNU General Public License for more details.
17 |
18 | You should have received a copy of the GNU General Public License
19 | along with this program; if not, see .
20 |
21 | For questions and suggestions, see:
22 | http://ilk.uvt.nl/software.html
23 | or send mail to:
24 | timbl@uvt.nl
25 | */
26 |
27 | #include
28 |
29 | #include
30 | #include "timbl/TimblAPI.h"
31 | using namespace Timbl;
32 |
33 | int main(){
34 | TimblAPI *My_Experiment = new TimblAPI( "-a IB1 +vDI+DB +mM" ,
35 | "test4" );
36 | My_Experiment->ShowSettings( std::cout );
37 | My_Experiment->Learn( "dimin.train" );
38 | My_Experiment->Test( "dimin.test", "inc1.out" );
39 | My_Experiment->SaveWeights( "wg.1.wgt" );
40 | My_Experiment->WriteArrays( "arr.1.arr" );
41 | My_Experiment->Increment( "=,=,=,=,+,k,e,=,-,r,@,l,T" );
42 | My_Experiment->Test( "dimin.test", "inc2.out" );
43 | My_Experiment->SaveWeights( "wg.2.wgt" );
44 | My_Experiment->WriteArrays( "arr.2.arr" );
45 | My_Experiment->Increment( "+,zw,A,rt,-,k,O,p,-,n,O,n,E" );
46 | My_Experiment->Test( "dimin.test", "inc3.out" );
47 | My_Experiment->SaveWeights( "wg.3.wgt" );
48 | My_Experiment->WriteArrays( "arr.3.arr" );
49 | My_Experiment->Decrement( "+,zw,A,rt,-,k,O,p,-,n,O,n,E" );
50 | My_Experiment->Test( "dimin.test", "inc4.out" );
51 | My_Experiment->SaveWeights( "wg.4.wgt" );
52 | My_Experiment->WriteArrays( "arr.4.arr" );
53 | My_Experiment->Decrement( "=,=,=,=,+,k,e,=,-,r,@,l,T" );
54 | My_Experiment->Test( "dimin.test", "inc5.out" );
55 | My_Experiment->SaveWeights( "wg.5.wgt" );
56 | My_Experiment->WriteArrays( "arr.5.arr" );
57 | delete My_Experiment;
58 | exit(1);
59 | }
60 |
--------------------------------------------------------------------------------
/demos/api_test5.cxx:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 1998 - 2015
3 | ILK - Tilburg University
4 | CLiPS - University of Antwerp
5 |
6 | This file is part of timbl
7 |
8 | timbl is free software; you can redistribute it and/or modify
9 | it under the terms of the GNU General Public License as published by
10 | the Free Software Foundation; either version 3 of the License, or
11 | (at your option) any later version.
12 |
13 | timbl is distributed in the hope that it will be useful,
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | GNU General Public License for more details.
17 |
18 | You should have received a copy of the GNU General Public License
19 | along with this program; if not, see .
20 |
21 | For questions and suggestions, see:
22 | http://ilk.uvt.nl/software.html
23 | or send mail to:
24 | timbl@uvt.nl
25 | */
26 | #include
27 | #include
28 | #include "timbl/TimblAPI.h"
29 |
30 | using std::endl;
31 | using std::cout;
32 | using std::string;
33 | using namespace Timbl;
34 |
35 | int main(){
36 | TimblAPI *My_Experiment = new TimblAPI( "-a IB1 +vDI+DB+n +mM +k4 " ,
37 | "test5" );
38 | My_Experiment->Learn( "dimin.train" );
39 | {
40 | icu::UnicodeString line = "=,=,=,=,+,k,e,=,-,r,@,l,T";
41 | const neighborSet *neighbours1 = My_Experiment->classifyNS( line );
42 | if ( neighbours1 ){
43 | cout << "Classify OK on " << line << endl;
44 | cout << neighbours1;
45 | }
46 | else {
47 | cout << "Classify failed on " << line << endl;
48 | neighbours1 = new neighborSet();
49 | }
50 | neighborSet neighbours2;
51 | line = "+,zw,A,rt,-,k,O,p,-,n,O,n,E";
52 | if ( My_Experiment->classifyNS( line, neighbours2 ) ){
53 | cout << "Classify OK on " << line << endl;
54 | cout << neighbours2;
55 | }
56 | else {
57 | cout << "Classify failed on " << line << endl;
58 | }
59 | line = "+,z,O,n,-,d,A,xs,-,=,A,rm,P";
60 | const neighborSet *neighbours3 = My_Experiment->classifyNS( line );
61 | if ( neighbours3 ){
62 | cout << "Classify OK on " << line << endl;
63 | cout << neighbours3;
64 | }
65 | else {
66 | cout << "Classify failed on " << line << endl;
67 | neighbours3 = new neighborSet();
68 | }
69 | neighborSet uit2;
70 | {
71 | neighborSet uit;
72 | uit.setShowDistance(true);
73 | uit.setShowDistribution(true);
74 | cout << " before first merge " << endl;
75 | cout << uit;
76 | uit.merge( *neighbours1 );
77 | cout << " after first merge " << endl;
78 | cout << uit;
79 | uit.merge( *neighbours3 );
80 | cout << " after second merge " << endl;
81 | cout << uit;
82 | uit.merge( neighbours2 );
83 | cout << " after third merge " << endl;
84 | cout << uit;
85 | uit.truncate( 3 );
86 | cout << " after truncate " << endl;
87 | cout << uit;
88 | cout << " test assignment" << endl;
89 | uit2 = *neighbours1;
90 | }
91 | cout << "assignment result: " << endl;
92 | cout << uit2;
93 | {
94 | cout << " test copy construction" << endl;
95 | neighborSet uit(uit2);
96 | cout << "result: " << endl;
97 | cout << uit;
98 | }
99 | cout << "almost done!" << endl;
100 | }
101 | delete My_Experiment;
102 | cout << "done!" << endl;
103 | }
104 |
--------------------------------------------------------------------------------
/demos/api_test6.cxx:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 1998 - 2015
3 | ILK - Tilburg University
4 | CLiPS - University of Antwerp
5 |
6 | This file is part of timbl
7 |
8 | timbl is free software; you can redistribute it and/or modify
9 | it under the terms of the GNU General Public License as published by
10 | the Free Software Foundation; either version 3 of the License, or
11 | (at your option) any later version.
12 |
13 | timbl is distributed in the hope that it will be useful,
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | GNU General Public License for more details.
17 |
18 | You should have received a copy of the GNU General Public License
19 | along with this program; if not, see .
20 |
21 | For questions and suggestions, see:
22 | http://ilk.uvt.nl/software.html
23 | or send mail to:
24 | timbl@uvt.nl
25 | */
26 |
27 | #include
28 | #include "timbl/TimblAPI.h"
29 |
30 | using std::cout;
31 | using std::endl;
32 | using namespace Timbl;
33 |
34 | int main(){
35 | TimblAPI My_Experiment( "-a IB1 +vDI+DB -G 0 -k3", "test6" );
36 | My_Experiment.Learn( "dimin.train" );
37 | const ClassDistribution *vd;
38 | const TargetValue *tv
39 | = My_Experiment.Classify( std::string("-,=,O,m,+,h,K,=,-,n,I,N,K"), vd );
40 | cout << "resulting target: " << tv << endl;
41 | cout << "resulting Distribution: " << vd << endl;
42 | ClassDistribution::dist_iterator it=vd->begin();
43 | while ( it != vd->end() ){
44 | cout << it->second << " OR ";
45 | cout << it->second->Value() << " " << it->second->Weight() << endl;
46 | ++it;
47 | }
48 |
49 | cout << "the same with neighborSets" << endl;
50 | const neighborSet *nb = My_Experiment.classifyNS( "-,=,O,m,+,h,K,=,-,n,I,N,K" );
51 | WClassDistribution *vd2 = nb->bestDistribution();
52 | vd2->Normalize();
53 | cout << "default answer " << vd2 << endl;
54 | decayStruct *dc = new expDecay(0.3);
55 | delete vd2;
56 | vd2 = nb->bestDistribution( dc );
57 | delete dc;
58 | cout << "with exponenial decay, alpha = 0.3 " << vd2 << endl;
59 | delete vd2;
60 | }
61 |
--------------------------------------------------------------------------------
/demos/classify.cxx:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 1998 - 2015
3 | ILK - Tilburg University
4 | CLiPS - University of Antwerp
5 |
6 | This file is part of timbl
7 |
8 | timbl is free software; you can redistribute it and/or modify
9 | it under the terms of the GNU General Public License as published by
10 | the Free Software Foundation; either version 3 of the License, or
11 | (at your option) any later version.
12 |
13 | timbl is distributed in the hope that it will be useful,
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | GNU General Public License for more details.
17 |
18 | You should have received a copy of the GNU General Public License
19 | along with this program; if not, see .
20 |
21 | For questions and suggestions, see:
22 | http://ilk.uvt.nl/software.html
23 | or send mail to:
24 | timbl@uvt.nl
25 | */
26 |
27 | #include
28 | #include
29 |
30 | #include
31 |
32 | #include "timbl/TimblAPI.h"
33 |
34 | using namespace std;
35 | using namespace Timbl;
36 |
37 | char inf[] = "./dimin.train";
38 | char test_f[] = "./dimin.test";
39 |
40 | int main(){
41 | string Bresult;
42 | double Distance;
43 |
44 | TimblAPI *Exp = new TimblAPI( "-a TRIBL" );
45 | Exp->SetOptions( "+vS +x -N30 -q2" );
46 | Exp->ShowOptions( cout );
47 | Exp->Learn( inf );
48 | ifstream testfile;
49 | string Buffer;
50 | testfile.open( test_f, ios::in );
51 | cout << "\nStart testing, using TRIBL" << endl;
52 | while ( getline( testfile, Buffer ) ){
53 | const TargetValue *tv = Exp->Classify( Buffer, Distance );
54 | if ( tv )
55 | cout << Buffer << "\t --> " << tv << " " << Distance << endl;
56 | else
57 | cout << Buffer << "\t --> (nill)" << endl;
58 | }
59 | testfile.close();
60 | delete Exp;
61 | Exp = new TimblAPI( "-a IB1" );
62 | Exp->SetOptions( "+vS" );
63 | Exp->ShowOptions( cout );
64 | Exp->Learn( inf );
65 | testfile.clear();
66 | testfile.open( test_f, ios::in );
67 | cout << "\nStart testing, using IB" << endl;
68 | while ( getline( testfile, Buffer ) ){
69 | if ( Exp->Classify( Buffer, Bresult, Distance ) ){
70 | cout << Buffer << "\t --> " << Bresult << " " << Distance << endl;
71 | }
72 | else
73 | cout << Buffer << "\t --> (nill)" << endl;
74 | }
75 | testfile.close();
76 | delete Exp;
77 | Exp = new TimblAPI( "-a IGTREE" );
78 | Exp->SetOptions( "+vS -N40" );
79 | Exp->ShowOptions( cout );
80 | Exp->Learn( inf );
81 | Exp->WriteInstanceBase( "dimin.tree" );
82 | Exp->SaveWeights( "dimin.wgt" );
83 | cout << "\nStart testing, using IGTree, first run" << endl;
84 | testfile.clear();
85 | testfile.open( test_f, ios::in );
86 | while ( getline( testfile, Buffer ) ){
87 | if ( Exp->Classify( Buffer, Bresult, Distance ) ){
88 | cout << Buffer << "\t --> " << Bresult << " " << Distance << endl;
89 | }
90 | else
91 | cout << Buffer << "\t --> (nill)" << endl;
92 | }
93 | testfile.close();
94 | delete Exp;
95 | Exp = new TimblAPI( "-a IGTREE" );
96 | Exp->SetOptions( "+vS" );
97 | Exp->ShowOptions( cout );
98 | Exp->GetInstanceBase( "dimin.tree" );
99 | Exp->GetWeights( "dimin.wgt" );
100 | cout << "\nStart testing, using IGTree, second run, (retrieved Tree)" << endl;
101 | testfile.clear();
102 | testfile.open( test_f, ios::in );
103 | while ( getline( testfile, Buffer ) ){
104 | if ( Exp->Classify( Buffer, Bresult, Distance ) ){
105 | cout << Buffer << "\t --> " << Bresult << " " << Distance << endl;
106 | }
107 | else
108 | cout << Buffer << "\t --> (nill)" << endl;
109 | }
110 | testfile.close();
111 | exit(1);
112 | }
113 |
--------------------------------------------------------------------------------
/demos/cross_val.test:
--------------------------------------------------------------------------------
1 | small_1.train
2 | small_2.train
3 | small_3.train
4 | small_4.train
5 | small_5.train
6 |
--------------------------------------------------------------------------------
/demos/dimin.script:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 1998 - 2011
3 | # ILK - Tilburg University
4 | # CLiPS - University of Antwerp
5 | #
6 | # This file is part of timbl
7 | #
8 | # timbl is free software; you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation; either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # timbl is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with this program; if not, see .
20 | #
21 | # For questions and suggestions, see:
22 | # http://ilk.uvt.nl/software.html
23 | # or send mail to:
24 | # timbl@uvt.nl
25 |
26 | # example script file for tse demo program.
27 | #
28 | # create an experiment with name exp1
29 | #
30 | new exp1
31 | #
32 | # make sure that we see something happen
33 | #
34 | exp1.set +v+f+di+O
35 | #
36 | # set the desired weighting to IG
37 | #
38 | exp1.set +w IG
39 | #
40 | # now train ( prepare is implicit)
41 | #
42 | exp1.train ./dimin.train
43 | #
44 | # save the Instancebase for later use.
45 | #
46 | exp1.save tree.tmp
47 | exp1.show options
48 | #
49 | #
50 | # first we start with OVERLAP metric
51 | #
52 | exp1.set -mO
53 | #
54 | # and test
55 | exp1.test ./dimin.test a1.tmp
56 | #
57 | # now we try the Value Difference Metric
58 | exp1.set -mM
59 | #
60 |
61 | exp1.test ./dimin.test a2.tmp
62 |
63 | #
64 | # start a new experiment:
65 | new exp2
66 | #
67 | # fill it with de tree generated with exp1
68 | #
69 | exp2.get tree.tmp
70 | #
71 | # let's make a lot of noice!
72 | exp2.set +v +o+f+di+n+db
73 | #
74 | # now delete exp1, to demonstrate that it works.
75 | free exp1
76 | #
77 | # end perform a test with exp2
78 | exp2.test ./dimin.test exp2.out.tmp
79 | #
80 | # ready
81 |
--------------------------------------------------------------------------------
/demos/small_1.train:
--------------------------------------------------------------------------------
1 | Rockwell,PUNT,PUNT,PUNT,NNP,PUNT,PUNT,PUNT,I
2 | International,Rockwell,PUNT,PUNT,NNP,NNP,PUNT,PUNT,I
3 | CorpPUNT,International,Rockwell,PUNT,NNP,NNP,NNP,PUNT,I
4 | 's,CorpPUNT,International,Rockwell,POS,NNP,NNP,NNP,I
5 | Tulsa,'s,CorpPUNT,International,NNP,POS,NNP,NNP,B
6 | unit,Tulsa,'s,CorpPUNT,NN,NNP,POS,NNP,I
7 | said,unit,Tulsa,'s,VBD,NN,NNP,POS,I
8 | it,said,unit,Tulsa,PRP,VBD,NN,NNP,O
9 | signed,it,said,unit,VBD,PRP,VBD,NN,I
10 | a,signed,it,said,DT,VBD,PRP,VBD,O
11 |
--------------------------------------------------------------------------------
/demos/small_2.train:
--------------------------------------------------------------------------------
1 | tentative,a,signed,it,JJ,DT,VBD,PRP,I
2 | agreement,tentative,a,signed,NN,JJ,DT,VBD,I
3 | extending,agreement,tentative,a,VBG,NN,JJ,DT,I
4 | its,extending,agreement,tentative,PRP$,VBG,NN,JJ,O
5 | contract,its,extending,agreement,NN,PRP$,VBG,NN,I
6 | with,contract,its,extending,IN,NN,PRP$,VBG,I
7 | Boeing,with,contract,its,NNP,IN,NN,PRP$,O
8 | CoPUNT,Boeing,with,contract,NNP,NNP,IN,NN,I
9 | to,CoPUNT,Boeing,with,TO,NNP,NNP,IN,I
10 | provide,to,CoPUNT,Boeing,VB,TO,NNP,NNP,O
11 |
--------------------------------------------------------------------------------
/demos/small_3.train:
--------------------------------------------------------------------------------
1 | structural,provide,to,CoPUNT,JJ,VB,TO,NNP,O
2 | parts,structural,provide,to,NNS,JJ,VB,TO,I
3 | for,parts,structural,provide,IN,NNS,JJ,VB,I
4 | Boeing,for,parts,structural,NNP,IN,NNS,JJ,O
5 | 's,Boeing,for,parts,POS,NNP,IN,NNS,I
6 | 747,'s,Boeing,for,CD,POS,NNP,IN,B
7 | jetliners,747,'s,Boeing,NNS,CD,POS,NNP,I
8 | PUNT,jetliners,747,'s,PUNT,NNS,CD,POS,I
9 | Rockwell,PUNT,PUNT,jetliners,NNP,PUNT,PUNT,NNS,O
10 | said,Rockwell,PUNT,PUNT,VBD,NNP,PUNT,PUNT,I
11 |
--------------------------------------------------------------------------------
/demos/small_4.train:
--------------------------------------------------------------------------------
1 | the,said,Rockwell,PUNT,DT,VBD,NNP,PUNT,O
2 | agreement,the,said,Rockwell,NN,DT,VBD,NNP,I
3 | calls,agreement,the,said,VBZ,NN,DT,VBD,I
4 | for,calls,agreement,the,IN,VBZ,NN,DT,O
5 | it,for,calls,agreement,PRP,IN,VBZ,NN,O
6 | to,it,for,calls,TO,PRP,IN,VBZ,I
7 | supply,to,it,for,VB,TO,PRP,IN,O
8 | 200,supply,to,it,CD,VB,TO,PRP,O
9 | additional,200,supply,to,JJ,CD,VB,TO,I
10 | shipsets,so-called,additional,200,NNS,JJ,JJ,CD,I
11 |
--------------------------------------------------------------------------------
/demos/small_5.train:
--------------------------------------------------------------------------------
1 | for,shipsets,so-called,additional,IN,NNS,JJ,JJ,I
2 | the,for,shipsets,so-called,DT,IN,NNS,JJ,O
3 | planes,the,for,shipsets,NNS,DT,IN,NNS,I
4 | PUNT,planes,the,for,PUNT,NNS,DT,IN,I
5 | These,PUNT,PUNT,planes,DT,PUNT,PUNT,NNS,O
6 | include,These,PUNT,PUNT,VBP,DT,PUNT,PUNT,I
7 | among,KOMMA,include,These,IN,KOMMA,VBP,DT,O
8 | other,among,KOMMA,include,JJ,IN,KOMMA,VBP,O
9 |
--------------------------------------------------------------------------------
/docs/Makefile.am:
--------------------------------------------------------------------------------
1 | # $Id$
2 | # $URL$
3 |
4 | man1_MANS = timbl.1
5 |
6 | EXTRA_DIST = timbl.1
7 |
--------------------------------------------------------------------------------
/docs/Timbl_6.4_Manual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LanguageMachines/timbl/1c58d476a7baf82729502c1cc2804f8d210de70e/docs/Timbl_6.4_Manual.pdf
--------------------------------------------------------------------------------
/docs/texfiles/fullname.sty:
--------------------------------------------------------------------------------
1 | %%% fullname.sty
2 | %%%
3 | %%% Stuart M. Shieber
4 | %%% Mon Mar 30 17:23:36 EST 1992
5 |
6 | %%% Modifications to the citation macros intended to be used with the
7 | %%% fullname.bst style.
8 |
9 | %%% Some material taken from Peter Patel-Schneider's AAAI style for
10 | %%% use in conjunction with AAAI-named bibliography style.
11 | %%%
12 | %%% Citation forms:
13 | %%%
14 | %%% Macro Output format
15 | %%% ----------- -----------------------------------------
16 | %%% \cite: (Dewey, 1988)
17 | %%% (Dewey, 1988, page 15)
18 | %%% (Dewey, 1988; Cheatham, 1987; Howe, 1903)
19 | %%% \shortcite: (1988)
20 | %%% (1988, page 15)
21 | %%% \namecite: Dewey (1988)
22 | %%% Dewey (1988, page 15)
23 |
24 | % don't box citations, add space between multiple citations, separate with ;
25 | \def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi
26 | \def\@citea{}\@cite{\@for\@citeb:=#2\do
27 | {\@citea\def\@citea{; }\@ifundefined
28 | {b@\@citeb}{{\bf ?}\@warning
29 | {Citation `\@citeb' on page \thepage \space undefined}}%
30 | {\csname b@\@citeb\endcsname}}}{#1}}
31 | % Allow short (name-less) citations, when used in
32 | % conjunction with a bibliography style that creates labels like
33 | % \citename{, }
34 | \let\@internalcite\cite
35 | \def\cite{\def\citename##1{##1, }(\@internalcite}
36 | \def\shortcite{\def\citename##1{}(\@internalcite}
37 | \def\namecite{\def\citename##1{##1 (}\@internalcite}
38 | \def\@biblabel#1{\def\citename##1{##1}[#1]\hfill}
39 |
40 | %%% More changes made by SMS (originals in latex.tex)
41 | % Use parentheses instead of square brackets in the text.
42 | \def\@cite#1#2{{#1\if@tempswa , #2\fi})}
43 |
44 | % Don't put a label in the bibliography at all. Just use the unlabeled format
45 | % instead.
46 | \def\thebibliography#1{\section*{References\@mkboth
47 | {References}{References}}\list
48 | {}{\setlength{\labelwidth}{0pt}\setlength{\leftmargin}{\parindent}
49 | \setlength{\itemindent}{-\parindent}}
50 | \def\newblock{\hskip .11em plus .33em minus -.07em}
51 | \sloppy\clubpenalty4000\widowpenalty4000
52 | \sfcode`\.=1000\relax}
53 | \let\endthebibliography=\endlist
54 |
55 | \def\@lbibitem[#1]#2{\item[]\if@filesw
56 | { \def\protect##1{\string ##1\space}\immediate
57 | \write\@auxout{\string\bibcite{#2}{#1}}\fi\ignorespaces}}
58 |
59 | \def\@bibitem#1{\item\if@filesw \immediate\write\@auxout
60 | {\string\bibcite{#1}{\the\c@enumi}}\fi\ignorespaces}
61 |
62 |
63 |
--------------------------------------------------------------------------------
/docs/timbl.1:
--------------------------------------------------------------------------------
1 | .TH timbl 1 "2017 November 9"
2 |
3 | .SH NAME
4 | timbl \- Tilburg Memory Based Learner
5 | .SH SYNOPSIS
6 | timbl [options]
7 |
8 | timbl \-f data\-file \-t test\(hyfile
9 |
10 | .SH DESCRIPTION
11 | TiMBL is an open source software package implementing several memory\(hybased learning algorithms, among which IB1\(hyIG, an implementation of k\(hynearest neighbor classification with feature weighting suitable for symbolic feature spaces, and IGTree, a decision\(hytree approximation of IB1\(hyIG. All implemented algorithms have in common that they store some representation of the training set explicitly in memory. During testing, new cases are classified by extrapolation from the most similar stored cases.
12 |
13 | .SH OPTIONS
14 | .B \-a
15 |
16 | or
17 | .B \-a
18 |
19 | .RS
20 | determines the classification algorithm.
21 |
22 | Possible values are:
23 |
24 | .B 0
25 | or
26 | .B IB
27 | the IB1 (k\(hyNN) algorithm (default)
28 |
29 | .B 1
30 | or
31 | .B IGTREE
32 | a decision\(hytree\(hybased approximation of IB1
33 |
34 | .B 2
35 | or
36 | .B TRIBL
37 | a hybrid of IB1 and IGTREE
38 |
39 | .B 3
40 | or
41 | .B IB2
42 | an incremental editing version of IB1
43 |
44 | .B 4
45 | or
46 | .B TRIBL2
47 | a non\(hyparameteric version of TRIBL
48 | .RE
49 |
50 | .B \-b
51 | n
52 | .RS
53 | number of lines used for bootstrapping (IB2 only)
54 | .RE
55 |
56 | .B \-B
57 | n
58 | .RS
59 | number of bins used for discretization of numeric feature values (Default B=20)
60 | .RE
61 |
62 | .BR \-\-Beam =
63 | .RS
64 | limit +v db output to n highest\(hyvote classes
65 | .RE
66 |
67 | .BR \-\-clones =
68 | .RS
69 | number f threads to use for parallel testing
70 | .RE
71 |
72 | .B \-c
73 | n
74 | .RS
75 | clipping frequency for prestoring MVDM matrices
76 | .RE
77 |
78 | .B +D
79 | .RS
80 | store distributions on all nodes (necessary for
81 | using +v db with IGTree, but wastes memory otherwise)
82 | .RE
83 |
84 | .B \-\-Diversify
85 | .RS
86 | rescale weight (see docs)
87 | .RE
88 |
89 | .B \-d
90 | val
91 | .RS
92 | weigh neighbors as function of their distance:
93 | Z : equal weights to all (default)
94 | ID : Inverse Distance
95 | IL : Inverse Linear
96 | ED:a : Exponential Decay with factor a (no whitespace!)
97 | ED:a:b : Exponential Decay with factor a and b (no whitespace!)
98 | .RE
99 |
100 | .B \-e
101 | n
102 | .RS
103 | estimate time until n patterns tested
104 | .RE
105 |
106 | .B \-f
107 | file
108 | .RS
109 | read from data file 'file' OR use filenames from 'file' for cross validation test
110 | .RE
111 |
112 | .B \-F
113 | format
114 | .RS
115 | assume the specified input format
116 | (Compact, C4.5, ARFF, Columns, Binary, Sparse )
117 | .RE
118 |
119 | .B \-G
120 | normalization
121 |
122 | .RS
123 | normalize distributions (+v db option only)
124 |
125 | Supported normalizations are:
126 |
127 | .B Probability
128 | or
129 | .B 0
130 |
131 | normalize between 0 and 1
132 |
133 | .BR addFactor :
134 | or
135 | .BR 1 :
136 |
137 | add f to all possible targets, then normalize between 0 and 1 (default f=1.0).
138 |
139 | .B logProbability
140 | or
141 | .B 2
142 |
143 | Add 1 to the target Weight, take the 10Log and then normalize between 0 and 1
144 |
145 | .RE
146 |
147 | .B +H
148 | or
149 | .B \-H
150 | .RS
151 | write hashed trees (default +H)
152 | .RE
153 |
154 | .B \-i
155 | file
156 | .RS
157 | read the InstanceBase from 'file' (skips phase 1 & 2 )
158 | .RE
159 |
160 | .B \-I
161 | file
162 | .RS
163 | dump the InstanceBase in 'file'
164 | .RE
165 |
166 | .B \-k
167 | n
168 | .RS
169 | search 'n' nearest neighbors (default n = 1)
170 | .RE
171 |
172 | .B \-L
173 | n
174 | .RS
175 | set value frequency threshold to back off from MVDM to Overlap at level n
176 | .RE
177 |
178 | .B \-l
179 | n
180 | .RS
181 | fixed feature value length (Compact format only)
182 | .RE
183 |
184 | .B \-m
185 | string
186 | .RS
187 | use feature metrics as specified in 'string':
188 | The format is : GlobalMetric:MetricRange:MetricRange
189 | e.g.: mO:N3:I2,5\-7
190 |
191 | C: cosine distance. (Global only. numeric features implied)
192 | D: dot product. (Global only. numeric features implied)
193 | DC: Dice coefficient
194 | O: weighted overlap (default)
195 | E: Euclidian distance
196 | L: Levenshtein distance
197 | M: modified value difference
198 | J: Jeffrey divergence
199 | S: Jensen\(hyShannon divergence
200 | N: numeric values
201 | I: Ignore named values
202 | .RE
203 |
204 | .BR \-\-matrixin =file
205 | .RS
206 | read ValueDifference Matrices from file 'file'
207 | .RE
208 |
209 | .BR \-\-matrixout =file
210 | .RS
211 | store ValueDifference Matrices in 'file'
212 | .RE
213 |
214 | .B \-n
215 | file
216 | .RS
217 | create a C4.5\-style names file 'file'
218 | .RE
219 |
220 | .B \-M
221 | n
222 | .RS
223 | size of MaxBests Array
224 | .RE
225 |
226 | .B \-N
227 | n
228 | .RS
229 | number of features (default 2500)
230 | .RE
231 |
232 | .B \-o
233 | s
234 | .RS
235 | use s as output filename
236 | .RE
237 |
238 | .BR \-\-occurrences =
239 | .RS
240 | The input file contains occurrence counts (at the last position)
241 | value can be one of:
242 | .B train
243 | ,
244 | .B test
245 | or
246 | .B both
247 | .RE
248 |
249 | .B \-O
250 | path
251 | .RS
252 | save output using 'path'
253 | .RE
254 |
255 | .B \-p
256 | n
257 | .RS
258 | show progress every n lines (default p = 100,000)
259 | .RE
260 |
261 | .B \-P
262 | path
263 | .RS
264 | read data using 'path'
265 | .RE
266 |
267 | .B \-q
268 | n
269 | .RS
270 | set TRIBL threshold at level n
271 | .RE
272 |
273 | .B \-R
274 | n
275 | .RS
276 | solve ties at random with seed n
277 | .RE
278 |
279 | .B \-s
280 | .RS
281 | use the exemplar weights from the input file
282 | .RE
283 |
284 | .B \-s0
285 | .RS
286 | ignore the exemplar weights from the input file
287 | .RE
288 |
289 | .B \-T
290 | n
291 | .RS
292 | use feature n as the class label. (default: the last feature)
293 | .RE
294 |
295 | .B \-t
296 | file
297 | .RS
298 | test using 'file'
299 | .RE
300 |
301 | .B \-t
302 | leave_one_out
303 | .RS
304 | test with the leave\(hyone\(hyout testing regimen (IB1 only).
305 | you may add \-\-sloppy to speed up leave\(hyone\(hyout testing (but see docs)
306 | .RE
307 |
308 | .B \-t
309 | cross_validate
310 | .RS
311 | perform cross\(hyvalidation test (IB1 only)
312 | .RE
313 |
314 | .B \-t
315 | @file
316 | .RS
317 | test using files and options described in 'file'
318 | Supported options: d e F k m o p q R t u v w x % \-
319 | .RE
320 |
321 | .B \-\-Treeorder =value
322 | n
323 | .RS
324 | ordering of the Tree:
325 | DO: none
326 | GRO: using GainRatio
327 | IGO: using InformationGain
328 | 1/V: using 1/# of Values
329 | G/V: using GainRatio/# of Valuess
330 | I/V: using InfoGain/# of Valuess
331 | X2O: using X\(hysquare
332 | X/V: using X\(hysquare/# of Values
333 | SVO: using Shared Variance
334 | S/V: using Shared Variance/# of Values
335 | GxE: using GainRatio * SplitInfo
336 | IxE: using InformationGain * SplitInfo
337 | 1/S: using 1/SplitInfo
338 | .RE
339 |
340 | .B \-u
341 | file
342 | .RS
343 | read value\(hyclass probabilities from 'file'
344 | .RE
345 |
346 | .B \-U
347 | file
348 | .RS
349 | save value\(hyclass probabilities in 'file'
350 | .RE
351 |
352 | .B \-V
353 | .RS
354 | Show VERSION
355 | .RE
356 |
357 | .B +v
358 | level or
359 | .B \-v
360 | level
361 | .RS
362 | set or unset verbosity level, where level is:
363 |
364 | s: work silently
365 | o: show all options set
366 | b: show node/branch count and branching factor
367 | f: show calculated feature weights (default)
368 | p: show value difference matrices
369 | e: show exact matches
370 | as: show advanced statistics (memory consuming)
371 | cm: show confusion matrix (implies +vas)
372 | cs: show per\(hyclass statistics (implies +vas)
373 | cf: add confidence to output file (needs \-G)
374 | di: add distance to output file
375 | db: add distribution of best matched to output file
376 | md: add matching depth to output file.
377 | k: add a summary for all k neigbors to output file (sets \-x)
378 | n: add nearest neigbors to output file (sets \-x)
379 |
380 | You may combine levels using '+' e.g. +v p+db or \-v o+di
381 | .RE
382 |
383 | .B \-w
384 | n
385 | .RS
386 | weighting
387 | 0 or nw: no weighting
388 | 1 or gr: weigh using gain ratio (default)
389 | 2 or ig: weigh using information gain
390 | 3 or x2: weigh using the chi\(hysquare statistic
391 | 4 or sv: weigh using the shared variance statistic
392 | 5 or sd: weigh using standard deviation. (all features must be numeric)
393 | .RE
394 |
395 | .B \-w
396 | file
397 | .RS
398 | read weights from 'file'
399 | .RE
400 |
401 | .B \-w
402 | file:n
403 | .RS
404 | read weight n from 'file'
405 | .RE
406 |
407 | .B \-W
408 | file
409 | .RS
410 | calculate and save all weights in 'file'
411 | .RE
412 |
413 | .B +%
414 | or
415 | .B \-%
416 | .RS
417 | do or don't save test result (%) to file
418 | .RE
419 |
420 | .B +x
421 | or
422 | .B \-x
423 | .RS
424 | do or don't use the exact match shortcut
425 | (IB1 and IB2 only, default is \-x)
426 | .RE
427 |
428 | .BR \-X " file"
429 | .RS
430 | dump the InstanceBase as XML in 'file'
431 | .RE
432 |
433 | .SH BUGS
434 | possibly
435 |
436 | .SH AUTHORS
437 | Ko van der Sloot Timbl@uvt.nl
438 |
439 | Antal van den Bosch Timbl@uvt.nl
440 |
441 | .SH SEE ALSO
442 | .BR timblserver (1)
443 |
--------------------------------------------------------------------------------
/include/Makefile.am:
--------------------------------------------------------------------------------
1 | # $Id$
2 | # $URL$
3 |
4 | AUTOMAKE_OPTIONS = foreign
5 |
6 | SUBDIRS = timbl
7 |
--------------------------------------------------------------------------------
/include/timbl/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 |
--------------------------------------------------------------------------------
/include/timbl/BestArray.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 1998 - 2024
3 | ILK - Tilburg University
4 | CLST - Radboud University
5 | CLiPS - University of Antwerp
6 |
7 | This file is part of timbl
8 |
9 | timbl is free software; you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation; either version 3 of the License, or
12 | (at your option) any later version.
13 |
14 | timbl is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | GNU General Public License for more details.
18 |
19 | You should have received a copy of the GNU General Public License
20 | along with this program; if not, see .
21 |
22 | For questions and suggestions, see:
23 | https://github.com/LanguageMachines/timbl/issues
24 | or send mail to:
25 | lamasoftware (at ) science.ru.nl
26 |
27 | */
28 | #ifndef TIMBL_BESTARRAY_H
29 | #define TIMBL_BESTARRAY_H
30 |
31 | #include
32 | #include
33 | #include
34 |
35 | #include "unicode/unistr.h"
36 | #include "libxml/parser.h"
37 | #include "ticcutils/json.hpp"
38 | #include "timbl/Targets.h"
39 |
40 | namespace Timbl {
41 |
42 | class neighborSet;
43 |
44 | class BestRec {
45 | friend std::ostream& operator<< ( std::ostream&, const BestRec * );
46 | public:
47 | BestRec();
48 | BestRec( const BestRec& ) = delete; // forbid copies
49 | BestRec& operator=( const BestRec& ) = delete; // forbid copies
50 | ~BestRec();
51 | size_t totalBests() const { return aggregateDist.totalSize(); };
52 | double bestDistance;
53 | ClassDistribution aggregateDist;
54 | std::vector bestDistributions;
55 | std::vector bestInstances;
56 | private:
57 | };
58 |
59 | class BestArray {
60 | friend std::ostream& operator<< ( std::ostream&, const BestArray& );
61 | public:
62 | BestArray(): _storeInstances(false),
63 | _showDi(false),
64 | _showDb(false),
65 | size(0),
66 | maxBests(0)
67 | {};
68 | ~BestArray();
69 | void init( unsigned int, unsigned int, bool, bool, bool );
70 | double addResult( double,
71 | const ClassDistribution *,
72 | const icu::UnicodeString& );
73 | void initNeighborSet( neighborSet& ) const;
74 | void addToNeighborSet( neighborSet& , size_t ) const;
75 | xmlNode *toXML() const;
76 | nlohmann::json to_JSON() const;
77 | nlohmann::json record_to_json( const BestRec *, size_t ) const;
78 | private:
79 | bool _storeInstances;
80 | bool _showDi;
81 | bool _showDb;
82 | unsigned int size;
83 | unsigned int maxBests;
84 | std::vector bestArray;
85 | };
86 |
87 | }
88 | #endif // TIMBL_BESTARRAY_H
89 |
--------------------------------------------------------------------------------
/include/timbl/Choppers.h:
--------------------------------------------------------------------------------
1 | #ifndef TIMBL_CHOPPERS_H
2 | #define TIMBL_CHOPPERS_H
3 | /*
4 | Copyright (c) 1998 - 2024
5 | ILK - Tilburg University
6 | CLST - Radboud University
7 | CLiPS - University of Antwerp
8 |
9 | This file is part of timbl
10 |
11 | timbl is free software; you can redistribute it and/or modify
12 | it under the terms of the GNU General Public License as published by
13 | the Free Software Foundation; either version 3 of the License, or
14 | (at your option) any later version.
15 |
16 | timbl is distributed in the hope that it will be useful,
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | GNU General Public License for more details.
20 |
21 | You should have received a copy of the GNU General Public License
22 | along with this program; if not, see .
23 |
24 | For questions and suggestions, see:
25 | https://github.com/LanguageMachines/timbl/issues
26 | or send mail to:
27 | lamasoftware (at ) science.ru.nl
28 |
29 | */
30 |
31 | #include
32 | #include
33 |
34 | #include // for ostream
35 | #include // for vector
36 | #include "unicode/unistr.h"
37 | #include "unicode/ustream.h"
38 | #include "timbl/Types.h"
39 |
40 | namespace Timbl{
41 |
42 | static const icu::UnicodeString DefaultSparseString = "0.0000E-17";
43 |
44 | class Chopper {
45 | public:
46 | Chopper():
47 | vSize(0)
48 | {};
49 | virtual ~Chopper() {};
50 | virtual bool chop( const icu::UnicodeString&, size_t ) = 0;
51 | const icu::UnicodeString& operator[]( int i ) const {
52 | return choppedInput[i]; }
53 | const icu::UnicodeString& getField( size_t i ) const {
54 | return choppedInput[i];
55 | };
56 | virtual double getExW() const { return -1; };
57 | virtual int getOcc() const { return 1; };
58 | virtual icu::UnicodeString getString() const = 0;
59 | void print( std::ostream& os ){
60 | os << getString();
61 | };
62 | void swapTarget( size_t target_pos ){
63 | icu::UnicodeString tmp = choppedInput[target_pos];
64 | for ( size_t i = target_pos+1; i < vSize; ++i ){
65 | choppedInput[i-1] = choppedInput[i];
66 | }
67 | choppedInput[vSize-1] = tmp;
68 | }
69 | static Chopper *create( InputFormatType , bool, int, bool );
70 | static InputFormatType getInputFormat( const icu::UnicodeString&,
71 | bool=false );
72 | static size_t countFeatures( const icu::UnicodeString&,
73 | InputFormatType,
74 | int,
75 | bool=false );
76 | protected:
77 | virtual void init( const icu::UnicodeString&, size_t, bool );
78 | size_t vSize;
79 | icu::UnicodeString strippedInput;
80 | std::vector choppedInput;
81 | };
82 |
83 | class ExChopper: public virtual Chopper {
84 | public:
85 | ExChopper():
86 | Chopper(),
87 | exW(-1.0)
88 | {};
89 | double getExW() const override { return exW; };
90 | protected:
91 | void init( const icu::UnicodeString&, size_t, bool ) override;
92 | double exW;
93 | };
94 |
95 | class OccChopper: public virtual Chopper {
96 | public:
97 | OccChopper():
98 | Chopper(),
99 | occ(-1)
100 | {};
101 | int getOcc() const override { return occ; };
102 | protected:
103 | void init( const icu::UnicodeString&, size_t, bool ) override;
104 | int occ;
105 | };
106 |
107 | class C45_Chopper : public virtual Chopper {
108 | public:
109 | bool chop( const icu::UnicodeString&, size_t ) override;
110 | icu::UnicodeString getString() const override;
111 | };
112 |
113 | class C45_ExChopper : public C45_Chopper, public ExChopper {
114 | };
115 |
116 | class C45_OccChopper : public C45_Chopper, public OccChopper {
117 | };
118 |
119 | class ARFF_Chopper : public C45_Chopper {
120 | public:
121 | bool chop( const icu::UnicodeString&, size_t ) override;
122 | };
123 |
124 | class ARFF_ExChopper : public C45_ExChopper {
125 | };
126 |
127 | class ARFF_OccChopper : public C45_OccChopper {
128 | };
129 |
130 | class Bin_Chopper : public virtual Chopper {
131 | public:
132 | bool chop( const icu::UnicodeString&, size_t ) override;
133 | icu::UnicodeString getString() const override;
134 | };
135 |
136 | class Bin_ExChopper : public Bin_Chopper, public ExChopper {
137 | };
138 |
139 | class Bin_OccChopper : public Bin_Chopper, public OccChopper {
140 | };
141 |
142 | class Compact_Chopper : public virtual Chopper {
143 | public:
144 | explicit Compact_Chopper( int L ): fLen(L){};
145 | bool chop( const icu::UnicodeString&, size_t ) override;
146 | icu::UnicodeString getString() const override;
147 | private:
148 | int fLen;
149 | Compact_Chopper();
150 | };
151 |
152 | class Compact_ExChopper : public Compact_Chopper, public ExChopper {
153 | public:
154 | explicit Compact_ExChopper( int L ): Compact_Chopper( L ){};
155 | private:
156 | Compact_ExChopper();
157 | };
158 |
159 | class Compact_OccChopper : public Compact_Chopper, public OccChopper {
160 | public:
161 | explicit Compact_OccChopper( int L ): Compact_Chopper( L ){};
162 | private:
163 | Compact_OccChopper();
164 | };
165 |
166 | class Columns_Chopper : public virtual Chopper {
167 | public:
168 | bool chop( const icu::UnicodeString&, size_t ) override;
169 | icu::UnicodeString getString() const override;
170 | };
171 |
172 | class Columns_ExChopper : public Columns_Chopper, public ExChopper {
173 | };
174 |
175 | class Columns_OccChopper : public Columns_Chopper, public OccChopper {
176 | };
177 |
178 | class Tabbed_Chopper : public virtual Chopper {
179 | public:
180 | bool chop( const icu::UnicodeString&, size_t ) override;
181 | icu::UnicodeString getString() const override;
182 | };
183 |
184 | class Tabbed_ExChopper : public Tabbed_Chopper, public ExChopper {
185 | };
186 |
187 | class Tabbed_OccChopper : public Tabbed_Chopper, public OccChopper {
188 | };
189 |
190 |
191 | class Sparse_Chopper : public virtual Chopper {
192 | public:
193 | bool chop( const icu::UnicodeString&, size_t ) override;
194 | icu::UnicodeString getString() const override;
195 | };
196 |
197 | class Sparse_ExChopper : public Sparse_Chopper, public ExChopper {
198 | };
199 |
200 | class Sparse_OccChopper : public Sparse_Chopper, public OccChopper {
201 | };
202 |
203 | }
204 | #endif // TIMBL_CHOPPERS_H
205 |
--------------------------------------------------------------------------------
/include/timbl/Common.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 1998 - 2024
3 | ILK - Tilburg University
4 | CLST - Radboud University
5 | CLiPS - University of Antwerp
6 |
7 | This file is part of timbl
8 |
9 | timbl is free software; you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation; either version 3 of the License, or
12 | (at your option) any later version.
13 |
14 | timbl is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | GNU General Public License for more details.
18 |
19 | You should have received a copy of the GNU General Public License
20 | along with this program; if not, see .
21 |
22 | For questions and suggestions, see:
23 | https://github.com/LanguageMachines/timbl/issues
24 | or send mail to:
25 | lamasoftware (at ) science.ru.nl
26 |
27 | */
28 | #ifndef TIMBL_COMMON_H
29 | #define TIMBL_COMMON_H
30 |
31 | #include
32 | #include
33 | #include
34 | #include
35 | #include // for isspace
36 | #include // for string
37 |
38 | namespace Common {
39 | const double Epsilon = std::numeric_limits::epsilon();
40 | // smallest x so that 1+x != 1
41 | const int DEFAULT_MAX_FEATS = 2500; // default maximun number of Features
42 |
43 | std::string Version();
44 | std::string VersionName();
45 | std::string BuildInfo();
46 | std::string VersionInfo( bool ); // obsolete
47 |
48 | inline int look_ahead( std::istream &is ){
49 | while( is ){
50 | int nc=is.peek();
51 | if ( !isspace(nc) )
52 | return nc;
53 | is.get();
54 | }
55 | return -1;
56 | }
57 |
58 | inline void skip_spaces( std::istream &is ){
59 | while( is ){
60 | int nc=is.peek();
61 | if ( !isspace(nc) )
62 | return;
63 | is.get();
64 | }
65 | }
66 |
67 | inline double Log2(double number){
68 | // LOG base 2.
69 | if ( fabs(number) < Epsilon)
70 | return(0.0);
71 | return log2(number);
72 | }
73 |
74 | }
75 | #endif
76 |
--------------------------------------------------------------------------------
/include/timbl/Features.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 1998 - 2024
3 | ILK - Tilburg University
4 | CLST - Radboud University
5 | CLiPS - University of Antwerp
6 |
7 | This file is part of timbl
8 |
9 | timbl is free software; you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation; either version 3 of the License, or
12 | (at your option) any later version.
13 |
14 | timbl is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | GNU General Public License for more details.
18 |
19 | You should have received a copy of the GNU General Public License
20 | along with this program; if not, see .
21 |
22 | For questions and suggestions, see:
23 | https://github.com/LanguageMachines/timbl/issues
24 | or send mail to:
25 | lamasoftware (at ) science.ru.nl
26 |
27 | */
28 | #ifndef TIMBL_FEATURES_H
29 | #define TIMBL_FEATURES_H
30 |
31 | #include
32 | #include