├── .gitignore ├── .travis.yml ├── LICENSE.txt ├── MANIFEST.in ├── Makefile ├── README.md ├── README.rst ├── README_CN.md ├── runtests.py ├── sample ├── classify.py ├── custom_tokenize.py ├── load.py └── test.py ├── setup.py └── tgrocery ├── __init__.py ├── base.py ├── classifier.py ├── converter.py └── learner ├── Makefile ├── __init__.py ├── learner.py ├── liblinear ├── COPYRIGHT ├── Makefile ├── README ├── blas │ ├── Makefile │ ├── blas.h │ ├── blasp.h │ ├── daxpy.c │ ├── ddot.c │ ├── dnrm2.c │ └── dscal.c ├── heart_scale ├── linear.cpp ├── linear.def ├── linear.h ├── predict.c ├── python │ ├── Makefile │ ├── README │ ├── liblinear.py │ └── liblinearutil.py ├── train.c ├── tron.cpp └── tron.h ├── test.cpp └── util.c /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .pypirc 3 | 4 | *.txt 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | 10 | # C extensions 11 | *.so.1 12 | *.o 13 | *.a 14 | *.svm 15 | 16 | # Distribution / packaging 17 | .Python 18 | env/ 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | 58 | # Sphinx documentation 59 | docs/_build/ 60 | 61 | # PyBuilder 62 | target/ 63 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "2.6" 5 | - "2.7" 6 | 7 | branches: 8 | only: 9 | - master 10 | 11 | before_script: 12 | - pip install jieba 13 | - make 14 | 15 | script: 16 | - python runtests.py 17 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include Makefile 2 | recursive-include tgrocery/learner *.c *.cpp *.h Makefile 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | make -C tgrocery/learner 3 | 4 | clean: 5 | rm -rf *.svm *.converter *.model *.config *.out *.pyc 6 | make -C doc clean -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | TextGrocery 2 | =========== 3 | 4 | [![Build Status](https://travis-ci.org/2shou/TextGrocery.svg?branch=master)](https://travis-ci.org/2shou/TextGrocery) 5 | 6 | A simple, efficient short-text classification tool based on LibLinear 7 | 8 | Embed with [jieba](https://github.com/fxsjy/jieba) as default tokenizer to support Chinese tokenize 9 | 10 | Other languages: [更详细的中文文档](http://textgrocery.readthedocs.org/zh/latest/index.html) 11 | 12 | Performance 13 | ----------- 14 | 15 | - Train set: 48k news titles with 32 labels 16 | - Test set: 16k news titles with 32 labels 17 | - Compare with svm and naive-bayes of [scikit-learn](https://github.com/scikit-learn/scikit-learn) 18 | 19 | | Classifier | Accuracy | Time cost(s) | 20 | |:------------------------:|:---------:|:--------------:| 21 | | scikit-learn(nb) | 76.8% | 134 | 22 | | scikit-learn(svm) | 76.9% | 121 | 23 | | **TextGrocery** | **79.6%** | **49** | 24 | 25 | Sample Code 26 | ----------- 27 | 28 | ```python 29 | >>> from tgrocery import Grocery 30 | # Create a grocery(don't forget to set a name) 31 | >>> grocery = Grocery('sample') 32 | # Train from list 33 | >>> train_src = [ 34 | ('education', 'Student debt to cost Britain billions within decades'), 35 | ('education', 'Chinese education for TV experiment'), 36 | ('sports', 'Middle East and Asia boost investment in top level sports'), 37 | ('sports', 'Summit Series look launches HBO Canada sports doc series: Mudhar') 38 | ] 39 | >>> grocery.train(train_src) 40 | # Or train from file 41 | # Format: Label\tText 42 | >>> grocery.train('train_ch.txt') 43 | # Save model 44 | >>> grocery.save() 45 | # Load model(the same name as previous) 46 | >>> new_grocery = Grocery('sample') 47 | >>> new_grocery.load() 48 | # Predict 49 | >>> new_grocery.predict('Abbott government spends $8 million on higher education media blitz') 50 | education 51 | # Test from list 52 | >>> test_src = [ 53 | ('education', 'Abbott government spends $8 million on higher education media blitz'), 54 | ('sports', 'Middle East and Asia boost investment in top level sports'), 55 | ] 56 | >>> new_grocery.test(test_src) 57 | # Return Accuracy 58 | 1.0 59 | # Or test from file 60 | >>> new_grocery.test('test_ch.txt') 61 | # Custom tokenize 62 | >>> custom_grocery = Grocery('custom', custom_tokenize=list) 63 | ``` 64 | 65 | More examples: [sample/](sample/) 66 | 67 | Install 68 | ------- 69 | 70 | $ pip install tgrocery 71 | 72 | > Only test under Unix-based System 73 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | TextGrocery 2 | ----------- 3 | 4 | A simple, efficient short-text classification tool based on LibLinear. 5 | 6 | Sample Usage 7 | ```````````` 8 | .. code:: python 9 | 10 | >>> from tgrocery import Grocery 11 | # Create a grocery(don't forget to set a name) 12 | >>> grocery = Grocery('sample') 13 | # Train from list 14 | >>> train_src = [ 15 | ('education', 'Student debt to cost Britain billions within decades'), 16 | ('education', 'Chinese education for TV experiment'), 17 | ('sports', 'Middle East and Asia boost investment in top level sports'), 18 | ('sports', 'Summit Series look launches HBO Canada sports doc series: Mudhar') 19 | ] 20 | >>> grocery.train(train_src) 21 | # Or train from file 22 | >>> grocery.train('train_ch.txt') 23 | # Save model 24 | >>> grocery.save() 25 | # Load model(the same name as previous) 26 | >>> new_grocery = Grocery('sample') 27 | >>> new_grocery.load() 28 | # Predict 29 | >>> new_grocery.predict('Abbott government spends $8 million on higher education media blitz') 30 | education 31 | # Test from list 32 | >>> test_src = [ 33 | ('education', 'Abbott government spends $8 million on higher education media blitz'), 34 | ('sports', 'Middle East and Asia boost investment in top level sports'), 35 | ] 36 | >>> new_grocery.test(test_src) 37 | # Return Accuracy 38 | 1.0 39 | # Or test from file 40 | >>> new_grocery.test('test_ch.txt') 41 | # Custom tokenize 42 | >>> custom_grocery = Grocery('custom', custom_tokenize=list) 43 | 44 | Installation 45 | ```````````` 46 | .. code:: bash 47 | 48 | $ pip install tgrocery 49 | 50 | Links 51 | ````` 52 | 53 | * `Code on Github `_ -------------------------------------------------------------------------------- /README_CN.md: -------------------------------------------------------------------------------- 1 | TextGrocery 2 | =========== 3 | 4 | [![Build Status](https://travis-ci.org/2shou/TextGrocery.svg?branch=master)](https://travis-ci.org/2shou/TextGrocery) 5 | 6 | 一个高效易用的短文本分类工具,基于[LibLinear](http://www.csie.ntu.edu.tw/~cjlin/liblinear) 7 | 8 | TextGrocery整合了[结巴分词](https://github.com/fxsjy/jieba)作为默认的分词单元,以支持中文的短文本分类 9 | 10 | 性能 11 | ---- 12 | 13 | - 训练集:来自32个类别的4.8万条新闻标题 14 | - 测试集:来自32个类别的1.6万条新闻标题 15 | - 与[scikit-learn](https://github.com/scikit-learn/scikit-learn)的svm和朴素贝叶斯算法做横向对比 16 | 17 | | 分类器 | 准确率 | 计算时间(秒) | 18 | |:------------------------:|:---------:|:--------------:| 19 | | scikit-learn(nb) | 76.8% | 134 | 20 | | scikit-learn(svm) | 76.9% | 121 | 21 | | **TextGrocery** | **79.6%** | **49** | 22 | 23 | 示例代码 24 | ------- 25 | 26 | ```python 27 | >>> from tgrocery import Grocery 28 | # 新开张一个杂货铺,别忘了取名! 29 | >>> grocery = Grocery('sample') 30 | # 训练文本可以用列表传入 31 | >>> train_src = [ 32 | ('education', '名师指导托福语法技巧:名词的复数形式'), 33 | ('education', '中国高考成绩海外认可 是“狼来了”吗?'), 34 | ('sports', '图文:法网孟菲尔斯苦战进16强 孟菲尔斯怒吼'), 35 | ('sports', '四川丹棱举行全国长距登山挑战赛 近万人参与') 36 | ] 37 | >>> grocery.train(train_src) 38 | # 也可以用文件传入 39 | >>> grocery.train('train_ch.txt') 40 | # 保存模型 41 | >>> grocery.save() 42 | # 加载模型(名字和保存的一样) 43 | >>> new_grocery = Grocery('sample') 44 | >>> new_grocery.load() 45 | # 预测 46 | >>> new_grocery.predict('考生必读:新托福写作考试评分标准') 47 | education 48 | # 测试 49 | >>> test_src = [ 50 | ('education', '福建春季公务员考试报名18日截止 2月6日考试'), 51 | ('sports', '意甲首轮补赛交战记录:米兰客场8战不败国米10年连胜'), 52 | ] 53 | >>> new_grocery.test(test_src) 54 | # 准确率 55 | 0.5 56 | # 同样可以用文本传入 57 | >>> new_grocery.test('test_ch.txt') 58 | # 自定义分词器 59 | >>> custom_grocery = Grocery('custom', custom_tokenize=list) 60 | ``` 61 | 62 | 更多示例: [sample/](sample/) 63 | 64 | 安装 65 | ---- 66 | 67 | $ pip install tgrocery 68 | 69 | > 目前仅在Unix系统下测试通过 -------------------------------------------------------------------------------- /runtests.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import unittest 4 | import os 5 | import shutil 6 | 7 | from tgrocery import Grocery 8 | 9 | 10 | class GroceryTestCase(unittest.TestCase): 11 | def setUp(self): 12 | self.train_src = [ 13 | ('education', '名师指导托福语法技巧:名词的复数形式'), 14 | ('education', '中国高考成绩海外认可 是“狼来了”吗?'), 15 | ('sports', '图文:法网孟菲尔斯苦战进16强 孟菲尔斯怒吼'), 16 | ('sports', '四川丹棱举行全国长距登山挑战赛 近万人参与') 17 | ] 18 | self.grocery_name = 'test' 19 | 20 | def test_main(self): 21 | grocery = Grocery(self.grocery_name) 22 | grocery.train(self.train_src) 23 | grocery.save() 24 | new_grocery = Grocery('test') 25 | new_grocery.load() 26 | assert grocery.get_load_status() 27 | assert grocery.predict('考生必读:新托福写作考试评分标准') == 'education' 28 | # cleanup 29 | if self.grocery_name and os.path.exists(self.grocery_name): 30 | shutil.rmtree(self.grocery_name) 31 | 32 | 33 | if __name__ == 'main': 34 | unittest.main() -------------------------------------------------------------------------------- /sample/classify.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from tgrocery import Grocery 4 | 5 | 6 | grocery = Grocery('test') 7 | train_src = [ 8 | ('education', '名师指导托福语法技巧:名词的复数形式'), 9 | ('education', '中国高考成绩海外认可 是“狼来了”吗?'), 10 | ('sports', '图文:法网孟菲尔斯苦战进16强 孟菲尔斯怒吼'), 11 | ('sports', '四川丹棱举行全国长距登山挑战赛 近万人参与') 12 | ] 13 | grocery.train(train_src) 14 | print grocery.get_load_status() 15 | predict_result = grocery.predict('考生必读:新托福写作考试评分标准') 16 | print predict_result 17 | print predict_result.dec_values 18 | 19 | grocery = Grocery('read_text') 20 | train_src = '../text_src/train_ch.txt' 21 | grocery.train(train_src) 22 | print grocery.get_load_status() 23 | predict_result = grocery.predict('考生必读:新托福写作考试评分标准') 24 | print predict_result 25 | print predict_result.dec_values -------------------------------------------------------------------------------- /sample/custom_tokenize.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from tgrocery import Grocery 4 | 5 | # pass a tokenizer, must be a python func 6 | custom_grocery = Grocery('custom', custom_tokenize=list) 7 | train_src = [ 8 | ('education', '名师指导托福语法技巧:名词的复数形式'), 9 | ('education', '中国高考成绩海外认可 是“狼来了”吗?'), 10 | ('sports', '图文:法网孟菲尔斯苦战进16强 孟菲尔斯怒吼'), 11 | ('sports', '四川丹棱举行全国长距登山挑战赛 近万人参与') 12 | ] 13 | custom_grocery.train(train_src) 14 | print custom_grocery.get_load_status() 15 | print custom_grocery.predict('考生必读:新托福写作考试评分标准') -------------------------------------------------------------------------------- /sample/load.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from tgrocery import Grocery 4 | 5 | 6 | # save 7 | grocery = Grocery('test') 8 | train_src = [ 9 | ('education', '名师指导托福语法技巧:名词的复数形式'), 10 | ('education', '中国高考成绩海外认可 是“狼来了”吗?'), 11 | ('sports', '图文:法网孟菲尔斯苦战进16强 孟菲尔斯怒吼'), 12 | ('sports', '四川丹棱举行全国长距登山挑战赛 近万人参与') 13 | ] 14 | grocery.train(train_src) 15 | grocery.save() 16 | 17 | # load 18 | # grocery name must be the same as the previous one 19 | new_grocery = Grocery('test') 20 | new_grocery.load() 21 | print new_grocery.predict('考生必读:新托福写作考试评分标准') 22 | -------------------------------------------------------------------------------- /sample/test.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from tgrocery import Grocery 4 | 5 | 6 | grocery = Grocery('test') 7 | train_src = [ 8 | ('education', '名师指导托福语法技巧:名词的复数形式'), 9 | ('education', '中国高考成绩海外认可 是“狼来了”吗?'), 10 | ('sports', '图文:法网孟菲尔斯苦战进16强 孟菲尔斯怒吼'), 11 | ('sports', '四川丹棱举行全国长距登山挑战赛 近万人参与') 12 | ] 13 | grocery.train(train_src) 14 | print grocery.get_load_status() 15 | 16 | test_src = [ 17 | ('education', '福建春季公务员考试报名18日截止 2月6日考试'), 18 | ('sports', '意甲首轮补赛交战记录:米兰客场8战不败国米10年连胜'), 19 | ] 20 | test_result = grocery.test(test_src) 21 | print test_result.accuracy_labels 22 | print test_result.recall_labels 23 | 24 | grocery = Grocery('text_src') 25 | train_src = '../text_src/train_ch.txt' 26 | grocery.train(train_src) 27 | print grocery.get_load_status() 28 | 29 | test_src = '../text_src/test_ch.txt' 30 | test_result = grocery.test(test_src) 31 | print test_result.accuracy_labels 32 | print test_result.recall_labels 33 | test_result.show_result() -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import os 4 | 5 | from setuptools.command.install import install 6 | 7 | from setuptools import setup 8 | 9 | 10 | with open('README.rst') as f: 11 | LONG_DESCRIPTION = f.read() 12 | 13 | 14 | class MakeCommand(install): 15 | def run(self): 16 | os.system('make') 17 | common_dir = 'tgrocery/learner' 18 | target_dir = '%s/%s' % (self.build_lib, common_dir) 19 | self.mkpath(target_dir) 20 | os.system('cp %s/util.so.1 %s' % (common_dir, target_dir)) 21 | common_dir = 'tgrocery/learner/liblinear' 22 | target_dir = '%s/%s' % (self.build_lib, common_dir) 23 | self.mkpath(target_dir) 24 | os.system('cp %s/liblinear.so.1 %s' % (common_dir, target_dir)) 25 | install.run(self) 26 | 27 | 28 | setup( 29 | name='tgrocery', 30 | version='0.1.3', 31 | packages=['tgrocery', 'tgrocery.learner', 'tgrocery.learner.liblinear.python'], 32 | url='https://github.com/2shou/TextGrocery', 33 | license='BSD', 34 | author='2shou', 35 | author_email='gavin.zgz@gmail.com', 36 | description='A simple short-text classification tool based on LibLinear', 37 | long_description=LONG_DESCRIPTION, 38 | install_requires=['jieba'], 39 | keywords='text classification svm liblinear libshorttext', 40 | cmdclass={'install': MakeCommand} 41 | ) 42 | -------------------------------------------------------------------------------- /tgrocery/__init__.py: -------------------------------------------------------------------------------- 1 | from converter import * 2 | from classifier import * 3 | 4 | __all__ = ['Grocery'] 5 | 6 | 7 | class GroceryException(Exception): 8 | pass 9 | 10 | 11 | class GroceryNotTrainException(GroceryException): 12 | def __init__(self): 13 | self.message = 'Text model has not been trained.' 14 | 15 | 16 | class Grocery(object): 17 | def __init__(self, name, custom_tokenize=None): 18 | self.name = name 19 | if custom_tokenize is not None and not hasattr(custom_tokenize, '__call__'): 20 | raise GroceryException('Tokenize func must be callable.') 21 | self.custom_tokenize = custom_tokenize 22 | self.model = None 23 | self.classifier = None 24 | self.train_svm_file = None 25 | 26 | def get_load_status(self): 27 | return self.model is not None and isinstance(self.model, GroceryTextModel) 28 | 29 | def train(self, train_src, delimiter='\t'): 30 | text_converter = GroceryTextConverter(custom_tokenize=self.custom_tokenize) 31 | self.train_svm_file = '%s_train.svm' % self.name 32 | text_converter.convert_text(train_src, output=self.train_svm_file, delimiter=delimiter) 33 | # default parameter 34 | model = train(self.train_svm_file, '', '-s 4') 35 | self.model = GroceryTextModel(text_converter, model) 36 | return self 37 | 38 | def predict(self, single_text): 39 | if not self.get_load_status(): 40 | raise GroceryNotTrainException() 41 | return self.model.predict_text(single_text) 42 | 43 | def test(self, text_src, delimiter='\t'): 44 | if not self.get_load_status(): 45 | raise GroceryNotTrainException() 46 | return GroceryTest(self.model).test(text_src, delimiter) 47 | 48 | def save(self): 49 | if not self.get_load_status(): 50 | raise GroceryNotTrainException() 51 | self.model.save(self.name, force=True) 52 | 53 | def load(self): 54 | text_converter = GroceryTextConverter(custom_tokenize=self.custom_tokenize) 55 | self.model = GroceryTextModel(text_converter) 56 | self.model.load(self.name) 57 | 58 | def __del__(self): 59 | if self.train_svm_file and os.path.exists(self.train_svm_file): 60 | os.remove(self.train_svm_file) 61 | -------------------------------------------------------------------------------- /tgrocery/base.py: -------------------------------------------------------------------------------- 1 | def read_text_src(text_src, delimiter): 2 | if isinstance(text_src, str): 3 | with open(text_src, 'r') as f: 4 | text_src = [line.split(delimiter) for line in f] 5 | elif not isinstance(text_src, list): 6 | raise TypeError('text_src should be list or str') 7 | return text_src 8 | 9 | 10 | class GroceryTestResult(object): 11 | def __init__(self, true_y, predicted_y): 12 | self.true_y = true_y 13 | self.predicted_y = predicted_y 14 | self._compute_accuracy_overall() 15 | self._compute_accuracy_recall_labels() 16 | 17 | def _compute_accuracy_overall(self): 18 | l = len(self.true_y) 19 | self.accuracy_overall = sum([self.true_y[i] == self.predicted_y[i] for i in range(l)]) / float(l) 20 | 21 | def _compute_accuracy_recall_labels(self): 22 | labels = {} 23 | for idx, predicted_label in enumerate(self.predicted_y): 24 | true_label = self.true_y[idx] 25 | if predicted_label not in labels: 26 | labels[predicted_label] = [0, 0, 0] 27 | if true_label not in labels: 28 | labels[true_label] = [0, 0, 0] 29 | if predicted_label == true_label: 30 | labels[predicted_label][0] += 1 31 | labels[predicted_label][1] += 1 32 | labels[true_label][2] += 1 33 | self.accuracy_labels = {} 34 | self.recall_labels = {} 35 | for key, val in labels.iteritems(): 36 | try: 37 | self.accuracy_labels[key] = float(val[0]) / val[1] 38 | except ZeroDivisionError: 39 | self.accuracy_labels[key] = float(0) 40 | try: 41 | self.recall_labels[key] = float(val[0]) / val[2] 42 | except ZeroDivisionError: 43 | self.recall_labels[key] = float(0) 44 | 45 | @staticmethod 46 | def draw_table(data, row_labels, column_labels): 47 | row_format = '{:<15}' * (len(column_labels) + 1) 48 | table_string = '%s\n' % row_format.format('', *column_labels) 49 | for row_label, row_data in zip(row_labels, data): 50 | table_string += '%s\n' % row_format.format(row_label, *row_data) 51 | return table_string 52 | 53 | def show_result(self): 54 | print self.draw_table( 55 | zip( 56 | ['%.2f%%' % (s * 100) for s in self.accuracy_labels.values()], 57 | ['%.2f%%' % (s * 100) for s in self.recall_labels.values()] 58 | ), 59 | self.accuracy_labels.keys(), 60 | ('accuracy', 'recall') 61 | ) 62 | 63 | def __str__(self): 64 | return str(self.accuracy_overall) 65 | 66 | 67 | class GroceryPredictResult(object): 68 | def __init__(self, predicted_y=None, dec_values=None, labels=None): 69 | self.predicted_y = predicted_y 70 | self.dec_values = dict(zip(labels, dec_values)) 71 | 72 | def __str__(self): 73 | return self.predicted_y 74 | -------------------------------------------------------------------------------- /tgrocery/classifier.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import os 3 | import shutil 4 | 5 | from converter import GroceryTextConverter 6 | from .learner import * 7 | from base import * 8 | 9 | 10 | class GroceryTextModel(object): 11 | def __init__(self, text_converter=None, model=None): 12 | if text_converter is not None and isinstance(text_converter, GroceryTextConverter): 13 | self.text_converter = text_converter 14 | else: 15 | self.text_converter = GroceryTextConverter() 16 | self.svm_model = model 17 | self._hashcode = str(uuid.uuid4()) 18 | 19 | def __str__(self): 20 | return 'TextModel instance ({0}, {1})'.format(self.text_converter, self.svm_model) 21 | 22 | def get_labels(self): 23 | return [self.text_converter.get_class_name(k) for k in self.svm_model.get_labels()] 24 | 25 | def load(self, model_name): 26 | try: 27 | with open(model_name + '/id', 'r') as fin: 28 | self._hashcode = fin.readline().strip() 29 | except IOError: 30 | raise ValueError("The given model is invalid.") 31 | self.text_converter.load(model_name + '/converter') 32 | self.svm_model = LearnerModel(model_name + '/learner') 33 | 34 | def save(self, model_name, force=False): 35 | if self.svm_model is None: 36 | raise Exception('This model can not be saved because svm model is not given.') 37 | if os.path.exists(model_name) and force: 38 | shutil.rmtree(model_name) 39 | try: 40 | os.mkdir(model_name) 41 | except OSError as e: 42 | raise OSError(e, 'Please use force option to overwrite the existing files.') 43 | self.text_converter.save(model_name + '/converter') 44 | self.svm_model.save(model_name + '/learner', force) 45 | 46 | with open(model_name + '/id', 'w') as fout: 47 | fout.write(self._hashcode) 48 | 49 | def predict_text(self, text): 50 | if self.svm_model is None: 51 | raise Exception('This model is not usable because svm model is not given') 52 | # process unicode type 53 | if isinstance(text, unicode): 54 | text = text.encode('utf-8') 55 | if not isinstance(text, str): 56 | raise TypeError('The argument should be plain text') 57 | text = self.text_converter.to_svm(text) 58 | y, dec = predict_one(text, self.svm_model) 59 | y = self.text_converter.get_class_name(int(y)) 60 | labels = [self.text_converter.get_class_name(k) for k in 61 | self.svm_model.label[:self.svm_model.nr_class]] 62 | return GroceryPredictResult(predicted_y=y, dec_values=dec[:self.svm_model.nr_class], labels=labels) 63 | 64 | 65 | class GroceryTest(object): 66 | def __init__(self, model): 67 | self.model = model 68 | 69 | def test(self, text_src, delimiter): 70 | text_src = read_text_src(text_src, delimiter) 71 | true_y = [] 72 | predicted_y = [] 73 | for line in text_src: 74 | try: 75 | label, text = line 76 | except ValueError: 77 | continue 78 | predicted_y.append(self.model.predict_text(text).predicted_y) 79 | true_y.append(label) 80 | return GroceryTestResult(true_y, predicted_y) 81 | -------------------------------------------------------------------------------- /tgrocery/converter.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import cPickle 3 | import os 4 | 5 | import jieba 6 | from base import * 7 | 8 | __all__ = ['GroceryTextConverter'] 9 | 10 | 11 | def _dict2list(d): 12 | if len(d) == 0: 13 | return [] 14 | m = max(v for k, v in d.iteritems()) 15 | ret = [''] * (m + 1) 16 | for k, v in d.iteritems(): 17 | ret[v] = k 18 | return ret 19 | 20 | 21 | def _list2dict(l): 22 | return dict((v, k) for k, v in enumerate(l)) 23 | 24 | 25 | class GroceryTextPreProcessor(object): 26 | def __init__(self): 27 | # index must start from 1 28 | self.tok2idx = {'>>dummy<<': 0} 29 | self.idx2tok = None 30 | 31 | @staticmethod 32 | def _default_tokenize(text): 33 | return jieba.cut(text.strip(), cut_all=True) 34 | 35 | def preprocess(self, text, custom_tokenize): 36 | if custom_tokenize is not None: 37 | tokens = custom_tokenize(text) 38 | else: 39 | tokens = self._default_tokenize(text) 40 | ret = [] 41 | for idx, tok in enumerate(tokens): 42 | if tok not in self.tok2idx: 43 | self.tok2idx[tok] = len(self.tok2idx) 44 | ret.append(self.tok2idx[tok]) 45 | return ret 46 | 47 | def save(self, dest_file): 48 | self.idx2tok = _dict2list(self.tok2idx) 49 | config = {'idx2tok': self.idx2tok} 50 | cPickle.dump(config, open(dest_file, 'wb'), -1) 51 | 52 | def load(self, src_file): 53 | config = cPickle.load(open(src_file, 'rb')) 54 | self.idx2tok = config['idx2tok'] 55 | self.tok2idx = _list2dict(self.idx2tok) 56 | return self 57 | 58 | 59 | class GroceryFeatureGenerator(object): 60 | def __init__(self): 61 | self.ngram2fidx = {'>>dummy<<': 0} 62 | self.fidx2ngram = None 63 | 64 | def unigram(self, tokens): 65 | feat = defaultdict(int) 66 | NG = self.ngram2fidx 67 | for x in tokens: 68 | if (x,) not in NG: 69 | NG[x,] = len(NG) 70 | feat[NG[x,]] += 1 71 | return feat 72 | 73 | def bigram(self, tokens): 74 | feat = self.unigram(tokens) 75 | NG = self.ngram2fidx 76 | for x, y in zip(tokens[:-1], tokens[1:]): 77 | if (x, y) not in NG: 78 | NG[x, y] = len(NG) 79 | feat[NG[x, y]] += 1 80 | return feat 81 | 82 | def save(self, dest_file): 83 | self.fidx2ngram = _dict2list(self.ngram2fidx) 84 | config = {'fidx2ngram': self.fidx2ngram} 85 | cPickle.dump(config, open(dest_file, 'wb'), -1) 86 | 87 | def load(self, src_file): 88 | config = cPickle.load(open(src_file, 'rb')) 89 | self.fidx2ngram = config['fidx2ngram'] 90 | self.ngram2fidx = _list2dict(self.fidx2ngram) 91 | return self 92 | 93 | 94 | class GroceryClassMapping(object): 95 | def __init__(self): 96 | self.class2idx = {} 97 | self.idx2class = None 98 | 99 | def to_idx(self, class_name): 100 | if class_name in self.class2idx: 101 | return self.class2idx[class_name] 102 | 103 | m = len(self.class2idx) 104 | self.class2idx[class_name] = m 105 | return m 106 | 107 | def to_class_name(self, idx): 108 | if self.idx2class is None: 109 | self.idx2class = _dict2list(self.class2idx) 110 | if idx == -1: 111 | return "**not in training**" 112 | if idx >= len(self.idx2class): 113 | raise KeyError( 114 | 'class idx ({0}) should be less than the number of classes ({0}).'.format(idx, len(self.idx2class))) 115 | return self.idx2class[idx] 116 | 117 | def save(self, dest_file): 118 | self.idx2class = _dict2list(self.class2idx) 119 | config = {'idx2class': self.idx2class} 120 | cPickle.dump(config, open(dest_file, 'wb'), -1) 121 | 122 | def load(self, src_file): 123 | config = cPickle.load(open(src_file, 'rb')) 124 | self.idx2class = config['idx2class'] 125 | self.class2idx = _list2dict(self.idx2class) 126 | return self 127 | 128 | 129 | class GroceryTextConverter(object): 130 | def __init__(self, custom_tokenize=None): 131 | self.text_prep = GroceryTextPreProcessor() 132 | self.feat_gen = GroceryFeatureGenerator() 133 | self.class_map = GroceryClassMapping() 134 | self.custom_tokenize = custom_tokenize 135 | 136 | def get_class_idx(self, class_name): 137 | return self.class_map.to_idx(class_name) 138 | 139 | def get_class_name(self, class_idx): 140 | return self.class_map.to_class_name(class_idx) 141 | 142 | def to_svm(self, text, class_name=None): 143 | feat = self.feat_gen.bigram(self.text_prep.preprocess(text, self.custom_tokenize)) 144 | if class_name is None: 145 | return feat 146 | return feat, self.class_map.to_idx(class_name) 147 | 148 | def convert_text(self, text_src, delimiter, output=None): 149 | if not output: 150 | output = '%s.svm' % text_src 151 | text_src = read_text_src(text_src, delimiter) 152 | with open(output, 'w') as w: 153 | for line in text_src: 154 | try: 155 | label, text = line 156 | except ValueError: 157 | continue 158 | feat, label = self.to_svm(text, label) 159 | w.write('%s %s\n' % (label, ''.join(' {0}:{1}'.format(f, feat[f]) for f in sorted(feat)))) 160 | 161 | def save(self, dest_dir): 162 | config = { 163 | 'text_prep': 'text_prep.config.pickle', 164 | 'feat_gen': 'feat_gen.config.pickle', 165 | 'class_map': 'class_map.config.pickle', 166 | } 167 | if not os.path.exists(dest_dir): 168 | os.mkdir(dest_dir) 169 | self.text_prep.save(os.path.join(dest_dir, config['text_prep'])) 170 | self.feat_gen.save(os.path.join(dest_dir, config['feat_gen'])) 171 | self.class_map.save(os.path.join(dest_dir, config['class_map'])) 172 | 173 | def load(self, src_dir): 174 | config = { 175 | 'text_prep': 'text_prep.config.pickle', 176 | 'feat_gen': 'feat_gen.config.pickle', 177 | 'class_map': 'class_map.config.pickle', 178 | } 179 | self.text_prep.load(os.path.join(src_dir, config['text_prep'])) 180 | self.feat_gen.load(os.path.join(src_dir, config['feat_gen'])) 181 | self.class_map.load(os.path.join(src_dir, config['class_map'])) 182 | return self 183 | -------------------------------------------------------------------------------- /tgrocery/learner/Makefile: -------------------------------------------------------------------------------- 1 | all = lib 2 | TARGET = util 3 | SHVER = 1 4 | OS = $(shell uname) 5 | 6 | all: lib test 7 | make -C liblinear 8 | make -C liblinear/python 9 | 10 | test: util.c 11 | g++ -fPIC -Iliblinear test.cpp -o test 12 | lib: ${TARGET}.o 13 | if [ "$(OS)" = "Darwin" ]; then \ 14 | SHARED_LIB_FLAG="-dynamiclib -Wl,-install_name,${TARGET}.so.$(SHVER)"; \ 15 | else \ 16 | SHARED_LIB_FLAG="-shared -Wl,-soname,${TARGET}.so.$(SHVER)"; \ 17 | fi; \ 18 | gcc $${SHARED_LIB_FLAG} ${TARGET}.o -o ${TARGET}.so.1 19 | 20 | ${TARGET}.o: ${TARGET}.c 21 | gcc -fPIC -Iliblinear -O3 -c -o ${TARGET}.o ${TARGET}.c 22 | 23 | clean: 24 | rm -rf ${TARGET}.o ${TARGET}.so.1 *pyc test 25 | make -C liblinear clean 26 | -------------------------------------------------------------------------------- /tgrocery/learner/__init__.py: -------------------------------------------------------------------------------- 1 | from .learner import * 2 | del learner 3 | -------------------------------------------------------------------------------- /tgrocery/learner/learner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ctypes import * 4 | from ctypes.util import find_library 5 | import sys 6 | import os 7 | from os import path 8 | import shutil 9 | 10 | if sys.version_info[0] >= 3: 11 | xrange = range 12 | import pickle as cPickle 13 | izip = zip 14 | 15 | def unicode(string, setting): 16 | return string 17 | else: 18 | import cPickle 19 | from itertools import izip 20 | 21 | util = CDLL(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'util.so.1')) 22 | 23 | LIBLINEAR_HOME = os.environ.get('LIBLINEAR_HOME') or os.path.dirname(os.path.abspath(__file__)) + '/liblinear' 24 | sys.path = [LIBLINEAR_HOME, LIBLINEAR_HOME + '/python'] + sys.path 25 | 26 | import liblinear 27 | from liblinearutil import train as liblinear_train, predict as liblinear_predict, save_model as liblinear_save_model, load_model as liblinear_load_model 28 | 29 | __all__ = ['LearnerParameter', 'LearnerModel', 30 | 'train', 'predict_one', 'predict', 'LIBLINEAR_HOME'] 31 | 32 | 33 | def print_debug(src): 34 | if os.environ.get('SHORTTEXTDEBUG'): 35 | print('[DEBUG]: ' + src) 36 | 37 | 38 | def fillprototype(f, restype, argtypes): 39 | f.restype = restype 40 | f.argtypes = argtypes 41 | 42 | 43 | def genFields(names, types): 44 | return list(zip(names, types)) 45 | 46 | 47 | # Interface to util 48 | class SVMProblem(Structure): 49 | _names = ["prob", "x_space", "n_x_space"] 50 | _types = [liblinear.problem, POINTER(liblinear.feature_node), c_int64] 51 | _fields_ = genFields(_names, _types) 52 | 53 | def __del__ (self): 54 | print_debug('SVMProblem delete:%s'% id(self)) 55 | util.freeSVMProblem(self) 56 | 57 | 58 | def read_SVMProblem(src): 59 | status = c_int64() 60 | svmprob = util.read_problem(src.encode(), 0, pointer(status)) # bias = 0 is required 61 | 62 | status = status.value 63 | 64 | if status == 0: 65 | print_debug('SVMProblem construct:%s'% id(svmprob)) 66 | return svmprob 67 | 68 | if status == -1: 69 | raise IOError("Can not open file " + src + ".") 70 | 71 | if status == -2: 72 | raise MemoryError("Memory Exhausted. Try to restart python.") 73 | 74 | raise ValueError("Wrong file format in line " + str(status) + ".") 75 | 76 | 77 | fillprototype(util.read_problem, SVMProblem, [c_char_p, c_double, POINTER(c_int64)]) 78 | fillprototype(util.freeSVMProblem, None, [SVMProblem]) 79 | fillprototype(util.compute_idf, c_double, [POINTER(liblinear.problem), POINTER(c_double)]) 80 | fillprototype(util.normalize, None, [POINTER(liblinear.problem), c_int, c_int, c_int, c_int, POINTER(c_double)]) 81 | 82 | 83 | class LearnerProblem(liblinear.problem): 84 | def __init__(self, src): 85 | #svmprob = util.read_problem(src.encode(), 0) # bias = 0 is required 86 | svmprob = read_SVMProblem(src) # bias = 0 is required 87 | self.x = svmprob.prob.x 88 | self.y = svmprob.prob.y 89 | self.l = svmprob.prob.l 90 | self.n = svmprob.prob.n 91 | self.bias = svmprob.prob.bias 92 | self.x_space = svmprob.x_space 93 | self.n_x_space = svmprob.n_x_space 94 | print_debug('LearnerProblem construct:%s'% id(svmprob)) 95 | 96 | def set_bias(self, bias): 97 | if self.bias == bias: 98 | return 99 | node = liblinear.feature_node(self.n, bias) 100 | if bias >= 0 and self.bias < 0: 101 | self.n += 1 102 | node = liblinear.feature_node(self.n, bias) 103 | if bias < 0 and self.bias >= 0: 104 | self.n -= 1 105 | node = liblinear.feature_node(-1, bias) 106 | 107 | for i in range(1,self.l): 108 | self.x[i][-2] = node 109 | self.x_space[self.n_x_space-2] = node 110 | self.bias = bias 111 | 112 | def normalize(self, learner_param, idf): 113 | print_debug ("normal parameters: bin_feat {0}, inst_norm {1}, tf {2}, idf {3}\n".format(learner_param.binary_feature, 114 | learner_param.inst_normalization, 115 | learner_param.term_frequency, 116 | learner_param.inverse_document_frequency, 117 | )) 118 | util.normalize(pointer(self), 119 | learner_param.binary_feature, 120 | learner_param.inst_normalization, 121 | learner_param.term_frequency, 122 | learner_param.inverse_document_frequency, 123 | idf) 124 | 125 | @staticmethod 126 | def normalize_one(xi, learner_param, idf): 127 | """ 128 | The maximum index of xi should be less 129 | or equal to the weight vector size. 130 | """ 131 | norm = 0 132 | word_count = 0 133 | i = 0 134 | while xi[i].index != -1: 135 | idx = xi[i].index-1 136 | if learner_param.binary_feature: 137 | xi[i].value = xi[i].value != 0 138 | 139 | word_count += abs(xi[i].value) 140 | 141 | if learner_param.inverse_document_frequency and idx < len(idf): 142 | xi[i].value *= idf[idx] 143 | 144 | norm += xi[i].value * xi[i].value 145 | i += 1 146 | 147 | norm **= .5 148 | 149 | 150 | if learner_param.term_frequency: 151 | i = 0 152 | while xi[i].index != -1: 153 | xi[i].value /= word_count 154 | i += 1 155 | 156 | if learner_param.inst_normalization: 157 | i = 0 158 | while xi[i].index != -1: 159 | xi[i].value /= norm 160 | i += 1 161 | 162 | def compute_idf(self): 163 | idf = (c_double * self.n)() 164 | util.compute_idf(self, idf) 165 | return idf 166 | 167 | class LearnerParameter(liblinear.parameter): 168 | """ 169 | :class:`LearnerParameter` is the parameter structure used by 170 | :class:`LearnerModel`. It consists of normalization parameters and 171 | LIBLINEAR parameters. 172 | 173 | Both *liblinear_opts* and *learner_opts* are :class:`str` or a 174 | :class:`list` of :class:`str`. For example, you can write either 175 | 176 | >>> param = LearnerParameter('-N 1 -T 1', '-c 2 -e 1e-2') 177 | 178 | or 179 | 180 | >>> param = LearnerParameter(['-N', '1', '-T', '1'], ['-c', '2', '-e', '1e-2']) 181 | 182 | *liblinear_opts* is LIBLINEAR's parameters. Refer to LIBLINEAR's 183 | document for more details. *learner_opts* includes options for feature 184 | representation and instance-wise normalization. The preprocessor of 185 | LibShortText converts text files to LIBSVM-format data, where the 186 | features are word counts. All *value* in the options should be either 187 | ``1`` or ``0``, where ``1`` enables the option. 188 | 189 | ========== ==================================================== 190 | options explanation when *value* is ``1`` 191 | ========== ==================================================== 192 | -D *value* Binary representation. All non-zero values are 193 | treated as 1. Default is enabled. 194 | -T *value* Term frequency. The data are divided by the feature 195 | sum. That is, 196 | :math:`x_i \leftarrow (x_i)/\sum_j |x_j|`, 197 | where :math:`x` is the training instance and 198 | :math:`x_i` is the :math:`i`-th feature of :math:`x`. 199 | Default is disabled. 200 | -I *value* Inverse document frequency (idf). Default is 201 | disabled. 202 | -N *value* Instance normalization. The training instances are 203 | normalized to unit vectors before training. Default 204 | is enabled. 205 | ========== ==================================================== 206 | 207 | Note that if more than one option is enabled, then they are done in the 208 | order: binary representation, term frequency, IDF, and instance 209 | normalization. The following example is tf-idf representation without 210 | instance normalization. 211 | 212 | >>> param = LearnerParameter('-D 0 -T 1 -I 1 -N 0', liblinear_opts) 213 | 214 | """ 215 | def __init__(self, learner_opts = '', liblinear_opts = ''): 216 | self.parse_options(learner_opts, liblinear_opts) 217 | 218 | def set_to_default_values(self): 219 | """ 220 | Set the options to some values 221 | (``'-D 1 -T 0 -I 0 -N 1'``). 222 | """ 223 | liblinear.parameter.set_to_default_values(self) 224 | self.binary_feature = 1 225 | self.inst_normalization = 1 226 | self.term_frequency = 0 227 | self.inverse_document_frequency = 0 228 | 229 | def parse_options(self, learner_opts, liblinear_opts): 230 | """ 231 | Set the options to the specific values. 232 | """ 233 | 234 | self.raw_options = (learner_opts, liblinear_opts) 235 | if isinstance(learner_opts, list): 236 | argv = learner_opts 237 | elif isinstance(learner_opts, str): 238 | argv = learner_opts.split() 239 | else: 240 | raise TypeError("Wrong types") 241 | self.set_to_default_values() 242 | liblinear.parameter.parse_options(self, liblinear_opts) 243 | 244 | i = 0 245 | while i < len(argv): 246 | if argv[i] == "-D": 247 | i = i + 1 248 | self.binary_feature = int(argv[i]) 249 | elif argv[i] == "-N": 250 | i = i + 1 251 | self.inst_normalization = int(argv[i]) 252 | elif argv[i] == "-I": 253 | i = i + 1 254 | self.inverse_document_frequency = int(argv[i]) 255 | elif argv[i] == "-T": 256 | i = i + 1 257 | self.term_frequency = int(argv[i]) 258 | else : 259 | raise ValueError('No option ' + argv[i]) 260 | i = i + 1 261 | 262 | 263 | class LearnerModel(liblinear.model): 264 | """ 265 | :class:`LearnerModel` is a middle-level classification model. It 266 | inherits from :class:`liblinear.model` by having two more members: 267 | a :class:`LearnerParameter` instance and an inverse document frequency list. 268 | 269 | We do not recommend users to create a :class:`LearnerModel` by themselves. 270 | Instead, users should create and manipulate a :class:`LearnerModel` 271 | via :func:`train`, :func:`predict`, and :func:`predict_one`. 272 | 273 | If users want to redefine :class:`LearnerModel`, they must 274 | implement the following four methods used by 275 | :mod:`libshorttext.classifier` and :mod:`libshorttext.analyzer`. 276 | """ 277 | 278 | def _reconstruct_label_idx(self): 279 | def _get_label_idx(nr_class, labels): 280 | return dict(zip(labels[:nr_class], range(nr_class))) 281 | 282 | if self.c_model is not None: 283 | self.labelidx = _get_label_idx(self.c_model.nr_class, self.c_model.label) 284 | 285 | 286 | def __init__(self, c_model, param = None, idf = None): 287 | """ 288 | constructor of :class:`LearnerModel`. 289 | """ 290 | 291 | print_debug('c_model(%s), self(%s)' % (id(c_model), id(self))) 292 | 293 | if isinstance(c_model, str): 294 | self.load(c_model) 295 | return 296 | elif isinstance(c_model, liblinear.model): 297 | if param is None: 298 | raise ValueError("param can not be None if model is given.") 299 | else: 300 | raise TypeError("c_model should be model file name or a model.") 301 | 302 | self.c_model = c_model # prevent GC 303 | 304 | if isinstance(param, LearnerParameter): 305 | self.param_options = param.raw_options 306 | elif isinstance(param, tuple): 307 | self.param_options = param 308 | else: 309 | raise TypeError("param should be a LearnerParameter or a tuple.") 310 | 311 | if idf is not None: 312 | self.idf = idf[:self.c_model.nr_feature + (self.c_model.bias >= 0)] 313 | else: 314 | self.idf = None 315 | 316 | for attr in c_model._names: 317 | setattr(self, attr, getattr(c_model, attr)) 318 | 319 | self._reconstruct_label_idx() 320 | 321 | def get_weight(self, j, k): 322 | """ 323 | Return the weight of feature *j* and label *k*. 324 | """ 325 | return self.c_model.w[(j-1)*self.c_model.nr_class + self.labelidx[k]] 326 | 327 | def get_labels(self): 328 | """ 329 | Return the labels of this model. 330 | """ 331 | return self.label[:self.nr_class] 332 | 333 | def load(self, model_dir): 334 | """ 335 | Load the contents from a :class:`TextModel` directory. 336 | """ 337 | 338 | self.c_model = liblinear_load_model(path.join(model_dir,'liblinear_model')) 339 | 340 | options_file = path.join(model_dir,'options.pickle') 341 | self.param_options = cPickle.load(open(options_file,'rb')) 342 | 343 | idf_file = path.join(model_dir,'idf.pickle') 344 | self.idf = cPickle.load(open(idf_file,'rb')) 345 | 346 | self.__init__(self.c_model, LearnerParameter(self.param_options[0], self.param_options[1]), self.idf) 347 | 348 | def save(self, model_dir, force=False): 349 | """ 350 | Save the model to a directory. If *force* is set to ``True``, 351 | the existing directory will be overwritten; otherwise, 352 | :class:`IOError` will be raised. 353 | """ 354 | 355 | if path.exists(model_dir): 356 | if force: 357 | shutil.rmtree(model_dir) 358 | else : 359 | raise OSError('Please use force option to overwrite the existing files.') 360 | os.mkdir(model_dir) 361 | 362 | liblinear_save_model(path.join(model_dir,'liblinear_model'), self.c_model) 363 | options_file = path.join(model_dir,'options.pickle') 364 | cPickle.dump(self.param_options, open(options_file,'wb'),-1) 365 | 366 | idf_file = path.join(model_dir,'idf.pickle') 367 | cPickle.dump(self.idf, open(idf_file,'wb'),-1) 368 | 369 | def __str__(self): 370 | if type(self.param_options) is tuple and len(self.param_options) > 0: 371 | return 'LearnerModel: ' + (self.param_options[0] or 'default') 372 | else: 373 | return 'empty LearnerModel' 374 | 375 | 376 | def train(data_file_name, learner_opts="", liblinear_opts=""): 377 | """ 378 | Return a :class:`LearnerModel`. 379 | 380 | *data_file_name* is the file path of the LIBSVM-format data. *learner_opts* is a 381 | :class:`str`. Refer to :ref:`learner_param`. *liblinear_opts* is a :class:`str` of 382 | LIBLINEAR's parameters. Refer to LIBLINEAR's document. 383 | """ 384 | 385 | learner_prob = LearnerProblem(data_file_name) 386 | learner_param = LearnerParameter(learner_opts, liblinear_opts) 387 | 388 | idf = None 389 | if learner_param.inverse_document_frequency: 390 | idf = learner_prob.compute_idf() 391 | 392 | learner_prob.normalize(learner_param, idf) 393 | 394 | m = liblinear_train(learner_prob, learner_param) 395 | if not learner_param.cross_validation: 396 | m.x_space = None # This is required to reduce the memory usage... 397 | m = LearnerModel(m, learner_param, idf) 398 | return m 399 | 400 | 401 | def predict_one(xi, m): 402 | """ 403 | Return the label and a :class:`c_double` array of decision values of 404 | the test instance *xi* using :class:`LearnerModel` *m*. 405 | 406 | *xi* can be a :class:`list` or a :class:`dict` as in LIBLINEAR python 407 | interface. It can also be a LIBLINEAR feature_node array. 408 | 409 | .. note:: 410 | 411 | This function is designed to analyze the result of one instance. 412 | It has a severe efficiency issue and should be used only by 413 | :func:`libshorttext.classifier.predict_single_text`. If many 414 | instances need to be predicted, they should be stored in a file 415 | and predicted by :func:`predict`. 416 | 417 | .. warning:: 418 | 419 | The content of *xi* may be **changed** after the function call. 420 | """ 421 | 422 | if isinstance(xi, (list, dict)): 423 | xi = liblinear.gen_feature_nodearray(xi)[0] 424 | elif not isinstance(xi, POINTER(liblinear.feature_node)): 425 | raise TypeError("xi should be a test instance") 426 | 427 | learner_param = LearnerParameter(m.param_options[0], m.param_options[1]) 428 | 429 | if m.bias >= 0: 430 | i = 0 431 | while xi[i].index != -1: i += 1 432 | 433 | # Already has bias, or bias reserved. 434 | # Actually this statement should be true if 435 | # the data is read by read_SVMProblem. 436 | if i > 0 and xi[i-1].index == m.nr_feature + 1: 437 | i -= 1 438 | 439 | xi[i] = liblinear.feature_node(m.nr_feature + 1, m.bias) 440 | xi[i+1] = liblinear.feature_node(-1, 0) 441 | 442 | LearnerProblem.normalize_one(xi, learner_param, m.idf) 443 | 444 | dec_values = (c_double * m.nr_class)() 445 | label = liblinear.liblinear.predict_values(m, xi, dec_values) 446 | 447 | return label, dec_values 448 | 449 | 450 | def predict(data_file_name, m, liblinear_opts=""): 451 | """ 452 | Return a quadruple: the predicted labels, the accuracy, the decision values, and the 453 | true labels in the test data file (obtained through the :class:`LearnerModel` *m*). 454 | 455 | The predicted labels and true labels in the file are :class:`list`. The accuracy is 456 | evaluated by assuming that the labels in the file are the true label. 457 | 458 | The decision values are in a :class:`list`, where the length is the same as the number 459 | of test instances. Each element in the list is a :class:`c_double` array, and the 460 | values in the array are an instance's decision values in different classes. 461 | For example, the decision value of instance i and class k can be obtained by 462 | 463 | >>> predicted_label, accuracy, all_dec_values, label = predict('svm_file', model) 464 | >>> print all_dec_values[i][k] 465 | """ 466 | 467 | learner_prob = LearnerProblem(data_file_name) 468 | learner_param = LearnerParameter(m.param_options[0], m.param_options[1]) 469 | 470 | idf = None 471 | if m.idf: 472 | idf = (c_double * len(m.idf))() 473 | for i in range(len(m.idf)): idf[i] = m.idf[i] 474 | learner_prob.normalize(learner_param, idf) 475 | 476 | all_dec_values = [] 477 | acc = 0 478 | py = [] # predicted y 479 | ty = [] # true y 480 | 481 | dec_values = (c_double * m.nr_class)() 482 | 483 | for i in range(learner_prob.l): 484 | label = liblinear.liblinear.predict_values(m, learner_prob.x[i], dec_values) 485 | all_dec_values += [dec_values[:m.nr_class]] 486 | py += [label] 487 | ty += [learner_prob.y[i]] 488 | 489 | if label == learner_prob.y[i]: 490 | acc += 1 491 | 492 | acc /= float(learner_prob.l) 493 | 494 | return py, acc, all_dec_values, ty 495 | 496 | 497 | 498 | if __name__ == '__main__': 499 | argv = sys.argv 500 | if len(argv) < 2: #4 or '-v' not in argv: 501 | print("{0} -v fold [other liblinear_options] [learner_opts] training-data".format(argv[0])) 502 | sys.exit(-1) 503 | data_file_name = argv[-1] 504 | learner_opts, liblinear_opts = [], [] 505 | i = 1 506 | while i < len(argv)-1: 507 | if argv[i] in ["-D", "-N", "-I", "-T"]: 508 | learner_opts += [argv[i], argv[i+1]] 509 | i += 2 510 | else : 511 | liblinear_opts += [argv[i]] 512 | i += 1 513 | m = train(data_file_name, learner_opts, liblinear_opts) -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/COPYRIGHT: -------------------------------------------------------------------------------- 1 | 2 | opyright (c) 2007-2012 The LIBLINEAR Project. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | 3. Neither name of copyright holders nor the names of its contributors 17 | may be used to endorse or promote products derived from this software 18 | without specific prior written permission. 19 | 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/Makefile: -------------------------------------------------------------------------------- 1 | CXX ?= g++ 2 | CC ?= gcc 3 | CFLAGS = -Wall -Wconversion -O3 -fPIC 4 | LIBS = blas/blas.a 5 | SHVER = 1 6 | OS = $(shell uname) 7 | #LIBS = -lblas 8 | 9 | all: train predict 10 | 11 | lib: linear.o tron.o blas/blas.a 12 | if [ "$(OS)" = "Darwin" ]; then \ 13 | SHARED_LIB_FLAG="-dynamiclib -Wl,-install_name,liblinear.so.$(SHVER)"; \ 14 | else \ 15 | SHARED_LIB_FLAG="-shared -Wl,-soname,liblinear.so.$(SHVER)"; \ 16 | fi; \ 17 | $(CXX) $${SHARED_LIB_FLAG} linear.o tron.o blas/blas.a -o liblinear.so.$(SHVER) 18 | 19 | train: tron.o linear.o train.c blas/blas.a 20 | $(CXX) $(CFLAGS) -o train train.c tron.o linear.o $(LIBS) 21 | 22 | predict: tron.o linear.o predict.c blas/blas.a 23 | $(CXX) $(CFLAGS) -o predict predict.c tron.o linear.o $(LIBS) 24 | 25 | tron.o: tron.cpp tron.h 26 | $(CXX) $(CFLAGS) -c -o tron.o tron.cpp 27 | 28 | linear.o: linear.cpp linear.h 29 | $(CXX) $(CFLAGS) -c -o linear.o linear.cpp 30 | 31 | blas/blas.a: blas/*.c blas/*.h 32 | make -C blas OPTFLAGS='$(CFLAGS)' CC='$(CC)'; 33 | 34 | clean: 35 | make -C blas clean 36 | make -C matlab clean 37 | rm -f *~ tron.o linear.o train predict liblinear.so.$(SHVER) 38 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/README: -------------------------------------------------------------------------------- 1 | LIBLINEAR is a simple package for solving large-scale regularized linear 2 | classification and regression. It currently supports 3 | - L2-regularized logistic regression/L2-loss support vector classification/L1-loss support vector classification 4 | - L1-regularized L2-loss support vector classification/L1-regularized logistic regression 5 | - L2-regularized L2-loss support vector regression/L1-loss support vector regression. 6 | This document explains the usage of LIBLINEAR. 7 | 8 | To get started, please read the ``Quick Start'' section first. 9 | For developers, please check the ``Library Usage'' section to learn 10 | how to integrate LIBLINEAR in your software. 11 | 12 | Table of Contents 13 | ================= 14 | 15 | - When to use LIBLINEAR but not LIBSVM 16 | - Quick Start 17 | - Installation 18 | - `train' Usage 19 | - `predict' Usage 20 | - Examples 21 | - Library Usage 22 | - Additional Information 23 | - MATLAB/OCTAVE interface 24 | - PYTHON interface 25 | 26 | When to use LIBLINEAR but not LIBSVM 27 | ==================================== 28 | 29 | There are some large data for which with/without nonlinear mappings 30 | gives similar performances. Without using kernels, one can 31 | efficiently train a much larger set via linear classification/regression. 32 | These data usually have a large number of features. Document classification 33 | is an example. 34 | 35 | Warning: While generally liblinear is very fast, its default solver 36 | may be slow under certain situations (e.g., data not scaled or C is 37 | large). See Appendix B of our SVM guide about how to handle such 38 | cases. 39 | http://www.csie.ntu.edu.tw/~cjlin/papers/guide/guide.pdf 40 | 41 | Warning: If you are a beginner and your data sets are not large, you 42 | should consider LIBSVM first. 43 | 44 | LIBSVM page: 45 | http://www.csie.ntu.edu.tw/~cjlin/libsvm 46 | 47 | 48 | Quick Start 49 | =========== 50 | 51 | See the section ``Installation'' for installing LIBLINEAR. 52 | 53 | After installation, there are programs `train' and `predict' for 54 | training and testing, respectively. 55 | 56 | About the data format, please check the README file of LIBSVM. Note 57 | that feature index must start from 1 (but not 0). 58 | 59 | A sample classification data included in this package is `heart_scale'. 60 | 61 | Type `train heart_scale', and the program will read the training 62 | data and output the model file `heart_scale.model'. If you have a test 63 | set called heart_scale.t, then type `predict heart_scale.t 64 | heart_scale.model output' to see the prediction accuracy. The `output' 65 | file contains the predicted class labels. 66 | 67 | For more information about `train' and `predict', see the sections 68 | `train' Usage and `predict' Usage. 69 | 70 | To obtain good performances, sometimes one needs to scale the 71 | data. Please check the program `svm-scale' of LIBSVM. For large and 72 | sparse data, use `-l 0' to keep the sparsity. 73 | 74 | Installation 75 | ============ 76 | 77 | On Unix systems, type `make' to build the `train' and `predict' 78 | programs. Run them without arguments to show the usages. 79 | 80 | This software uses some level-1 BLAS subroutines. The needed functions are 81 | included in this package. If a BLAS library is available on your 82 | machine, you may use it by modifying the Makefile: Unmark the following line 83 | 84 | #LIBS ?= -lblas 85 | 86 | and mark 87 | 88 | LIBS ?= blas/blas.a 89 | 90 | `train' Usage 91 | ============= 92 | 93 | Usage: train [options] training_set_file [model_file] 94 | options: 95 | -s type : set type of solver (default 1) 96 | for multi-class classification 97 | 0 -- L2-regularized logistic regression (primal) 98 | 1 -- L2-regularized L2-loss support vector classification (dual) 99 | 2 -- L2-regularized L2-loss support vector classification (primal) 100 | 3 -- L2-regularized L1-loss support vector classification (dual) 101 | 4 -- support vector classification by Crammer and Singer 102 | 5 -- L1-regularized L2-loss support vector classification 103 | 6 -- L1-regularized logistic regression 104 | 7 -- L2-regularized logistic regression (dual) 105 | for regression 106 | 11 -- L2-regularized L2-loss support vector regression (primal) 107 | 12 -- L2-regularized L2-loss support vector regression (dual) 108 | 13 -- L2-regularized L1-loss support vector regression (dual) 109 | -c cost : set the parameter C (default 1) 110 | -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1) 111 | -e epsilon : set tolerance of termination criterion 112 | -s 0 and 2 113 | |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2, 114 | where f is the primal function and pos/neg are # of 115 | positive/negative data (default 0.01) 116 | -s 11 117 | |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001) 118 | -s 1, 3, 4 and 7 119 | Dual maximal violation <= eps; similar to libsvm (default 0.1) 120 | -s 5 and 6 121 | |f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf, 122 | where f is the primal function (default 0.01) 123 | -s 12 and 13\n" 124 | |f'(alpha)|_1 <= eps |f'(alpha0)|, 125 | where f is the dual function (default 0.1) 126 | -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1) 127 | -wi weight: weights adjust the parameter C of different classes (see README for details) 128 | -v n: n-fold cross validation mode 129 | -q : quiet mode (no outputs) 130 | 131 | Option -v randomly splits the data into n parts and calculates cross 132 | validation accuracy on them. 133 | 134 | Formulations: 135 | 136 | For L2-regularized logistic regression (-s 0), we solve 137 | 138 | min_w w^Tw/2 + C \sum log(1 + exp(-y_i w^Tx_i)) 139 | 140 | For L2-regularized L2-loss SVC dual (-s 1), we solve 141 | 142 | min_alpha 0.5(alpha^T (Q + I/2/C) alpha) - e^T alpha 143 | s.t. 0 <= alpha_i, 144 | 145 | For L2-regularized L2-loss SVC (-s 2), we solve 146 | 147 | min_w w^Tw/2 + C \sum max(0, 1- y_i w^Tx_i)^2 148 | 149 | For L2-regularized L1-loss SVC dual (-s 3), we solve 150 | 151 | min_alpha 0.5(alpha^T Q alpha) - e^T alpha 152 | s.t. 0 <= alpha_i <= C, 153 | 154 | For L1-regularized L2-loss SVC (-s 5), we solve 155 | 156 | min_w \sum |w_j| + C \sum max(0, 1- y_i w^Tx_i)^2 157 | 158 | For L1-regularized logistic regression (-s 6), we solve 159 | 160 | min_w \sum |w_j| + C \sum log(1 + exp(-y_i w^Tx_i)) 161 | 162 | For L2-regularized logistic regression (-s 7), we solve 163 | 164 | min_alpha 0.5(alpha^T Q alpha) + \sum alpha_i*log(alpha_i) + \sum (C-alpha_i)*log(C-alpha_i) - a constant 165 | s.t. 0 <= alpha_i <= C, 166 | 167 | where 168 | 169 | Q is a matrix with Q_ij = y_i y_j x_i^T x_j. 170 | 171 | For L2-regularized L2-loss SVR (-s 11), we solve 172 | 173 | min_w w^Tw/2 + C \sum max(0, |y_i-w^Tx_i|-epsilon)^2 174 | 175 | For L2-regularized L2-loss SVR dual (-s 12), we solve 176 | 177 | min_beta 0.5(beta^T (Q + lambda I/2/C) beta) - y^T beta + \sum |beta_i| 178 | 179 | For L2-regularized L1-loss SVR dual (-s 13), we solve 180 | 181 | min_beta 0.5(beta^T Q beta) - y^T beta + \sum |beta_i| 182 | s.t. -C <= beta_i <= C, 183 | 184 | where 185 | 186 | Q is a matrix with Q_ij = x_i^T x_j. 187 | 188 | If bias >= 0, w becomes [w; w_{n+1}] and x becomes [x; bias]. 189 | 190 | The primal-dual relationship implies that -s 1 and -s 2 give the same 191 | model, -s 0 and -s 7 give the same, and -s 11 and -s 12 give the same. 192 | 193 | We implement 1-vs-the rest multi-class strategy for classification. 194 | In training i vs. non_i, their C parameters are (weight from -wi)*C 195 | and C, respectively. If there are only two classes, we train only one 196 | model. Thus weight1*C vs. weight2*C is used. See examples below. 197 | 198 | We also implement multi-class SVM by Crammer and Singer (-s 4): 199 | 200 | min_{w_m, \xi_i} 0.5 \sum_m ||w_m||^2 + C \sum_i \xi_i 201 | s.t. w^T_{y_i} x_i - w^T_m x_i >= \e^m_i - \xi_i \forall m,i 202 | 203 | where e^m_i = 0 if y_i = m, 204 | e^m_i = 1 if y_i != m, 205 | 206 | Here we solve the dual problem: 207 | 208 | min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i 209 | s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i 210 | 211 | where w_m(\alpha) = \sum_i \alpha^m_i x_i, 212 | and C^m_i = C if m = y_i, 213 | C^m_i = 0 if m != y_i. 214 | 215 | `predict' Usage 216 | =============== 217 | 218 | Usage: predict [options] test_file model_file output_file 219 | options: 220 | -b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only 221 | -q : quiet mode (no outputs) 222 | 223 | Note that -b is only needed in the prediction phase. This is different 224 | from the setting of LIBSVM. 225 | 226 | Examples 227 | ======== 228 | 229 | > train data_file 230 | 231 | Train linear SVM with L2-loss function. 232 | 233 | > train -s 0 data_file 234 | 235 | Train a logistic regression model. 236 | 237 | > train -v 5 -e 0.001 data_file 238 | 239 | Do five-fold cross-validation using L2-loss svm. 240 | Use a smaller stopping tolerance 0.001 than the default 241 | 0.1 if you want more accurate solutions. 242 | 243 | > train -c 10 -w1 2 -w2 5 -w3 2 four_class_data_file 244 | 245 | Train four classifiers: 246 | positive negative Cp Cn 247 | class 1 class 2,3,4. 20 10 248 | class 2 class 1,3,4. 50 10 249 | class 3 class 1,2,4. 20 10 250 | class 4 class 1,2,3. 10 10 251 | 252 | > train -c 10 -w3 1 -w2 5 two_class_data_file 253 | 254 | If there are only two classes, we train ONE model. 255 | The C values for the two classes are 10 and 50. 256 | 257 | > predict -b 1 test_file data_file.model output_file 258 | 259 | Output probability estimates (for logistic regression only). 260 | 261 | Library Usage 262 | ============= 263 | 264 | - Function: model* train(const struct problem *prob, 265 | const struct parameter *param); 266 | 267 | This function constructs and returns a linear classification 268 | or regression model according to the given training data and 269 | parameters. 270 | 271 | struct problem describes the problem: 272 | 273 | struct problem 274 | { 275 | INT64 l, n; 276 | INT64 *y; 277 | struct feature_node **x; 278 | double bias; 279 | }; 280 | 281 | where `l' is the number of training data. If bias >= 0, we assume 282 | that one additional feature is added to the end of each data 283 | instance. `n' is the number of feature (including the bias feature 284 | if bias >= 0). `y' is an array containing the target values. (integers 285 | in classification, real numbers in regression) And `x' is an array 286 | of pointers, each of which points to a sparse representation (array 287 | of feature_node) of one training vector. 288 | 289 | For example, if we have the following training data: 290 | 291 | LABEL ATTR1 ATTR2 ATTR3 ATTR4 ATTR5 292 | ----- ----- ----- ----- ----- ----- 293 | 1 0 0.1 0.2 0 0 294 | 2 0 0.1 0.3 -1.2 0 295 | 1 0.4 0 0 0 0 296 | 2 0 0.1 0 1.4 0.5 297 | 3 -0.1 -0.2 0.1 1.1 0.1 298 | 299 | and bias = 1, then the components of problem are: 300 | 301 | l = 5 302 | n = 6 303 | 304 | y -> 1 2 1 2 3 305 | 306 | x -> [ ] -> (2,0.1) (3,0.2) (6,1) (-1,?) 307 | [ ] -> (2,0.1) (3,0.3) (4,-1.2) (6,1) (-1,?) 308 | [ ] -> (1,0.4) (6,1) (-1,?) 309 | [ ] -> (2,0.1) (4,1.4) (5,0.5) (6,1) (-1,?) 310 | [ ] -> (1,-0.1) (2,-0.2) (3,0.1) (4,1.1) (5,0.1) (6,1) (-1,?) 311 | 312 | struct parameter describes the parameters of a linear classification 313 | or regression model: 314 | 315 | struct parameter 316 | { 317 | INT64 solver_type; 318 | 319 | /* these are for training only */ 320 | double eps; /* stopping criteria */ 321 | double C; 322 | INT64 nr_weight; 323 | INT64 *weight_label; 324 | double* weight; 325 | double p; 326 | }; 327 | 328 | solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL. 329 | for classification 330 | L2R_LR L2-regularized logistic regression (primal) 331 | L2R_L2LOSS_SVC_DUAL L2-regularized L2-loss support vector classification (dual) 332 | L2R_L2LOSS_SVC L2-regularized L2-loss support vector classification (primal) 333 | L2R_L1LOSS_SVC_DUAL L2-regularized L1-loss support vector classification (dual) 334 | MCSVM_CS support vector classification by Crammer and Singer 335 | L1R_L2LOSS_SVC L1-regularized L2-loss support vector classification 336 | L1R_LR L1-regularized logistic regression 337 | L2R_LR_DUAL L2-regularized logistic regression (dual) 338 | for regression 339 | L2R_L2LOSS_SVR L2-regularized L2-loss support vector regression (primal) 340 | L2R_L2LOSS_SVR_DUAL L2-regularized L2-loss support vector regression (dual) 341 | L2R_L1LOSS_SVR_DUAL L2-regularized L1-loss support vector regression (dual) 342 | 343 | C is the cost of constraints violation. 344 | p is the sensitiveness of loss of support vector regression. 345 | eps is the stopping criterion. 346 | 347 | nr_weight, weight_label, and weight are used to change the penalty 348 | for some classes (If the weight for a class is not changed, it is 349 | set to 1). This is useful for training classifier using unbalanced 350 | input data or with asymmetric misclassification cost. 351 | 352 | nr_weight is the number of elements in the array weight_label and 353 | weight. Each weight[i] corresponds to weight_label[i], meaning that 354 | the penalty of class weight_label[i] is scaled by a factor of weight[i]. 355 | 356 | If you do not want to change penalty for any of the classes, 357 | just set nr_weight to 0. 358 | 359 | *NOTE* To avoid wrong parameters, check_parameter() should be 360 | called before train(). 361 | 362 | struct model stores the model obtained from the training procedure: 363 | 364 | struct model 365 | { 366 | struct parameter param; 367 | INT64 nr_class; /* number of classes */ 368 | INT64 nr_feature; 369 | double *w; 370 | INT64 *label; /* label of each class */ 371 | double bias; 372 | }; 373 | 374 | param describes the parameters used to obtain the model. 375 | 376 | nr_class and nr_feature are the number of classes and features, 377 | respectively. nr_class = 2 for regression. 378 | 379 | The nr_feature*nr_class array w gives feature weights. We use one 380 | against the rest for multi-class classification, so each feature 381 | index corresponds to nr_class weight values. Weights are 382 | organized in the following way 383 | 384 | +------------------+------------------+------------+ 385 | | nr_class weights | nr_class weights | ... 386 | | for 1st feature | for 2nd feature | 387 | +------------------+------------------+------------+ 388 | 389 | If bias >= 0, x becomes [x; bias]. The number of features is 390 | increased by one, so w is a (nr_feature+1)*nr_class array. The 391 | value of bias is stored in the variable bias. 392 | 393 | The array label stores class labels. 394 | 395 | - Function: void cross_validation(const problem *prob, const parameter *param, INT64 nr_fold, double *target); 396 | 397 | This function conducts cross validation. Data are separated to 398 | nr_fold folds. Under given parameters, sequentially each fold is 399 | validated using the model from training the remaining. Predicted 400 | labels in the validation process are stored in the array called 401 | target. 402 | 403 | The format of prob is same as that for train(). 404 | 405 | - Function: double predict(const model *model_, const feature_node *x); 406 | 407 | For a classification model, the predicted class for x is returned. 408 | For a regression model, the function value of x calculated using 409 | the model is returned. 410 | 411 | - Function: double predict_values(const struct model *model_, 412 | const struct feature_node *x, double* dec_values); 413 | 414 | This function gives nr_w decision values in the array dec_values. 415 | nr_w=1 if regression is applied or the number of classes is two. An exception is 416 | multi-class svm by Crammer and Singer (-s 4), where nr_w = 2 if there are two classes. For all other situations, nr_w is the 417 | number of classes. 418 | 419 | We implement one-vs-the rest multi-class strategy (-s 0,1,2,3,5,6,7) 420 | and multi-class svm by Crammer and Singer (-s 4) for multi-class SVM. 421 | The class with the highest decision value is returned. 422 | 423 | - Function: double predict_probability(const struct model *model_, 424 | const struct feature_node *x, double* prob_estimates); 425 | 426 | This function gives nr_class probability estimates in the array 427 | prob_estimates. nr_class can be obtained from the function 428 | get_nr_class. The class with the highest probability is 429 | returned. Currently, we support only the probability outputs of 430 | logistic regression. 431 | 432 | - Function: INT64 get_nr_feature(const model *model_); 433 | 434 | The function gives the number of attributes of the model. 435 | 436 | - Function: INT64 get_nr_class(const model *model_); 437 | 438 | The function gives the number of classes of the model. 439 | For a regression model, 2 is returned. 440 | 441 | - Function: void get_labels(const model *model_, INT64* label); 442 | 443 | This function outputs the name of labels into an array called label. 444 | For a regression model, label is unchanged. 445 | 446 | - Function: const char *check_parameter(const struct problem *prob, 447 | const struct parameter *param); 448 | 449 | This function checks whether the parameters are within the feasible 450 | range of the problem. This function should be called before calling 451 | train() and cross_validation(). It returns NULL if the 452 | parameters are feasible, otherwise an error message is returned. 453 | 454 | - Function: INT64 save_model(const char *model_file_name, 455 | const struct model *model_); 456 | 457 | This function saves a model to a file; returns 0 on success, or -1 458 | if an error occurs. 459 | 460 | - Function: struct model *load_model(const char *model_file_name); 461 | 462 | This function returns a pointer to the model read from the file, 463 | or a null pointer if the model could not be loaded. 464 | 465 | - Function: void free_model_content(struct model *model_ptr); 466 | 467 | This function frees the memory used by the entries in a model structure. 468 | 469 | - Function: void free_and_destroy_model(struct model **model_ptr_ptr); 470 | 471 | This function frees the memory used by a model and destroys the model 472 | structure. 473 | 474 | - Function: void destroy_param(struct parameter *param); 475 | 476 | This function frees the memory used by a parameter set. 477 | 478 | - Function: void set_print_string_function(void (*print_func)(const char *)); 479 | 480 | Users can specify their output format by a function. Use 481 | set_print_string_function(NULL); 482 | for default printing to stdout. 483 | 484 | 485 | MATLAB/OCTAVE Interface 486 | ======================= 487 | 488 | Please check the file README in the directory `matlab'. 489 | 490 | PYTHON Interface 491 | ================ 492 | 493 | Please check the file README in the directory `python'. 494 | 495 | Additional Information 496 | ====================== 497 | 498 | If you find LIBLINEAR helpful, please cite it as 499 | 500 | R.-E. Fan, K.-W. Chang, C.-J. Hsieh, X.-R. Wang, and C.-J. Lin. 501 | LIBLINEAR: A Library for Large Linear Classification, Journal of 502 | Machine Learning Research 9(2008), 1871-1874. Software available at 503 | http://www.csie.ntu.edu.tw/~cjlin/liblinear 504 | 505 | For any questions and comments, please send your email to 506 | cjlin@csie.ntu.edu.tw 507 | 508 | 509 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/blas/Makefile: -------------------------------------------------------------------------------- 1 | AR = ar rcv 2 | RANLIB = ranlib 3 | 4 | HEADERS = blas.h blasp.h 5 | FILES = dnrm2.o daxpy.o ddot.o dscal.o 6 | 7 | CFLAGS = $(OPTFLAGS) 8 | FFLAGS = $(OPTFLAGS) 9 | 10 | blas: $(FILES) $(HEADERS) 11 | $(AR) blas.a $(FILES) 12 | $(RANLIB) blas.a 13 | 14 | clean: 15 | - rm -f *.o 16 | - rm -f *.a 17 | - rm -f *~ 18 | 19 | .c.o: 20 | $(CC) $(CFLAGS) -c $*.c 21 | 22 | 23 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/blas/blas.h: -------------------------------------------------------------------------------- 1 | /* blas.h -- C header file for BLAS Ver 1.0 */ 2 | /* Jesse Bennett March 23, 2000 */ 3 | 4 | /** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." 5 | 6 | - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ 7 | 8 | #ifndef BLAS_INCLUDE 9 | #define BLAS_INCLUDE 10 | 11 | #include "stdint.h" 12 | #ifndef INT64_DEFINED 13 | typedef int64_t INT64; 14 | #define INT64_DEFINED 15 | #endif 16 | /* Data types specific to BLAS implementation */ 17 | typedef struct { float r, i; } fcomplex; 18 | typedef struct { double r, i; } dcomplex; 19 | typedef INT64 blasbool; 20 | 21 | #include "blasp.h" /* Prototypes for all BLAS functions */ 22 | 23 | #define FALSE 0 24 | #define TRUE 1 25 | 26 | /* Macro functions */ 27 | #define MIN(a,b) ((a) <= (b) ? (a) : (b)) 28 | #define MAX(a,b) ((a) >= (b) ? (a) : (b)) 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/blas/blasp.h: -------------------------------------------------------------------------------- 1 | /* blasp.h -- C prototypes for BLAS Ver 1.0 */ 2 | /* Jesse Bennett March 23, 2000 */ 3 | 4 | /* Functions listed in alphabetical order */ 5 | #include 6 | #ifndef INT64_DEFINED 7 | typedef int64_t INT64; 8 | #define INT64_DEFINED 9 | #endif 10 | 11 | #ifdef F2C_COMPAT 12 | 13 | void cdotc_(fcomplex *dotval, INT64 *n, fcomplex *cx, INT64 *incx, 14 | fcomplex *cy, INT64 *incy); 15 | 16 | void cdotu_(fcomplex *dotval, INT64 *n, fcomplex *cx, INT64 *incx, 17 | fcomplex *cy, INT64 *incy); 18 | 19 | double sasum_(INT64 *n, float *sx, INT64 *incx); 20 | 21 | double scasum_(INT64 *n, fcomplex *cx, INT64 *incx); 22 | 23 | double scnrm2_(INT64 *n, fcomplex *x, INT64 *incx); 24 | 25 | double sdot_(INT64 *n, float *sx, INT64 *incx, float *sy, INT64 *incy); 26 | 27 | double snrm2_(INT64 *n, float *x, INT64 *incx); 28 | 29 | void zdotc_(dcomplex *dotval, INT64 *n, dcomplex *cx, INT64 *incx, 30 | dcomplex *cy, INT64 *incy); 31 | 32 | void zdotu_(dcomplex *dotval, INT64 *n, dcomplex *cx, INT64 *incx, 33 | dcomplex *cy, INT64 *incy); 34 | 35 | #else 36 | 37 | fcomplex cdotc_(INT64 *n, fcomplex *cx, INT64 *incx, fcomplex *cy, INT64 *incy); 38 | 39 | fcomplex cdotu_(INT64 *n, fcomplex *cx, INT64 *incx, fcomplex *cy, INT64 *incy); 40 | 41 | float sasum_(INT64 *n, float *sx, INT64 *incx); 42 | 43 | float scasum_(INT64 *n, fcomplex *cx, INT64 *incx); 44 | 45 | float scnrm2_(INT64 *n, fcomplex *x, INT64 *incx); 46 | 47 | float sdot_(INT64 *n, float *sx, INT64 *incx, float *sy, INT64 *incy); 48 | 49 | float snrm2_(INT64 *n, float *x, INT64 *incx); 50 | 51 | dcomplex zdotc_(INT64 *n, dcomplex *cx, INT64 *incx, dcomplex *cy, INT64 *incy); 52 | 53 | dcomplex zdotu_(INT64 *n, dcomplex *cx, INT64 *incx, dcomplex *cy, INT64 *incy); 54 | 55 | #endif 56 | 57 | /* Remaining functions listed in alphabetical order */ 58 | 59 | INT64 caxpy_(INT64 *n, fcomplex *ca, fcomplex *cx, INT64 *incx, fcomplex *cy, 60 | INT64 *incy); 61 | 62 | INT64 ccopy_(INT64 *n, fcomplex *cx, INT64 *incx, fcomplex *cy, INT64 *incy); 63 | 64 | INT64 cgbmv_(char *trans, INT64 *m, INT64 *n, INT64 *kl, INT64 *ku, 65 | fcomplex *alpha, fcomplex *a, INT64 *lda, fcomplex *x, INT64 *incx, 66 | fcomplex *beta, fcomplex *y, INT64 *incy); 67 | 68 | INT64 cgemm_(char *transa, char *transb, INT64 *m, INT64 *n, INT64 *k, 69 | fcomplex *alpha, fcomplex *a, INT64 *lda, fcomplex *b, INT64 *ldb, 70 | fcomplex *beta, fcomplex *c, INT64 *ldc); 71 | 72 | INT64 cgemv_(char *trans, INT64 *m, INT64 *n, fcomplex *alpha, fcomplex *a, 73 | INT64 *lda, fcomplex *x, INT64 *incx, fcomplex *beta, fcomplex *y, 74 | INT64 *incy); 75 | 76 | INT64 cgerc_(INT64 *m, INT64 *n, fcomplex *alpha, fcomplex *x, INT64 *incx, 77 | fcomplex *y, INT64 *incy, fcomplex *a, INT64 *lda); 78 | 79 | INT64 cgeru_(INT64 *m, INT64 *n, fcomplex *alpha, fcomplex *x, INT64 *incx, 80 | fcomplex *y, INT64 *incy, fcomplex *a, INT64 *lda); 81 | 82 | INT64 chbmv_(char *uplo, INT64 *n, INT64 *k, fcomplex *alpha, fcomplex *a, 83 | INT64 *lda, fcomplex *x, INT64 *incx, fcomplex *beta, fcomplex *y, 84 | INT64 *incy); 85 | 86 | INT64 chemm_(char *side, char *uplo, INT64 *m, INT64 *n, fcomplex *alpha, 87 | fcomplex *a, INT64 *lda, fcomplex *b, INT64 *ldb, fcomplex *beta, 88 | fcomplex *c, INT64 *ldc); 89 | 90 | INT64 chemv_(char *uplo, INT64 *n, fcomplex *alpha, fcomplex *a, INT64 *lda, 91 | fcomplex *x, INT64 *incx, fcomplex *beta, fcomplex *y, INT64 *incy); 92 | 93 | INT64 cher_(char *uplo, INT64 *n, float *alpha, fcomplex *x, INT64 *incx, 94 | fcomplex *a, INT64 *lda); 95 | 96 | INT64 cher2_(char *uplo, INT64 *n, fcomplex *alpha, fcomplex *x, INT64 *incx, 97 | fcomplex *y, INT64 *incy, fcomplex *a, INT64 *lda); 98 | 99 | INT64 cher2k_(char *uplo, char *trans, INT64 *n, INT64 *k, fcomplex *alpha, 100 | fcomplex *a, INT64 *lda, fcomplex *b, INT64 *ldb, float *beta, 101 | fcomplex *c, INT64 *ldc); 102 | 103 | INT64 cherk_(char *uplo, char *trans, INT64 *n, INT64 *k, float *alpha, 104 | fcomplex *a, INT64 *lda, float *beta, fcomplex *c, INT64 *ldc); 105 | 106 | INT64 chpmv_(char *uplo, INT64 *n, fcomplex *alpha, fcomplex *ap, fcomplex *x, 107 | INT64 *incx, fcomplex *beta, fcomplex *y, INT64 *incy); 108 | 109 | INT64 chpr_(char *uplo, INT64 *n, float *alpha, fcomplex *x, INT64 *incx, 110 | fcomplex *ap); 111 | 112 | INT64 chpr2_(char *uplo, INT64 *n, fcomplex *alpha, fcomplex *x, INT64 *incx, 113 | fcomplex *y, INT64 *incy, fcomplex *ap); 114 | 115 | INT64 crotg_(fcomplex *ca, fcomplex *cb, float *c, fcomplex *s); 116 | 117 | INT64 cscal_(INT64 *n, fcomplex *ca, fcomplex *cx, INT64 *incx); 118 | 119 | INT64 csscal_(INT64 *n, float *sa, fcomplex *cx, INT64 *incx); 120 | 121 | INT64 cswap_(INT64 *n, fcomplex *cx, INT64 *incx, fcomplex *cy, INT64 *incy); 122 | 123 | INT64 csymm_(char *side, char *uplo, INT64 *m, INT64 *n, fcomplex *alpha, 124 | fcomplex *a, INT64 *lda, fcomplex *b, INT64 *ldb, fcomplex *beta, 125 | fcomplex *c, INT64 *ldc); 126 | 127 | INT64 csyr2k_(char *uplo, char *trans, INT64 *n, INT64 *k, fcomplex *alpha, 128 | fcomplex *a, INT64 *lda, fcomplex *b, INT64 *ldb, fcomplex *beta, 129 | fcomplex *c, INT64 *ldc); 130 | 131 | INT64 csyrk_(char *uplo, char *trans, INT64 *n, INT64 *k, fcomplex *alpha, 132 | fcomplex *a, INT64 *lda, fcomplex *beta, fcomplex *c, INT64 *ldc); 133 | 134 | INT64 ctbmv_(char *uplo, char *trans, char *diag, INT64 *n, INT64 *k, 135 | fcomplex *a, INT64 *lda, fcomplex *x, INT64 *incx); 136 | 137 | INT64 ctbsv_(char *uplo, char *trans, char *diag, INT64 *n, INT64 *k, 138 | fcomplex *a, INT64 *lda, fcomplex *x, INT64 *incx); 139 | 140 | INT64 ctpmv_(char *uplo, char *trans, char *diag, INT64 *n, fcomplex *ap, 141 | fcomplex *x, INT64 *incx); 142 | 143 | INT64 ctpsv_(char *uplo, char *trans, char *diag, INT64 *n, fcomplex *ap, 144 | fcomplex *x, INT64 *incx); 145 | 146 | INT64 ctrmm_(char *side, char *uplo, char *transa, char *diag, INT64 *m, 147 | INT64 *n, fcomplex *alpha, fcomplex *a, INT64 *lda, fcomplex *b, 148 | INT64 *ldb); 149 | 150 | INT64 ctrmv_(char *uplo, char *trans, char *diag, INT64 *n, fcomplex *a, 151 | INT64 *lda, fcomplex *x, INT64 *incx); 152 | 153 | INT64 ctrsm_(char *side, char *uplo, char *transa, char *diag, INT64 *m, 154 | INT64 *n, fcomplex *alpha, fcomplex *a, INT64 *lda, fcomplex *b, 155 | INT64 *ldb); 156 | 157 | INT64 ctrsv_(char *uplo, char *trans, char *diag, INT64 *n, fcomplex *a, 158 | INT64 *lda, fcomplex *x, INT64 *incx); 159 | 160 | INT64 daxpy_(INT64 *n, double *sa, double *sx, INT64 *incx, double *sy, 161 | INT64 *incy); 162 | 163 | INT64 dcopy_(INT64 *n, double *sx, INT64 *incx, double *sy, INT64 *incy); 164 | 165 | INT64 dgbmv_(char *trans, INT64 *m, INT64 *n, INT64 *kl, INT64 *ku, 166 | double *alpha, double *a, INT64 *lda, double *x, INT64 *incx, 167 | double *beta, double *y, INT64 *incy); 168 | 169 | INT64 dgemm_(char *transa, char *transb, INT64 *m, INT64 *n, INT64 *k, 170 | double *alpha, double *a, INT64 *lda, double *b, INT64 *ldb, 171 | double *beta, double *c, INT64 *ldc); 172 | 173 | INT64 dgemv_(char *trans, INT64 *m, INT64 *n, double *alpha, double *a, 174 | INT64 *lda, double *x, INT64 *incx, double *beta, double *y, 175 | INT64 *incy); 176 | 177 | INT64 dger_(INT64 *m, INT64 *n, double *alpha, double *x, INT64 *incx, 178 | double *y, INT64 *incy, double *a, INT64 *lda); 179 | 180 | INT64 drot_(INT64 *n, double *sx, INT64 *incx, double *sy, INT64 *incy, 181 | double *c, double *s); 182 | 183 | INT64 drotg_(double *sa, double *sb, double *c, double *s); 184 | 185 | INT64 dsbmv_(char *uplo, INT64 *n, INT64 *k, double *alpha, double *a, 186 | INT64 *lda, double *x, INT64 *incx, double *beta, double *y, 187 | INT64 *incy); 188 | 189 | INT64 dscal_(INT64 *n, double *sa, double *sx, INT64 *incx); 190 | 191 | INT64 dspmv_(char *uplo, INT64 *n, double *alpha, double *ap, double *x, 192 | INT64 *incx, double *beta, double *y, INT64 *incy); 193 | 194 | INT64 dspr_(char *uplo, INT64 *n, double *alpha, double *x, INT64 *incx, 195 | double *ap); 196 | 197 | INT64 dspr2_(char *uplo, INT64 *n, double *alpha, double *x, INT64 *incx, 198 | double *y, INT64 *incy, double *ap); 199 | 200 | INT64 dswap_(INT64 *n, double *sx, INT64 *incx, double *sy, INT64 *incy); 201 | 202 | INT64 dsymm_(char *side, char *uplo, INT64 *m, INT64 *n, double *alpha, 203 | double *a, INT64 *lda, double *b, INT64 *ldb, double *beta, 204 | double *c, INT64 *ldc); 205 | 206 | INT64 dsymv_(char *uplo, INT64 *n, double *alpha, double *a, INT64 *lda, 207 | double *x, INT64 *incx, double *beta, double *y, INT64 *incy); 208 | 209 | INT64 dsyr_(char *uplo, INT64 *n, double *alpha, double *x, INT64 *incx, 210 | double *a, INT64 *lda); 211 | 212 | INT64 dsyr2_(char *uplo, INT64 *n, double *alpha, double *x, INT64 *incx, 213 | double *y, INT64 *incy, double *a, INT64 *lda); 214 | 215 | INT64 dsyr2k_(char *uplo, char *trans, INT64 *n, INT64 *k, double *alpha, 216 | double *a, INT64 *lda, double *b, INT64 *ldb, double *beta, 217 | double *c, INT64 *ldc); 218 | 219 | INT64 dsyrk_(char *uplo, char *trans, INT64 *n, INT64 *k, double *alpha, 220 | double *a, INT64 *lda, double *beta, double *c, INT64 *ldc); 221 | 222 | INT64 dtbmv_(char *uplo, char *trans, char *diag, INT64 *n, INT64 *k, 223 | double *a, INT64 *lda, double *x, INT64 *incx); 224 | 225 | INT64 dtbsv_(char *uplo, char *trans, char *diag, INT64 *n, INT64 *k, 226 | double *a, INT64 *lda, double *x, INT64 *incx); 227 | 228 | INT64 dtpmv_(char *uplo, char *trans, char *diag, INT64 *n, double *ap, 229 | double *x, INT64 *incx); 230 | 231 | INT64 dtpsv_(char *uplo, char *trans, char *diag, INT64 *n, double *ap, 232 | double *x, INT64 *incx); 233 | 234 | INT64 dtrmm_(char *side, char *uplo, char *transa, char *diag, INT64 *m, 235 | INT64 *n, double *alpha, double *a, INT64 *lda, double *b, 236 | INT64 *ldb); 237 | 238 | INT64 dtrmv_(char *uplo, char *trans, char *diag, INT64 *n, double *a, 239 | INT64 *lda, double *x, INT64 *incx); 240 | 241 | INT64 dtrsm_(char *side, char *uplo, char *transa, char *diag, INT64 *m, 242 | INT64 *n, double *alpha, double *a, INT64 *lda, double *b, 243 | INT64 *ldb); 244 | 245 | INT64 dtrsv_(char *uplo, char *trans, char *diag, INT64 *n, double *a, 246 | INT64 *lda, double *x, INT64 *incx); 247 | 248 | 249 | INT64 saxpy_(INT64 *n, float *sa, float *sx, INT64 *incx, float *sy, INT64 *incy); 250 | 251 | INT64 scopy_(INT64 *n, float *sx, INT64 *incx, float *sy, INT64 *incy); 252 | 253 | INT64 sgbmv_(char *trans, INT64 *m, INT64 *n, INT64 *kl, INT64 *ku, 254 | float *alpha, float *a, INT64 *lda, float *x, INT64 *incx, 255 | float *beta, float *y, INT64 *incy); 256 | 257 | INT64 sgemm_(char *transa, char *transb, INT64 *m, INT64 *n, INT64 *k, 258 | float *alpha, float *a, INT64 *lda, float *b, INT64 *ldb, 259 | float *beta, float *c, INT64 *ldc); 260 | 261 | INT64 sgemv_(char *trans, INT64 *m, INT64 *n, float *alpha, float *a, 262 | INT64 *lda, float *x, INT64 *incx, float *beta, float *y, 263 | INT64 *incy); 264 | 265 | INT64 sger_(INT64 *m, INT64 *n, float *alpha, float *x, INT64 *incx, 266 | float *y, INT64 *incy, float *a, INT64 *lda); 267 | 268 | INT64 srot_(INT64 *n, float *sx, INT64 *incx, float *sy, INT64 *incy, 269 | float *c, float *s); 270 | 271 | INT64 srotg_(float *sa, float *sb, float *c, float *s); 272 | 273 | INT64 ssbmv_(char *uplo, INT64 *n, INT64 *k, float *alpha, float *a, 274 | INT64 *lda, float *x, INT64 *incx, float *beta, float *y, 275 | INT64 *incy); 276 | 277 | INT64 sscal_(INT64 *n, float *sa, float *sx, INT64 *incx); 278 | 279 | INT64 sspmv_(char *uplo, INT64 *n, float *alpha, float *ap, float *x, 280 | INT64 *incx, float *beta, float *y, INT64 *incy); 281 | 282 | INT64 sspr_(char *uplo, INT64 *n, float *alpha, float *x, INT64 *incx, 283 | float *ap); 284 | 285 | INT64 sspr2_(char *uplo, INT64 *n, float *alpha, float *x, INT64 *incx, 286 | float *y, INT64 *incy, float *ap); 287 | 288 | INT64 sswap_(INT64 *n, float *sx, INT64 *incx, float *sy, INT64 *incy); 289 | 290 | INT64 ssymm_(char *side, char *uplo, INT64 *m, INT64 *n, float *alpha, 291 | float *a, INT64 *lda, float *b, INT64 *ldb, float *beta, 292 | float *c, INT64 *ldc); 293 | 294 | INT64 ssymv_(char *uplo, INT64 *n, float *alpha, float *a, INT64 *lda, 295 | float *x, INT64 *incx, float *beta, float *y, INT64 *incy); 296 | 297 | INT64 ssyr_(char *uplo, INT64 *n, float *alpha, float *x, INT64 *incx, 298 | float *a, INT64 *lda); 299 | 300 | INT64 ssyr2_(char *uplo, INT64 *n, float *alpha, float *x, INT64 *incx, 301 | float *y, INT64 *incy, float *a, INT64 *lda); 302 | 303 | INT64 ssyr2k_(char *uplo, char *trans, INT64 *n, INT64 *k, float *alpha, 304 | float *a, INT64 *lda, float *b, INT64 *ldb, float *beta, 305 | float *c, INT64 *ldc); 306 | 307 | INT64 ssyrk_(char *uplo, char *trans, INT64 *n, INT64 *k, float *alpha, 308 | float *a, INT64 *lda, float *beta, float *c, INT64 *ldc); 309 | 310 | INT64 stbmv_(char *uplo, char *trans, char *diag, INT64 *n, INT64 *k, 311 | float *a, INT64 *lda, float *x, INT64 *incx); 312 | 313 | INT64 stbsv_(char *uplo, char *trans, char *diag, INT64 *n, INT64 *k, 314 | float *a, INT64 *lda, float *x, INT64 *incx); 315 | 316 | INT64 stpmv_(char *uplo, char *trans, char *diag, INT64 *n, float *ap, 317 | float *x, INT64 *incx); 318 | 319 | INT64 stpsv_(char *uplo, char *trans, char *diag, INT64 *n, float *ap, 320 | float *x, INT64 *incx); 321 | 322 | INT64 strmm_(char *side, char *uplo, char *transa, char *diag, INT64 *m, 323 | INT64 *n, float *alpha, float *a, INT64 *lda, float *b, 324 | INT64 *ldb); 325 | 326 | INT64 strmv_(char *uplo, char *trans, char *diag, INT64 *n, float *a, 327 | INT64 *lda, float *x, INT64 *incx); 328 | 329 | INT64 strsm_(char *side, char *uplo, char *transa, char *diag, INT64 *m, 330 | INT64 *n, float *alpha, float *a, INT64 *lda, float *b, 331 | INT64 *ldb); 332 | 333 | INT64 strsv_(char *uplo, char *trans, char *diag, INT64 *n, float *a, 334 | INT64 *lda, float *x, INT64 *incx); 335 | 336 | INT64 zaxpy_(INT64 *n, dcomplex *ca, dcomplex *cx, INT64 *incx, dcomplex *cy, 337 | INT64 *incy); 338 | 339 | INT64 zcopy_(INT64 *n, dcomplex *cx, INT64 *incx, dcomplex *cy, INT64 *incy); 340 | 341 | INT64 zdscal_(INT64 *n, double *sa, dcomplex *cx, INT64 *incx); 342 | 343 | INT64 zgbmv_(char *trans, INT64 *m, INT64 *n, INT64 *kl, INT64 *ku, 344 | dcomplex *alpha, dcomplex *a, INT64 *lda, dcomplex *x, INT64 *incx, 345 | dcomplex *beta, dcomplex *y, INT64 *incy); 346 | 347 | INT64 zgemm_(char *transa, char *transb, INT64 *m, INT64 *n, INT64 *k, 348 | dcomplex *alpha, dcomplex *a, INT64 *lda, dcomplex *b, INT64 *ldb, 349 | dcomplex *beta, dcomplex *c, INT64 *ldc); 350 | 351 | INT64 zgemv_(char *trans, INT64 *m, INT64 *n, dcomplex *alpha, dcomplex *a, 352 | INT64 *lda, dcomplex *x, INT64 *incx, dcomplex *beta, dcomplex *y, 353 | INT64 *incy); 354 | 355 | INT64 zgerc_(INT64 *m, INT64 *n, dcomplex *alpha, dcomplex *x, INT64 *incx, 356 | dcomplex *y, INT64 *incy, dcomplex *a, INT64 *lda); 357 | 358 | INT64 zgeru_(INT64 *m, INT64 *n, dcomplex *alpha, dcomplex *x, INT64 *incx, 359 | dcomplex *y, INT64 *incy, dcomplex *a, INT64 *lda); 360 | 361 | INT64 zhbmv_(char *uplo, INT64 *n, INT64 *k, dcomplex *alpha, dcomplex *a, 362 | INT64 *lda, dcomplex *x, INT64 *incx, dcomplex *beta, dcomplex *y, 363 | INT64 *incy); 364 | 365 | INT64 zhemm_(char *side, char *uplo, INT64 *m, INT64 *n, dcomplex *alpha, 366 | dcomplex *a, INT64 *lda, dcomplex *b, INT64 *ldb, dcomplex *beta, 367 | dcomplex *c, INT64 *ldc); 368 | 369 | INT64 zhemv_(char *uplo, INT64 *n, dcomplex *alpha, dcomplex *a, INT64 *lda, 370 | dcomplex *x, INT64 *incx, dcomplex *beta, dcomplex *y, INT64 *incy); 371 | 372 | INT64 zher_(char *uplo, INT64 *n, double *alpha, dcomplex *x, INT64 *incx, 373 | dcomplex *a, INT64 *lda); 374 | 375 | INT64 zher2_(char *uplo, INT64 *n, dcomplex *alpha, dcomplex *x, INT64 *incx, 376 | dcomplex *y, INT64 *incy, dcomplex *a, INT64 *lda); 377 | 378 | INT64 zher2k_(char *uplo, char *trans, INT64 *n, INT64 *k, dcomplex *alpha, 379 | dcomplex *a, INT64 *lda, dcomplex *b, INT64 *ldb, double *beta, 380 | dcomplex *c, INT64 *ldc); 381 | 382 | INT64 zherk_(char *uplo, char *trans, INT64 *n, INT64 *k, double *alpha, 383 | dcomplex *a, INT64 *lda, double *beta, dcomplex *c, INT64 *ldc); 384 | 385 | INT64 zhpmv_(char *uplo, INT64 *n, dcomplex *alpha, dcomplex *ap, dcomplex *x, 386 | INT64 *incx, dcomplex *beta, dcomplex *y, INT64 *incy); 387 | 388 | INT64 zhpr_(char *uplo, INT64 *n, double *alpha, dcomplex *x, INT64 *incx, 389 | dcomplex *ap); 390 | 391 | INT64 zhpr2_(char *uplo, INT64 *n, dcomplex *alpha, dcomplex *x, INT64 *incx, 392 | dcomplex *y, INT64 *incy, dcomplex *ap); 393 | 394 | INT64 zrotg_(dcomplex *ca, dcomplex *cb, double *c, dcomplex *s); 395 | 396 | INT64 zscal_(INT64 *n, dcomplex *ca, dcomplex *cx, INT64 *incx); 397 | 398 | INT64 zswap_(INT64 *n, dcomplex *cx, INT64 *incx, dcomplex *cy, INT64 *incy); 399 | 400 | INT64 zsymm_(char *side, char *uplo, INT64 *m, INT64 *n, dcomplex *alpha, 401 | dcomplex *a, INT64 *lda, dcomplex *b, INT64 *ldb, dcomplex *beta, 402 | dcomplex *c, INT64 *ldc); 403 | 404 | INT64 zsyr2k_(char *uplo, char *trans, INT64 *n, INT64 *k, dcomplex *alpha, 405 | dcomplex *a, INT64 *lda, dcomplex *b, INT64 *ldb, dcomplex *beta, 406 | dcomplex *c, INT64 *ldc); 407 | 408 | INT64 zsyrk_(char *uplo, char *trans, INT64 *n, INT64 *k, dcomplex *alpha, 409 | dcomplex *a, INT64 *lda, dcomplex *beta, dcomplex *c, INT64 *ldc); 410 | 411 | INT64 ztbmv_(char *uplo, char *trans, char *diag, INT64 *n, INT64 *k, 412 | dcomplex *a, INT64 *lda, dcomplex *x, INT64 *incx); 413 | 414 | INT64 ztbsv_(char *uplo, char *trans, char *diag, INT64 *n, INT64 *k, 415 | dcomplex *a, INT64 *lda, dcomplex *x, INT64 *incx); 416 | 417 | INT64 ztpmv_(char *uplo, char *trans, char *diag, INT64 *n, dcomplex *ap, 418 | dcomplex *x, INT64 *incx); 419 | 420 | INT64 ztpsv_(char *uplo, char *trans, char *diag, INT64 *n, dcomplex *ap, 421 | dcomplex *x, INT64 *incx); 422 | 423 | INT64 ztrmm_(char *side, char *uplo, char *transa, char *diag, INT64 *m, 424 | INT64 *n, dcomplex *alpha, dcomplex *a, INT64 *lda, dcomplex *b, 425 | INT64 *ldb); 426 | 427 | INT64 ztrmv_(char *uplo, char *trans, char *diag, INT64 *n, dcomplex *a, 428 | INT64 *lda, dcomplex *x, INT64 *incx); 429 | 430 | INT64 ztrsm_(char *side, char *uplo, char *transa, char *diag, INT64 *m, 431 | INT64 *n, dcomplex *alpha, dcomplex *a, INT64 *lda, dcomplex *b, 432 | INT64 *ldb); 433 | 434 | INT64 ztrsv_(char *uplo, char *trans, char *diag, INT64 *n, dcomplex *a, 435 | INT64 *lda, dcomplex *x, INT64 *incx); 436 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/blas/daxpy.c: -------------------------------------------------------------------------------- 1 | #include "blas.h" 2 | 3 | INT64 daxpy_(INT64 *n, double *sa, double *sx, INT64 *incx, double *sy, 4 | INT64 *incy) 5 | { 6 | INT64 i, m, ix, iy, nn, iincx, iincy; 7 | register double ssa; 8 | 9 | /* constant times a vector plus a vector. 10 | uses unrolled loop for increments equal to one. 11 | jack dongarra, linpack, 3/11/78. 12 | modified 12/3/93, array(1) declarations changed to array(*) */ 13 | 14 | /* Dereference inputs */ 15 | nn = *n; 16 | ssa = *sa; 17 | iincx = *incx; 18 | iincy = *incy; 19 | 20 | if( nn > 0 && ssa != 0.0 ) 21 | { 22 | if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */ 23 | { 24 | m = nn-3; 25 | for (i = 0; i < m; i += 4) 26 | { 27 | sy[i] += ssa * sx[i]; 28 | sy[i+1] += ssa * sx[i+1]; 29 | sy[i+2] += ssa * sx[i+2]; 30 | sy[i+3] += ssa * sx[i+3]; 31 | } 32 | for ( ; i < nn; ++i) /* clean-up loop */ 33 | sy[i] += ssa * sx[i]; 34 | } 35 | else /* code for unequal increments or equal increments not equal to 1 */ 36 | { 37 | ix = iincx >= 0 ? 0 : (1 - nn) * iincx; 38 | iy = iincy >= 0 ? 0 : (1 - nn) * iincy; 39 | for (i = 0; i < nn; i++) 40 | { 41 | sy[iy] += ssa * sx[ix]; 42 | ix += iincx; 43 | iy += iincy; 44 | } 45 | } 46 | } 47 | 48 | return 0; 49 | } /* daxpy_ */ 50 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/blas/ddot.c: -------------------------------------------------------------------------------- 1 | #include "blas.h" 2 | 3 | double ddot_(INT64 *n, double *sx, INT64 *incx, double *sy, INT64 *incy) 4 | { 5 | INT64 i, m, nn, iincx, iincy; 6 | double stemp; 7 | INT64 ix, iy; 8 | 9 | /* forms the dot product of two vectors. 10 | uses unrolled loops for increments equal to one. 11 | jack dongarra, linpack, 3/11/78. 12 | modified 12/3/93, array(1) declarations changed to array(*) */ 13 | 14 | /* Dereference inputs */ 15 | nn = *n; 16 | iincx = *incx; 17 | iincy = *incy; 18 | 19 | stemp = 0.0; 20 | if (nn > 0) 21 | { 22 | if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */ 23 | { 24 | m = nn-4; 25 | for (i = 0; i < m; i += 5) 26 | stemp += sx[i] * sy[i] + sx[i+1] * sy[i+1] + sx[i+2] * sy[i+2] + 27 | sx[i+3] * sy[i+3] + sx[i+4] * sy[i+4]; 28 | 29 | for ( ; i < nn; i++) /* clean-up loop */ 30 | stemp += sx[i] * sy[i]; 31 | } 32 | else /* code for unequal increments or equal increments not equal to 1 */ 33 | { 34 | ix = 0; 35 | iy = 0; 36 | if (iincx < 0) 37 | ix = (1 - nn) * iincx; 38 | if (iincy < 0) 39 | iy = (1 - nn) * iincy; 40 | for (i = 0; i < nn; i++) 41 | { 42 | stemp += sx[ix] * sy[iy]; 43 | ix += iincx; 44 | iy += iincy; 45 | } 46 | } 47 | } 48 | 49 | return stemp; 50 | } /* ddot_ */ 51 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/blas/dnrm2.c: -------------------------------------------------------------------------------- 1 | #include /* Needed for fabs() and sqrt() */ 2 | #include "blas.h" 3 | 4 | double dnrm2_(INT64 *n, double *x, INT64 *incx) 5 | { 6 | INT64 ix, nn, iincx; 7 | double norm, scale, absxi, ssq, temp; 8 | 9 | /* DNRM2 returns the euclidean norm of a vector via the function 10 | name, so that 11 | 12 | DNRM2 := sqrt( x'*x ) 13 | 14 | -- This version written on 25-October-1982. 15 | Modified on 14-October-1993 to inline the call to SLASSQ. 16 | Sven Hammarling, Nag Ltd. */ 17 | 18 | /* Dereference inputs */ 19 | nn = *n; 20 | iincx = *incx; 21 | 22 | if( nn > 0 && iincx > 0 ) 23 | { 24 | if (nn == 1) 25 | { 26 | norm = fabs(x[0]); 27 | } 28 | else 29 | { 30 | scale = 0.0; 31 | ssq = 1.0; 32 | 33 | /* The following loop is equivalent to this call to the LAPACK 34 | auxiliary routine: CALL SLASSQ( N, X, INCX, SCALE, SSQ ) */ 35 | 36 | for (ix=(nn-1)*iincx; ix>=0; ix-=iincx) 37 | { 38 | if (x[ix] != 0.0) 39 | { 40 | absxi = fabs(x[ix]); 41 | if (scale < absxi) 42 | { 43 | temp = scale / absxi; 44 | ssq = ssq * (temp * temp) + 1.0; 45 | scale = absxi; 46 | } 47 | else 48 | { 49 | temp = absxi / scale; 50 | ssq += temp * temp; 51 | } 52 | } 53 | } 54 | norm = scale * sqrt(ssq); 55 | } 56 | } 57 | else 58 | norm = 0.0; 59 | 60 | return norm; 61 | 62 | } /* dnrm2_ */ 63 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/blas/dscal.c: -------------------------------------------------------------------------------- 1 | #include "blas.h" 2 | 3 | INT64 dscal_(INT64 *n, double *sa, double *sx, INT64 *incx) 4 | { 5 | INT64 i, m, nincx, nn, iincx; 6 | double ssa; 7 | 8 | /* scales a vector by a constant. 9 | uses unrolled loops for increment equal to 1. 10 | jack dongarra, linpack, 3/11/78. 11 | modified 3/93 to return if incx .le. 0. 12 | modified 12/3/93, array(1) declarations changed to array(*) */ 13 | 14 | /* Dereference inputs */ 15 | nn = *n; 16 | iincx = *incx; 17 | ssa = *sa; 18 | 19 | if (nn > 0 && iincx > 0) 20 | { 21 | if (iincx == 1) /* code for increment equal to 1 */ 22 | { 23 | m = nn-4; 24 | for (i = 0; i < m; i += 5) 25 | { 26 | sx[i] = ssa * sx[i]; 27 | sx[i+1] = ssa * sx[i+1]; 28 | sx[i+2] = ssa * sx[i+2]; 29 | sx[i+3] = ssa * sx[i+3]; 30 | sx[i+4] = ssa * sx[i+4]; 31 | } 32 | for ( ; i < nn; ++i) /* clean-up loop */ 33 | sx[i] = ssa * sx[i]; 34 | } 35 | else /* code for increment not equal to 1 */ 36 | { 37 | nincx = nn * iincx; 38 | for (i = 0; i < nincx; i += iincx) 39 | sx[i] = ssa * sx[i]; 40 | } 41 | } 42 | 43 | return 0; 44 | } /* dscal_ */ 45 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/heart_scale: -------------------------------------------------------------------------------- 1 | +1 1:0.708333 2:1 3:1 4:-0.320755 5:-0.105023 6:-1 7:1 8:-0.419847 9:-1 10:-0.225806 12:1 13:-1 2 | -1 1:0.583333 2:-1 3:0.333333 4:-0.603774 5:1 6:-1 7:1 8:0.358779 9:-1 10:-0.483871 12:-1 13:1 3 | +1 1:0.166667 2:1 3:-0.333333 4:-0.433962 5:-0.383562 6:-1 7:-1 8:0.0687023 9:-1 10:-0.903226 11:-1 12:-1 13:1 4 | -1 1:0.458333 2:1 3:1 4:-0.358491 5:-0.374429 6:-1 7:-1 8:-0.480916 9:1 10:-0.935484 12:-0.333333 13:1 5 | -1 1:0.875 2:-1 3:-0.333333 4:-0.509434 5:-0.347032 6:-1 7:1 8:-0.236641 9:1 10:-0.935484 11:-1 12:-0.333333 13:-1 6 | -1 1:0.5 2:1 3:1 4:-0.509434 5:-0.767123 6:-1 7:-1 8:0.0534351 9:-1 10:-0.870968 11:-1 12:-1 13:1 7 | +1 1:0.125 2:1 3:0.333333 4:-0.320755 5:-0.406393 6:1 7:1 8:0.0839695 9:1 10:-0.806452 12:-0.333333 13:0.5 8 | +1 1:0.25 2:1 3:1 4:-0.698113 5:-0.484018 6:-1 7:1 8:0.0839695 9:1 10:-0.612903 12:-0.333333 13:1 9 | +1 1:0.291667 2:1 3:1 4:-0.132075 5:-0.237443 6:-1 7:1 8:0.51145 9:-1 10:-0.612903 12:0.333333 13:1 10 | +1 1:0.416667 2:-1 3:1 4:0.0566038 5:0.283105 6:-1 7:1 8:0.267176 9:-1 10:0.290323 12:1 13:1 11 | -1 1:0.25 2:1 3:1 4:-0.226415 5:-0.506849 6:-1 7:-1 8:0.374046 9:-1 10:-0.83871 12:-1 13:1 12 | -1 2:1 3:1 4:-0.0943396 5:-0.543379 6:-1 7:1 8:-0.389313 9:1 10:-1 11:-1 12:-1 13:1 13 | -1 1:-0.375 2:1 3:0.333333 4:-0.132075 5:-0.502283 6:-1 7:1 8:0.664122 9:-1 10:-1 11:-1 12:-1 13:-1 14 | +1 1:0.333333 2:1 3:-1 4:-0.245283 5:-0.506849 6:-1 7:-1 8:0.129771 9:-1 10:-0.16129 12:0.333333 13:-1 15 | -1 1:0.166667 2:-1 3:1 4:-0.358491 5:-0.191781 6:-1 7:1 8:0.343511 9:-1 10:-1 11:-1 12:-0.333333 13:-1 16 | -1 1:0.75 2:-1 3:1 4:-0.660377 5:-0.894977 6:-1 7:-1 8:-0.175573 9:-1 10:-0.483871 12:-1 13:-1 17 | +1 1:-0.291667 2:1 3:1 4:-0.132075 5:-0.155251 6:-1 7:-1 8:-0.251908 9:1 10:-0.419355 12:0.333333 13:1 18 | +1 2:1 3:1 4:-0.132075 5:-0.648402 6:1 7:1 8:0.282443 9:1 11:1 12:-1 13:1 19 | -1 1:0.458333 2:1 3:-1 4:-0.698113 5:-0.611872 6:-1 7:1 8:0.114504 9:1 10:-0.419355 12:-1 13:-1 20 | -1 1:-0.541667 2:1 3:-1 4:-0.132075 5:-0.666667 6:-1 7:-1 8:0.633588 9:1 10:-0.548387 11:-1 12:-1 13:1 21 | +1 1:0.583333 2:1 3:1 4:-0.509434 5:-0.52968 6:-1 7:1 8:-0.114504 9:1 10:-0.16129 12:0.333333 13:1 22 | -1 1:-0.208333 2:1 3:-0.333333 4:-0.320755 5:-0.456621 6:-1 7:1 8:0.664122 9:-1 10:-0.935484 12:-1 13:-1 23 | -1 1:-0.416667 2:1 3:1 4:-0.603774 5:-0.191781 6:-1 7:-1 8:0.679389 9:-1 10:-0.612903 12:-1 13:-1 24 | -1 1:-0.25 2:1 3:1 4:-0.660377 5:-0.643836 6:-1 7:-1 8:0.0992366 9:-1 10:-0.967742 11:-1 12:-1 13:-1 25 | -1 1:0.0416667 2:-1 3:-0.333333 4:-0.283019 5:-0.260274 6:1 7:1 8:0.343511 9:1 10:-1 11:-1 12:-0.333333 13:-1 26 | -1 1:-0.208333 2:-1 3:0.333333 4:-0.320755 5:-0.319635 6:-1 7:-1 8:0.0381679 9:-1 10:-0.935484 11:-1 12:-1 13:-1 27 | -1 1:-0.291667 2:-1 3:1 4:-0.169811 5:-0.465753 6:-1 7:1 8:0.236641 9:1 10:-1 12:-1 13:-1 28 | -1 1:-0.0833333 2:-1 3:0.333333 4:-0.509434 5:-0.228311 6:-1 7:1 8:0.312977 9:-1 10:-0.806452 11:-1 12:-1 13:-1 29 | +1 1:0.208333 2:1 3:0.333333 4:-0.660377 5:-0.525114 6:-1 7:1 8:0.435115 9:-1 10:-0.193548 12:-0.333333 13:1 30 | -1 1:0.75 2:-1 3:0.333333 4:-0.698113 5:-0.365297 6:1 7:1 8:-0.0992366 9:-1 10:-1 11:-1 12:-0.333333 13:-1 31 | +1 1:0.166667 2:1 3:0.333333 4:-0.358491 5:-0.52968 6:-1 7:1 8:0.206107 9:-1 10:-0.870968 12:-0.333333 13:1 32 | -1 1:0.541667 2:1 3:1 4:0.245283 5:-0.534247 6:-1 7:1 8:0.0229008 9:-1 10:-0.258065 11:-1 12:-1 13:0.5 33 | -1 1:-0.666667 2:-1 3:0.333333 4:-0.509434 5:-0.593607 6:-1 7:-1 8:0.51145 9:-1 10:-1 11:-1 12:-1 13:-1 34 | +1 1:0.25 2:1 3:1 4:0.433962 5:-0.086758 6:-1 7:1 8:0.0534351 9:1 10:0.0967742 11:1 12:-1 13:1 35 | +1 1:-0.125 2:1 3:1 4:-0.0566038 5:-0.6621 6:-1 7:1 8:-0.160305 9:1 10:-0.709677 12:-1 13:1 36 | +1 1:-0.208333 2:1 3:1 4:-0.320755 5:-0.406393 6:1 7:1 8:0.206107 9:1 10:-1 11:-1 12:0.333333 13:1 37 | +1 1:0.333333 2:1 3:1 4:-0.132075 5:-0.630137 6:-1 7:1 8:0.0229008 9:1 10:-0.387097 11:-1 12:-0.333333 13:1 38 | +1 1:0.25 2:1 3:-1 4:0.245283 5:-0.328767 6:-1 7:1 8:-0.175573 9:-1 10:-1 11:-1 12:-1 13:-1 39 | -1 1:-0.458333 2:1 3:0.333333 4:-0.320755 5:-0.753425 6:-1 7:-1 8:0.206107 9:-1 10:-1 11:-1 12:-1 13:-1 40 | -1 1:-0.208333 2:1 3:1 4:-0.471698 5:-0.561644 6:-1 7:1 8:0.755725 9:-1 10:-1 11:-1 12:-1 13:-1 41 | +1 1:-0.541667 2:1 3:1 4:0.0943396 5:-0.557078 6:-1 7:-1 8:0.679389 9:-1 10:-1 11:-1 12:-1 13:1 42 | -1 1:0.375 2:-1 3:1 4:-0.433962 5:-0.621005 6:-1 7:-1 8:0.40458 9:-1 10:-1 11:-1 12:-1 13:-1 43 | -1 1:-0.375 2:1 3:0.333333 4:-0.320755 5:-0.511416 6:-1 7:-1 8:0.648855 9:1 10:-0.870968 11:-1 12:-1 13:-1 44 | -1 1:-0.291667 2:1 3:-0.333333 4:-0.867925 5:-0.675799 6:1 7:-1 8:0.29771 9:-1 10:-1 11:-1 12:-1 13:1 45 | +1 1:0.25 2:1 3:0.333333 4:-0.396226 5:-0.579909 6:1 7:-1 8:-0.0381679 9:-1 10:-0.290323 12:-0.333333 13:0.5 46 | -1 1:0.208333 2:1 3:0.333333 4:-0.132075 5:-0.611872 6:1 7:1 8:0.435115 9:-1 10:-1 11:-1 12:-1 13:-1 47 | +1 1:-0.166667 2:1 3:0.333333 4:-0.54717 5:-0.894977 6:-1 7:1 8:-0.160305 9:-1 10:-0.741935 11:-1 12:1 13:-1 48 | +1 1:-0.375 2:1 3:1 4:-0.698113 5:-0.675799 6:-1 7:1 8:0.618321 9:-1 10:-1 11:-1 12:-0.333333 13:-1 49 | +1 1:0.541667 2:1 3:-0.333333 4:0.245283 5:-0.452055 6:-1 7:-1 8:-0.251908 9:1 10:-1 12:1 13:0.5 50 | +1 1:0.5 2:-1 3:1 4:0.0566038 5:-0.547945 6:-1 7:1 8:-0.343511 9:-1 10:-0.677419 12:1 13:1 51 | +1 1:-0.458333 2:1 3:1 4:-0.207547 5:-0.136986 6:-1 7:-1 8:-0.175573 9:1 10:-0.419355 12:-1 13:0.5 52 | -1 1:-0.0416667 2:1 3:-0.333333 4:-0.358491 5:-0.639269 6:1 7:-1 8:0.725191 9:-1 10:-1 11:-1 12:-1 13:-1 53 | -1 1:0.5 2:-1 3:0.333333 4:-0.132075 5:0.328767 6:1 7:1 8:0.312977 9:-1 10:-0.741935 11:-1 12:-0.333333 13:-1 54 | -1 1:0.416667 2:-1 3:-0.333333 4:-0.132075 5:-0.684932 6:-1 7:-1 8:0.648855 9:-1 10:-1 11:-1 12:0.333333 13:-1 55 | -1 1:-0.333333 2:-1 3:-0.333333 4:-0.320755 5:-0.506849 6:-1 7:1 8:0.587786 9:-1 10:-0.806452 12:-1 13:-1 56 | -1 1:-0.5 2:-1 3:-0.333333 4:-0.792453 5:-0.671233 6:-1 7:-1 8:0.480916 9:-1 10:-1 11:-1 12:-0.333333 13:-1 57 | +1 1:0.333333 2:1 3:1 4:-0.169811 5:-0.817352 6:-1 7:1 8:-0.175573 9:1 10:0.16129 12:-0.333333 13:-1 58 | -1 1:0.291667 2:-1 3:0.333333 4:-0.509434 5:-0.762557 6:1 7:-1 8:-0.618321 9:-1 10:-1 11:-1 12:-1 13:-1 59 | +1 1:0.25 2:-1 3:1 4:0.509434 5:-0.438356 6:-1 7:-1 8:0.0992366 9:1 10:-1 12:-1 13:-1 60 | +1 1:0.375 2:1 3:-0.333333 4:-0.509434 5:-0.292237 6:-1 7:1 8:-0.51145 9:-1 10:-0.548387 12:-0.333333 13:1 61 | -1 1:0.166667 2:1 3:0.333333 4:0.0566038 5:-1 6:1 7:-1 8:0.557252 9:-1 10:-0.935484 11:-1 12:-0.333333 13:1 62 | +1 1:-0.0833333 2:-1 3:1 4:-0.320755 5:-0.182648 6:-1 7:-1 8:0.0839695 9:1 10:-0.612903 12:-1 13:1 63 | -1 1:-0.375 2:1 3:0.333333 4:-0.509434 5:-0.543379 6:-1 7:-1 8:0.496183 9:-1 10:-1 11:-1 12:-1 13:-1 64 | -1 1:0.291667 2:-1 3:-1 4:0.0566038 5:-0.479452 6:-1 7:-1 8:0.526718 9:-1 10:-0.709677 11:-1 12:-1 13:-1 65 | -1 1:0.416667 2:1 3:-1 4:-0.0377358 5:-0.511416 6:1 7:1 8:0.206107 9:-1 10:-0.258065 11:1 12:-1 13:0.5 66 | +1 1:0.166667 2:1 3:1 4:0.0566038 5:-0.315068 6:-1 7:1 8:-0.374046 9:1 10:-0.806452 12:-0.333333 13:0.5 67 | -1 1:-0.0833333 2:1 3:1 4:-0.132075 5:-0.383562 6:-1 7:1 8:0.755725 9:1 10:-1 11:-1 12:-1 13:-1 68 | +1 1:0.208333 2:-1 3:-0.333333 4:-0.207547 5:-0.118721 6:1 7:1 8:0.236641 9:-1 10:-1 11:-1 12:0.333333 13:-1 69 | -1 1:-0.375 2:-1 3:0.333333 4:-0.54717 5:-0.47032 6:-1 7:-1 8:0.19084 9:-1 10:-0.903226 12:-0.333333 13:-1 70 | +1 1:-0.25 2:1 3:0.333333 4:-0.735849 5:-0.465753 6:-1 7:-1 8:0.236641 9:-1 10:-1 11:-1 12:-1 13:-1 71 | +1 1:0.333333 2:1 3:1 4:-0.509434 5:-0.388128 6:-1 7:-1 8:0.0534351 9:1 10:0.16129 12:-0.333333 13:1 72 | -1 1:0.166667 2:-1 3:1 4:-0.509434 5:0.0410959 6:-1 7:-1 8:0.40458 9:1 10:-0.806452 11:-1 12:-1 13:-1 73 | -1 1:0.708333 2:1 3:-0.333333 4:0.169811 5:-0.456621 6:-1 7:1 8:0.0992366 9:-1 10:-1 11:-1 12:-1 13:-1 74 | -1 1:0.958333 2:-1 3:0.333333 4:-0.132075 5:-0.675799 6:-1 8:-0.312977 9:-1 10:-0.645161 12:-1 13:-1 75 | -1 1:0.583333 2:-1 3:1 4:-0.773585 5:-0.557078 6:-1 7:-1 8:0.0839695 9:-1 10:-0.903226 11:-1 12:0.333333 13:-1 76 | +1 1:-0.333333 2:1 3:1 4:-0.0943396 5:-0.164384 6:-1 7:1 8:0.160305 9:1 10:-1 12:1 13:1 77 | -1 1:-0.333333 2:1 3:1 4:-0.811321 5:-0.625571 6:-1 7:1 8:0.175573 9:1 10:-0.0322581 12:-1 13:-1 78 | -1 1:-0.583333 2:-1 3:0.333333 4:-1 5:-0.666667 6:-1 7:-1 8:0.648855 9:-1 10:-1 11:-1 12:-1 13:-1 79 | -1 1:-0.458333 2:-1 3:0.333333 4:-0.509434 5:-0.621005 6:-1 7:-1 8:0.557252 9:-1 10:-1 12:-1 13:-1 80 | -1 1:0.125 2:1 3:-0.333333 4:-0.509434 5:-0.497717 6:-1 7:-1 8:0.633588 9:-1 10:-0.741935 11:-1 12:-1 13:-1 81 | +1 1:0.208333 2:1 3:1 4:-0.0188679 5:-0.579909 6:-1 7:-1 8:-0.480916 9:-1 10:-0.354839 12:-0.333333 13:1 82 | +1 1:-0.75 2:1 3:1 4:-0.509434 5:-0.671233 6:-1 7:-1 8:-0.0992366 9:1 10:-0.483871 12:-1 13:1 83 | +1 1:0.208333 2:1 3:1 4:0.0566038 5:-0.342466 6:-1 7:1 8:-0.389313 9:1 10:-0.741935 11:-1 12:-1 13:1 84 | -1 1:-0.5 2:1 3:0.333333 4:-0.320755 5:-0.598174 6:-1 7:1 8:0.480916 9:-1 10:-0.354839 12:-1 13:-1 85 | -1 1:0.166667 2:1 3:1 4:-0.698113 5:-0.657534 6:-1 7:-1 8:-0.160305 9:1 10:-0.516129 12:-1 13:0.5 86 | -1 1:-0.458333 2:1 3:-1 4:0.0188679 5:-0.461187 6:-1 7:1 8:0.633588 9:-1 10:-0.741935 11:-1 12:0.333333 13:-1 87 | -1 1:0.375 2:1 3:-0.333333 4:-0.358491 5:-0.625571 6:1 7:1 8:0.0534351 9:-1 10:-1 11:-1 12:-1 13:-1 88 | -1 1:0.25 2:1 3:-1 4:0.584906 5:-0.342466 6:-1 7:1 8:0.129771 9:-1 10:0.354839 11:1 12:-1 13:1 89 | -1 1:-0.5 2:-1 3:-0.333333 4:-0.396226 5:-0.178082 6:-1 7:-1 8:0.40458 9:-1 10:-1 11:-1 12:-1 13:-1 90 | +1 1:-0.125 2:1 3:1 4:0.0566038 5:-0.465753 6:-1 7:1 8:-0.129771 9:-1 10:-0.16129 12:-1 13:1 91 | -1 1:0.25 2:1 3:-0.333333 4:-0.132075 5:-0.56621 6:-1 7:-1 8:0.419847 9:1 10:-1 11:-1 12:-1 13:-1 92 | +1 1:0.333333 2:-1 3:1 4:-0.320755 5:-0.0684932 6:-1 7:1 8:0.496183 9:-1 10:-1 11:-1 12:-1 13:-1 93 | +1 1:0.0416667 2:1 3:1 4:-0.433962 5:-0.360731 6:-1 7:1 8:-0.419847 9:1 10:-0.290323 12:-0.333333 13:1 94 | +1 1:0.0416667 2:1 3:1 4:-0.698113 5:-0.634703 6:-1 7:1 8:-0.435115 9:1 10:-1 12:-0.333333 13:-1 95 | +1 1:-0.0416667 2:1 3:1 4:-0.415094 5:-0.607306 6:-1 7:-1 8:0.480916 9:-1 10:-0.677419 11:-1 12:0.333333 13:1 96 | +1 1:-0.25 2:1 3:1 4:-0.698113 5:-0.319635 6:-1 7:1 8:-0.282443 9:1 10:-0.677419 12:-0.333333 13:-1 97 | -1 1:0.541667 2:1 3:1 4:-0.509434 5:-0.196347 6:-1 7:1 8:0.221374 9:-1 10:-0.870968 12:-1 13:-1 98 | +1 1:0.208333 2:1 3:1 4:-0.886792 5:-0.506849 6:-1 7:-1 8:0.29771 9:-1 10:-0.967742 11:-1 12:-0.333333 13:1 99 | -1 1:0.458333 2:-1 3:0.333333 4:-0.132075 5:-0.146119 6:-1 7:-1 8:-0.0534351 9:-1 10:-0.935484 11:-1 12:-1 13:1 100 | -1 1:-0.125 2:-1 3:-0.333333 4:-0.509434 5:-0.461187 6:-1 7:-1 8:0.389313 9:-1 10:-0.645161 11:-1 12:-1 13:-1 101 | -1 1:-0.375 2:-1 3:0.333333 4:-0.735849 5:-0.931507 6:-1 7:-1 8:0.587786 9:-1 10:-0.806452 12:-1 13:-1 102 | +1 1:0.583333 2:1 3:1 4:-0.509434 5:-0.493151 6:-1 7:-1 8:-1 9:-1 10:-0.677419 12:-1 13:-1 103 | -1 1:-0.166667 2:-1 3:1 4:-0.320755 5:-0.347032 6:-1 7:-1 8:0.40458 9:-1 10:-1 11:-1 12:-1 13:-1 104 | +1 1:0.166667 2:1 3:1 4:0.339623 5:-0.255708 6:1 7:1 8:-0.19084 9:-1 10:-0.677419 12:1 13:1 105 | +1 1:0.416667 2:1 3:1 4:-0.320755 5:-0.415525 6:-1 7:1 8:0.160305 9:-1 10:-0.548387 12:-0.333333 13:1 106 | +1 1:-0.208333 2:1 3:1 4:-0.433962 5:-0.324201 6:-1 7:1 8:0.450382 9:-1 10:-0.83871 12:-1 13:1 107 | -1 1:-0.0833333 2:1 3:0.333333 4:-0.886792 5:-0.561644 6:-1 7:-1 8:0.0992366 9:1 10:-0.612903 12:-1 13:-1 108 | +1 1:0.291667 2:-1 3:1 4:0.0566038 5:-0.39726 6:-1 7:1 8:0.312977 9:-1 10:-0.16129 12:0.333333 13:1 109 | +1 1:0.25 2:1 3:1 4:-0.132075 5:-0.767123 6:-1 7:-1 8:0.389313 9:1 10:-1 11:-1 12:-0.333333 13:1 110 | -1 1:-0.333333 2:-1 3:-0.333333 4:-0.660377 5:-0.844749 6:-1 7:-1 8:0.0229008 9:-1 10:-1 12:-1 13:-1 111 | +1 1:0.0833333 2:-1 3:1 4:0.622642 5:-0.0821918 6:-1 8:-0.29771 9:1 10:0.0967742 12:-1 13:-1 112 | -1 1:-0.5 2:1 3:-0.333333 4:-0.698113 5:-0.502283 6:-1 7:-1 8:0.251908 9:-1 10:-1 11:-1 12:-1 13:-1 113 | +1 1:0.291667 2:-1 3:1 4:0.207547 5:-0.182648 6:-1 7:1 8:0.374046 9:-1 10:-1 11:-1 12:-1 13:-1 114 | -1 1:0.0416667 2:-1 3:0.333333 4:-0.226415 5:-0.187215 6:1 7:-1 8:0.51145 9:-1 10:-1 11:-1 12:-1 13:-1 115 | -1 1:-0.458333 2:1 3:-0.333333 4:-0.509434 5:-0.228311 6:-1 7:-1 8:0.389313 9:-1 10:-1 11:-1 12:-1 13:-1 116 | -1 1:-0.166667 2:-1 3:-0.333333 4:-0.245283 5:-0.3379 6:-1 7:-1 8:0.389313 9:-1 10:-1 12:-1 13:-1 117 | +1 1:-0.291667 2:1 3:1 4:-0.509434 5:-0.438356 6:-1 7:1 8:0.114504 9:-1 10:-0.741935 11:-1 12:-1 13:1 118 | +1 1:0.125 2:-1 3:1 4:1 5:-0.260274 6:1 7:1 8:-0.0534351 9:1 10:0.290323 11:1 12:0.333333 13:1 119 | -1 1:0.541667 2:-1 3:-1 4:0.0566038 5:-0.543379 6:-1 7:-1 8:-0.343511 9:-1 10:-0.16129 11:1 12:-1 13:-1 120 | +1 1:0.125 2:1 3:1 4:-0.320755 5:-0.283105 6:1 7:1 8:-0.51145 9:1 10:-0.483871 11:1 12:-1 13:1 121 | +1 1:-0.166667 2:1 3:0.333333 4:-0.509434 5:-0.716895 6:-1 7:-1 8:0.0381679 9:-1 10:-0.354839 12:1 13:1 122 | +1 1:0.0416667 2:1 3:1 4:-0.471698 5:-0.269406 6:-1 7:1 8:-0.312977 9:1 10:0.0322581 12:0.333333 13:-1 123 | +1 1:0.166667 2:1 3:1 4:0.0943396 5:-0.324201 6:-1 7:-1 8:-0.740458 9:1 10:-0.612903 12:-0.333333 13:1 124 | -1 1:0.5 2:-1 3:0.333333 4:0.245283 5:0.0684932 6:-1 7:1 8:0.221374 9:-1 10:-0.741935 11:-1 12:-1 13:-1 125 | -1 1:0.0416667 2:1 3:0.333333 4:-0.415094 5:-0.328767 6:-1 7:1 8:0.236641 9:-1 10:-0.83871 11:1 12:-0.333333 13:-1 126 | -1 1:0.0416667 2:-1 3:0.333333 4:0.245283 5:-0.657534 6:-1 7:-1 8:0.40458 9:-1 10:-1 11:-1 12:-0.333333 13:-1 127 | +1 1:0.375 2:1 3:1 4:-0.509434 5:-0.356164 6:-1 7:-1 8:-0.572519 9:1 10:-0.419355 12:0.333333 13:1 128 | -1 1:-0.0416667 2:-1 3:0.333333 4:-0.207547 5:-0.680365 6:-1 7:1 8:0.496183 9:-1 10:-0.967742 12:-1 13:-1 129 | -1 1:-0.0416667 2:1 3:-0.333333 4:-0.245283 5:-0.657534 6:-1 7:-1 8:0.328244 9:-1 10:-0.741935 11:-1 12:-0.333333 13:-1 130 | +1 1:0.291667 2:1 3:1 4:-0.566038 5:-0.525114 6:1 7:-1 8:0.358779 9:1 10:-0.548387 11:-1 12:0.333333 13:1 131 | +1 1:0.416667 2:-1 3:1 4:-0.735849 5:-0.347032 6:-1 7:-1 8:0.496183 9:1 10:-0.419355 12:0.333333 13:-1 132 | +1 1:0.541667 2:1 3:1 4:-0.660377 5:-0.607306 6:-1 7:1 8:-0.0687023 9:1 10:-0.967742 11:-1 12:-0.333333 13:-1 133 | -1 1:-0.458333 2:1 3:1 4:-0.132075 5:-0.543379 6:-1 7:-1 8:0.633588 9:-1 10:-1 11:-1 12:-1 13:-1 134 | +1 1:0.458333 2:1 3:1 4:-0.509434 5:-0.452055 6:-1 7:1 8:-0.618321 9:1 10:-0.290323 11:1 12:-0.333333 13:-1 135 | -1 1:0.0416667 2:1 3:0.333333 4:0.0566038 5:-0.515982 6:-1 7:1 8:0.435115 9:-1 10:-0.483871 11:-1 12:-1 13:1 136 | -1 1:-0.291667 2:-1 3:0.333333 4:-0.0943396 5:-0.767123 6:-1 7:1 8:0.358779 9:1 10:-0.548387 11:1 12:-1 13:-1 137 | -1 1:0.583333 2:-1 3:0.333333 4:0.0943396 5:-0.310502 6:-1 7:-1 8:0.541985 9:-1 10:-1 11:-1 12:-0.333333 13:-1 138 | +1 1:0.125 2:1 3:1 4:-0.415094 5:-0.438356 6:1 7:1 8:0.114504 9:1 10:-0.612903 12:-0.333333 13:-1 139 | -1 1:-0.791667 2:-1 3:-0.333333 4:-0.54717 5:-0.616438 6:-1 7:-1 8:0.847328 9:-1 10:-0.774194 11:-1 12:-1 13:-1 140 | -1 1:0.166667 2:1 3:1 4:-0.283019 5:-0.630137 6:-1 7:-1 8:0.480916 9:1 10:-1 11:-1 12:-1 13:1 141 | +1 1:0.458333 2:1 3:1 4:-0.0377358 5:-0.607306 6:-1 7:1 8:-0.0687023 9:-1 10:-0.354839 12:0.333333 13:0.5 142 | -1 1:0.25 2:1 3:1 4:-0.169811 5:-0.3379 6:-1 7:1 8:0.694656 9:-1 10:-1 11:-1 12:-1 13:-1 143 | +1 1:-0.125 2:1 3:0.333333 4:-0.132075 5:-0.511416 6:-1 7:-1 8:0.40458 9:-1 10:-0.806452 12:-0.333333 13:1 144 | -1 1:-0.0833333 2:1 3:-1 4:-0.415094 5:-0.60274 6:-1 7:1 8:-0.175573 9:1 10:-0.548387 11:-1 12:-0.333333 13:-1 145 | +1 1:0.0416667 2:1 3:-0.333333 4:0.849057 5:-0.283105 6:-1 7:1 8:0.89313 9:-1 10:-1 11:-1 12:-0.333333 13:1 146 | +1 2:1 3:1 4:-0.45283 5:-0.287671 6:-1 7:-1 8:-0.633588 9:1 10:-0.354839 12:0.333333 13:1 147 | +1 1:-0.0416667 2:1 3:1 4:-0.660377 5:-0.525114 6:-1 7:-1 8:0.358779 9:-1 10:-1 11:-1 12:-0.333333 13:-1 148 | +1 1:-0.541667 2:1 3:1 4:-0.698113 5:-0.812785 6:-1 7:1 8:-0.343511 9:1 10:-0.354839 12:-1 13:1 149 | +1 1:0.208333 2:1 3:0.333333 4:-0.283019 5:-0.552511 6:-1 7:1 8:0.557252 9:-1 10:0.0322581 11:-1 12:0.333333 13:1 150 | -1 1:-0.5 2:-1 3:0.333333 4:-0.660377 5:-0.351598 6:-1 7:1 8:0.541985 9:1 10:-1 11:-1 12:-1 13:-1 151 | -1 1:-0.5 2:1 3:0.333333 4:-0.660377 5:-0.43379 6:-1 7:-1 8:0.648855 9:-1 10:-1 11:-1 12:-1 13:-1 152 | -1 1:-0.125 2:-1 3:0.333333 4:-0.509434 5:-0.575342 6:-1 7:-1 8:0.328244 9:-1 10:-0.483871 12:-1 13:-1 153 | -1 1:0.0416667 2:-1 3:0.333333 4:-0.735849 5:-0.356164 6:-1 7:1 8:0.465649 9:-1 10:-1 11:-1 12:-1 13:-1 154 | -1 1:0.458333 2:-1 3:1 4:-0.320755 5:-0.191781 6:-1 7:-1 8:-0.221374 9:-1 10:-0.354839 12:0.333333 13:-1 155 | -1 1:-0.0833333 2:-1 3:0.333333 4:-0.320755 5:-0.406393 6:-1 7:1 8:0.19084 9:-1 10:-0.83871 11:-1 12:-1 13:-1 156 | -1 1:-0.291667 2:-1 3:-0.333333 4:-0.792453 5:-0.643836 6:-1 7:-1 8:0.541985 9:-1 10:-1 11:-1 12:-1 13:-1 157 | +1 1:0.0833333 2:1 3:1 4:-0.132075 5:-0.584475 6:-1 7:-1 8:-0.389313 9:1 10:0.806452 11:1 12:-1 13:1 158 | -1 1:-0.333333 2:1 3:-0.333333 4:-0.358491 5:-0.16895 6:-1 7:1 8:0.51145 9:-1 10:-1 11:-1 12:-1 13:-1 159 | -1 1:0.125 2:1 3:-1 4:-0.509434 5:-0.694064 6:-1 7:1 8:0.389313 9:-1 10:-0.387097 12:-1 13:1 160 | +1 1:0.541667 2:-1 3:1 4:0.584906 5:-0.534247 6:1 7:-1 8:0.435115 9:1 10:-0.677419 12:0.333333 13:1 161 | +1 1:-0.625 2:1 3:-1 4:-0.509434 5:-0.520548 6:-1 7:-1 8:0.694656 9:1 10:0.225806 12:-1 13:1 162 | +1 1:0.375 2:-1 3:1 4:0.0566038 5:-0.461187 6:-1 7:-1 8:0.267176 9:1 10:-0.548387 12:-1 13:-1 163 | -1 1:0.0833333 2:1 3:-0.333333 4:-0.320755 5:-0.378995 6:-1 7:-1 8:0.282443 9:-1 10:-1 11:-1 12:-1 13:-1 164 | +1 1:0.208333 2:1 3:1 4:-0.358491 5:-0.392694 6:-1 7:1 8:-0.0992366 9:1 10:-0.0322581 12:0.333333 13:1 165 | -1 1:-0.416667 2:1 3:1 4:-0.698113 5:-0.611872 6:-1 7:-1 8:0.374046 9:-1 10:-1 11:-1 12:-1 13:1 166 | -1 1:0.458333 2:-1 3:1 4:0.622642 5:-0.0913242 6:-1 7:-1 8:0.267176 9:1 10:-1 11:-1 12:-1 13:-1 167 | -1 1:-0.125 2:-1 3:1 4:-0.698113 5:-0.415525 6:-1 7:1 8:0.343511 9:-1 10:-1 11:-1 12:-1 13:-1 168 | -1 2:1 3:0.333333 4:-0.320755 5:-0.675799 6:1 7:1 8:0.236641 9:-1 10:-0.612903 11:1 12:-1 13:-1 169 | -1 1:-0.333333 2:-1 3:1 4:-0.169811 5:-0.497717 6:-1 7:1 8:0.236641 9:1 10:-0.935484 12:-1 13:-1 170 | +1 1:0.5 2:1 3:-1 4:-0.169811 5:-0.287671 6:1 7:1 8:0.572519 9:-1 10:-0.548387 12:-0.333333 13:-1 171 | -1 1:0.666667 2:1 3:-1 4:0.245283 5:-0.506849 6:1 7:1 8:-0.0839695 9:-1 10:-0.967742 12:-0.333333 13:-1 172 | +1 1:0.666667 2:1 3:0.333333 4:-0.132075 5:-0.415525 6:-1 7:1 8:0.145038 9:-1 10:-0.354839 12:1 13:1 173 | +1 1:0.583333 2:1 3:1 4:-0.886792 5:-0.210046 6:-1 7:1 8:-0.175573 9:1 10:-0.709677 12:0.333333 13:-1 174 | -1 1:0.625 2:-1 3:0.333333 4:-0.509434 5:-0.611872 6:-1 7:1 8:-0.328244 9:-1 10:-0.516129 12:-1 13:-1 175 | -1 1:-0.791667 2:1 3:-1 4:-0.54717 5:-0.744292 6:-1 7:1 8:0.572519 9:-1 10:-1 11:-1 12:-1 13:-1 176 | +1 1:0.375 2:-1 3:1 4:-0.169811 5:-0.232877 6:1 7:-1 8:-0.465649 9:-1 10:-0.387097 12:1 13:-1 177 | +1 1:-0.0833333 2:1 3:1 4:-0.132075 5:-0.214612 6:-1 7:-1 8:-0.221374 9:1 10:0.354839 12:1 13:1 178 | +1 1:-0.291667 2:1 3:0.333333 4:0.0566038 5:-0.520548 6:-1 7:-1 8:0.160305 9:-1 10:0.16129 12:-1 13:-1 179 | +1 1:0.583333 2:1 3:1 4:-0.415094 5:-0.415525 6:1 7:-1 8:0.40458 9:-1 10:-0.935484 12:0.333333 13:1 180 | -1 1:-0.125 2:1 3:0.333333 4:-0.339623 5:-0.680365 6:-1 7:-1 8:0.40458 9:-1 10:-1 11:-1 12:-1 13:-1 181 | -1 1:-0.458333 2:1 3:0.333333 4:-0.509434 5:-0.479452 6:1 7:-1 8:0.877863 9:-1 10:-0.741935 11:1 12:-1 13:1 182 | +1 1:0.125 2:-1 3:1 4:-0.245283 5:0.292237 6:-1 7:1 8:0.206107 9:1 10:-0.387097 12:0.333333 13:1 183 | +1 1:-0.5 2:1 3:1 4:-0.698113 5:-0.789954 6:-1 7:1 8:0.328244 9:-1 10:-1 11:-1 12:-1 13:1 184 | -1 1:-0.458333 2:-1 3:1 4:-0.849057 5:-0.365297 6:-1 7:1 8:-0.221374 9:-1 10:-0.806452 12:-1 13:-1 185 | -1 2:1 3:0.333333 4:-0.320755 5:-0.452055 6:1 7:1 8:0.557252 9:-1 10:-1 11:-1 12:1 13:-1 186 | -1 1:-0.416667 2:1 3:0.333333 4:-0.320755 5:-0.136986 6:-1 7:-1 8:0.389313 9:-1 10:-0.387097 11:-1 12:-0.333333 13:-1 187 | +1 1:0.125 2:1 3:1 4:-0.283019 5:-0.73516 6:-1 7:1 8:-0.480916 9:1 10:-0.322581 12:-0.333333 13:0.5 188 | -1 1:-0.0416667 2:1 3:1 4:-0.735849 5:-0.511416 6:1 7:-1 8:0.160305 9:-1 10:-0.967742 11:-1 12:1 13:1 189 | -1 1:0.375 2:-1 3:1 4:-0.132075 5:0.223744 6:-1 7:1 8:0.312977 9:-1 10:-0.612903 12:-1 13:-1 190 | +1 1:0.708333 2:1 3:0.333333 4:0.245283 5:-0.347032 6:-1 7:-1 8:-0.374046 9:1 10:-0.0645161 12:-0.333333 13:1 191 | -1 1:0.0416667 2:1 3:1 4:-0.132075 5:-0.484018 6:-1 7:-1 8:0.358779 9:-1 10:-0.612903 11:-1 12:-1 13:-1 192 | +1 1:0.708333 2:1 3:1 4:-0.0377358 5:-0.780822 6:-1 7:-1 8:-0.175573 9:1 10:-0.16129 11:1 12:-1 13:1 193 | -1 1:0.0416667 2:1 3:-0.333333 4:-0.735849 5:-0.164384 6:-1 7:-1 8:0.29771 9:-1 10:-1 11:-1 12:-1 13:1 194 | +1 1:-0.75 2:1 3:1 4:-0.396226 5:-0.287671 6:-1 7:1 8:0.29771 9:1 10:-1 11:-1 12:-1 13:1 195 | -1 1:-0.208333 2:1 3:0.333333 4:-0.433962 5:-0.410959 6:1 7:-1 8:0.587786 9:-1 10:-1 11:-1 12:0.333333 13:-1 196 | -1 1:0.0833333 2:-1 3:-0.333333 4:-0.226415 5:-0.43379 6:-1 7:1 8:0.374046 9:-1 10:-0.548387 12:-1 13:-1 197 | -1 1:0.208333 2:-1 3:1 4:-0.886792 5:-0.442922 6:-1 7:1 8:-0.221374 9:-1 10:-0.677419 12:-1 13:-1 198 | -1 1:0.0416667 2:-1 3:0.333333 4:-0.698113 5:-0.598174 6:-1 7:-1 8:0.328244 9:-1 10:-0.483871 12:-1 13:-1 199 | -1 1:0.666667 2:-1 3:-1 4:-0.132075 5:-0.484018 6:-1 7:-1 8:0.221374 9:-1 10:-0.419355 11:-1 12:0.333333 13:-1 200 | +1 1:1 2:1 3:1 4:-0.415094 5:-0.187215 6:-1 7:1 8:0.389313 9:1 10:-1 11:-1 12:1 13:-1 201 | -1 1:0.625 2:1 3:0.333333 4:-0.54717 5:-0.310502 6:-1 7:-1 8:0.221374 9:-1 10:-0.677419 11:-1 12:-0.333333 13:1 202 | +1 1:0.208333 2:1 3:1 4:-0.415094 5:-0.205479 6:-1 7:1 8:0.526718 9:-1 10:-1 11:-1 12:0.333333 13:1 203 | +1 1:0.291667 2:1 3:1 4:-0.415094 5:-0.39726 6:-1 7:1 8:0.0687023 9:1 10:-0.0967742 12:-0.333333 13:1 204 | +1 1:-0.0833333 2:1 3:1 4:-0.132075 5:-0.210046 6:-1 7:-1 8:0.557252 9:1 10:-0.483871 11:-1 12:-1 13:1 205 | +1 1:0.0833333 2:1 3:1 4:0.245283 5:-0.255708 6:-1 7:1 8:0.129771 9:1 10:-0.741935 12:-0.333333 13:1 206 | -1 1:-0.0416667 2:1 3:-1 4:0.0943396 5:-0.214612 6:1 7:-1 8:0.633588 9:-1 10:-0.612903 12:-1 13:1 207 | -1 1:0.291667 2:-1 3:0.333333 4:-0.849057 5:-0.123288 6:-1 7:-1 8:0.358779 9:-1 10:-1 11:-1 12:-0.333333 13:-1 208 | -1 1:0.208333 2:1 3:0.333333 4:-0.792453 5:-0.479452 6:-1 7:1 8:0.267176 9:1 10:-0.806452 12:-1 13:1 209 | +1 1:0.458333 2:1 3:0.333333 4:-0.415094 5:-0.164384 6:-1 7:-1 8:-0.0839695 9:1 10:-0.419355 12:-1 13:1 210 | -1 1:-0.666667 2:1 3:0.333333 4:-0.320755 5:-0.43379 6:-1 7:-1 8:0.770992 9:-1 10:0.129032 11:1 12:-1 13:-1 211 | +1 1:0.25 2:1 3:-1 4:0.433962 5:-0.260274 6:-1 7:1 8:0.343511 9:-1 10:-0.935484 12:-1 13:1 212 | -1 1:-0.0833333 2:1 3:0.333333 4:-0.415094 5:-0.456621 6:1 7:1 8:0.450382 9:-1 10:-0.225806 12:-1 13:-1 213 | -1 1:-0.416667 2:-1 3:0.333333 4:-0.471698 5:-0.60274 6:-1 7:-1 8:0.435115 9:-1 10:-0.935484 12:-1 13:-1 214 | +1 1:0.208333 2:1 3:1 4:-0.358491 5:-0.589041 6:-1 7:1 8:-0.0839695 9:1 10:-0.290323 12:1 13:1 215 | -1 1:-1 2:1 3:-0.333333 4:-0.320755 5:-0.643836 6:-1 7:1 8:1 9:-1 10:-1 11:-1 12:-1 13:-1 216 | -1 1:-0.5 2:-1 3:-0.333333 4:-0.320755 5:-0.643836 6:-1 7:1 8:0.541985 9:-1 10:-0.548387 11:-1 12:-1 13:-1 217 | -1 1:0.416667 2:-1 3:0.333333 4:-0.226415 5:-0.424658 6:-1 7:1 8:0.541985 9:-1 10:-1 11:-1 12:-1 13:-1 218 | -1 1:-0.0833333 2:1 3:0.333333 4:-1 5:-0.538813 6:-1 7:-1 8:0.267176 9:1 10:-1 11:-1 12:-0.333333 13:1 219 | -1 1:0.0416667 2:1 3:0.333333 4:-0.509434 5:-0.39726 6:-1 7:1 8:0.160305 9:-1 10:-0.870968 12:-1 13:1 220 | -1 1:-0.375 2:1 3:-0.333333 4:-0.509434 5:-0.570776 6:-1 7:-1 8:0.51145 9:-1 10:-1 11:-1 12:-1 13:-1 221 | +1 1:0.0416667 2:1 3:1 4:-0.698113 5:-0.484018 6:-1 7:-1 8:-0.160305 9:1 10:-0.0967742 12:-0.333333 13:1 222 | +1 1:0.5 2:1 3:1 4:-0.226415 5:-0.415525 6:-1 7:1 8:-0.145038 9:-1 10:-0.0967742 12:-0.333333 13:1 223 | -1 1:0.166667 2:1 3:0.333333 4:0.0566038 5:-0.808219 6:-1 7:-1 8:0.572519 9:-1 10:-0.483871 11:-1 12:-1 13:-1 224 | +1 1:0.416667 2:1 3:1 4:-0.320755 5:-0.0684932 6:1 7:1 8:-0.0687023 9:1 10:-0.419355 11:-1 12:1 13:1 225 | -1 1:-0.75 2:-1 3:1 4:-0.169811 5:-0.739726 6:-1 7:-1 8:0.694656 9:-1 10:-0.548387 11:-1 12:-1 13:-1 226 | -1 1:-0.5 2:1 3:-0.333333 4:-0.226415 5:-0.648402 6:-1 7:-1 8:-0.0687023 9:-1 10:-1 12:-1 13:0.5 227 | +1 1:0.375 2:-1 3:0.333333 4:-0.320755 5:-0.374429 6:-1 7:-1 8:-0.603053 9:-1 10:-0.612903 12:-0.333333 13:1 228 | +1 1:-0.416667 2:-1 3:1 4:-0.283019 5:-0.0182648 6:1 7:1 8:-0.00763359 9:1 10:-0.0322581 12:-1 13:1 229 | -1 1:0.208333 2:-1 3:-1 4:0.0566038 5:-0.283105 6:1 7:1 8:0.389313 9:-1 10:-0.677419 11:-1 12:-1 13:-1 230 | -1 1:-0.0416667 2:1 3:-1 4:-0.54717 5:-0.726027 6:-1 7:1 8:0.816794 9:-1 10:-1 12:-1 13:0.5 231 | +1 1:0.333333 2:-1 3:1 4:-0.0377358 5:-0.173516 6:-1 7:1 8:0.145038 9:1 10:-0.677419 12:-1 13:1 232 | +1 1:-0.583333 2:1 3:1 4:-0.54717 5:-0.575342 6:-1 7:-1 8:0.0534351 9:-1 10:-0.612903 12:-1 13:1 233 | -1 1:-0.333333 2:1 3:1 4:-0.603774 5:-0.388128 6:-1 7:1 8:0.740458 9:-1 10:-1 11:-1 12:-1 13:-1 234 | +1 1:-0.0416667 2:1 3:1 4:-0.358491 5:-0.410959 6:-1 7:-1 8:0.374046 9:1 10:-1 11:-1 12:-0.333333 13:1 235 | -1 1:0.375 2:1 3:0.333333 4:-0.320755 5:-0.520548 6:-1 7:-1 8:0.145038 9:-1 10:-0.419355 12:1 13:1 236 | +1 1:0.375 2:-1 3:1 4:0.245283 5:-0.826484 6:-1 7:1 8:0.129771 9:-1 10:1 11:1 12:1 13:1 237 | -1 2:-1 3:1 4:-0.169811 5:-0.506849 6:-1 7:1 8:0.358779 9:-1 10:-1 11:-1 12:-1 13:-1 238 | +1 1:-0.416667 2:1 3:1 4:-0.509434 5:-0.767123 6:-1 7:1 8:-0.251908 9:1 10:-0.193548 12:-1 13:1 239 | -1 1:-0.25 2:1 3:0.333333 4:-0.169811 5:-0.401826 6:-1 7:1 8:0.29771 9:-1 10:-1 11:-1 12:-1 13:-1 240 | -1 1:-0.0416667 2:1 3:-0.333333 4:-0.509434 5:-0.0913242 6:-1 7:-1 8:0.541985 9:-1 10:-0.935484 11:-1 12:-1 13:-1 241 | +1 1:0.625 2:1 3:0.333333 4:0.622642 5:-0.324201 6:1 7:1 8:0.206107 9:1 10:-0.483871 12:-1 13:1 242 | -1 1:-0.583333 2:1 3:0.333333 4:-0.132075 5:-0.109589 6:-1 7:1 8:0.694656 9:-1 10:-1 11:-1 12:-1 13:-1 243 | -1 2:-1 3:1 4:-0.320755 5:-0.369863 6:-1 7:1 8:0.0992366 9:-1 10:-0.870968 12:-1 13:-1 244 | +1 1:0.375 2:-1 3:1 4:-0.132075 5:-0.351598 6:-1 7:1 8:0.358779 9:-1 10:0.16129 11:1 12:0.333333 13:-1 245 | -1 1:-0.0833333 2:-1 3:0.333333 4:-0.132075 5:-0.16895 6:-1 7:1 8:0.0839695 9:-1 10:-0.516129 11:-1 12:-0.333333 13:-1 246 | +1 1:0.291667 2:1 3:1 4:-0.320755 5:-0.420091 6:-1 7:-1 8:0.114504 9:1 10:-0.548387 11:-1 12:-0.333333 13:1 247 | +1 1:0.5 2:1 3:1 4:-0.698113 5:-0.442922 6:-1 7:1 8:0.328244 9:-1 10:-0.806452 11:-1 12:0.333333 13:0.5 248 | -1 1:0.5 2:-1 3:0.333333 4:0.150943 5:-0.347032 6:-1 7:-1 8:0.175573 9:-1 10:-0.741935 11:-1 12:-1 13:-1 249 | +1 1:0.291667 2:1 3:0.333333 4:-0.132075 5:-0.730594 6:-1 7:1 8:0.282443 9:-1 10:-0.0322581 12:-1 13:-1 250 | +1 1:0.291667 2:1 3:1 4:-0.0377358 5:-0.287671 6:-1 7:1 8:0.0839695 9:1 10:-0.0967742 12:0.333333 13:1 251 | +1 1:0.0416667 2:1 3:1 4:-0.509434 5:-0.716895 6:-1 7:-1 8:-0.358779 9:-1 10:-0.548387 12:-0.333333 13:1 252 | -1 1:-0.375 2:1 3:-0.333333 4:-0.320755 5:-0.575342 6:-1 7:1 8:0.78626 9:-1 10:-1 11:-1 12:-1 13:-1 253 | +1 1:-0.375 2:1 3:1 4:-0.660377 5:-0.251142 6:-1 7:1 8:0.251908 9:-1 10:-1 11:-1 12:-0.333333 13:-1 254 | -1 1:-0.0833333 2:1 3:0.333333 4:-0.698113 5:-0.776256 6:-1 7:-1 8:-0.206107 9:-1 10:-0.806452 11:-1 12:-1 13:-1 255 | -1 1:0.25 2:1 3:0.333333 4:0.0566038 5:-0.607306 6:1 7:-1 8:0.312977 9:-1 10:-0.483871 11:-1 12:-1 13:-1 256 | -1 1:0.75 2:-1 3:-0.333333 4:0.245283 5:-0.196347 6:-1 7:-1 8:0.389313 9:-1 10:-0.870968 11:-1 12:0.333333 13:-1 257 | -1 1:0.333333 2:1 3:0.333333 4:0.0566038 5:-0.465753 6:1 7:-1 8:0.00763359 9:1 10:-0.677419 12:-1 13:-1 258 | +1 1:0.0833333 2:1 3:1 4:-0.283019 5:0.0365297 6:-1 7:-1 8:-0.0687023 9:1 10:-0.612903 12:-0.333333 13:1 259 | +1 1:0.458333 2:1 3:0.333333 4:-0.132075 5:-0.0456621 6:-1 7:-1 8:0.328244 9:-1 10:-1 11:-1 12:-1 13:-1 260 | -1 1:-0.416667 2:1 3:1 4:0.0566038 5:-0.447489 6:-1 7:-1 8:0.526718 9:-1 10:-0.516129 11:-1 12:-1 13:-1 261 | -1 1:0.208333 2:-1 3:0.333333 4:-0.509434 5:-0.0228311 6:-1 7:-1 8:0.541985 9:-1 10:-1 11:-1 12:-1 13:-1 262 | +1 1:0.291667 2:1 3:1 4:-0.320755 5:-0.634703 6:-1 7:1 8:-0.0687023 9:1 10:-0.225806 12:0.333333 13:1 263 | +1 1:0.208333 2:1 3:-0.333333 4:-0.509434 5:-0.278539 6:-1 7:1 8:0.358779 9:-1 10:-0.419355 12:-1 13:-1 264 | -1 1:-0.166667 2:1 3:-0.333333 4:-0.320755 5:-0.360731 6:-1 7:-1 8:0.526718 9:-1 10:-0.806452 11:-1 12:-1 13:-1 265 | +1 1:-0.208333 2:1 3:-0.333333 4:-0.698113 5:-0.52968 6:-1 7:-1 8:0.480916 9:-1 10:-0.677419 11:1 12:-1 13:1 266 | -1 1:-0.0416667 2:1 3:0.333333 4:0.471698 5:-0.666667 6:1 7:-1 8:0.389313 9:-1 10:-0.83871 11:-1 12:-1 13:1 267 | -1 1:-0.375 2:1 3:-0.333333 4:-0.509434 5:-0.374429 6:-1 7:-1 8:0.557252 9:-1 10:-1 11:-1 12:-1 13:1 268 | -1 1:0.125 2:-1 3:-0.333333 4:-0.132075 5:-0.232877 6:-1 7:1 8:0.251908 9:-1 10:-0.580645 12:-1 13:-1 269 | -1 1:0.166667 2:1 3:1 4:-0.132075 5:-0.69863 6:-1 7:-1 8:0.175573 9:-1 10:-0.870968 12:-1 13:0.5 270 | +1 1:0.583333 2:1 3:1 4:0.245283 5:-0.269406 6:-1 7:1 8:-0.435115 9:1 10:-0.516129 12:1 13:-1 271 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/linear.def: -------------------------------------------------------------------------------- 1 | LIBRARY liblinear 2 | EXPORTS 3 | train @1 4 | cross_validation @2 5 | save_model @3 6 | load_model @4 7 | get_nr_feature @5 8 | get_nr_class @6 9 | get_labels @7 10 | predict_values @8 11 | predict @9 12 | predict_probability @10 13 | free_and_destroy_model @11 14 | free_model_content @12 15 | destroy_param @13 16 | check_parameter @14 17 | check_probability_model @15 18 | set_print_string_function @16 19 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/linear.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef _LIBLINEAR_H 4 | #define _LIBLINEAR_H 5 | #ifndef INT64_DEFINED 6 | typedef int64_t INT64; 7 | #define INT64_DEFINED 8 | #endif 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | struct feature_node 15 | { 16 | INT64 index; 17 | double value; 18 | }; 19 | 20 | struct problem 21 | { 22 | INT64 l, n; 23 | double *y; 24 | struct feature_node **x; 25 | double bias; /* < 0 if no bias term */ 26 | }; 27 | 28 | enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */ 29 | 30 | struct parameter 31 | { 32 | INT64 solver_type; 33 | 34 | /* these are for training only */ 35 | double eps; /* stopping criteria */ 36 | double C; 37 | INT64 nr_weight; 38 | INT64 *weight_label; 39 | double* weight; 40 | double p; 41 | }; 42 | 43 | struct model 44 | { 45 | struct parameter param; 46 | INT64 nr_class; /* number of classes */ 47 | INT64 nr_feature; 48 | double *w; 49 | INT64 *label; /* label of each class */ 50 | double bias; 51 | }; 52 | 53 | struct model* train(const struct problem *prob, const struct parameter *param); 54 | void cross_validation(const struct problem *prob, const struct parameter *param, INT64 nr_fold, double *target); 55 | 56 | double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values); 57 | double predict(const struct model *model_, const struct feature_node *x); 58 | double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates); 59 | 60 | INT64 save_model(const char *model_file_name, const struct model *model_); 61 | struct model *load_model(const char *model_file_name); 62 | 63 | INT64 get_nr_feature(const struct model *model_); 64 | INT64 get_nr_class(const struct model *model_); 65 | void get_labels(const struct model *model_, INT64* label); 66 | 67 | void free_model_content(struct model *model_ptr); 68 | void free_and_destroy_model(struct model **model_ptr_ptr); 69 | void destroy_param(struct parameter *param); 70 | 71 | const char *check_parameter(const struct problem *prob, const struct parameter *param); 72 | INT64 check_probability_model(const struct model *model); 73 | void set_print_string_function(void (*print_func) (const char*)); 74 | 75 | #ifdef __cplusplus 76 | } 77 | #endif 78 | 79 | #endif /* _LIBLINEAR_H */ 80 | 81 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/predict.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "linear.h" 7 | 8 | int print_null(const char *s,...) {return 0;} 9 | 10 | static int (*info)(const char *fmt,...) = &printf; 11 | 12 | struct feature_node *x; 13 | INT64 max_nr_attr = 64; 14 | 15 | struct model* model_; 16 | INT64 flag_predict_probability=0; 17 | 18 | void exit_input_error(INT64 line_num) 19 | { 20 | fprintf(stderr,"Wrong input format at line %lld\n", (long long int)line_num); 21 | exit(1); 22 | } 23 | 24 | static char *line = NULL; 25 | static INT64 max_line_len; 26 | 27 | static char* readline(FILE *input) 28 | { 29 | INT64 len; 30 | 31 | if(fgets(line,max_line_len,input) == NULL) 32 | return NULL; 33 | 34 | while(strrchr(line,'\n') == NULL) 35 | { 36 | max_line_len *= 2; 37 | line = (char *) realloc(line,max_line_len); 38 | len = (INT64) strlen(line); 39 | if(fgets(line+len,max_line_len-len,input) == NULL) 40 | break; 41 | } 42 | return line; 43 | } 44 | 45 | void do_predict(FILE *input, FILE *output) 46 | { 47 | INT64 correct = 0; 48 | INT64 total = 0; 49 | double error = 0; 50 | double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0; 51 | 52 | INT64 nr_class=get_nr_class(model_); 53 | double *prob_estimates=NULL; 54 | INT64 j, n; 55 | INT64 nr_feature=get_nr_feature(model_); 56 | if(model_->bias>=0) 57 | n=nr_feature+1; 58 | else 59 | n=nr_feature; 60 | 61 | if(flag_predict_probability) 62 | { 63 | INT64 *labels; 64 | 65 | if(!check_probability_model(model_)) 66 | { 67 | fprintf(stderr, "probability output is only supported for logistic regression\n"); 68 | exit(1); 69 | } 70 | 71 | labels=(INT64 *) malloc(nr_class*sizeof(INT64)); 72 | get_labels(model_,labels); 73 | prob_estimates = (double *) malloc(nr_class*sizeof(double)); 74 | fprintf(output,"labels"); 75 | for(j=0;j=max_nr_attr-2) // need one more for index = -1 101 | { 102 | max_nr_attr *= 2; 103 | x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); 104 | } 105 | 106 | idx = strtok(NULL,":"); 107 | val = strtok(NULL," \t"); 108 | 109 | if(val == NULL) 110 | break; 111 | errno = 0; 112 | x[i].index = (INT64) strtoll(idx,&endptr,10); 113 | if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) 114 | exit_input_error(total+1); 115 | else 116 | inst_max_index = x[i].index; 117 | 118 | errno = 0; 119 | x[i].value = strtod(val,&endptr); 120 | if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) 121 | exit_input_error(total+1); 122 | 123 | // feature indices larger than those in training are not used 124 | if(x[i].index <= nr_feature) 125 | ++i; 126 | } 127 | 128 | if(model_->bias>=0) 129 | { 130 | x[i].index = n; 131 | x[i].value = model_->bias; 132 | i++; 133 | } 134 | x[i].index = -1; 135 | 136 | if(flag_predict_probability) 137 | { 138 | INT64 j; 139 | predict_label = predict_probability(model_,x,prob_estimates); 140 | fprintf(output,"%g",predict_label); 141 | for(j=0;jnr_class;j++) 142 | fprintf(output," %g",prob_estimates[j]); 143 | fprintf(output,"\n"); 144 | } 145 | else 146 | { 147 | predict_label = predict(model_,x); 148 | fprintf(output,"%g\n",predict_label); 149 | } 150 | 151 | if(predict_label == target_label) 152 | ++correct; 153 | error += (predict_label-target_label)*(predict_label-target_label); 154 | sump += predict_label; 155 | sumt += target_label; 156 | sumpp += predict_label*predict_label; 157 | sumtt += target_label*target_label; 158 | sumpt += predict_label*target_label; 159 | ++total; 160 | } 161 | if(model_->param.solver_type==L2R_L2LOSS_SVR || 162 | model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || 163 | model_->param.solver_type==L2R_L2LOSS_SVR_DUAL) 164 | { 165 | info("Mean squared error = %g (regression)\n",error/total); 166 | info("Squared correlation coefficient = %g (regression)\n", 167 | ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/ 168 | ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)) 169 | ); 170 | } 171 | else 172 | info("Accuracy = %g%% (%lld/%lld)\n",(double) correct/total*100,(long long int)correct,(long long int)total); 173 | if(flag_predict_probability) 174 | free(prob_estimates); 175 | } 176 | 177 | void exit_with_help() 178 | { 179 | printf( 180 | "Usage: predict [options] test_file model_file output_file\n" 181 | "options:\n" 182 | "-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n" 183 | "-q : quiet mode (no outputs)\n" 184 | ); 185 | exit(1); 186 | } 187 | 188 | int main(int argc, char **argv) 189 | { 190 | FILE *input, *output; 191 | INT64 i; 192 | 193 | // parse options 194 | for(i=1;i=argc) 214 | exit_with_help(); 215 | 216 | if(i!=argc-3 || argv[i+1][0] == '-' || argv[i+2][0] == '-') 217 | exit_with_help(); 218 | 219 | input = fopen(argv[i],"r"); 220 | if(input == NULL) 221 | { 222 | fprintf(stderr,"can't open input file %s\n",argv[i]); 223 | exit(1); 224 | } 225 | 226 | output = fopen(argv[i+2],"w"); 227 | if(output == NULL) 228 | { 229 | fprintf(stderr,"can't open output file %s\n",argv[i+2]); 230 | exit(1); 231 | } 232 | 233 | if((model_=load_model(argv[i+1]))==0) 234 | { 235 | fprintf(stderr,"can't open model file %s\n",argv[i+1]); 236 | exit(1); 237 | } 238 | 239 | x = (struct feature_node *) malloc(max_nr_attr*sizeof(struct feature_node)); 240 | do_predict(input, output); 241 | free_and_destroy_model(&model_); 242 | free(line); 243 | free(x); 244 | fclose(input); 245 | fclose(output); 246 | return 0; 247 | } 248 | 249 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/python/Makefile: -------------------------------------------------------------------------------- 1 | all = lib 2 | 3 | lib: 4 | make -C .. lib 5 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/python/README: -------------------------------------------------------------------------------- 1 | ------------------------------------- 2 | --- Python interface of LIBLINEAR --- 3 | ------------------------------------- 4 | 5 | Table of Contents 6 | ================= 7 | 8 | - Introduction 9 | - Installation 10 | - Quick Start 11 | - Design Description 12 | - Data Structures 13 | - Utility Functions 14 | - Additional Information 15 | 16 | Introduction 17 | ============ 18 | 19 | Python (http://www.python.org/) is a programming language suitable for rapid 20 | development. This tool provides a simple Python interface to LIBLINEAR, a library 21 | for support vector machines (http://www.csie.ntu.edu.tw/~cjlin/liblinear). The 22 | interface is very easy to use as the usage is the same as that of LIBLINEAR. The 23 | interface is developed with the built-in Python library "ctypes." 24 | 25 | Installation 26 | ============ 27 | 28 | On Unix systems, type 29 | 30 | > make 31 | 32 | The interface needs only LIBLINEAR shared library, which is generated by 33 | the above command. We assume that the shared library is on the LIBLINEAR 34 | main directory or in the system path. 35 | 36 | Quick Start 37 | =========== 38 | 39 | There are two levels of usage. The high-level one uses utility functions 40 | in liblinearutil.py and the usage is the same as the LIBLINEAR MATLAB interface. 41 | 42 | >>> from liblinearutil import * 43 | # Read data in LIBSVM format 44 | >>> y, x = svm_read_problem('../heart_scale') 45 | >>> m = train(y[:200], x[:200], '-c 4') 46 | >>> p_label, p_acc, p_val = predict(y[200:], x[200:], m) 47 | 48 | # Construct problem in python format 49 | # Dense data 50 | >>> y, x = [1,-1], [[1,0,1], [-1,0,-1]] 51 | # Sparse data 52 | >>> y, x = [1,-1], [{1:1, 3:1}, {1:-1,3:-1}] 53 | >>> prob = problem(y, x) 54 | >>> param = parameter('-c 4 -B 1') 55 | >>> m = train(prob, param) 56 | 57 | # Other utility functions 58 | >>> save_model('heart_scale.model', m) 59 | >>> m = load_model('heart_scale.model') 60 | >>> p_label, p_acc, p_val = predict(y, x, m, '-b 1') 61 | >>> ACC, MSE, SCC = evaluations(y, p_label) 62 | 63 | # Getting online help 64 | >>> help(train) 65 | 66 | The low-level use directly calls C interfaces imported by liblinear.py. Note that 67 | all arguments and return values are in ctypes format. You need to handle them 68 | carefully. 69 | 70 | >>> from liblinear import * 71 | >>> prob = problem([1,-1], [{1:1, 3:1}, {1:-1,3:-1}]) 72 | >>> param = parameter('-c 4') 73 | >>> m = liblinear.train(prob, param) # m is a ctype pointer to a model 74 | # Convert a Python-format instance to feature_nodearray, a ctypes structure 75 | >>> x0, max_idx = gen_feature_nodearray({1:1, 3:1}) 76 | >>> label = liblinear.predict(m, x0) 77 | 78 | Design Description 79 | ================== 80 | 81 | There are two files liblinear.py and liblinearutil.py, which respectively correspond to 82 | low-level and high-level use of the interface. 83 | 84 | In liblinear.py, we adopt the Python built-in library "ctypes," so that 85 | Python can directly access C structures and interface functions defined 86 | in linear.h. 87 | 88 | While advanced users can use structures/functions in liblinear.py, to 89 | avoid handling ctypes structures, in liblinearutil.py we provide some easy-to-use 90 | functions. The usage is similar to LIBLINEAR MATLAB interface. 91 | 92 | Data Structures 93 | =============== 94 | 95 | Three data structures derived from linear.h are node, problem, and 96 | parameter. They all contain fields with the same names in 97 | linear.h. Access these fields carefully because you directly use a C structure 98 | instead of a Python object. The following description introduces additional 99 | fields and methods. 100 | 101 | Before using the data structures, execute the following command to load the 102 | LIBLINEAR shared library: 103 | 104 | >>> from liblinear import * 105 | 106 | - class feature_node: 107 | 108 | Construct a feature_node. 109 | 110 | >>> node = feature_node(idx, val) 111 | 112 | idx: an integer indicates the feature index. 113 | 114 | val: a float indicates the feature value. 115 | 116 | Show the index and the value of a node. 117 | 118 | >>> print(node) 119 | 120 | - Function: gen_feature_nodearray(xi [,feature_max=None [,issparse=True]]) 121 | 122 | Generate a feature vector from a Python list/tuple or a dictionary: 123 | 124 | >>> xi, max_idx = gen_feature_nodearray({1:1, 3:1, 5:-2}) 125 | 126 | xi: the returned feature_nodearray (a ctypes structure) 127 | 128 | max_idx: the maximal feature index of xi 129 | 130 | issparse: if issparse == True, zero feature values are removed. The default 131 | value is True for the sparsity. 132 | 133 | feature_max: if feature_max is assigned, features with indices larger than 134 | feature_max are removed. 135 | 136 | - class problem: 137 | 138 | Construct a problem instance 139 | 140 | >>> prob = problem(y, x [,bias=-1]) 141 | 142 | y: a Python list/tuple of l labels (type must be int/double). 143 | 144 | x: a Python list/tuple of l data instances. Each element of x must be 145 | an instance of list/tuple/dictionary type. 146 | 147 | bias: if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term 148 | added (default -1) 149 | 150 | You can also modify the bias value by 151 | 152 | >>> prob.set_bias(1) 153 | 154 | Note that if your x contains sparse data (i.e., dictionary), the internal 155 | ctypes data format is still sparse. 156 | 157 | - class parameter: 158 | 159 | Construct a parameter instance 160 | 161 | >>> param = parameter('training_options') 162 | 163 | If 'training_options' is empty, LIBLINEAR default values are applied. 164 | 165 | Set param to LIBLINEAR default values. 166 | 167 | >>> param.set_to_default_values() 168 | 169 | Parse a string of options. 170 | 171 | >>> param.parse_options('training_options') 172 | 173 | Show values of parameters. 174 | 175 | >>> print(param) 176 | 177 | - class model: 178 | 179 | There are two ways to obtain an instance of model: 180 | 181 | >>> model_ = train(y, x) 182 | >>> model_ = load_model('model_file_name') 183 | 184 | Note that the returned structure of interface functions 185 | liblinear.train and liblinear.load_model is a ctypes pointer of 186 | model, which is different from the model object returned 187 | by train and load_model in liblinearutil.py. We provide a 188 | function toPyModel for the conversion: 189 | 190 | >>> model_ptr = liblinear.train(prob, param) 191 | >>> model_ = toPyModel(model_ptr) 192 | 193 | If you obtain a model in a way other than the above approaches, 194 | handle it carefully to avoid memory leak or segmentation fault. 195 | 196 | Some interface functions to access LIBLINEAR models are wrapped as 197 | members of the class model: 198 | 199 | >>> nr_feature = model_.get_nr_feature() 200 | >>> nr_class = model_.get_nr_class() 201 | >>> class_labels = model_.get_labels() 202 | >>> is_prob_model = model_.is_probability_model() 203 | 204 | Utility Functions 205 | ================= 206 | 207 | To use utility functions, type 208 | 209 | >>> from liblinearutil import * 210 | 211 | The above command loads 212 | train() : train a linear model 213 | predict() : predict testing data 214 | svm_read_problem() : read the data from a LIBSVM-format file. 215 | load_model() : load a LIBLINEAR model. 216 | save_model() : save model to a file. 217 | evaluations() : evaluate prediction results. 218 | 219 | - Function: train 220 | 221 | There are three ways to call train() 222 | 223 | >>> model = train(y, x [, 'training_options']) 224 | >>> model = train(prob [, 'training_options']) 225 | >>> model = train(prob, param) 226 | 227 | y: a list/tuple of l training labels (type must be int/double). 228 | 229 | x: a list/tuple of l training instances. The feature vector of 230 | each training instance is an instance of list/tuple or dictionary. 231 | 232 | training_options: a string in the same form as that for LIBLINEAR command 233 | mode. 234 | 235 | prob: a problem instance generated by calling 236 | problem(y, x). 237 | 238 | param: a parameter instance generated by calling 239 | parameter('training_options') 240 | 241 | model: the returned model instance. See linear.h for details of this 242 | structure. If '-v' is specified, cross validation is 243 | conducted and the returned model is just a scalar: cross-validation 244 | accuracy for classification and mean-squared error for regression. 245 | 246 | To train the same data many times with different 247 | parameters, the second and the third ways should be faster.. 248 | 249 | Examples: 250 | 251 | >>> y, x = svm_read_problem('../heart_scale') 252 | >>> prob = problem(y, x) 253 | >>> param = parameter('-s 3 -c 5 -q') 254 | >>> m = train(y, x, '-c 5') 255 | >>> m = train(prob, '-w1 5 -c 5') 256 | >>> m = train(prob, param) 257 | >>> CV_ACC = train(y, x, '-v 3') 258 | 259 | - Function: predict 260 | 261 | To predict testing data with a model, use 262 | 263 | >>> p_labs, p_acc, p_vals = predict(y, x, model [,'predicting_options']) 264 | 265 | y: a list/tuple of l true labels (type must be int/double). It is used 266 | for calculating the accuracy. Use [] if true labels are 267 | unavailable. 268 | 269 | x: a list/tuple of l predicting instances. The feature vector of 270 | each predicting instance is an instance of list/tuple or dictionary. 271 | 272 | predicting_options: a string of predicting options in the same format as 273 | that of LIBLINEAR. 274 | 275 | model: a model instance. 276 | 277 | p_labels: a list of predicted labels 278 | 279 | p_acc: a tuple including accuracy (for classification), mean 280 | squared error, and squared correlation coefficient (for 281 | regression). 282 | 283 | p_vals: a list of decision values or probability estimates (if '-b 1' 284 | is specified). If k is the number of classes, for decision values, 285 | each element includes results of predicting k binary-class 286 | SVMs. If k = 2 and solver is not MCSVM_CS, only one decision value 287 | is returned. For probabilities, each element contains k values 288 | indicating the probability that the testing instance is in each class. 289 | Note that the order of classes here is the same as 'model.label' 290 | field in the model structure. 291 | 292 | Example: 293 | 294 | >>> m = train(y, x, '-c 5') 295 | >>> p_labels, p_acc, p_vals = predict(y, x, m) 296 | 297 | - Functions: svm_read_problem/load_model/save_model 298 | 299 | See the usage by examples: 300 | 301 | >>> y, x = svm_read_problem('data.txt') 302 | >>> m = load_model('model_file') 303 | >>> save_model('model_file', m) 304 | 305 | - Function: evaluations 306 | 307 | Calculate some evaluations using the true values (ty) and predicted 308 | values (pv): 309 | 310 | >>> (ACC, MSE, SCC) = evaluations(ty, pv) 311 | 312 | ty: a list of true values. 313 | 314 | pv: a list of predict values. 315 | 316 | ACC: accuracy. 317 | 318 | MSE: mean squared error. 319 | 320 | SCC: squared correlation coefficient. 321 | 322 | 323 | Additional Information 324 | ====================== 325 | 326 | This interface was written by Hsiang-Fu Yu from Department of Computer 327 | Science, National Taiwan University. If you find this tool useful, please 328 | cite LIBLINEAR as follows 329 | 330 | R.-E. Fan, K.-W. Chang, C.-J. Hsieh, X.-R. Wang, and C.-J. Lin. 331 | LIBLINEAR: A Library for Large Linear Classification, Journal of 332 | Machine Learning Research 9(2008), 1871-1874. Software available at 333 | http://www.csie.ntu.edu.tw/~cjlin/liblinear 334 | 335 | For any question, please contact Chih-Jen Lin , 336 | or check the FAQ page: 337 | 338 | http://www.csie.ntu.edu.tw/~cjlin/liblinear/faq.html 339 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/python/liblinear.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from ctypes import * 4 | from ctypes.util import find_library 5 | from os import path 6 | import sys 7 | 8 | # For unix the prefix 'lib' is not considered. 9 | liblinear = CDLL(path.join(path.dirname(path.abspath(__file__)), '../liblinear.so.1')) 10 | 11 | # Construct constants 12 | SOLVER_TYPE = ['L2R_LR', 'L2R_L2LOSS_SVC_DUAL', 'L2R_L2LOSS_SVC', 'L2R_L1LOSS_SVC_DUAL',\ 13 | 'MCSVM_CS', 'L1R_L2LOSS_SVC', 'L1R_LR', 'L2R_LR_DUAL', \ 14 | None, None, None, \ 15 | 'L2R_L2LOSS_SVR', 'L2R_L2LOSS_SVR_DUAL', 'L2R_L1LOSS_SVR_DUAL'] 16 | for i, s in enumerate(SOLVER_TYPE): 17 | if s is not None: exec("%s = %d" % (s , i)) 18 | 19 | PRINT_STRING_FUN = CFUNCTYPE(None, c_char_p) 20 | def print_null(s): 21 | return 22 | 23 | def genFields(names, types): 24 | return list(zip(names, types)) 25 | 26 | def fillprototype(f, restype, argtypes): 27 | f.restype = restype 28 | f.argtypes = argtypes 29 | 30 | class feature_node(Structure): 31 | _names = ["index", "value"] 32 | _types = [c_int64, c_double] 33 | _fields_ = genFields(_names, _types) 34 | 35 | def __str__(self): 36 | return '%d:%g' % (self.index, self.value) 37 | 38 | def gen_feature_nodearray(xi, feature_max=None, issparse=True): 39 | if isinstance(xi, dict): 40 | index_range = xi.keys() 41 | elif isinstance(xi, (list, tuple)): 42 | xi = [0] + xi # idx should start from 1 43 | index_range = range(1, len(xi)) 44 | else: 45 | raise TypeError('xi should be a dictionary, list or tuple') 46 | 47 | if feature_max: 48 | assert(isinstance(feature_max, int)) 49 | index_range = filter(lambda j: j <= feature_max, index_range) 50 | if issparse: 51 | index_range = filter(lambda j:xi[j] != 0, index_range) 52 | 53 | index_range = sorted(index_range) 54 | ret = (feature_node * (len(index_range)+2))() 55 | ret[-1].index = -1 # for bias term 56 | ret[-2].index = -1 57 | for idx, j in enumerate(index_range): 58 | ret[idx].index = j 59 | ret[idx].value = xi[j] 60 | max_idx = 0 61 | if index_range : 62 | max_idx = index_range[-1] 63 | return ret, max_idx 64 | 65 | class problem(Structure): 66 | _names = ["l", "n", "y", "x", "bias"] 67 | _types = [c_int64, c_int64, POINTER(c_double), POINTER(POINTER(feature_node)), c_double] 68 | _fields_ = genFields(_names, _types) 69 | 70 | def __init__(self, y, x, bias = -1): 71 | if len(y) != len(x) : 72 | raise ValueError("len(y) != len(x)") 73 | self.l = l = len(y) 74 | self.bias = -1 75 | 76 | max_idx = 0 77 | x_space = self.x_space = [] 78 | for i, xi in enumerate(x): 79 | tmp_xi, tmp_idx = gen_feature_nodearray(xi) 80 | x_space += [tmp_xi] 81 | max_idx = max(max_idx, tmp_idx) 82 | self.n = max_idx 83 | 84 | self.y = (c_double * l)() 85 | for i, yi in enumerate(y): self.y[i] = y[i] 86 | 87 | self.x = (POINTER(feature_node) * l)() 88 | for i, xi in enumerate(self.x_space): self.x[i] = xi 89 | 90 | self.set_bias(bias) 91 | 92 | def set_bias(self, bias): 93 | if self.bias == bias: 94 | return 95 | if bias >= 0 and self.bias < 0: 96 | self.n += 1 97 | node = feature_node(self.n, bias) 98 | if bias < 0 and self.bias >= 0: 99 | self.n -= 1 100 | node = feature_node(-1, bias) 101 | 102 | for xi in self.x_space: 103 | xi[-2] = node 104 | self.bias = bias 105 | 106 | 107 | class parameter(Structure): 108 | _names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p"] 109 | _types = [c_int64, c_double, c_double, c_int64, POINTER(c_int64), POINTER(c_double), c_double] 110 | _fields_ = genFields(_names, _types) 111 | 112 | def __init__(self, options = None): 113 | if options == None: 114 | options = '' 115 | self.parse_options(options) 116 | 117 | def __str__(self): 118 | s = '' 119 | attrs = parameter._names + list(self.__dict__.keys()) 120 | values = map(lambda attr: getattr(self, attr), attrs) 121 | for attr, val in zip(attrs, values): 122 | s += (' %s: %s\n' % (attr, val)) 123 | s = s.strip() 124 | 125 | return s 126 | 127 | def set_to_default_values(self): 128 | self.solver_type = L2R_L2LOSS_SVC_DUAL 129 | self.eps = float('inf') 130 | self.C = 1 131 | self.p = 0.1 132 | self.nr_weight = 0 133 | self.weight_label = (c_int64 * 0)() 134 | self.weight = (c_double * 0)() 135 | self.bias = -1 136 | self.cross_validation = False 137 | self.nr_fold = 0 138 | self.print_func = None 139 | 140 | def parse_options(self, options): 141 | if isinstance(options, list): 142 | argv = options 143 | elif isinstance(options, str): 144 | argv = options.split() 145 | else: 146 | raise TypeError("arg 1 should be a list or a str.") 147 | self.set_to_default_values() 148 | self.print_func = cast(None, PRINT_STRING_FUN) 149 | weight_label = [] 150 | weight = [] 151 | 152 | i = 0 153 | while i < len(argv) : 154 | if argv[i] == "-s": 155 | i = i + 1 156 | self.solver_type = int(argv[i]) 157 | elif argv[i] == "-c": 158 | i = i + 1 159 | self.C = float(argv[i]) 160 | elif argv[i] == "-p": 161 | i = i + 1 162 | self.p = float(argv[i]) 163 | elif argv[i] == "-e": 164 | i = i + 1 165 | self.eps = float(argv[i]) 166 | elif argv[i] == "-B": 167 | i = i + 1 168 | self.bias = float(argv[i]) 169 | elif argv[i] == "-v": 170 | i = i + 1 171 | self.cross_validation = 1 172 | self.nr_fold = int(argv[i]) 173 | if self.nr_fold < 2 : 174 | raise ValueError("n-fold cross validation: n must >= 2") 175 | elif argv[i].startswith("-w"): 176 | i = i + 1 177 | self.nr_weight += 1 178 | nr_weight = self.nr_weight 179 | weight_label += [int(argv[i-1][2:])] 180 | weight += [float(argv[i])] 181 | elif argv[i] == "-q": 182 | self.print_func = PRINT_STRING_FUN(print_null) 183 | else : 184 | raise ValueError("Wrong options") 185 | i += 1 186 | 187 | liblinear.set_print_string_function(self.print_func) 188 | self.weight_label = (c_int64*self.nr_weight)() 189 | self.weight = (c_double*self.nr_weight)() 190 | for i in range(self.nr_weight): 191 | self.weight[i] = weight[i] 192 | self.weight_label[i] = weight_label[i] 193 | 194 | if self.eps == float('inf'): 195 | if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]: 196 | self.eps = 0.01 197 | elif self.solver_type in [L2R_L2LOSS_SVR]: 198 | self.eps = 0.001 199 | elif self.solver_type in [L2R_L2LOSS_SVC_DUAL, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L2R_LR_DUAL]: 200 | self.eps = 0.1 201 | elif self.solver_type in [L1R_L2LOSS_SVC, L1R_LR]: 202 | self.eps = 0.01 203 | elif self.solver_type in [L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]: 204 | self.eps = 0.1 205 | 206 | class model(Structure): 207 | _names = ["param", "nr_class", "nr_feature", "w", "label", "bias"] 208 | _types = [parameter, c_int64, c_int64, POINTER(c_double), POINTER(c_int64), c_double] 209 | _fields_ = genFields(_names, _types) 210 | 211 | def __init__(self): 212 | self.__createfrom__ = 'python' 213 | 214 | def __del__(self): 215 | # free memory created by C to avoid memory leak 216 | if hasattr(self, '__createfrom__') and self.__createfrom__ == 'C': 217 | liblinear.free_and_destroy_model(pointer(self)) 218 | 219 | def get_nr_feature(self): 220 | return liblinear.get_nr_feature(self) 221 | 222 | def get_nr_class(self): 223 | return liblinear.get_nr_class(self) 224 | 225 | def get_labels(self): 226 | nr_class = self.get_nr_class() 227 | labels = (c_int64 * nr_class)() 228 | liblinear.get_labels(self, labels) 229 | return labels[:nr_class] 230 | 231 | def is_probability_model(self): 232 | return (liblinear.check_probability_model(self) == 1) 233 | 234 | def toPyModel(model_ptr): 235 | """ 236 | toPyModel(model_ptr) -> model 237 | 238 | Convert a ctypes POINTER(model) to a Python model 239 | """ 240 | if bool(model_ptr) == False: 241 | raise ValueError("Null pointer") 242 | m = model_ptr.contents 243 | m.__createfrom__ = 'C' 244 | return m 245 | 246 | fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)]) 247 | fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int64, POINTER(c_double)]) 248 | 249 | fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)]) 250 | fillprototype(liblinear.predict, c_double, [POINTER(model), POINTER(feature_node)]) 251 | fillprototype(liblinear.predict_probability, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)]) 252 | 253 | fillprototype(liblinear.save_model, c_int64, [c_char_p, POINTER(model)]) 254 | fillprototype(liblinear.load_model, POINTER(model), [c_char_p]) 255 | 256 | fillprototype(liblinear.get_nr_feature, c_int64, [POINTER(model)]) 257 | fillprototype(liblinear.get_nr_class, c_int64, [POINTER(model)]) 258 | fillprototype(liblinear.get_labels, None, [POINTER(model), POINTER(c_int64)]) 259 | 260 | fillprototype(liblinear.free_model_content, None, [POINTER(model)]) 261 | fillprototype(liblinear.free_and_destroy_model, None, [POINTER(POINTER(model))]) 262 | fillprototype(liblinear.destroy_param, None, [POINTER(parameter)]) 263 | fillprototype(liblinear.check_parameter, c_char_p, [POINTER(problem), POINTER(parameter)]) 264 | fillprototype(liblinear.check_probability_model, c_int64, [POINTER(model)]) 265 | fillprototype(liblinear.set_print_string_function, None, [CFUNCTYPE(None, c_char_p)]) 266 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/python/liblinearutil.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import sys 5 | 6 | sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path 7 | from liblinear import * 8 | 9 | def svm_read_problem(data_file_name): 10 | """ 11 | svm_read_problem(data_file_name) -> [y, x] 12 | 13 | Read LIBSVM-format data from data_file_name and return labels y 14 | and data instances x. 15 | """ 16 | prob_y = [] 17 | prob_x = [] 18 | for line in open(data_file_name): 19 | line = line.split(None, 1) 20 | # In case an instance with all zero features 21 | if len(line) == 1: line += [''] 22 | label, features = line 23 | xi = {} 24 | for e in features.split(): 25 | ind, val = e.split(":") 26 | xi[int(ind)] = float(val) 27 | prob_y += [float(label)] 28 | prob_x += [xi] 29 | return (prob_y, prob_x) 30 | 31 | def load_model(model_file_name): 32 | """ 33 | load_model(model_file_name) -> model 34 | 35 | Load a LIBLINEAR model from model_file_name and return. 36 | """ 37 | model = liblinear.load_model(model_file_name.encode()) 38 | if not model: 39 | print("can't open model file %s" % model_file_name) 40 | return None 41 | model = toPyModel(model) 42 | return model 43 | 44 | def save_model(model_file_name, model): 45 | """ 46 | save_model(model_file_name, model) -> None 47 | 48 | Save a LIBLINEAR model to the file model_file_name. 49 | """ 50 | liblinear.save_model(model_file_name.encode(), model) 51 | 52 | def evaluations(ty, pv): 53 | """ 54 | evaluations(ty, pv) -> (ACC, MSE, SCC) 55 | 56 | Calculate accuracy, mean squared error and squared correlation coefficient 57 | using the true values (ty) and predicted values (pv). 58 | """ 59 | if len(ty) != len(pv): 60 | raise ValueError("len(ty) must equal to len(pv)") 61 | total_correct = total_error = 0 62 | sumv = sumy = sumvv = sumyy = sumvy = 0 63 | for v, y in zip(pv, ty): 64 | if y == v: 65 | total_correct += 1 66 | total_error += (v-y)*(v-y) 67 | sumv += v 68 | sumy += y 69 | sumvv += v*v 70 | sumyy += y*y 71 | sumvy += v*y 72 | l = len(ty) 73 | ACC = 100.0*total_correct/l 74 | MSE = total_error/l 75 | try: 76 | SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy)) 77 | except: 78 | SCC = float('nan') 79 | return (ACC, MSE, SCC) 80 | 81 | def train(arg1, arg2=None, arg3=None): 82 | """ 83 | train(y, x [, options]) -> model | ACC 84 | train(prob [, options]) -> model | ACC 85 | train(prob, param) -> model | ACC 86 | 87 | Train a model from data (y, x) or a problem prob using 88 | 'options' or a parameter param. 89 | If '-v' is specified in 'options' (i.e., cross validation) 90 | either accuracy (ACC) or mean-squared error (MSE) is returned. 91 | 92 | options: 93 | -s type : set type of solver (default 1) 94 | for multi-class classification 95 | 0 -- L2-regularized logistic regression (primal) 96 | 1 -- L2-regularized L2-loss support vector classification (dual) 97 | 2 -- L2-regularized L2-loss support vector classification (primal) 98 | 3 -- L2-regularized L1-loss support vector classification (dual) 99 | 4 -- support vector classification by Crammer and Singer 100 | 5 -- L1-regularized L2-loss support vector classification 101 | 6 -- L1-regularized logistic regression 102 | 7 -- L2-regularized logistic regression (dual) 103 | for regression 104 | 11 -- L2-regularized L2-loss support vector regression (primal) 105 | 12 -- L2-regularized L2-loss support vector regression (dual) 106 | 13 -- L2-regularized L1-loss support vector regression (dual) 107 | -c cost : set the parameter C (default 1) 108 | -p epsilon : set the epsilon in loss function of SVR (default 0.1) 109 | -e epsilon : set tolerance of termination criterion 110 | -s 0 and 2 111 | |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2, 112 | where f is the primal function, (default 0.01) 113 | -s 11 114 | |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001) 115 | -s 1, 3, 4, and 7 116 | Dual maximal violation <= eps; similar to liblinear (default 0.) 117 | -s 5 and 6 118 | |f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf, 119 | where f is the primal function (default 0.01) 120 | -s 12 and 13 121 | |f'(alpha)|_1 <= eps |f'(alpha0)|, 122 | where f is the dual function (default 0.1) 123 | -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1) 124 | -wi weight: weights adjust the parameter C of different classes (see README for details) 125 | -v n: n-fold cross validation mode 126 | -q : quiet mode (no outputs) 127 | """ 128 | prob, param = None, None 129 | if isinstance(arg1, (list, tuple)): 130 | assert isinstance(arg2, (list, tuple)) 131 | y, x, options = arg1, arg2, arg3 132 | prob = problem(y, x) 133 | param = parameter(options) 134 | elif isinstance(arg1, problem): 135 | prob = arg1 136 | if isinstance(arg2, parameter): 137 | param = arg2 138 | else : 139 | param = parameter(arg2) 140 | if prob == None or param == None : 141 | raise TypeError("Wrong types for the arguments") 142 | 143 | prob.set_bias(param.bias) 144 | liblinear.set_print_string_function(param.print_func) 145 | err_msg = liblinear.check_parameter(prob, param) 146 | if err_msg : 147 | raise ValueError('Error: %s' % err_msg) 148 | 149 | if param.cross_validation: 150 | l, nr_fold = prob.l, param.nr_fold 151 | target = (c_double * l)() 152 | liblinear.cross_validation(prob, param, nr_fold, target) 153 | ACC, MSE, SCC = evaluations(prob.y[:l], target[:l]) 154 | if param.solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]: 155 | print("Cross Validation Mean squared error = %g" % MSE) 156 | print("Cross Validation Squared correlation coefficient = %g" % SCC) 157 | return MSE 158 | else: 159 | print("Cross Validation Accuracy = %g%%" % ACC) 160 | return ACC 161 | else : 162 | m = liblinear.train(prob, param) 163 | m = toPyModel(m) 164 | 165 | return m 166 | 167 | def predict(y, x, m, options=""): 168 | """ 169 | predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals) 170 | 171 | Predict data (y, x) with the SVM model m. 172 | options: 173 | -b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only 174 | -q quiet mode (no outputs) 175 | 176 | The return tuple contains 177 | p_labels: a list of predicted labels 178 | p_acc: a tuple including accuracy (for classification), mean-squared 179 | error, and squared correlation coefficient (for regression). 180 | p_vals: a list of decision values or probability estimates (if '-b 1' 181 | is specified). If k is the number of classes, for decision values, 182 | each element includes results of predicting k binary-class 183 | SVMs. if k = 2 and solver is not MCSVM_CS, only one decision value 184 | is returned. For probabilities, each element contains k values 185 | indicating the probability that the testing instance is in each class. 186 | Note that the order of classes here is the same as 'model.label' 187 | field in the model structure. 188 | """ 189 | 190 | def info(s): 191 | print(s) 192 | 193 | predict_probability = 0 194 | argv = options.split() 195 | i = 0 196 | while i < len(argv): 197 | if argv[i] == '-b': 198 | i += 1 199 | predict_probability = int(argv[i]) 200 | elif argv[i] == '-q': 201 | info = print_null 202 | else: 203 | raise ValueError("Wrong options") 204 | i+=1 205 | 206 | solver_type = m.param.solver_type 207 | nr_class = m.get_nr_class() 208 | nr_feature = m.get_nr_feature() 209 | is_prob_model = m.is_probability_model() 210 | bias = m.bias 211 | if bias >= 0: 212 | biasterm = feature_node(nr_feature+1, bias) 213 | else: 214 | biasterm = feature_node(-1, bias) 215 | pred_labels = [] 216 | pred_values = [] 217 | 218 | if predict_probability: 219 | if not is_prob_model: 220 | raise TypeError('probability output is only supported for logistic regression') 221 | prob_estimates = (c_double * nr_class)() 222 | for xi in x: 223 | xi, idx = gen_feature_nodearray(xi, feature_max=nr_feature) 224 | xi[-2] = biasterm 225 | label = liblinear.predict_probability(m, xi, prob_estimates) 226 | values = prob_estimates[:nr_class] 227 | pred_labels += [label] 228 | pred_values += [values] 229 | else: 230 | if nr_class <= 2: 231 | nr_classifier = 1 232 | else: 233 | nr_classifier = nr_class 234 | dec_values = (c_double * nr_classifier)() 235 | for xi in x: 236 | xi, idx = gen_feature_nodearray(xi, feature_max=nr_feature) 237 | xi[-2] = biasterm 238 | label = liblinear.predict_values(m, xi, dec_values) 239 | values = dec_values[:nr_classifier] 240 | pred_labels += [label] 241 | pred_values += [values] 242 | if len(y) == 0: 243 | y = [0] * len(x) 244 | ACC, MSE, SCC = evaluations(y, pred_labels) 245 | l = len(y) 246 | if solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]: 247 | info("Mean squared error = %g (regression)" % MSE) 248 | info("Squared correlation coefficient = %g (regression)" % SCC) 249 | else: 250 | info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l)) 251 | 252 | return pred_labels, (ACC, MSE, SCC), pred_values 253 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/train.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "linear.h" 8 | #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) 9 | #define INF HUGE_VAL 10 | 11 | void print_null(const char *s) {} 12 | 13 | void exit_with_help() 14 | { 15 | printf( 16 | "Usage: train [options] training_set_file [model_file]\n" 17 | "options:\n" 18 | "-s type : set type of solver (default 1)\n" 19 | " for multi-class classification\n" 20 | " 0 -- L2-regularized logistic regression (primal)\n" 21 | " 1 -- L2-regularized L2-loss support vector classification (dual)\n" 22 | " 2 -- L2-regularized L2-loss support vector classification (primal)\n" 23 | " 3 -- L2-regularized L1-loss support vector classification (dual)\n" 24 | " 4 -- support vector classification by Crammer and Singer\n" 25 | " 5 -- L1-regularized L2-loss support vector classification\n" 26 | " 6 -- L1-regularized logistic regression\n" 27 | " 7 -- L2-regularized logistic regression (dual)\n" 28 | " for regression\n" 29 | " 11 -- L2-regularized L2-loss support vector regression (primal)\n" 30 | " 12 -- L2-regularized L2-loss support vector regression (dual)\n" 31 | " 13 -- L2-regularized L1-loss support vector regression (dual)\n" 32 | "-c cost : set the parameter C (default 1)\n" 33 | "-p epsilon : set the epsilon in loss function of SVR (default 0.1)\n" 34 | "-e epsilon : set tolerance of termination criterion\n" 35 | " -s 0 and 2\n" 36 | " |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n" 37 | " where f is the primal function and pos/neg are # of\n" 38 | " positive/negative data (default 0.01)\n" 39 | " -s 11\n" 40 | " |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)\n" 41 | " -s 1, 3, 4, and 7\n" 42 | " Dual maximal violation <= eps; similar to libsvm (default 0.1)\n" 43 | " -s 5 and 6\n" 44 | " |f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n" 45 | " where f is the primal function (default 0.01)\n" 46 | " -s 12 and 13\n" 47 | " |f'(alpha)|_1 <= eps |f'(alpha0)|,\n" 48 | " where f is the dual function (default 0.1)\n" 49 | "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n" 50 | "-wi weight: weights adjust the parameter C of different classes (see README for details)\n" 51 | "-v n: n-fold cross validation mode\n" 52 | "-q : quiet mode (no outputs)\n" 53 | ); 54 | exit(1); 55 | } 56 | 57 | void exit_input_error(INT64 line_num) 58 | { 59 | fprintf(stderr,"Wrong input format at line %lld\n", (long long int)line_num); 60 | exit(1); 61 | } 62 | 63 | static char *line = NULL; 64 | static INT64 max_line_len; 65 | 66 | static char* readline(FILE *input) 67 | { 68 | INT64 len; 69 | 70 | if(fgets(line,max_line_len,input) == NULL) 71 | return NULL; 72 | 73 | while(strrchr(line,'\n') == NULL) 74 | { 75 | max_line_len *= 2; 76 | line = (char *) realloc(line,max_line_len); 77 | len = (INT64) strlen(line); 78 | if(fgets(line+len,max_line_len-len,input) == NULL) 79 | break; 80 | } 81 | return line; 82 | } 83 | 84 | void parse_command_line(INT64 argc, char **argv, char *input_file_name, char *model_file_name); 85 | void read_problem(const char *filename); 86 | void do_cross_validation(); 87 | 88 | struct feature_node *x_space; 89 | struct parameter param; 90 | struct problem prob; 91 | struct model* model_; 92 | INT64 flag_cross_validation; 93 | INT64 nr_fold; 94 | double bias; 95 | 96 | int main(int argc, char **argv) 97 | { 98 | char input_file_name[1024]; 99 | char model_file_name[1024]; 100 | const char *error_msg; 101 | 102 | parse_command_line((INT64)argc, argv, input_file_name, model_file_name); 103 | read_problem(input_file_name); 104 | error_msg = check_parameter(&prob,¶m); 105 | 106 | if(error_msg) 107 | { 108 | fprintf(stderr,"ERROR: %s\n",error_msg); 109 | exit(1); 110 | } 111 | 112 | if(flag_cross_validation) 113 | { 114 | do_cross_validation(); 115 | } 116 | else 117 | { 118 | model_=train(&prob, ¶m); 119 | if(save_model(model_file_name, model_)) 120 | { 121 | fprintf(stderr,"can't save model to file %s\n",model_file_name); 122 | exit(1); 123 | } 124 | free_and_destroy_model(&model_); 125 | } 126 | destroy_param(¶m); 127 | free(prob.y); 128 | free(prob.x); 129 | free(x_space); 130 | free(line); 131 | 132 | return 0; 133 | } 134 | 135 | void do_cross_validation() 136 | { 137 | INT64 i; 138 | INT64 total_correct = 0; 139 | double total_error = 0; 140 | double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0; 141 | double *target = Malloc(double, prob.l); 142 | 143 | cross_validation(&prob,¶m,nr_fold,target); 144 | if(param.solver_type == L2R_L2LOSS_SVR || 145 | param.solver_type == L2R_L1LOSS_SVR_DUAL || 146 | param.solver_type == L2R_L2LOSS_SVR_DUAL) 147 | { 148 | for(i=0;i=argc) 197 | exit_with_help(); 198 | switch(argv[i-1][1]) 199 | { 200 | case 's': 201 | param.solver_type = atoi(argv[i]); 202 | break; 203 | 204 | case 'c': 205 | param.C = atof(argv[i]); 206 | break; 207 | 208 | case 'p': 209 | param.p = atof(argv[i]); 210 | break; 211 | 212 | case 'e': 213 | param.eps = atof(argv[i]); 214 | break; 215 | 216 | case 'B': 217 | bias = atof(argv[i]); 218 | break; 219 | 220 | case 'w': 221 | ++param.nr_weight; 222 | param.weight_label = (INT64 *) realloc(param.weight_label,sizeof(INT64)*param.nr_weight); 223 | param.weight = (double *) realloc(param.weight,sizeof(double)*param.nr_weight); 224 | param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]); 225 | param.weight[param.nr_weight-1] = atof(argv[i]); 226 | break; 227 | 228 | case 'v': 229 | flag_cross_validation = 1; 230 | nr_fold = atoi(argv[i]); 231 | if(nr_fold < 2) 232 | { 233 | fprintf(stderr,"n-fold cross validation: n must >= 2\n"); 234 | exit_with_help(); 235 | } 236 | break; 237 | 238 | case 'q': 239 | print_func = &print_null; 240 | i--; 241 | break; 242 | 243 | default: 244 | fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]); 245 | exit_with_help(); 246 | break; 247 | } 248 | } 249 | 250 | set_print_string_function(print_func); 251 | 252 | // determine filenames 253 | if(i>=argc) 254 | exit_with_help(); 255 | 256 | strcpy(input_file_name, argv[i]); 257 | 258 | if(i= 0) elements += prob.l; 339 | 340 | prob.y = Malloc(double,prob.l); 341 | prob.x = Malloc(struct feature_node *,prob.l); 342 | x_space = Malloc(struct feature_node,elements+prob.l); 343 | 344 | max_index = 0; 345 | j=0; 346 | for(i=0;i max_index) 383 | max_index = inst_max_index; 384 | 385 | if(prob.bias >= 0) 386 | x_space[j++].value = prob.bias; 387 | 388 | x_space[j++].index = -1; 389 | } 390 | 391 | if(prob.bias >= 0) 392 | { 393 | prob.n=max_index+1; 394 | for(i=1;iindex = prob.n; 396 | x_space[j-2].index = prob.n; 397 | } 398 | else 399 | prob.n=max_index; 400 | 401 | fclose(fp); 402 | } 403 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/tron.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "tron.h" 6 | #ifndef INT64_DEFINED 7 | typedef int64_t INT64; 8 | #define INT64_DEFINED 9 | #endif 10 | 11 | #ifndef min 12 | template static inline T min(T x,T y) { return (x static inline T max(T x,T y) { return (x>y)?x:y; } 17 | #endif 18 | 19 | #ifdef __cplusplus 20 | extern "C" { 21 | #endif 22 | 23 | extern double dnrm2_(INT64 *, double *, INT64 *); 24 | extern double ddot_(INT64 *, double *, INT64 *, double *, INT64 *); 25 | extern INT64 daxpy_(INT64 *, double *, double *, INT64 *, double *, INT64 *); 26 | extern INT64 dscal_(INT64 *, double *, double *, INT64 *); 27 | 28 | #ifdef __cplusplus 29 | } 30 | #endif 31 | 32 | static void default_print(const char *buf) 33 | { 34 | fputs(buf,stdout); 35 | fflush(stdout); 36 | } 37 | 38 | void TRON::info(const char *fmt,...) 39 | { 40 | char buf[BUFSIZ]; 41 | va_list ap; 42 | va_start(ap,fmt); 43 | vsprintf(buf,fmt,ap); 44 | va_end(ap); 45 | (*tron_print_string)(buf); 46 | } 47 | 48 | TRON::TRON(const function *fun_obj, double eps, INT64 max_iter) 49 | { 50 | this->fun_obj=const_cast(fun_obj); 51 | this->eps=eps; 52 | this->max_iter=max_iter; 53 | tron_print_string = default_print; 54 | } 55 | 56 | TRON::~TRON() 57 | { 58 | } 59 | 60 | void TRON::tron(double *w) 61 | { 62 | // Parameters for updating the iterates. 63 | double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75; 64 | 65 | // Parameters for updating the trust region size delta. 66 | double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4; 67 | 68 | INT64 n = fun_obj->get_nr_variable(); 69 | INT64 i, cg_iter; 70 | double delta, snorm, one=1.0; 71 | double alpha, f, fnew, prered, actred, gs; 72 | INT64 search = 1, iter = 1, inc = 1; 73 | double *s = new double[n]; 74 | double *r = new double[n]; 75 | double *w_new = new double[n]; 76 | double *g = new double[n]; 77 | 78 | for (i=0; ifun(w); 82 | fun_obj->grad(w, g); 83 | delta = dnrm2_(&n, g, &inc); 84 | double gnorm1 = delta; 85 | double gnorm = gnorm1; 86 | 87 | if (gnorm <= eps*gnorm1) 88 | search = 0; 89 | 90 | iter = 1; 91 | 92 | while (iter <= max_iter && search) 93 | { 94 | cg_iter = trcg(delta, g, s, r); 95 | 96 | memcpy(w_new, w, sizeof(double)*n); 97 | daxpy_(&n, &one, s, &inc, w_new, &inc); 98 | 99 | gs = ddot_(&n, g, &inc, s, &inc); 100 | prered = -0.5*(gs-ddot_(&n, s, &inc, r, &inc)); 101 | fnew = fun_obj->fun(w_new); 102 | 103 | // Compute the actual reduction. 104 | actred = f - fnew; 105 | 106 | // On the first iteration, adjust the initial step bound. 107 | snorm = dnrm2_(&n, s, &inc); 108 | if (iter == 1) 109 | delta = min(delta, snorm); 110 | 111 | // Compute prediction alpha*snorm of the step. 112 | if (fnew - f - gs <= 0) 113 | alpha = sigma3; 114 | else 115 | alpha = max(sigma1, -0.5*(gs/(fnew - f - gs))); 116 | 117 | // Update the trust region bound according to the ratio of actual to predicted reduction. 118 | if (actred < eta0*prered) 119 | delta = min(max(alpha, sigma1)*snorm, sigma2*delta); 120 | else if (actred < eta1*prered) 121 | delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta)); 122 | else if (actred < eta2*prered) 123 | delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta)); 124 | else 125 | delta = max(delta, min(alpha*snorm, sigma3*delta)); 126 | 127 | info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter); 128 | 129 | if (actred > eta0*prered) 130 | { 131 | iter++; 132 | memcpy(w, w_new, sizeof(double)*n); 133 | f = fnew; 134 | fun_obj->grad(w, g); 135 | 136 | gnorm = dnrm2_(&n, g, &inc); 137 | if (gnorm <= eps*gnorm1) 138 | break; 139 | } 140 | if (f < -1.0e+32) 141 | { 142 | info("WARNING: f < -1.0e+32\n"); 143 | break; 144 | } 145 | if (fabs(actred) <= 0 && prered <= 0) 146 | { 147 | info("WARNING: actred and prered <= 0\n"); 148 | break; 149 | } 150 | if (fabs(actred) <= 1.0e-12*fabs(f) && 151 | fabs(prered) <= 1.0e-12*fabs(f)) 152 | { 153 | info("WARNING: actred and prered too small\n"); 154 | break; 155 | } 156 | } 157 | 158 | delete[] g; 159 | delete[] r; 160 | delete[] w_new; 161 | delete[] s; 162 | } 163 | 164 | INT64 TRON::trcg(double delta, double *g, double *s, double *r) 165 | { 166 | INT64 i, inc = 1; 167 | INT64 n = fun_obj->get_nr_variable(); 168 | double one = 1; 169 | double *d = new double[n]; 170 | double *Hd = new double[n]; 171 | double rTr, rnewTrnew, alpha, beta, cgtol; 172 | 173 | for (i=0; iHv(d, Hd); 189 | 190 | alpha = rTr/ddot_(&n, d, &inc, Hd, &inc); 191 | daxpy_(&n, &alpha, d, &inc, s, &inc); 192 | if (dnrm2_(&n, s, &inc) > delta) 193 | { 194 | info("cg reaches trust region boundary\n"); 195 | alpha = -alpha; 196 | daxpy_(&n, &alpha, d, &inc, s, &inc); 197 | 198 | double std = ddot_(&n, s, &inc, d, &inc); 199 | double sts = ddot_(&n, s, &inc, s, &inc); 200 | double dtd = ddot_(&n, d, &inc, d, &inc); 201 | double dsq = delta*delta; 202 | double rad = sqrt(std*std + dtd*(dsq-sts)); 203 | if (std >= 0) 204 | alpha = (dsq - sts)/(std + rad); 205 | else 206 | alpha = (rad - std)/dtd; 207 | daxpy_(&n, &alpha, d, &inc, s, &inc); 208 | alpha = -alpha; 209 | daxpy_(&n, &alpha, Hd, &inc, r, &inc); 210 | break; 211 | } 212 | alpha = -alpha; 213 | daxpy_(&n, &alpha, Hd, &inc, r, &inc); 214 | rnewTrnew = ddot_(&n, r, &inc, r, &inc); 215 | beta = rnewTrnew/rTr; 216 | dscal_(&n, &beta, d, &inc); 217 | daxpy_(&n, &one, r, &inc, d, &inc); 218 | rTr = rnewTrnew; 219 | } 220 | 221 | delete[] d; 222 | delete[] Hd; 223 | 224 | return(cg_iter); 225 | } 226 | 227 | double TRON::norm_inf(INT64 n, double *x) 228 | { 229 | double dmax = fabs(x[0]); 230 | for (INT64 i=1; i= dmax) 232 | dmax = fabs(x[i]); 233 | return(dmax); 234 | } 235 | 236 | void TRON::set_print_string(void (*print_string) (const char *buf)) 237 | { 238 | tron_print_string = print_string; 239 | } 240 | -------------------------------------------------------------------------------- /tgrocery/learner/liblinear/tron.h: -------------------------------------------------------------------------------- 1 | #include 2 | #ifndef INT64_DEFINED 3 | typedef int64_t INT64; 4 | #define INT64_DEFINED 5 | #endif 6 | 7 | #ifndef _TRON_H 8 | #define _TRON_H 9 | 10 | class function 11 | { 12 | public: 13 | virtual double fun(double *w) = 0 ; 14 | virtual void grad(double *w, double *g) = 0 ; 15 | virtual void Hv(double *s, double *Hs) = 0 ; 16 | 17 | virtual INT64 get_nr_variable(void) = 0 ; 18 | virtual ~function(void){} 19 | }; 20 | 21 | class TRON 22 | { 23 | public: 24 | TRON(const function *fun_obj, double eps = 0.1, INT64 max_iter = 1000); 25 | ~TRON(); 26 | 27 | void tron(double *w); 28 | void set_print_string(void (*i_print) (const char *buf)); 29 | 30 | private: 31 | INT64 trcg(double delta, double *g, double *s, double *r); 32 | double norm_inf(INT64 n, double *x); 33 | 34 | double eps; 35 | INT64 max_iter; 36 | function *fun_obj; 37 | void info(const char *fmt,...); 38 | void (*tron_print_string)(const char *buf); 39 | }; 40 | #endif 41 | -------------------------------------------------------------------------------- /tgrocery/learner/test.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "util.c" 3 | 4 | int main(int argc, const char* argv[]){ 5 | INT64 offsets[1000]; 6 | INT64 error_code = 0; 7 | merge_problems(&argv[1], argc-2, &offsets[0], argv[argc-1], 1, &error_code); 8 | 9 | for(int i = 0; i < argc-1; i++) 10 | printf("%ld ", offsets[i]); 11 | puts(""); 12 | return 0; 13 | } 14 | 15 | -------------------------------------------------------------------------------- /tgrocery/learner/util.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "linear.h" 8 | #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) 9 | 10 | 11 | static char *line = NULL; 12 | static INT64 max_line_len; 13 | 14 | static char* readline(FILE *input) 15 | { 16 | INT64 len; 17 | 18 | if(fgets(line,max_line_len,input) == NULL) 19 | return NULL; 20 | 21 | while(strrchr(line,'\n') == NULL) 22 | { 23 | max_line_len *= 2; 24 | line = (char *) realloc(line,max_line_len); 25 | len = (INT64) strlen(line); 26 | if(fgets(line+len,max_line_len-len,input) == NULL) 27 | break; 28 | } 29 | return line; 30 | } 31 | 32 | typedef struct { 33 | struct problem prob; 34 | struct feature_node* x_space; 35 | INT64 len_x_space; 36 | } SVMProblem; 37 | 38 | void freeSVMProblem(SVMProblem svmprob) { 39 | struct problem *prob = &(svmprob.prob); 40 | if (prob->x!=NULL) free(prob->x); 41 | if (prob->y!=NULL) free(prob->y); 42 | if (svmprob.x_space!=NULL) free(svmprob.x_space); 43 | } 44 | 45 | 46 | // read in a problem (in libsvm format) 47 | SVMProblem read_problem(const char *filename, double bias, INT64 *error_code) 48 | { 49 | INT64 max_index, inst_max_index, i; 50 | INT64 elements, j; 51 | FILE *fp = fopen(filename,"r"); 52 | char *endptr; 53 | char *idx, *val, *label; 54 | struct problem prob; 55 | SVMProblem svmprob; 56 | 57 | /** 58 | * error_code: 59 | * 0 no error 60 | * > 0 input format error. The error_code value 61 | * indicates the line number. 62 | * -1 can not open file 63 | * -2 memory exhausted 64 | */ 65 | *error_code = 0; 66 | 67 | if(fp == NULL) 68 | { 69 | *error_code = -1; 70 | return svmprob; 71 | } 72 | 73 | prob.l = 0; 74 | elements = 0; 75 | max_line_len = 1024; 76 | line = Malloc(char,max_line_len); 77 | while(readline(fp)!=NULL) 78 | { 79 | char *p = strtok(line," \t"); // label 80 | 81 | // features 82 | while(1) 83 | { 84 | p = strtok(NULL," \t"); 85 | if(p == NULL || *p == '\n') // check '\n' as ' ' may be after the last feature 86 | break; 87 | elements++; 88 | } 89 | prob.l++; 90 | } 91 | rewind(fp); 92 | 93 | prob.bias=bias; 94 | if(prob.bias >= 0) elements += prob.l; 95 | 96 | errno = 0; 97 | prob.y = Malloc(double,prob.l); 98 | prob.x = Malloc(struct feature_node *,prob.l); 99 | struct feature_node* x_space = Malloc(struct feature_node,elements+prob.l); 100 | 101 | if(errno == ENOMEM) 102 | { 103 | free(line); 104 | fclose(fp); 105 | *error_code = -2; 106 | return svmprob; 107 | } 108 | 109 | max_index = 0; 110 | j=0; 111 | for(i=0;i max_index) 169 | max_index = inst_max_index; 170 | 171 | if(prob.bias >= 0) 172 | x_space[j++].value = prob.bias; 173 | 174 | x_space[j++].index = -1; 175 | } 176 | 177 | if(prob.bias >= 0) 178 | { 179 | prob.n=max_index+1; 180 | for(i=1;iindex = prob.n; 182 | x_space[j-2].index = prob.n; 183 | } 184 | else 185 | prob.n=max_index; 186 | 187 | fclose(fp); 188 | free(line); 189 | 190 | svmprob.prob = prob; 191 | svmprob.x_space = x_space; 192 | svmprob.len_x_space = j; 193 | 194 | return svmprob; 195 | } 196 | 197 | 198 | double* compute_idf(const struct problem *prob, double *idf_val) 199 | { 200 | INT64 i, j; 201 | //double* idf_val = Malloc(double, prob.n); 202 | memset(idf_val, 0, sizeof(double) * prob->n); 203 | 204 | for(i = 0; i < prob->l; ++i) 205 | { 206 | struct feature_node* xi = prob->x[i]; 207 | while(xi->index != -1) 208 | { 209 | ++idf_val[xi->index-1]; 210 | ++xi; 211 | } 212 | } 213 | 214 | for(j = 0; j < prob->n; ++j) 215 | { 216 | if(idf_val[j] > 0) 217 | idf_val[j] = log(prob->l / idf_val[j]); 218 | else 219 | idf_val[j] = 0; 220 | } 221 | 222 | return idf_val; 223 | } 224 | 225 | void normalize(struct problem *prob, int binary, int norm, int tf, int idf, double* idf_val) 226 | { 227 | INT64 i; 228 | 229 | for(i = 0; i < prob->l; ++i) 230 | { 231 | struct feature_node* xi; 232 | 233 | if(binary) 234 | { 235 | xi = prob->x[i]; 236 | while(xi->index != -1) 237 | { 238 | xi->value = xi->value != 0; 239 | ++xi; 240 | } 241 | } 242 | 243 | if(tf) 244 | { 245 | double norm = 0; 246 | xi = prob->x[i]; 247 | while(xi->index != -1) 248 | { 249 | norm += xi->value; 250 | ++xi; 251 | } 252 | 253 | xi = prob->x[i]; 254 | if(norm != 0) 255 | while(xi->index != -1) 256 | { 257 | xi->value /= norm; 258 | ++xi; 259 | } 260 | } 261 | 262 | if(idf) 263 | { 264 | xi = prob->x[i]; 265 | while(xi->index != -1) 266 | { 267 | xi->value *= idf_val[xi->index-1]; 268 | ++xi; 269 | } 270 | } 271 | 272 | if(norm) 273 | { 274 | double norm = 0; 275 | xi = prob->x[i]; 276 | while(xi->index != -1) 277 | { 278 | norm += xi->value * xi->value; 279 | ++xi; 280 | } 281 | 282 | norm = sqrt(norm); 283 | 284 | xi = prob->x[i]; 285 | if(norm != 0) 286 | while(xi->index != -1) 287 | { 288 | xi->value /= norm; 289 | ++xi; 290 | } 291 | } 292 | } 293 | } 294 | 295 | 296 | void merge_problems(const char *srcs[], const int num_srcs, INT64* offsets, const char *output_filename, char training, INT64 *error_code) { 297 | int i, j; 298 | const double bias = -1; 299 | SVMProblem *svmproblems = Malloc(SVMProblem, num_srcs); 300 | FILE *fp = NULL; 301 | 302 | /** 303 | * error_code: 304 | * 0 no error 305 | * > 0 input format error. The error_code value 306 | * indicates the line number. 307 | * -1 can not open file 308 | * -2 memory exhausted 309 | * -3 input files contain different numbsers of instances 310 | * -4 no file given 311 | */ 312 | 313 | if(num_srcs <= 0) { 314 | *error_code = -4; 315 | return; 316 | } 317 | 318 | for(i=0; i < num_srcs; i++) 319 | { 320 | svmproblems[i] = read_problem(srcs[i], bias, error_code); 321 | if(*error_code != 0) { 322 | switch (*error_code) { 323 | case -1: 324 | fprintf(stderr,"ERROR: Cannot open input file: %s\n", srcs[i]); 325 | break; 326 | case -2: 327 | fprintf(stderr,"ERROR: Memory exhausted when reading %s\n", srcs[i]); 328 | break; 329 | default: /* error_code > 0 input format error*/ 330 | fprintf(stderr,"ERROR: input format error at line %ld in %s\n", (long)*error_code, srcs[i]); 331 | break; 332 | } 333 | return; 334 | } 335 | } 336 | 337 | 338 | // Overwrite offsets 339 | if(training) { 340 | offsets[0] = svmproblems[0].prob.n; 341 | for(i = 1; i < num_srcs; i++) 342 | offsets[i] = offsets[i-1] + svmproblems[i].prob.n; 343 | } 344 | 345 | // Make sure # of instances are all equal. 346 | for(i = 1; i < num_srcs; i++) 347 | { 348 | if(svmproblems[i].prob.l != svmproblems[i-1].prob.l) 349 | { 350 | *error_code = -3; 351 | fprintf(stderr,"ERROR: #insts in %s = %ld, but #insts in %s = %ld\n", 352 | srcs[i], (long)svmproblems[i].prob.l, srcs[i-1], (long)svmproblems[i-1].prob.l); 353 | return; 354 | } 355 | } 356 | 357 | fp = fopen(output_filename, "w"); 358 | if(fp == NULL) 359 | { 360 | *error_code = -1; 361 | fprintf(stderr,"ERROR: Cannot open output file: %s \n", srcs[i]); 362 | return; 363 | } 364 | 365 | for(j = 0; j < svmproblems[0].prob.l; j++) 366 | { 367 | INT64 base = 0; 368 | 369 | fprintf(fp, "%g", svmproblems[0].prob.y[j]); 370 | for(i = 0; i < num_srcs; i++) 371 | { 372 | struct feature_node* node; 373 | 374 | for(node = svmproblems[i].prob.x[j]; node->index != -1; node++) 375 | { 376 | INT64 index = base+node->index; 377 | if(index <= offsets[i]) 378 | fprintf(fp, " %ld:%.17g", (long)index, node->value); 379 | else 380 | break; 381 | } 382 | base = offsets[i]; 383 | } 384 | fprintf(fp,"\n"); 385 | } 386 | fclose(fp); 387 | 388 | for(i = 0; i < num_srcs; i++) 389 | freeSVMProblem(svmproblems[i]); 390 | } 391 | 392 | 393 | --------------------------------------------------------------------------------