├── requirements.txt ├── actleto ├── __init__.py ├── annotator │ ├── visualizers │ │ ├── __init__.py │ │ ├── textarea.py │ │ └── image.py │ ├── __init__.py │ ├── active_learner.py │ ├── annotator_widget.py │ └── ui_widget.py ├── strategies │ ├── __init__.py │ ├── libact_adaptor.py │ ├── mperr.py │ ├── positive_corrector.py │ ├── utils.py │ └── adwes.py └── models │ ├── utils_data.py │ └── model_wrappers.py ├── docs └── al.png ├── LICENSE ├── setup.py ├── README.md └── examples ├── 20newsgroups.ipynb └── MNIST_annotation.ipynb /requirements.txt: -------------------------------------------------------------------------------- 1 | . -------------------------------------------------------------------------------- /actleto/__init__.py: -------------------------------------------------------------------------------- 1 | from .annotator import * 2 | from .strategies import * 3 | -------------------------------------------------------------------------------- /docs/al.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IINemo/active_learning_toolbox/HEAD/docs/al.png -------------------------------------------------------------------------------- /actleto/annotator/visualizers/__init__.py: -------------------------------------------------------------------------------- 1 | from .image import ImageVisualizer 2 | from .textarea import TextAreaVisualizer 3 | -------------------------------------------------------------------------------- /actleto/annotator/__init__.py: -------------------------------------------------------------------------------- 1 | from .active_learner import ActiveLearner 2 | from .annotator_widget import AnnotatorWidget 3 | from .ui_widget import ActiveLearnerUiWidget 4 | from .visualizers import ImageVisualizer, TextAreaVisualizer -------------------------------------------------------------------------------- /actleto/strategies/__init__.py: -------------------------------------------------------------------------------- 1 | from .adwes import ADWeS 2 | from .libact_adaptor import AdaptorLibAct, make_libact_strategy_ctor 3 | from .mperr import MPErr 4 | from .positive_corrector import PositiveCorrector 5 | from .utils import MultipleQueryStrategy, SklearnProbaAdapterWithUnlabeled, SklearnRealAdapter 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Artem Shelmanov and Roman Suvorov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /actleto/strategies/libact_adaptor.py: -------------------------------------------------------------------------------- 1 | from .utils import MultipleQueryStrategy 2 | from libact.base.dataset import Dataset 3 | import numpy as np 4 | 5 | 6 | import logging 7 | logger = logging.getLogger('actleto') 8 | 9 | 10 | class AdaptorLibAct: 11 | """Adaptor for libact query strategies.""" 12 | 13 | def __init__(self, 14 | X_full_dataset, 15 | y_full_dataset, 16 | libact_query_alg_ctor, 17 | max_samples_number=40): 18 | self._train_dataset = Dataset(X_full_dataset, y_full_dataset) 19 | self._libact_query_alg = MultipleQueryStrategy(impl=libact_query_alg_ctor(self._train_dataset), 20 | query_n=max_samples_number) 21 | 22 | def make_iteration(self, indexes, y): 23 | for i in range(indexes.shape[0]): 24 | self._train_dataset.update(indexes[i], y[i]) 25 | 26 | def choose_samples_for_annotation(self): 27 | res = np.array(list(self._libact_query_alg.make_query())) 28 | return res 29 | 30 | 31 | def make_libact_strategy_ctor(stg_ctor): 32 | """Creates functor with adaptor for active learning strategies for libact.""" 33 | def _ctor(X, y): 34 | return AdaptorLibAct(X, y, libact_query_alg_ctor = stg_ctor) 35 | return _ctor 36 | -------------------------------------------------------------------------------- /actleto/annotator/visualizers/textarea.py: -------------------------------------------------------------------------------- 1 | from ipywidgets import Layout, Label, Textarea 2 | 3 | 4 | class TextAreaVisualizer(object): 5 | """Visualizer for texts via text areas.""" 6 | 7 | def __init__(self, text_columns, width = '90%', height = '150px'): 8 | """ 9 | Args: 10 | text_columns (list): List of labels (index elements) of columns that should be 11 | visualized via textarea. 12 | width (str): width of the text area. 13 | height (str): height of the text are. 14 | """ 15 | super().__init__() 16 | self._text_columns = text_columns 17 | self._text_layout = Layout(width = width, height = height) 18 | 19 | def __call__(self, dataframe, index): 20 | """Invokes the visuzlizer. 21 | 22 | Args: 23 | dataframe (pandas.DataFrame): the dataframe that contains the data for visualization. 24 | index (int): the positional (iloc) index of the row to visualize. 25 | 26 | Returns: 27 | list: widgets that visualize the row 28 | """ 29 | result = [] 30 | row = dataframe.iloc[index] 31 | for label in self._text_columns: 32 | result.append(Label('{}:'.format(label))) 33 | result.append(Textarea(value = str(row[label]), 34 | layout = self._text_layout, 35 | disabled = True)) 36 | 37 | return tuple(result) 38 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | #from __future__ import absolute_import, print_function 4 | 5 | import io 6 | from glob import glob 7 | from os.path import basename 8 | from os.path import dirname 9 | from os.path import join 10 | from os.path import splitext 11 | 12 | from setuptools import find_packages 13 | from setuptools import setup 14 | 15 | 16 | def read(*names, **kwargs): 17 | return io.open( 18 | join(dirname(__file__), *names), 19 | encoding=kwargs.get('encoding', 'utf8') 20 | ).read() 21 | 22 | # print(find_packages()) 23 | # 1 / 0 24 | 25 | setup( 26 | name='actleto', 27 | version='0.1.0', 28 | description='Toolbox for rapid dataset creation and classifier training with active machine learning', 29 | author='ISA RAS', 30 | author_email='', 31 | license='MIT', 32 | python_requires='>=3.5', 33 | packages=find_packages(), 34 | # package_dir={'': 'actleto'}, 35 | include_package_data=True, 36 | keywords='development active machine learning annotation corpus', 37 | zip_safe=False, 38 | package_dir={'examples': 'examples'}, 39 | package_data={'examples': ['*.ipynb']}, 40 | install_requires=['cython', 41 | 'numpy>=1.12.1', 42 | 'pandas>=0.20.1', 43 | 'scikit-learn>=0.18', 44 | 'scipy>=0.19.0', 45 | 'Pillow>=4.2.1', 46 | 'ipywidgets>=4', 47 | 'annoy'], 48 | dependency_links=['git+https://github.com/windj007/libact/#egg=libact'] 49 | ) 50 | -------------------------------------------------------------------------------- /actleto/strategies/mperr.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from libact.base.interfaces import ProbabilisticModel, ContinuousModel 3 | from libact.base.dataset import Dataset, ensure_sklearn_compat 4 | 5 | import logging 6 | logger = logging.getLogger('actleto') 7 | 8 | 9 | class MPErr: 10 | """Most probable error query strategy. 11 | 12 | Finds the negative samples that are likely to be positive ones 13 | from the model point of view. On each iteration, it uses the whole 14 | dataset for training the model: it treats unannotated samples as 15 | negative samples. 16 | 17 | Note: supports only binary classification so far. 18 | """ 19 | 20 | def __init__(self, dataset, model): 21 | """ 22 | Args: 23 | dataset: libact dataset with features. 24 | model: the model for active learning. 25 | 26 | """ 27 | self.dataset = dataset 28 | self.model = model 29 | assert isinstance(self.model, (ProbabilisticModel, ContinuousModel)) 30 | 31 | def make_query(self, return_score=False): 32 | X, y = list(zip(*self.dataset.data)) 33 | self.model.train(Dataset(X, 34 | numpy.array([label if not label is None else False 35 | for label in y]))) 36 | 37 | unlabeled_entry_ids, X_pool = list(zip(*self.dataset.get_unlabeled_entries())) 38 | unlabeled_entry_ids = numpy.asarray(unlabeled_entry_ids) 39 | X_pool = ensure_sklearn_compat(X_pool) 40 | 41 | if isinstance(self.model, ProbabilisticModel): 42 | score = self.model.predict_proba(X_pool)[:, 1] 43 | elif isinstance(self.model, ContinuousModel): 44 | score = self.model.predict_real(X_pool)[:, 1] 45 | 46 | best_id = unlabeled_entry_ids[numpy.argmax(score)] 47 | if return_score: 48 | return best_id, \ 49 | list(zip(unlabeled_entry_ids, score)) 50 | else: 51 | return best_id 52 | -------------------------------------------------------------------------------- /actleto/annotator/visualizers/image.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ipywidgets import Image as WidgImage 3 | from PIL import Image 4 | import io 5 | 6 | 7 | class ImageVisualizer(object): 8 | """Visualizer for images. 9 | 10 | This visualizer can display images (e.g., MNIST), which are stored as rows 11 | in a dataframe. 12 | """ 13 | 14 | def __init__(self, columns_range, img_shape, img_mode, preview_shape): 15 | """ImageVisualizer constructor. 16 | 17 | Args: 18 | rng (tuple): tuple (start, end) - range of columns in pandas.DataFrame, which contain image data 19 | img_shape (tuple): original image shape width x height. 20 | img_format (str): image format: "L" - black&white (MNIST); "RGB"; "CMYK"; "1". 21 | preview_shape (tuple): output image size. 22 | 23 | """ 24 | super().__init__() 25 | self._columns_range = columns_range 26 | self._img_shape = img_shape 27 | self._img_mode = img_mode 28 | self._preview_shape = preview_shape 29 | 30 | def __call__(self, dataframe, index): 31 | """Invokes the visualizer. 32 | 33 | Args: 34 | dataframe (pandas.DataFrame): the dataframe that contains the data for visualization. 35 | index (int): the positional (iloc) index of the row to visualize. 36 | 37 | Returns: 38 | tuple: The list of widgets that visualize the row with number index. 39 | 40 | """ 41 | img_array = dataframe.iloc[index][self._columns_range[0] : self._columns_range[1]].as_matrix() 42 | 43 | if img_array.shape[0] > np.product(self._img_shape): 44 | cur_img_shape = self._img_shape + (-1,) 45 | else: 46 | cur_img_shape = self._img_shape 47 | 48 | img = Image.fromarray(img_array.reshape(cur_img_shape), self._img_mode) 49 | 50 | buffer = io.BytesIO() 51 | img.convert('RGB').save(buffer, format = 'PNG') 52 | 53 | return (WidgImage(value = buffer.getvalue(), 54 | format = 'PNG', 55 | width = self._preview_shape[0], 56 | height = self._preview_shape[1]),) 57 | -------------------------------------------------------------------------------- /actleto/strategies/positive_corrector.py: -------------------------------------------------------------------------------- 1 | import pandas, numpy as np 2 | from libact.base.dataset import Dataset, ensure_sklearn_compat 3 | from libact.base.interfaces import ProbabilisticModel, ContinuousModel 4 | 5 | 6 | class PositiveCorrector(object): 7 | """The strategy that fixes the most unprobable positive labels (from the model point of view). 8 | 9 | Note: Supports only binary classification so far. 10 | 11 | """ 12 | 13 | def __init__(self, dataset, fully_annotated_y, model, strategy = 'uncertainty'): 14 | """ 15 | Args: 16 | dataset: the libact dataset without bad annotated positive 17 | labels and fully_annotated_y with positive labels. 18 | fully_annotated_y: the array with y labels for positives 19 | and negative samples. 20 | model: the model for active learning. 21 | strategy (str): the string identifier of strategy type from 22 | the list ['uncertainty', 'least_prob', 'most_prob']. 23 | 24 | """ 25 | self.dataset = dataset 26 | self.dataset.on_update(self._register_update) 27 | 28 | assert isinstance(model, (ProbabilisticModel, ContinuousModel)) 29 | self.model = model 30 | 31 | assert (strategy in ['uncertainty', 'least_prob', 'most_prob']), 'Wrong strategy identifier' 32 | self._strategy = strategy 33 | 34 | self._internal_dataset = pandas.DataFrame(fully_annotated_y, index = range(len(self.dataset))) 35 | 36 | def make_query(self, return_score=False): 37 | X, y = list(zip(*self.dataset.data)) 38 | self.model.train(Dataset(X, self._internal_dataset.values.reshape(-1))) 39 | 40 | unlabeled_entry_ids, X_pool = list(zip(*self.dataset.get_unlabeled_entries())) 41 | unlabeled_entry_ids = np.asarray(unlabeled_entry_ids) 42 | X_pool = ensure_sklearn_compat(X_pool) 43 | 44 | if isinstance(self.model, ProbabilisticModel): 45 | score = self.model.predict_proba(X_pool)[:, 1] 46 | elif isinstance(self.model, ContinuousModel): 47 | score = self.model.predict_real(X_pool)[:, 1] 48 | 49 | if self._strategy == 'uncertainty': 50 | score = -np.abs(score - 0.5) 51 | best_id = unlabeled_entry_ids[np.argmax(score)] 52 | elif self._strategy == 'least_prob': 53 | score = -score 54 | best_id = unlabeled_entry_ids[np.argmax(score)] 55 | elif self._strategy == 'most_prob': 56 | best_id = unlabeled_entry_ids[np.argmax(score)] 57 | 58 | if return_score: 59 | return best_id, \ 60 | list(zip(unlabeled_entry_ids, score)) 61 | else: 62 | return best_id 63 | 64 | def _register_update(self, index, answer): 65 | self._internal_dataset.iloc[index, 0] = answer 66 | -------------------------------------------------------------------------------- /actleto/strategies/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from libact.base.dataset import ensure_sklearn_compat 3 | from libact.base.interfaces import ProbabilisticModel, ContinuousModel 4 | 5 | 6 | class MultipleQueryStrategy: 7 | """The helper class for quering multiple instances from the unlabeled dataset 8 | 9 | Decorator for libact strategies that queries multiple instances from unlabeled 10 | dataset. 11 | 12 | """ 13 | 14 | def __init__(self, impl, query_n=10): 15 | """ 16 | Args: 17 | impl: the implementation of query strategy (libact compatible). 18 | query_n (int): number of unannotated examples to query. 19 | 20 | """ 21 | self.impl = impl 22 | self.query_n = query_n 23 | 24 | def make_query(self): 25 | try: 26 | id_score_list = self.impl.make_query(return_score=True)[1] 27 | id_score_list.sort(key = lambda p: -p[1]) 28 | return { sample for sample, _ in id_score_list[:self.query_n] } 29 | except TypeError: 30 | return { self.impl.make_query() for _ in range(self.query_n) } 31 | 32 | 33 | class SklearnProbaAdapterWithUnlabeled(ProbabilisticModel): 34 | """The adaptor of sklearn models for libact strategies.""" 35 | 36 | def __init__(self, clf): 37 | self._model = clf 38 | 39 | def train(self, dataset, *args, **kwargs): 40 | X, y = list(zip(*dataset.data)) 41 | X = ensure_sklearn_compat(X) 42 | y = np.asarray(y) 43 | self._model.fit(X, y, *args, **kwargs) 44 | 45 | def predict(self, feature, *args, **kwargs): 46 | return self._model.predict(feature, *args, **kwargs) 47 | 48 | def score(self, testing_dataset, *args, **kwargs): 49 | X, y = list(zip(*testing_dataset.data)) 50 | X = ensure_sklearn_compat(X) 51 | y = np.asarray(y) 52 | return self._model.score(X, y, *args, **kwargs) 53 | 54 | def predict_real(self, feature, *args, **kwargs): 55 | return self._model.predict_proba(feature, *args, **kwargs) * 2 - 1 56 | 57 | def predict_proba(self, feature, *args, **kwargs): 58 | return self._model.predict_proba(feature, *args, **kwargs) 59 | 60 | 61 | class SklearnRealAdapter(ContinuousModel): 62 | """The adaptor of sklearn models for libact strategies.""" 63 | 64 | def __init__(self, clf): 65 | self._model = clf 66 | 67 | def train(self, dataset, *args, **kwargs): 68 | self._model.fit(*(dataset.format_sklearn() + args), **kwargs) 69 | 70 | def predict(self, feature, *args, **kwargs): 71 | return self._model.predict(feature, *args, **kwargs) 72 | 73 | def score(self, testing_dataset, *args, **kwargs): 74 | return self._model.score(*(testing_dataset.format_sklearn() + args), 75 | **kwargs) 76 | 77 | def predict_real(self, feature, *args, **kwargs): 78 | return self._model.decision_function(feature, *args, **kwargs) 79 | -------------------------------------------------------------------------------- /actleto/annotator/active_learner.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import logging 4 | 5 | # TODO: partial fit. 6 | # TODO: separate thread for evaluation. 7 | # TODO: separate thread for training al model. 8 | 9 | logger = logging.getLogger('actleto') 10 | 11 | 12 | class ActiveLearner: 13 | """The class that implements active learning logic.""" 14 | 15 | def __init__(self, 16 | active_learn_alg_ctor, 17 | X_full_dataset, 18 | y_dtype, 19 | y_full_dataset = None, 20 | model_evaluate = None, 21 | X_test_dataset = None, 22 | y_test_dataset = None, 23 | eval_metrics = None, 24 | rnd_start_steps = 0): 25 | """ActiveLearner constructor. 26 | 27 | Args: 28 | active_learn_alg_ctor (functor): functor object that returns active learning strategy. 29 | X_full_dataset (np.array or sparse matrix): feature matrix. 30 | y_dtype: type of y labels. 31 | y_full_dataset (np.array): known answers (e.g., None -- unknown, True -- positive class, False -- negative class) 32 | model_evaluate: the model that will be evaluated on the holdout. 33 | X_test_dataset: feature matrix for testing via holdout. 34 | y_test_dataset: y labels for testing via holdout. 35 | eval_metrics (list): list of sklearn evaluation metrics. 36 | rnd_start_steps: AL will can make several seed steps by choosing random samples (without model suggestions). 37 | logger (logging.Logger): the object for logging. 38 | 39 | """ 40 | super().__init__() 41 | 42 | self._y_dtype = y_dtype 43 | self._model_evaluate = model_evaluate 44 | self._eval_metrics = eval_metrics 45 | 46 | self._X_full_dataset = X_full_dataset 47 | if y_full_dataset is not None: 48 | self._y_full_dataset = y_full_dataset # TODO: validate dimentions 49 | else: 50 | self._y_full_dataset = np.array([None] * self._X_full_dataset.shape[0]) 51 | 52 | self._active_learn_algorithm = active_learn_alg_ctor(self._X_full_dataset, 53 | self._y_full_dataset) 54 | 55 | self._X_test_dataset = X_test_dataset 56 | self._y_test_dataset = y_test_dataset 57 | 58 | self._iteration_num = 0 59 | self._rnd_start_steps = rnd_start_steps 60 | 61 | def _select_unannotated(self, labels): 62 | return np.where(labels.map(lambda x: x is None))[0] 63 | 64 | def choose_random_sample_for_annotation(self, number = 40): 65 | return np.random.choice(self._select_unannotated(self._y_full_dataset), 66 | size = number, 67 | replace = False) 68 | 69 | def choose_samples_for_annotation(self): 70 | if self._iteration_num < self._rnd_start_steps: 71 | return self.choose_random_sample_for_annotation() 72 | else: 73 | return self._active_learn_algorithm.choose_samples_for_annotation() 74 | 75 | def evaluate(self): 76 | if self._model_evaluate is None: 77 | return None 78 | 79 | y_fit = pd.Series(self._y_full_dataset) 80 | y_fit = y_fit[y_fit.notnull()].astype(self._y_dtype) 81 | logger.info('Number of training samples: {}'.format(y_fit.shape[0])) 82 | 83 | self._model_evaluate.fit(self._X_full_dataset[y_fit.index], y_fit) 84 | 85 | preds = self._model_evaluate.predict(self._X_test_dataset) 86 | return {metric.__name__ : metric(preds, self._y_test_dataset) 87 | for metric in self._eval_metrics} 88 | 89 | def get_annotation(self): 90 | return self._y_full_dataset 91 | 92 | def make_iteration(self, indexes, answers): 93 | self._iteration_num += 1 94 | answers = pd.Series(answers, index = indexes) 95 | answers = answers[answers.notnull()] 96 | res = self._active_learn_algorithm.make_iteration(answers.index, answers.values) 97 | self._y_full_dataset[list(answers.index)] = answers.values 98 | return res 99 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Active learning toolbox 2 | ======================= 3 | 4 | This repo contains some query strategies and utils for active learning, as well 5 | as the widget for dataset annotation in Jupyter IDE. The repo has tight 6 | integration with [libact](https://github.com/ntucllab/libact) Python library. 7 | 8 |   9 | 10 | Example of active learning annotation of MNIST dataset with the Jupyter widget. 11 | 12 | ![](https://github.com/IINemo/jupyter_al_annotator/blob/master/docs/al.png?raw=true) 13 | 14 | Active learning 15 | =============== 16 | 17 | Active learning (AL) is an interactive approach to simultaneously building a 18 | labeled dataset and training a machine learning model. AL algorithm: 19 | 20 | 1. A relatively large unlabeled dataset is gathered. 21 | 22 | 2. A domain expert labels a few positive examples in the dataset. 23 | 24 | 3. A classifier is trained on labeled samples. 25 | 26 | 4. The classifier is applied to the rest of the corpus. 27 | 28 | 5. Few most “useful” examples are selected (e.g., that increase classification 29 | performance). 30 | 31 | 6. The examples labeled by the expert are added to the training set. 32 | 33 | 7. Goto 3. 34 | 35 | The procedure repeats until the performance of the classifier stops improving or 36 | the expert is bored. 37 | 38 | Requirements 39 | ============ 40 | 41 | 1. Python 3.6 (the package has not been tested with earlier versions) 42 | 43 | 2. numpy (1.12.1) 44 | 45 | 3. pandas (0.20.1) 46 | 47 | 4. sklearn (0.18.1) 48 | 49 | 5. scipy (0.19.0) 50 | 51 | 6. Pillow (4.2.1) 52 | 53 | 7. Jupyter (4.3.0) 54 | 55 | 8. LibAct from the [fork](https://github.com/windj007/libact) (`pip install 56 | git+https://github.com/windj007/libact`) 57 | 58 | Installation 59 | ============ 60 | 61 | Enabling widgets in Jupyter IDE 62 | ------------------------------- 63 | 64 | The Jupyter widgets are not enabled by default. To install and activate them do 65 | the following. 66 | 67 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 68 | pip install ipywidgets 69 | jupyter nbextension enable --py --sys-prefix widgetsnbextension 70 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 71 | 72 | For further details, please, refer to [jupyter-widgets 73 | repo](https://github.com/jupyter-widgets/ipywidgets). 74 | 75 | Installing the library and the widget 76 | ------------------------------------- 77 | 78 | To install the library and the widget execute in command line with root 79 | priviledges: 80 | 81 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 82 | pip install git+https://github.com/IINemo/active_learning_toolbox 83 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 84 | 85 | Usage 86 | ===== 87 | 88 | See an example for [MNIST dataset 89 | annotation](https://github.com/IINemo/active_learning_toolbox/blob/master/examples/MNIST_annotation.ipynb) 90 | and an example for [20 newsgroups 91 | annotation](https://github.com/IINemo/active_learning_toolbox/blob/master/examples/20newsgroups.ipynb). 92 | 93 | If you have Docker installed, you can test the examples with [windj007/jupyter-keras-tool](https://hub.docker.com/r/windj007/jupyter-keras-tools/): 94 | 95 | ``` 96 | cd ``/examples 97 | docker run -ti --rm -v `pwd`:/notebook -p 8888:8888 windj007/jupyter-keras-tools 98 | ``` 99 | Then open [http://localhost:8888](http://localhost:8888) in a browser (will launch Jupyter IDE) and open an example notebook. 100 | 101 | 102 | Cite 103 | ==== 104 | 105 | If you use active learning toolbox in academic works, please cite (to be 106 | published): 107 | 108 |   109 | 110 | *BibTex:* 111 | 112 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 113 | @inproceedings{suvorovshelmanov2017ainl, 114 | title={Active Learning with Adaptive Density Weighted Sampling for Information Extraction from Scientific Papers}, 115 | author={Roman Suvorov and Artem Shelmanov and Ivan Smirnov}, 116 | booktitle={Proceedings of AINL: Artificial Intelligence and Natural Language Conference}, 117 | publisher = {Springer, Communications in Computer and Information Science}, 118 | year={2017} 119 | } 120 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 121 |   122 | 123 | *Russian GOST:* 124 | 125 | Suvorov R., Shelmanov A., Smirnov I. Active learning with adaptive density 126 | weighted sampling for information extraction from scientific papers // 127 | Proceedings of AINL: Artificial Intelligence and Natural Language Conference. — 128 | Springer, Communications in Computer and Information Science, 2017. 129 | -------------------------------------------------------------------------------- /actleto/annotator/annotator_widget.py: -------------------------------------------------------------------------------- 1 | from traitlets import Int, Instance 2 | from ipywidgets import Button, VBox, HBox, HTML, Box, Layout, Label, Text, ToggleButtons 3 | import pandas as pd 4 | import numpy as np 5 | from .visualizers import TextAreaVisualizer 6 | 7 | 8 | class AnnotatorWidget(Box): 9 | """The widget for Jupyter that implements example annotator. 10 | 11 | The widget can be used without active learning. 12 | 13 | """ 14 | 15 | _max_examples = Int(5, allow_none = False).tag(sync=True) 16 | _current_position = Int(0, allow_none = False).tag(sync = True) 17 | _dataframe = Instance(klass = pd.DataFrame) 18 | 19 | _example_layout = Instance(klass=Layout) 20 | _example_layout = Layout(width = '100%', 21 | border = 'solid 2px', 22 | margin = '3px', 23 | align_items = 'stretch', 24 | padding = '2px') 25 | 26 | ANNOT_DONT_KNOW = "Don't know" 27 | 28 | def __init__(self, 29 | dataframe, 30 | answers = None, 31 | max_examples = 5, 32 | current_position = 0, 33 | visualize_columns = [], 34 | drop_labels = [], 35 | y_labels = {'True' : True, 36 | 'False' : False}, 37 | visualizer = None, 38 | display_feature_table = True, 39 | *args, **kwargs): 40 | """Annotator constructor. 41 | 42 | Args: 43 | dataframe (pandas.DataFrame): the dataframe that contains data for examples visualizatin. 44 | answers (numpy.array): the array that contains known answers 45 | (that will not be marked as Don't Know check button). 46 | max_examples (int): maximum number of examples per page. 47 | current_position (int): index of position, from which iteration should start. 48 | textual_labels (list): list of string labels that will be visualized with VisualizerTextArea. 49 | drop_labels (list): list of string labels that will be dropped from visualization via table. 50 | y_labels (dict): dict { : }. 51 | visualizer (object): visualizer for X_helper representation. The default is None. If None the widget 52 | will invoke VisualizerTextArea by deafult. 53 | 54 | """ 55 | super(Box, self).__init__(*args, **kwargs) 56 | 57 | self._y_labels = y_labels 58 | self._y_labels[self.ANNOT_DONT_KNOW] = None 59 | self._y_labels_reversed = {v : k for k, v in self._y_labels.items()} 60 | self._display_feature_table = display_feature_table 61 | 62 | self._dataframe = dataframe 63 | self._current_position = min(current_position, self._dataframe.shape[0] - 1) 64 | self._max_examples = max_examples 65 | self._answers = (answers if answers is not None 66 | else np.array([None] * self._dataframe.shape[0])) 67 | assert self._answers.shape[0] == self._dataframe.shape[0], \ 68 | 'The length of dataframe should match the length of numpy.array with answers.' 69 | 70 | self._drop_labels = drop_labels + visualize_columns 71 | self._visualizer = visualizer 72 | if self._visualizer is None: 73 | self._visualizer = TextAreaVisualizer(visualize_columns) 74 | 75 | self._draw() 76 | self.observe(self._draw, names='_current_position') 77 | 78 | def get_answers(self): 79 | """Returns numpy.array with answers.""" 80 | return self._answers 81 | 82 | def get_dataframe(self): 83 | """Returns pandas.DataFrame with feature values.""" 84 | return self._dataframe 85 | 86 | def _click_prev(self, button): 87 | self._current_position = max(self._current_position - self._max_examples, 0) 88 | 89 | def _click_next(self, button): 90 | self._current_position = min(self._current_position + self._max_examples, self._dataframe.shape[0]) 91 | 92 | def _int_text_value_changed(self, wdg): 93 | try: 94 | new_value = int(wdg.value) 95 | except ValueError: 96 | return 97 | 98 | if new_value < 0: 99 | new_value = self._dataframe.shape[0] + new_value 100 | if new_value < 0: 101 | return 102 | 103 | if new_value >= self._dataframe.shape[0]: 104 | return 105 | 106 | self._current_position = new_value 107 | 108 | def _make_controls(self): 109 | controls = HBox(children = [Button(description='Prev'), 110 | Button(description='Next'), 111 | Text(value = str(self._current_position), 112 | layout = Layout(width = '80px')), 113 | Label(value = 'out of', 114 | layout = Layout(width = '35px')), 115 | Text(value = str(self._dataframe.shape[0]), 116 | disabled = True, 117 | layout = Layout(width = '80px'))]) 118 | controls.children[0].on_click(self._click_prev) 119 | controls.children[1].on_click(self._click_next) 120 | controls.children[2].on_submit(self._int_text_value_changed) 121 | 122 | return controls 123 | 124 | def _annotate(self, num, change): 125 | ch = change['new'] 126 | if ch == self.ANNOT_DONT_KNOW: 127 | self._answers[num] = None 128 | else: 129 | self._answers[num] = self._y_labels[ch] 130 | 131 | def _answer_to_label(self, answer): 132 | return self._y_labels_reversed[answer] 133 | 134 | def _draw(self, change = None): 135 | self._table = VBox(layout = Layout(width = '100%')) 136 | self._table.children += (self._make_controls(),) 137 | 138 | last_element = min(self._current_position + self._max_examples, self._dataframe.shape[0]) 139 | 140 | for i in range(self._current_position, last_element): 141 | data_row = VBox(layout = self._example_layout) 142 | 143 | if self._display_feature_table: 144 | elem = self._dataframe.iloc[i].drop(self._drop_labels) 145 | data_row.children += (HTML(value = pd.DataFrame([elem.values], 146 | columns = elem.index, 147 | index = [self._dataframe.index[i]]) 148 | .to_html(classes=['table', 'table-striped'])),) 149 | 150 | data_row.children += self._visualizer(self._dataframe, i) 151 | 152 | data_row.children += (ToggleButtons(options=([self.ANNOT_DONT_KNOW] + 153 | [k for k in self._y_labels.keys() if k != self.ANNOT_DONT_KNOW]), 154 | value = self._answer_to_label(self._answers[i]), 155 | description='Your annotation:', 156 | disabled=False),) 157 | data_row.children[-1].observe(lambda tgl_bt, num = i: self._annotate(num, tgl_bt), 158 | names='value') 159 | 160 | self._table.children += (data_row,) 161 | 162 | self._table.children += (self._make_controls(),) 163 | self.children = (self._table,) 164 | -------------------------------------------------------------------------------- /actleto/annotator/ui_widget.py: -------------------------------------------------------------------------------- 1 | from .annotator_widget import AnnotatorWidget 2 | 3 | from ipywidgets import Button, VBox, HBox, Label 4 | import pandas as pd 5 | import numpy as np 6 | import logging 7 | from threading import Timer 8 | import os 9 | 10 | 11 | logger = logging.getLogger('actleto') 12 | 13 | 14 | def prep_log(obj): 15 | return '\n' + str(obj) 16 | 17 | 18 | class EvaluationCallbackLogging: 19 | def __init__(self, logger): 20 | self._logger = logger 21 | 22 | def __call__(self, eval_res): 23 | self._logger.info('Evaluation: {}'.format(prep_log(pd.DataFrame([eval_res]) 24 | .to_string(index=False)))) 25 | 26 | 27 | class ActiveLearnerUiWidget(VBox): 28 | """The main ui widget for active learning annotation. 29 | 30 | Create widget in Jupyter, configure it with ActiveLearner object and invoke. 31 | 32 | """ 33 | 34 | _reset_check_time = 4 35 | 36 | def __init__(self, 37 | active_learner, 38 | X_helper, 39 | visualize_columns = [], 40 | drop_labels = [], 41 | display_feature_table = True, 42 | y_labels = {"True" : True, 43 | "False" : False}, 44 | visualizer = None, 45 | save_path = 'annotation', 46 | evaluation_callback = None, 47 | save_time = 0, 48 | *args, **kwargs): 49 | """Widget constructor. 50 | 51 | Args: 52 | active_learner (ActiveLearner): the ActiveLearner object configured with query strategy. 53 | X_helper (pandas.DataFrame): the dataframe with data for visualization. 54 | textual_labels (list): list of string labels that will be visualized with VisualizerTextArea. 55 | drop_labels (list): list of string labels that will be dropped from visualization via table. 56 | y_labels (dict): dict { : }. 57 | y_visualizer (object): visualizer for X_helper representation. The default is None. If None the widget 58 | will invoke VisualizerTextArea by deafult. 59 | save_path (str): the path to save the results. 60 | evaluation_callback (functor): the callback for evaluation. The default is logging callback. 61 | save_time (int): Autosave time. If 0 then autosave is disabled. If u use auto save u have to 62 | call stop() method to disabel autosave in the current widget. 63 | 64 | """ 65 | super(VBox, self).__init__(*args, **kwargs) 66 | 67 | self._X_helper = X_helper 68 | self._active_learner = active_learner 69 | self._save_path = save_path 70 | self._evaluation_callback = evaluation_callback or EvaluationCallbackLogging(logger) 71 | 72 | self._y_labels = y_labels 73 | self._visualizer = visualizer 74 | self._drop_labels = drop_labels 75 | self._visualize_columns = visualize_columns 76 | self._display_feature_table = display_feature_table 77 | 78 | controls = HBox() 79 | 80 | self._button_next_iter = Button(description = 'Next iteration') 81 | self._button_next_iter.on_click(self._click_next_iteration) 82 | controls.children += (self._button_next_iter,) 83 | 84 | self._iteration_num = 0 85 | controls.children += (Label(self._iteration_label()),) 86 | 87 | self._button_save = Button(description = 'Save') 88 | self._button_save.on_click(self._click_save) 89 | controls.children += (self._button_save,) 90 | 91 | self.children = (controls, self._make_annotator_widget()) 92 | 93 | self._save_time = save_time 94 | self._timer = None 95 | if self._save_time > 0: 96 | self._start_save_timer() 97 | 98 | self._timer_check_save_reset = None 99 | self._timer_check_next_iteration_reset = None 100 | 101 | def __del__(self): 102 | self.stop() 103 | 104 | def get_active_learner(self): 105 | """Returns the active learner object that was delivered to the constructor.""" 106 | return self._active_learner 107 | 108 | def stop(self): 109 | if self._timer is not None: 110 | self._timer.cancel() 111 | 112 | def _save_on_timer(self): 113 | logger.info('Autosave.') 114 | self._save_answers(os.path.splitext(self._save_path)[0] + '_autosave') 115 | self._start_save_timer() 116 | 117 | def _start_save_timer(self): 118 | self._timer = Timer(self._save_time, self._save_on_timer) 119 | self._timer.start() 120 | 121 | def _get_annotator_widget(self): 122 | return self.children[1] 123 | 124 | def _iteration_label(self): 125 | return 'Iteration #{}'.format(self._iteration_num) 126 | 127 | def _increment_iteration_num(self): 128 | self._iteration_num += 1 129 | self.children[0].children[1].value = self._iteration_label() 130 | 131 | def _make_annotator_widget(self): 132 | samples_to_annotate = self._active_learner.choose_samples_for_annotation() 133 | return AnnotatorWidget(dataframe = self._X_helper.iloc[samples_to_annotate], 134 | visualize_columns = self._visualize_columns, 135 | drop_labels = self._drop_labels, 136 | visualizer = self._visualizer, 137 | display_feature_table = self._display_feature_table, 138 | y_labels = self._y_labels) 139 | 140 | def _click_next_iteration(self, button): 141 | if self._timer_check_next_iteration_reset: 142 | self._timer_check_next_iteration_reset.cancel() 143 | 144 | self._button_next_iter.disabled = True 145 | self._button_next_iter.icon = 'clock-o' 146 | annotated_indexes = [self._X_helper.index.get_loc(e) 147 | for e in self._get_annotator_widget().get_dataframe().index] 148 | 149 | self._active_learner.make_iteration(annotated_indexes, 150 | self._get_annotator_widget().get_answers()) 151 | 152 | logger.info(self._iteration_label()) 153 | eval_res = self._active_learner.evaluate() 154 | if eval_res is not None: 155 | self._evaluation_callback(eval_res) 156 | 157 | self._increment_iteration_num() 158 | self.children = (self.children[0], self._make_annotator_widget()) 159 | self._button_next_iter.icon = 'check' 160 | self._button_next_iter.disabled = False 161 | 162 | self._timer_check_next_iteration_reset = Timer(self._reset_check_time, 163 | self._check_next_iteration_reset) 164 | self._timer_check_next_iteration_reset.start() 165 | 166 | def _check_next_iteration_reset(self): 167 | self._button_next_iter.icon = '' 168 | 169 | def _click_save(self, button): 170 | if self._timer_check_save_reset: 171 | self._timer_check_save_reset.cancel() 172 | 173 | self._button_save.disabled = True 174 | self._button_save.icon = 'clock-o' 175 | self._save_answers(self._save_path) 176 | self._button_save.icon = 'check' 177 | self._button_save.disabled = False 178 | 179 | self._timer_check_save_reset = Timer(self._reset_check_time, self._check_save_reset) 180 | self._timer_check_save_reset.start() 181 | 182 | def _check_save_reset(self): 183 | self._button_save.icon = '' 184 | 185 | def _save_answers(self, path): 186 | np.save(path, self._active_learner.get_annotation()) 187 | logger.info('Saved. File path: {}'.format(os.path.splitext(path)[0] + '.npy')) 188 | -------------------------------------------------------------------------------- /actleto/strategies/adwes.py: -------------------------------------------------------------------------------- 1 | import logging, numpy as np, scipy.stats 2 | from annoy import AnnoyIndex 3 | from sklearn.decomposition import TruncatedSVD 4 | from libact.base.dataset import ensure_sklearn_compat 5 | 6 | 7 | logger = logging.getLogger('actleto') 8 | 9 | 10 | class ADWeS(object): 11 | def __init__(self, 12 | dataset, 13 | basic_strategy, 14 | svd_components=300, 15 | index_trees=10, 16 | get_nearest_n=10, 17 | get_most_uncertain_n=0, 18 | exp_rel_power=0.8, 19 | exp_rel_rate=1.0, 20 | uncertainty_factor=0.5, 21 | us_method='lc', 22 | plot_each=20): 23 | self.dataset = dataset 24 | self.basic_strategy = basic_strategy 25 | 26 | self.get_nearest_n = get_nearest_n 27 | self.get_most_uncertain_n = get_most_uncertain_n 28 | self.exp_rel_power = exp_rel_power 29 | self.exp_rel_rate = exp_rel_rate 30 | self.uncertainty_factor = uncertainty_factor 31 | self.us_method = us_method 32 | self.plot_each = plot_each 33 | 34 | self.index = AnnoyIndex(svd_components) 35 | all_features = ensure_sklearn_compat(zip(*dataset.data)[0]) 36 | self.data = TruncatedSVD(n_components=svd_components).fit_transform(all_features) 37 | for i, item in enumerate(self.data): 38 | self.index.add_item(i, item) 39 | self.index.build(index_trees) 40 | 41 | self.labeled_ids = set() # will be updated in make_query before all job 42 | 43 | # calculate mean and maximum distances 44 | self.explore_relevance = [] 45 | self.explore_relevance_max = 0 46 | for i in range(self.data.shape[0]): 47 | cur_dist = self.index.get_nns_by_item(i, 48 | self.get_nearest_n, 49 | include_distances=True)[1] 50 | if len(cur_dist) > 0: 51 | cur_mean = np.mean(cur_dist) 52 | cur_max_dist = np.max(cur_dist) 53 | if cur_max_dist > self.explore_relevance_max: 54 | self.explore_relevance_max = cur_max_dist 55 | else: 56 | cur_mean = np.nan 57 | self.explore_relevance.append(cur_mean) 58 | self.explore_relevance = np.array(self.explore_relevance) 59 | 60 | # fill na 61 | samples_without_neighbors = np.isnan(self.explore_relevance) 62 | self.explore_relevance[samples_without_neighbors] = self.explore_relevance_max 63 | 64 | # normalize 65 | logger.debug('init dist %s' % str(scipy.stats.describe(self.explore_relevance))) 66 | self.explore_relevance = ((self.explore_relevance - self.explore_relevance.min()) / 67 | (self.explore_relevance.max() - self.explore_relevance.min())) 68 | self.explore_relevance = (1 - self.explore_relevance) ** self.exp_rel_power 69 | 70 | self.iter_i = 0 71 | 72 | def make_query(self, return_score=False): 73 | self._update_exp_rel() 74 | 75 | self.model.train(self.dataset) 76 | 77 | unlabeled_entry_ids, X_pool = list(zip(*self.dataset.get_unlabeled_entries())) 78 | unlabeled_entry_ids = np.asarray(unlabeled_entry_ids) 79 | X_pool = ensure_sklearn_compat(X_pool) 80 | 81 | _, ids_with_scores = self.base_strategy.make_query(return_score=True) 82 | unlabeled_entry_ids, base_score = zip(*ids_with_scores) 83 | 84 | # normalize: we dont care about absolute values, only relative to rank samples 85 | #base_score = base_score - base_score.mean() 86 | #base_score /= base_score.std() 87 | base_score = base_score - base_score.min() 88 | base_score /= base_score.max() 89 | 90 | if self.get_most_uncertain_n > 0: 91 | most_base_relevant_indices = np.argpartition(-base_score, self.get_most_uncertain_n)[:self.get_most_uncertain_n] 92 | else: 93 | most_base_relevant_indices = list(range(len(base_score))) 94 | most_base_relevant_ids = unlabeled_entry_ids[most_base_relevant_indices] 95 | most_base_relevant_score = base_score[most_base_relevant_indices] 96 | logger.debug('most base relevant score %s' % str(scipy.stats.describe(most_base_relevant_score))) 97 | most_base_relevant_exp_rel = self.explore_relevance[most_base_relevant_ids] 98 | 99 | # normalize: we dont care about absolute values, only relative to rank samples 100 | #most_uncertain_exp_rel = most_uncertain_exp_rel - most_uncertain_exp_rel.mean() 101 | #most_uncertain_exp_rel /= most_uncertain_exp_rel.std() 102 | most_base_relevant_exp_rel = most_base_relevant_exp_rel - most_base_relevant_exp_rel.min() 103 | most_base_relevant_exp_rel /= most_base_relevant_exp_rel.max() 104 | logger.debug('most exp rel %s' % str(scipy.stats.describe(most_base_relevant_exp_rel))) 105 | 106 | # f-beta 107 | result_score = ((1 + self.uncertainty_factor ** 2) * most_base_relevant_score * most_base_relevant_exp_rel / 108 | ((self.uncertainty_factor ** 2) * most_base_relevant_score + most_base_relevant_exp_rel)) 109 | #result_score = (self.uncertainty_factor * most_uncertain_uncert_score 110 | # + (1 - self.uncertainty_factor) * most_uncertain_exp_rel) 111 | result_score[np.isnan(result_score)] = 0.0 112 | logger.debug('most res %s' % str(scipy.stats.describe(result_score))) 113 | 114 | # if self.iter_i % self.plot_each == 0: 115 | # import matplotlib.pyplot as plt 116 | # fig, ax = plt.subplots() 117 | # fig.set_size_inches((9, 6)) 118 | # ax.hist(most_base_relevant_score, label='uncert') 119 | # ax.hist(most_base_relevant_exp_rel, label='exp_rel') 120 | # ax.hist(result_score, label='res') 121 | # fig.savefig('./debug/%05d_hist.png' % self.iter_i) 122 | # plt.close(fig) 123 | # 124 | # _, ax = plot_samples(np.array([most_base_relevant_score, 125 | # most_base_relevant_exp_rel]).T, 126 | # result_score, 127 | # with_kde=False, 128 | # filename='./debug/%05d_scores.png' % self.iter_i, 129 | # do_not_display=True) 130 | # ax.set_xlabel('uncert') 131 | # ax.set_ylabel('exp_rel') 132 | 133 | best_i = np.argmax(result_score) 134 | best_id = most_base_relevant_ids[best_i] 135 | logger.debug('best %r %r %r %r' % (best_i, 136 | result_score[best_i], 137 | most_base_relevant_score[best_i], 138 | most_base_relevant_exp_rel[best_i])) 139 | if return_score: 140 | return best_id, \ 141 | list(zip(most_base_relevant_ids, result_score)) 142 | else: 143 | return best_id 144 | 145 | def _update_exp_rel(self): 146 | data = self.dataset.data 147 | newly_labeled_ids = { i for i in range(len(data)) 148 | if not data[i][1] is None 149 | and not i in self.labeled_ids } 150 | self.labeled_ids.update(newly_labeled_ids) 151 | for ex_id in newly_labeled_ids: 152 | neighbor_ids, neighbor_dist = self.index.get_nns_by_item(ex_id, 153 | self.get_nearest_n, 154 | include_distances=True) 155 | neighbor_dist = np.asarray(neighbor_dist, dtype='float') 156 | neighbor_discount_factor = (1 - neighbor_dist / self.explore_relevance_max) ** self.exp_rel_power 157 | neighbor_discount_factor= 1 - self.exp_rel_rate * neighbor_discount_factor 158 | #logger.debug('dist: %s' % neighbor_dist) 159 | #logger.debug('factor: %s' % neighbor_discount_factor) 160 | assert np.count_nonzero(np.isnan(neighbor_discount_factor)) == 0 161 | self.explore_relevance[neighbor_ids] *= neighbor_discount_factor 162 | 163 | self.iter_i += 1 164 | # if self.iter_i % self.plot_each == 0: 165 | # plot_samples(self.data, 166 | # self.explore_relevance, 167 | # kind='svd', 168 | # with_kde=False, 169 | # filename='./debug/%05d.png' % self.iter_i, 170 | # do_not_display=True) 171 | # -------------------------------------------------------------------------------- /examples/20newsgroups.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 20newsgroups example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "ExecuteTime": { 15 | "end_time": "2017-09-19T07:56:12.283842Z", 16 | "start_time": "2017-09-19T07:56:12.255566Z" 17 | } 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "%load_ext autoreload\n", 22 | "%autoreload 2\n", 23 | "\n", 24 | "import sys\n", 25 | "sys.path.append('../')\n", 26 | "\n", 27 | "import os\n", 28 | "os.environ['CUDA_VISIBLE_DEVICES'] = ''" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# Initialization" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "ExecuteTime": { 43 | "end_time": "2017-09-19T07:56:13.148431Z", 44 | "start_time": "2017-09-19T07:56:12.285935Z" 45 | }, 46 | "collapsed": true 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "import collections, pandas as pd, numpy as np\n", 51 | "\n", 52 | "from sklearn.linear_model import LogisticRegression\n", 53 | "from sklearn.datasets import fetch_20newsgroups\n", 54 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 55 | "from sklearn.metrics import accuracy_score, f1_score, roc_auc_score\n", 56 | "\n", 57 | "from libact.models import LogisticRegression as LibActLogReg\n", 58 | "from libact.query_strategies import UncertaintySampling\n", 59 | "\n", 60 | "from actleto import ActiveLearner, ActiveLearnerUiWidget, make_libact_strategy_ctor" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "# Load and prepare data" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": { 74 | "ExecuteTime": { 75 | "end_time": "2017-09-19T07:56:13.820225Z", 76 | "start_time": "2017-09-19T07:56:13.151600Z" 77 | }, 78 | "collapsed": true 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "# Loading the 20newsgroups dataset\n", 83 | "train_dataset = fetch_20newsgroups(subset='train')\n", 84 | "test_dataset = fetch_20newsgroups(subset='test')" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": { 91 | "ExecuteTime": { 92 | "end_time": "2017-09-19T07:56:18.834411Z", 93 | "start_time": "2017-09-19T07:56:13.823592Z" 94 | }, 95 | "collapsed": true 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "# Preparing features\n", 100 | "vectorizer = TfidfVectorizer(min_df=3, max_df=0.5, sublinear_tf=True)\n", 101 | "\n", 102 | "X_train = vectorizer.fit_transform(train_dataset.data)\n", 103 | "Y_train = train_dataset.target\n", 104 | "\n", 105 | "X_test = vectorizer.transform(test_dataset.data)\n", 106 | "Y_test = test_dataset.target" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "# Train-test without active learning" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": { 120 | "ExecuteTime": { 121 | "end_time": "2017-09-19T07:56:24.748942Z", 122 | "start_time": "2017-09-19T07:56:18.836135Z" 123 | } 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "clf = LogisticRegression()\n", 128 | "clf.fit(X_train, Y_train)\n", 129 | "Y_pred = clf.predict(X_test)\n", 130 | "Y_pred_proba = clf.predict_proba(X_test)\n", 131 | "print('Accuracy', accuracy_score(Y_test, Y_pred))\n", 132 | "print('F1 macro', f1_score(Y_test, Y_pred, average='macro'))" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "# Train-test with active learning and human-in-the-loop" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "ExecuteTime": { 147 | "end_time": "2017-09-19T07:56:24.779279Z", 148 | "start_time": "2017-09-19T07:56:24.754386Z" 149 | }, 150 | "collapsed": true 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "# We choose 20 random examples as seed set for active learning.\n", 155 | "Y_seed = Y_train.copy().astype('O')\n", 156 | "unknown_indexes = np.random.randint(Y_seed.shape[0], size = Y_seed.shape[0] - 20)\n", 157 | "Y_seed[unknown_indexes] = None" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": { 164 | "ExecuteTime": { 165 | "end_time": "2017-09-19T07:56:40.927054Z", 166 | "start_time": "2017-09-19T07:56:38.006929Z" 167 | }, 168 | "collapsed": true 169 | }, 170 | "outputs": [], 171 | "source": [ 172 | "# Creating query strategy. The strategy selects examples for annotations from unlabeled dataset.\n", 173 | "# We use standard uncertainty sampling strategy from libact and wrap it into adaptor.\n", 174 | "active_learn_alg_ctor = make_libact_strategy_ctor(lambda trn_ds:\n", 175 | " UncertaintySampling(trn_ds,\n", 176 | " model = LibActLogReg()))\n", 177 | "\n", 178 | "# Creating ActiveLearning object that implements AL logic.\n", 179 | "active_learner = ActiveLearner(active_learn_alg_ctor = active_learn_alg_ctor,\n", 180 | " y_dtype = 'int',\n", 181 | " X_full_dataset = X_train, \n", 182 | " y_full_dataset = Y_seed,\n", 183 | " X_test_dataset = X_test,\n", 184 | " y_test_dataset = Y_test,\n", 185 | " model_evaluate = LibActLogReg(), # We use logreg from libact for choosing samples.\n", 186 | " eval_metrics = [accuracy_score, f1_score],\n", 187 | " rnd_start_steps = 0)\n", 188 | "\n", 189 | "# Creaing X_helper object for visualization dataset for humans.\n", 190 | "X_helper = pd.DataFrame(data={ 'text' : train_dataset.data })" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": { 197 | "ExecuteTime": { 198 | "end_time": "2017-09-19T07:56:47.558000Z", 199 | "start_time": "2017-09-19T07:56:45.335239Z" 200 | } 201 | }, 202 | "outputs": [], 203 | "source": [ 204 | "# This try-catch block is needed to stop autosave thread in case we invoke the cell multiple times.\n", 205 | "try:\n", 206 | " if active_learn_ui:\n", 207 | " active_learn_ui.stop()\n", 208 | "except NameError:\n", 209 | " pass\n", 210 | "\n", 211 | "# Creaing the active learner widget itself and configure it with active_learner, X_helper.\n", 212 | "# The default visualizer will preview the visualize_columns with VisualizerTextArea.\n", 213 | "active_learn_ui = ActiveLearnerUiWidget(active_learner = active_learner, \n", 214 | " X_helper = X_helper,\n", 215 | " display_feature_table = False,\n", 216 | " drop_labels = [],\n", 217 | " visualize_columns = ['text'],\n", 218 | " y_labels = { label : i for i, label in enumerate(train_dataset.target_names) },\n", 219 | " save_path = './20ng_active_dump.npy',\n", 220 | " save_time = 120)\n", 221 | "\n", 222 | "active_learn_ui" 223 | ] 224 | } 225 | ], 226 | "metadata": { 227 | "kernelspec": { 228 | "display_name": "Python 3", 229 | "language": "python", 230 | "name": "python3" 231 | }, 232 | "language_info": { 233 | "codemirror_mode": { 234 | "name": "ipython", 235 | "version": 3 236 | }, 237 | "file_extension": ".py", 238 | "mimetype": "text/x-python", 239 | "name": "python", 240 | "nbconvert_exporter": "python", 241 | "pygments_lexer": "ipython3", 242 | "version": "3.6.0" 243 | }, 244 | "toc": { 245 | "colors": { 246 | "hover_highlight": "#DAA520", 247 | "running_highlight": "#FF0000", 248 | "selected_highlight": "#FFD700" 249 | }, 250 | "moveMenuLeft": true, 251 | "nav_menu": { 252 | "height": "105px", 253 | "width": "252px" 254 | }, 255 | "navigate_menu": true, 256 | "number_sections": true, 257 | "sideBar": true, 258 | "threshold": 4, 259 | "toc_cell": false, 260 | "toc_section_display": "block", 261 | "toc_window_display": false 262 | } 263 | }, 264 | "nbformat": 4, 265 | "nbformat_minor": 2 266 | } 267 | -------------------------------------------------------------------------------- /examples/MNIST_annotation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# MNIST annotation example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "ExecuteTime": { 15 | "end_time": "2017-09-19T07:06:59.047801Z", 16 | "start_time": "2017-09-19T07:06:58.786121Z" 17 | } 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "%load_ext autoreload\n", 22 | "%autoreload 2\n", 23 | "\n", 24 | "import os, sys\n", 25 | "os.environ['CUDA_VISIBLE_DEVICES'] = ''\n", 26 | "\n", 27 | "sys.path.append('../')\n", 28 | "\n", 29 | "import matplotlib.pyplot as plt, collections, logging\n", 30 | "%pylab inline" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "# Initialization" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "ExecuteTime": { 45 | "end_time": "2017-09-19T07:07:00.044946Z", 46 | "start_time": "2017-09-19T07:06:59.933855Z" 47 | } 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "import pandas\n", 52 | "import numpy\n", 53 | "\n", 54 | "from keras.datasets import mnist\n", 55 | "from keras.models import Sequential\n", 56 | "from keras.layers import Dense\n", 57 | "from keras.layers import Dropout\n", 58 | "from keras.utils import np_utils\n", 59 | "from keras.wrappers.scikit_learn import KerasClassifier\n", 60 | "\n", 61 | "from sklearn.metrics import f1_score, accuracy_score\n", 62 | "\n", 63 | "from libact.query_strategies import UncertaintySampling\n", 64 | "from libact.models import LogisticRegression as LibActLogisticRegression, SklearnProbaAdapter\n", 65 | "\n", 66 | "from actleto import ActiveLearner, make_libact_strategy_ctor, MPErr, ActiveLearnerUiWidget, ImageVisualizer" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "ExecuteTime": { 74 | "end_time": "2017-09-19T07:07:02.574046Z", 75 | "start_time": "2017-09-19T07:07:02.485003Z" 76 | } 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "import logging\n", 81 | "\n", 82 | "# Logger is needed for the default evaluation output and some notifications\n", 83 | "logger = logging.getLogger('actleto')\n", 84 | "formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') \n", 85 | "strm_hdl = logging.StreamHandler(sys.stdout)\n", 86 | "strm_hdl.setFormatter(formatter)\n", 87 | "logger.addHandler(strm_hdl)\n", 88 | "logger.setLevel(logging.INFO)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "# Dataset preparation" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "ExecuteTime": { 103 | "end_time": "2017-09-19T07:07:04.315465Z", 104 | "start_time": "2017-09-19T07:07:03.634325Z" 105 | } 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "# Loading MNIST dataset\n", 110 | "(X_train_orig, y_train), (X_test_orig, y_test) = mnist.load_data()\n", 111 | "\n", 112 | "num_classes = numpy.unique(y_train).shape[0]\n", 113 | "\n", 114 | "# flatten 28*28 images to a 784 vector for each image\n", 115 | "num_pixels = X_train_orig.shape[1] * X_train_orig.shape[2]\n", 116 | "X_train = X_train_orig.reshape(X_train_orig.shape[0], num_pixels).astype('float32')\n", 117 | "X_test = X_test_orig.reshape(X_test_orig.shape[0], num_pixels).astype('float32')\n", 118 | "\n", 119 | "# normalize inputs from 0-255 to 0-1\n", 120 | "X_train = X_train / 255\n", 121 | "X_test = X_test / 255" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "# Model construction" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "ExecuteTime": { 136 | "end_time": "2017-09-19T07:17:10.995936Z", 137 | "start_time": "2017-09-19T07:17:10.904563Z" 138 | } 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "# Simple 2-layer perceptron\n", 143 | "def baseline_model():\n", 144 | " model = Sequential()\n", 145 | " model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))\n", 146 | " model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))\n", 147 | " model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n", 148 | " return model" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "# Active learning prepartions" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "ExecuteTime": { 163 | "end_time": "2017-09-19T07:18:22.421123Z", 164 | "start_time": "2017-09-19T07:18:22.327649Z" 165 | } 166 | }, 167 | "outputs": [], 168 | "source": [ 169 | "# For demonstration we sample 50 images from MNIST as seed examples for AL. \n", 170 | "# The rest of the images are considered \"unlabeled\"\n", 171 | "y_seed = pandas.Series([None] * y_train.shape[0], index = range(y_train.shape[0]))\n", 172 | "known_indexes = np.random.randint(y_train.shape[0], size = 50)\n", 173 | "y_seed.iloc[known_indexes] = y_train[known_indexes]" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": { 180 | "ExecuteTime": { 181 | "end_time": "2017-09-19T07:10:29.091372Z", 182 | "start_time": "2017-09-19T07:10:28.398883Z" 183 | }, 184 | "scrolled": true 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "# Creating model for selecting examples from the unlabeled dataset.\n", 189 | "# We use special wrapper to make sklearn model compatible with libact library.\n", 190 | "query_model = SklearnProbaAdapter(KerasClassifier(build_fn = baseline_model, verbose = 0))\n", 191 | "\n", 192 | "# We use uncertainty sampling strategy from libact and wrap it with adaptor.\n", 193 | "active_learn_alg_ctor = make_libact_strategy_ctor(lambda trn_ds:\n", 194 | " UncertaintySampling(trn_ds,\n", 195 | " model = query_model))\n", 196 | "\n", 197 | "# Now we create model for evaluation. In this example it is the same model as the one\n", 198 | "# for selecting examples.\n", 199 | "evaluation_model = KerasClassifier(build_fn = baseline_model, verbose = 0)\n", 200 | "\n", 201 | "\n", 202 | "def f1_macro(y_t, y_p):\n", 203 | " return f1_score(y_t, y_p, average = 'macro')\n", 204 | "\n", 205 | "# We create ActiveLearner object that incupsulates the logic of active learning.\n", 206 | "active_learner = ActiveLearner(active_learn_alg_ctor = active_learn_alg_ctor,\n", 207 | " y_dtype = 'int',\n", 208 | " X_full_dataset = X_train, \n", 209 | " y_full_dataset = y_seed.values,\n", 210 | " X_test_dataset = X_test,\n", 211 | " y_test_dataset = y_test,\n", 212 | " model_evaluate = evaluation_model,\n", 213 | " eval_metrics = [accuracy_score, f1_macro],\n", 214 | " rnd_start_steps = 0)\n", 215 | "\n", 216 | "# X_helper object is needed for data visualization for human annotators. \n", 217 | "X_helper = pandas.DataFrame(X_train_orig.reshape(-1, 28*28), index = range(X_train_orig.shape[0]))" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "# Invoking annotation widget" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": { 231 | "ExecuteTime": { 232 | "end_time": "2017-09-19T07:33:44.367082Z", 233 | "start_time": "2017-09-19T07:33:40.577203Z" 234 | } 235 | }, 236 | "outputs": [], 237 | "source": [ 238 | "# We need this try catch if we use autosave feature. It stops autosave thread for the older annotator widgets if we\n", 239 | "# invoke this cell for the second and next times.\n", 240 | "try:\n", 241 | " if active_learn_ui:\n", 242 | " active_learn_ui.stop()\n", 243 | "except NameError:\n", 244 | " pass\n", 245 | " \n", 246 | "# Now we create the active learner widget itself and configure it with created active learning object,\n", 247 | "# data for visualization (X_helper), and visualizer for images.\n", 248 | "active_learn_ui = ActiveLearnerUiWidget(active_learner = active_learner, \n", 249 | " X_helper = X_helper,\n", 250 | " display_feature_table = False,\n", 251 | " drop_labels = list(range(0, X_helper.shape[1])),\n", 252 | " visualizer = ImageVisualizer(columns_range = (0, X_helper.shape[1]), \n", 253 | " img_shape = (28, 28),\n", 254 | " img_mode = 'L',\n", 255 | " preview_shape = (100, 100)),\n", 256 | " y_labels = {str(elem) : elem for elem in range(10)},\n", 257 | " save_path = 'mnist.npy',\n", 258 | " save_time = 120)\n", 259 | "\n", 260 | "active_learn_ui" 261 | ] 262 | } 263 | ], 264 | "metadata": { 265 | "kernelspec": { 266 | "display_name": "Python 3", 267 | "language": "python", 268 | "name": "python3" 269 | }, 270 | "language_info": { 271 | "codemirror_mode": { 272 | "name": "ipython", 273 | "version": 3 274 | }, 275 | "file_extension": ".py", 276 | "mimetype": "text/x-python", 277 | "name": "python", 278 | "nbconvert_exporter": "python", 279 | "pygments_lexer": "ipython3", 280 | "version": "3.6.0" 281 | }, 282 | "toc": { 283 | "colors": { 284 | "hover_highlight": "#DAA520", 285 | "running_highlight": "#FF0000", 286 | "selected_highlight": "#FFD700" 287 | }, 288 | "moveMenuLeft": true, 289 | "nav_menu": { 290 | "height": "12px", 291 | "width": "253px" 292 | }, 293 | "navigate_menu": true, 294 | "number_sections": true, 295 | "sideBar": true, 296 | "threshold": 4, 297 | "toc_cell": false, 298 | "toc_section_display": "block", 299 | "toc_window_display": false 300 | } 301 | }, 302 | "nbformat": 4, 303 | "nbformat_minor": 2 304 | } 305 | -------------------------------------------------------------------------------- /actleto/models/utils_data.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | src/active_learning/utils_data.py · master · Nemo / skoltech_cardiology · GitLab 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 82 | 83 | 84 | 85 | 86 | 94 | 95 | 96 | 366 |
367 | 368 |
369 | 665 | 666 |
667 |
668 |
669 | 670 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 696 | 697 |
698 |
699 |
700 |
701 |
702 |
703 | 704 |
705 | 706 |
707 | 743 | 744 |
745 |
746 |
    747 |
  • 748 |
    749 | Artem Shelmanov's avatar 750 |
    751 |
    752 |
    753 | Fix 754 | 755 | · 756 | b64177a4 757 | 758 |
    759 | Artem Shelmanov authored 760 |
    761 | 762 |
    763 |
    764 | 765 |
    766 |
    767 |
    768 | b64177a4 769 |
    770 | 771 | 772 |
    773 |
    774 |
    775 |
  • 776 | 777 |
778 |
779 | 780 | 781 |
782 |
783 |
784 |
785 |
786 | 787 | 788 | utils_data.py 789 | 790 | 791 | 792 | 2.41 KB 793 | 794 |
795 | 796 |
EditWeb IDE
797 | 798 | 799 | 800 | 801 |
802 | 803 | 804 | 805 | 806 |
807 |
808 | 822 | 823 | 824 | 825 |
826 |
827 | 828 |
829 | 830 |
831 | 832 | 833 |
834 |
835 | 836 | 882 | 883 | 938 | 939 |
940 | 941 | 942 |
943 |
944 |
945 |
946 | 947 | 948 | 949 | 950 | 951 | 952 | 953 | -------------------------------------------------------------------------------- /actleto/models/model_wrappers.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | src/active_learning/model_wrappers.py · master · Nemo / skoltech_cardiology · GitLab 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 82 | 83 | 84 | 85 | 86 | 94 | 95 | 96 | 366 |
367 | 368 |
369 | 665 | 666 |
667 |
668 |
669 | 670 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 696 | 697 |
698 |
699 |
700 |
701 |
702 |
703 | 704 |
705 | 706 |
707 | 743 | 744 |
745 |
746 |
    747 |
  • 748 |
    749 | Artem Shelmanov's avatar 750 |
    751 |
    752 |
    753 | Simplified notebooks for al 754 | 755 | · 756 | 07eef965 757 | 758 |
    759 | Artem Shelmanov authored 760 |
    761 | 762 |
    763 |
    764 | 765 |
    766 |
    767 |
    768 | 07eef965 769 |
    770 | 771 | 772 |
    773 |
    774 |
    775 |
  • 776 | 777 |
778 |
779 | 780 | 781 |
782 |
783 |
784 |
785 |
786 | 787 | 788 | model_wrappers.py 789 | 790 | 791 | 792 | 6.34 KB 793 | 794 |
795 | 796 |
EditWeb IDE
797 | 798 | 799 | 800 | 801 |
802 | 803 | 804 | 805 | 806 |
807 |
808 | 822 | 823 | 824 | 825 |
826 |
827 | 828 |
829 | 830 |
831 | 832 | 833 |
834 |
835 | 836 | 882 | 883 | 938 | 939 |
940 | 941 | 942 |
943 |
944 |
945 |
946 | 947 | 948 | 949 | 950 | 951 | 952 | 953 | --------------------------------------------------------------------------------