├── .gitignore ├── README.md ├── examples └── cifar10_tutorial.py ├── nas ├── __init__.py ├── greedy.py ├── grid.py └── random.py └── tests └── test_nas.py /.gitignore: -------------------------------------------------------------------------------- 1 | # vim swp files 2 | *.swp 3 | # caffe/pytorch model files 4 | *.pth 5 | 6 | # Mkdocs 7 | /docs/ 8 | /mkdocs/docs/temp 9 | 10 | .DS_Store 11 | .idea 12 | .pytest_cache 13 | /experiments 14 | 15 | # resource temp folder 16 | tests/resources/temp/* 17 | !tests/resources/temp/.gitkeep 18 | 19 | # Byte-compiled / optimized / DLL files 20 | __pycache__/ 21 | *.py[cod] 22 | *$py.class 23 | 24 | # C extensions 25 | *.so 26 | 27 | # Distribution / packaging 28 | .Python 29 | build/ 30 | develop-eggs/ 31 | dist/ 32 | downloads/ 33 | eggs/ 34 | .eggs/ 35 | lib/ 36 | lib64/ 37 | parts/ 38 | sdist/ 39 | var/ 40 | wheels/ 41 | *.egg-info/ 42 | .installed.cfg 43 | *.egg 44 | MANIFEST 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *.cover 65 | .hypothesis/ 66 | 67 | # Translations 68 | *.mo 69 | *.pot 70 | 71 | # Django stuff: 72 | *.log 73 | .static_storage/ 74 | .media/ 75 | local_settings.py 76 | 77 | # Flask stuff: 78 | instance/ 79 | .webassets-cache 80 | 81 | # Scrapy stuff: 82 | .scrapy 83 | 84 | # Sphinx documentation 85 | docs/_build/ 86 | 87 | # PyBuilder 88 | target/ 89 | 90 | # Jupyter Notebook 91 | .ipynb_checkpoints 92 | 93 | # pyenv 94 | .python-version 95 | 96 | # celery beat schedule file 97 | celerybeat-schedule 98 | 99 | # SageMath parsed files 100 | *.sage.py 101 | 102 | # Environments 103 | .env 104 | .venv 105 | env/ 106 | venv/ 107 | ENV/ 108 | env.bak/ 109 | venv.bak/ 110 | 111 | # Spyder project settings 112 | .spyderproject 113 | .spyproject 114 | 115 | # Rope project settings 116 | .ropeproject 117 | 118 | # mkdocs documentation 119 | /site 120 | 121 | # mypy 122 | .mypy_cache/ 123 | 124 | examples/text_cnn/glove_embedding/ 125 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # autokeras-algorithm 2 | Some other AutoML algorithms as baselines. 3 | Refer to: https://autokeras.com/temp/nas/ 4 | -------------------------------------------------------------------------------- /examples/cifar10_tutorial.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run NAS baseline methods 3 | ======================== 4 | We provide 4 NAS baseline methods now, the default one is bayesian optimization. 5 | Here is a tutorial about running NAS baseline methods. 6 | 7 | Generally, to run a non-default NAS methods, we will do the following steps in order: 8 | 1. Prepare the dataset in the form of torch.utils.data.DataLoader. 9 | 2. Initialize the CnnModule/MlpModule with the class name of the NAS Searcher. 10 | 3. Start search by running fit function. 11 | Refer the cifar10 example below for more details. 12 | """ 13 | import numpy as np 14 | import torch 15 | import torchvision 16 | import torchvision.transforms as transforms 17 | from torch.nn.functional import cross_entropy 18 | 19 | from autokeras import CnnModule 20 | from autokeras.nn.metric import Accuracy 21 | from nas.greedy import GreedySearcher 22 | 23 | if __name__ == '__main__': 24 | print('==> Preparing data..') 25 | transform_train = transforms.Compose([ 26 | transforms.RandomCrop(32, padding=4), 27 | transforms.RandomHorizontalFlip(), 28 | transforms.ToTensor(), 29 | transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), 30 | ]) 31 | 32 | transform_test = transforms.Compose([ 33 | transforms.ToTensor(), 34 | transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), 35 | ]) 36 | 37 | trainset = torchvision.datasets.CIFAR10(root='./data', train=True, 38 | download=True, transform=transform_train) 39 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, 40 | shuffle=True, num_workers=2) 41 | 42 | testset = torchvision.datasets.CIFAR10(root='./data', train=False, 43 | download=True, transform=transform_test) 44 | testloader = torch.utils.data.DataLoader(testset, batch_size=4, 45 | shuffle=False, num_workers=2) 46 | (image, target) = trainset[0] 47 | image = np.array(image).transpose((1, 2, 0)) 48 | # add dim for batch 49 | input_shape = np.expand_dims(image, axis=0).shape 50 | num_classes = 10 51 | 52 | # take GreedySearcher as an example, you can implement your own searcher and 53 | # pass the class name to the CnnModule by search_type=YOUR_SEARCHER. 54 | cnnModule = CnnModule(loss=cross_entropy, metric=Accuracy, 55 | searcher_args={}, verbose=True, 56 | search_type=GreedySearcher) 57 | 58 | cnnModule.fit(n_output_node=num_classes, input_shape=input_shape, 59 | train_data=trainloader, 60 | test_data=testloader) -------------------------------------------------------------------------------- /nas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datamllab/autokeras-algorithm/5b31ca5324ba097c3a386f5d7a5b72974d08ed03/nas/__init__.py -------------------------------------------------------------------------------- /nas/greedy.py: -------------------------------------------------------------------------------- 1 | import time 2 | from copy import deepcopy 3 | 4 | from autokeras.custom_queue import Queue 5 | from autokeras.bayesian import contain, SearchTree 6 | from autokeras.net_transformer import transform 7 | from autokeras.search import Searcher 8 | 9 | class GreedyOptimizer: 10 | 11 | def __init__(self, searcher, metric): 12 | self.searcher = searcher 13 | self.metric = metric 14 | 15 | def generate(self, descriptors, timeout, sync_message): 16 | """Generate new neighbor architectures from the best model. 17 | 18 | Args: 19 | descriptors: All the searched neural architectures. 20 | timeout: An integer. The time limit in seconds. 21 | sync_message: the Queue for multiprocessing return value. 22 | 23 | Returns: 24 | out: A list of 2-elements tuple. Each tuple contains 25 | an instance of Graph, a morphed neural network with weights 26 | and the father node id in the search tree. 27 | """ 28 | out = [] 29 | start_time = time.time() 30 | descriptors = deepcopy(descriptors) 31 | 32 | if isinstance(sync_message, Queue) and sync_message.qsize() != 0: 33 | return out 34 | model_id = self.searcher.get_neighbour_best_model_id() 35 | graph = self.searcher.load_model_by_id(model_id) 36 | father_id = model_id 37 | for temp_graph in transform(graph): 38 | if contain(descriptors, temp_graph.extract_descriptor()): 39 | continue 40 | out.append((deepcopy(temp_graph), father_id)) 41 | remaining_time = timeout - (time.time() - start_time) 42 | 43 | if remaining_time < 0: 44 | raise TimeoutError 45 | return out 46 | 47 | 48 | class GreedySearcher(Searcher): 49 | """ Class to search for neural architectures using Greedy search strategy. 50 | 51 | Attribute: 52 | optimizer: An instance of BayesianOptimizer. 53 | """ 54 | 55 | def __init__(self, n_output_node, input_shape, path, metric, loss, generators, verbose, 56 | trainer_args=None, 57 | default_model_len=None, 58 | default_model_width=None): 59 | super(GreedySearcher, self).__init__(n_output_node, input_shape, 60 | path, metric, loss, generators, 61 | verbose, trainer_args, default_model_len, 62 | default_model_width) 63 | self.optimizer = GreedyOptimizer(self, metric) 64 | 65 | def generate(self, multiprocessing_queue): 66 | """Generate the next neural architecture. 67 | 68 | Args: 69 | multiprocessing_queue: the Queue for multiprocessing return value. 70 | pass into the search algorithm for synchronizing 71 | 72 | Returns: 73 | results: A list of 2-element tuples. Each tuple contains an instance of Graph, 74 | and anything to be saved in the training queue together with the architecture 75 | 76 | """ 77 | remaining_time = self._timeout - time.time() 78 | results = self.optimizer.generate(self.descriptors, remaining_time, 79 | multiprocessing_queue) 80 | if not results: 81 | new_father_id = 0 82 | generated_graph = self.generators[0](self.n_classes, self.input_shape). \ 83 | generate(self.default_model_len, self.default_model_width) 84 | results.append((generated_graph, new_father_id)) 85 | 86 | return results 87 | 88 | def update(self, other_info, model_id, graph, metric_value): 89 | return 90 | 91 | def load_neighbour_best_model(self): 92 | return self.load_model_by_id(self.get_neighbour_best_model_id()) 93 | 94 | def get_neighbour_best_model_id(self): 95 | if self.metric.higher_better(): 96 | return max(self.neighbour_history, key=lambda x: x['metric_value'])['model_id'] 97 | return min(self.neighbour_history, key=lambda x: x['metric_value'])['model_id'] 98 | -------------------------------------------------------------------------------- /nas/grid.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | from autokeras.constant import Constant 4 | from autokeras.search import Searcher 5 | 6 | 7 | def assert_search_space(search_space): 8 | grid = search_space 9 | value_list = [] 10 | if Constant.LENGTH_DIM not in list(grid.keys()): 11 | print('No length dimension found in search Space. Using default values') 12 | grid[Constant.LENGTH_DIM] = Constant.DEFAULT_LENGTH_SEARCH 13 | elif not isinstance(grid[Constant.LENGTH_DIM][0], int): 14 | print('Converting String to integers. Next time please make sure to enter integer values for Length Dimension') 15 | grid[Constant.LENGTH_DIM] = list(map(int, grid[Constant.LENGTH_DIM])) 16 | 17 | if Constant.WIDTH_DIM not in list(grid.keys()): 18 | print('No width dimension found in search Space. Using default values') 19 | grid[Constant.WIDTH_DIM] = Constant.DEFAULT_WIDTH_SEARCH 20 | elif not isinstance(grid[Constant.WIDTH_DIM][0], int): 21 | print('Converting String to integers. Next time please make sure to enter integer values for Width Dimension') 22 | grid[Constant.WIDTH_DIM] = list(map(int, grid[Constant.WIDTH_DIM])) 23 | 24 | grid_key_list = list(grid.keys()) 25 | grid_key_list.sort() 26 | for key in grid_key_list: 27 | value_list.append(grid[key]) 28 | 29 | dimension = list(itertools.product(*value_list)) 30 | # print(dimension) 31 | return grid, dimension 32 | 33 | 34 | class GridSearcher(Searcher): 35 | """ Class to search for neural architectures using Greedy search strategy. 36 | 37 | Attribute: 38 | search_space: A dictionary. Specifies the search dimensions and their possible values 39 | """ 40 | 41 | def __init__(self, n_output_node, input_shape, path, metric, loss, generators, verbose, search_space={}, 42 | trainer_args=None, default_model_len=None, default_model_width=None): 43 | super(GridSearcher, self).__init__(n_output_node, input_shape, path, metric, loss, generators, verbose, 44 | trainer_args, default_model_len, default_model_width) 45 | self.search_space, self.search_dimensions = assert_search_space(search_space) 46 | self.search_space_counter = 0 47 | 48 | def get_search_dimensions(self): 49 | return self.search_dimensions 50 | 51 | def search_space_exhausted(self): 52 | """ Check if Grid search has exhausted the search space """ 53 | if self.search_space_counter == len(self.search_dimensions): 54 | return True 55 | return False 56 | 57 | def search(self, train_data, test_data, timeout=60 * 60 * 24): 58 | """Run the search loop of training, generating and updating once. 59 | 60 | Call the base class implementation for search with 61 | 62 | Args: 63 | train_data: An instance of DataLoader. 64 | test_data: An instance of Dataloader. 65 | timeout: An integer, time limit in seconds. 66 | """ 67 | if self.search_space_exhausted(): 68 | return 69 | else: 70 | super().search(train_data, test_data, timeout) 71 | 72 | def update(self, other_info, model_id, graph, metric_value): 73 | return 74 | 75 | def generate(self, multiprocessing_queue): 76 | """Generate the next neural architecture. 77 | 78 | Args: 79 | multiprocessing_queue: the Queue for multiprocessing return value. 80 | 81 | Returns: 82 | list of 2-element tuples: generated_graph and other_info, 83 | for grid searcher the length of list is 1. 84 | generated_graph: An instance of Graph. 85 | other_info: Always 0. 86 | """ 87 | grid = self.get_grid() 88 | self.search_space_counter += 1 89 | generated_graph = self.generators[0](self.n_classes, self.input_shape). \ 90 | generate(grid[Constant.LENGTH_DIM], grid[Constant.WIDTH_DIM]) 91 | return [(generated_graph, 0)] 92 | 93 | def get_grid(self): 94 | """ Return the next grid to be searched """ 95 | if self.search_space_counter < len(self.search_dimensions): 96 | return self.search_dimensions[self.search_space_counter] 97 | return None 98 | -------------------------------------------------------------------------------- /nas/random.py: -------------------------------------------------------------------------------- 1 | from random import randrange 2 | 3 | from autokeras.bayesian import SearchTree, contain 4 | from autokeras.net_transformer import transform 5 | from autokeras.search import Searcher 6 | 7 | 8 | class RandomSearcher(Searcher): 9 | """ Class to search for neural architectures using Random search strategy. 10 | """ 11 | 12 | def __init__(self, n_output_node, input_shape, path, metric, loss, generators, verbose, 13 | trainer_args=None, 14 | default_model_len=None, 15 | default_model_width=None): 16 | super(RandomSearcher, self).__init__(n_output_node, input_shape, 17 | path, metric, loss, generators, 18 | verbose, trainer_args, default_model_len, 19 | default_model_width) 20 | 21 | def generate(self, multiprocessing_queue): 22 | """Generate the next neural architecture. 23 | 24 | Args: 25 | multiprocessing_queue: the Queue for multiprocessing return value. 26 | 27 | Returns: 28 | list of 2-element tuples: generated_graph and other_info, 29 | for random searcher the length of list is 1. 30 | generated_graph: An instance of Graph. 31 | other_info: Anything to be saved in the training queue together with the architecture. 32 | 33 | """ 34 | random_index = randrange(len(self.history)) 35 | model_id = self.history[random_index]['model_id'] 36 | graph = self.load_model_by_id(model_id) 37 | new_father_id = None 38 | generated_graph = None 39 | for temp_graph in transform(graph): 40 | if not contain(self.descriptors, temp_graph.extract_descriptor()): 41 | new_father_id = model_id 42 | generated_graph = temp_graph 43 | break 44 | if new_father_id is None: 45 | new_father_id = 0 46 | generated_graph = self.generators[0](self.n_classes, self.input_shape). \ 47 | generate(self.default_model_len, self.default_model_width) 48 | 49 | return [(generated_graph, new_father_id)] 50 | 51 | def update(self, other_info, model_id, graph, metric_value): 52 | return -------------------------------------------------------------------------------- /tests/test_nas.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | from autokeras.bayesian import edit_distance 4 | from autokeras.backend.torch.loss_function import classification_loss 5 | from autokeras.nn.metric import Accuracy 6 | from autokeras.search import * 7 | from autokeras.nn.generator import CnnGenerator, MlpGenerator, ResNetGenerator 8 | from tests.common import clean_dir, MockProcess, get_classification_data_loaders, get_classification_data_loaders_mlp, \ 9 | simple_transform, TEST_TEMP_DIR, simple_transform_mlp, mock_train, mock_out_of_memory_train, \ 10 | mock_exception_handling_train 11 | 12 | from nas.greedy import GreedySearcher 13 | from nas.grid import GridSearcher 14 | from nas.random import RandomSearcher 15 | 16 | 17 | @patch('torch.multiprocessing.get_context', side_effect=MockProcess) 18 | @patch('autokeras.bayesian.transform', side_effect=simple_transform) 19 | @patch('autokeras.backend.torch.model_trainer.ModelTrainer.train_model', side_effect=mock_train) 20 | def test_greedy_searcher(_, _1, _2): 21 | train_data, test_data = get_classification_data_loaders() 22 | clean_dir(TEST_TEMP_DIR) 23 | searcher = GreedySearcher(3, (28, 28, 3), verbose=False, path=TEST_TEMP_DIR, metric=Accuracy, 24 | loss=classification_loss, generators=[CnnGenerator, CnnGenerator]) 25 | for _ in range(2): 26 | searcher.search(train_data, test_data) 27 | clean_dir(TEST_TEMP_DIR) 28 | assert len(searcher.history) == 2 29 | 30 | 31 | @patch('torch.multiprocessing.get_context', side_effect=MockProcess) 32 | @patch('autokeras.bayesian.transform', side_effect=simple_transform) 33 | @patch('autokeras.backend.torch.model_trainer.ModelTrainer.train_model', side_effect=mock_train) 34 | @patch('autokeras.search.get_system', return_value=Constant.SYS_GOOGLE_COLAB) 35 | def test_greedy_searcher_sp(_, _1, _2, _3): 36 | train_data, test_data = get_classification_data_loaders() 37 | clean_dir(TEST_TEMP_DIR) 38 | searcher = GreedySearcher(3, (28, 28, 3), verbose=False, path=TEST_TEMP_DIR, metric=Accuracy, 39 | loss=classification_loss, generators=[CnnGenerator, CnnGenerator]) 40 | for _ in range(2): 41 | searcher.search(train_data, test_data) 42 | clean_dir(TEST_TEMP_DIR) 43 | assert len(searcher.history) == 2 44 | 45 | 46 | @patch('torch.multiprocessing.get_context', side_effect=MockProcess) 47 | @patch('autokeras.bayesian.transform', side_effect=simple_transform_mlp) 48 | @patch('autokeras.backend.torch.model_trainer.ModelTrainer.train_model', side_effect=mock_train) 49 | def test_greedy_searcher_mlp(_, _1, _2): 50 | train_data, test_data = get_classification_data_loaders_mlp() 51 | clean_dir(TEST_TEMP_DIR) 52 | generator = GreedySearcher(3, (28,), verbose=False, path=TEST_TEMP_DIR, metric=Accuracy, 53 | loss=classification_loss, generators=[MlpGenerator, MlpGenerator]) 54 | for _ in range(2): 55 | generator.search(train_data, test_data) 56 | clean_dir(TEST_TEMP_DIR) 57 | assert len(generator.history) == 2 58 | 59 | 60 | @patch('torch.multiprocessing.get_context', side_effect=MockProcess) 61 | @patch('autokeras.bayesian.transform', side_effect=simple_transform) 62 | @patch('autokeras.backend.torch.model_trainer.ModelTrainer.train_model', side_effect=mock_train) 63 | def test_random_searcher(_, _1, _2): 64 | train_data, test_data = get_classification_data_loaders() 65 | clean_dir(TEST_TEMP_DIR) 66 | searcher = RandomSearcher(3, (28, 28, 3), verbose=False, path=TEST_TEMP_DIR, metric=Accuracy, 67 | loss=classification_loss, generators=[CnnGenerator, CnnGenerator]) 68 | for _ in range(2): 69 | searcher.search(train_data, test_data) 70 | clean_dir(TEST_TEMP_DIR) 71 | assert len(searcher.history) == 2 72 | 73 | 74 | @patch('torch.multiprocessing.get_context', side_effect=MockProcess) 75 | @patch('autokeras.bayesian.transform', side_effect=simple_transform) 76 | @patch('autokeras.backend.torch.model_trainer.ModelTrainer.train_model', side_effect=mock_train) 77 | @patch('autokeras.search.get_system', return_value=Constant.SYS_GOOGLE_COLAB) 78 | def test_random_searcher_sp(_, _1, _2, _3): 79 | train_data, test_data = get_classification_data_loaders() 80 | clean_dir(TEST_TEMP_DIR) 81 | searcher = RandomSearcher(3, (28, 28, 3), verbose=False, path=TEST_TEMP_DIR, metric=Accuracy, 82 | loss=classification_loss, generators=[CnnGenerator, CnnGenerator]) 83 | for _ in range(2): 84 | searcher.search(train_data, test_data) 85 | clean_dir(TEST_TEMP_DIR) 86 | assert len(searcher.history) == 2 87 | 88 | 89 | @patch('torch.multiprocessing.get_context', side_effect=MockProcess) 90 | @patch('autokeras.bayesian.transform', side_effect=simple_transform) 91 | @patch('autokeras.backend.torch.model_trainer.ModelTrainer.train_model', side_effect=mock_train) 92 | def test_grid_searcher(_, _1, _2): 93 | train_data, test_data = get_classification_data_loaders() 94 | clean_dir(TEST_TEMP_DIR) 95 | searcher = GridSearcher(3, (28, 28, 3), verbose=True, path=TEST_TEMP_DIR, metric=Accuracy, 96 | loss=classification_loss, generators=[CnnGenerator, CnnGenerator]) 97 | Constant.N_NEIGHBOURS = 1 98 | Constant.T_MIN = 0.8 99 | print(len(searcher.get_search_dimensions())) 100 | for _ in range(len(searcher.get_search_dimensions())): 101 | searcher.search(train_data, test_data) 102 | clean_dir(TEST_TEMP_DIR) 103 | assert len(searcher.history) == len(searcher.search_dimensions) 104 | 105 | --------------------------------------------------------------------------------