├── .gitignore
├── README.md
├── examples
    └── cifar10_tutorial.py
├── nas
    ├── __init__.py
    ├── greedy.py
    ├── grid.py
    └── random.py
└── tests
    └── test_nas.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # vim swp files
  2 | *.swp
  3 | # caffe/pytorch model files
  4 | *.pth
  5 | 
  6 | # Mkdocs
  7 | /docs/
  8 | /mkdocs/docs/temp
  9 | 
 10 | .DS_Store
 11 | .idea
 12 | .pytest_cache
 13 | /experiments
 14 | 
 15 | # resource temp folder
 16 | tests/resources/temp/*
 17 | !tests/resources/temp/.gitkeep
 18 | 
 19 | # Byte-compiled / optimized / DLL files
 20 | __pycache__/
 21 | *.py[cod]
 22 | *$py.class
 23 | 
 24 | # C extensions
 25 | *.so
 26 | 
 27 | # Distribution / packaging
 28 | .Python
 29 | build/
 30 | develop-eggs/
 31 | dist/
 32 | downloads/
 33 | eggs/
 34 | .eggs/
 35 | lib/
 36 | lib64/
 37 | parts/
 38 | sdist/
 39 | var/
 40 | wheels/
 41 | *.egg-info/
 42 | .installed.cfg
 43 | *.egg
 44 | MANIFEST
 45 | 
 46 | # PyInstaller
 47 | #  Usually these files are written by a python script from a template
 48 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 49 | *.manifest
 50 | *.spec
 51 | 
 52 | # Installer logs
 53 | pip-log.txt
 54 | pip-delete-this-directory.txt
 55 | 
 56 | # Unit test / coverage reports
 57 | htmlcov/
 58 | .tox/
 59 | .coverage
 60 | .coverage.*
 61 | .cache
 62 | nosetests.xml
 63 | coverage.xml
 64 | *.cover
 65 | .hypothesis/
 66 | 
 67 | # Translations
 68 | *.mo
 69 | *.pot
 70 | 
 71 | # Django stuff:
 72 | *.log
 73 | .static_storage/
 74 | .media/
 75 | local_settings.py
 76 | 
 77 | # Flask stuff:
 78 | instance/
 79 | .webassets-cache
 80 | 
 81 | # Scrapy stuff:
 82 | .scrapy
 83 | 
 84 | # Sphinx documentation
 85 | docs/_build/
 86 | 
 87 | # PyBuilder
 88 | target/
 89 | 
 90 | # Jupyter Notebook
 91 | .ipynb_checkpoints
 92 | 
 93 | # pyenv
 94 | .python-version
 95 | 
 96 | # celery beat schedule file
 97 | celerybeat-schedule
 98 | 
 99 | # SageMath parsed files
100 | *.sage.py
101 | 
102 | # Environments
103 | .env
104 | .venv
105 | env/
106 | venv/
107 | ENV/
108 | env.bak/
109 | venv.bak/
110 | 
111 | # Spyder project settings
112 | .spyderproject
113 | .spyproject
114 | 
115 | # Rope project settings
116 | .ropeproject
117 | 
118 | # mkdocs documentation
119 | /site
120 | 
121 | # mypy
122 | .mypy_cache/
123 | 
124 | examples/text_cnn/glove_embedding/
125 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # autokeras-algorithm
2 | Some other AutoML algorithms as baselines.
3 | Refer to: https://autokeras.com/temp/nas/
4 | 


--------------------------------------------------------------------------------
/examples/cifar10_tutorial.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run NAS baseline methods
 3 | ========================
 4 | We provide 4 NAS baseline methods now, the default one is bayesian optimization.
 5 | Here is a tutorial about running NAS baseline methods.
 6 | 
 7 | Generally, to run a non-default NAS methods, we will do the following steps in order:
 8 | 1. Prepare the dataset in the form of torch.utils.data.DataLoader.
 9 | 2. Initialize the CnnModule/MlpModule with the class name of the NAS Searcher.
10 | 3. Start search by running fit function.
11 | Refer the cifar10 example below for more details.
12 | """
13 | import numpy as np
14 | import torch
15 | import torchvision
16 | import torchvision.transforms as transforms
17 | from torch.nn.functional import cross_entropy
18 | 
19 | from autokeras import CnnModule
20 | from autokeras.nn.metric import Accuracy
21 | from nas.greedy import GreedySearcher
22 | 
23 | if __name__ == '__main__':
24 |     print('==> Preparing data..')
25 |     transform_train = transforms.Compose([
26 |         transforms.RandomCrop(32, padding=4),
27 |         transforms.RandomHorizontalFlip(),
28 |         transforms.ToTensor(),
29 |         transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
30 |     ])
31 | 
32 |     transform_test = transforms.Compose([
33 |         transforms.ToTensor(),
34 |         transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
35 |     ])
36 | 
37 |     trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
38 |                                             download=True, transform=transform_train)
39 |     trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
40 |                                               shuffle=True, num_workers=2)
41 | 
42 |     testset = torchvision.datasets.CIFAR10(root='./data', train=False,
43 |                                            download=True, transform=transform_test)
44 |     testloader = torch.utils.data.DataLoader(testset, batch_size=4,
45 |                                              shuffle=False, num_workers=2)
46 |     (image, target) = trainset[0]
47 |     image = np.array(image).transpose((1, 2, 0))
48 |     # add dim for batch
49 |     input_shape = np.expand_dims(image, axis=0).shape
50 |     num_classes = 10
51 | 
52 |     # take GreedySearcher as an example, you can implement your own searcher and
53 |     # pass the class name to the CnnModule by search_type=YOUR_SEARCHER.
54 |     cnnModule = CnnModule(loss=cross_entropy, metric=Accuracy,
55 |                           searcher_args={}, verbose=True,
56 |                           search_type=GreedySearcher)
57 | 
58 |     cnnModule.fit(n_output_node=num_classes, input_shape=input_shape,
59 |                   train_data=trainloader,
60 |                   test_data=testloader)


--------------------------------------------------------------------------------
/nas/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datamllab/autokeras-algorithm/5b31ca5324ba097c3a386f5d7a5b72974d08ed03/nas/__init__.py


--------------------------------------------------------------------------------
/nas/greedy.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from copy import deepcopy
 3 | 
 4 | from autokeras.custom_queue import Queue
 5 | from autokeras.bayesian import contain, SearchTree
 6 | from autokeras.net_transformer import transform
 7 | from autokeras.search import Searcher
 8 | 
 9 | class GreedyOptimizer:
10 | 
11 |     def __init__(self, searcher, metric):
12 |         self.searcher = searcher
13 |         self.metric = metric
14 | 
15 |     def generate(self, descriptors, timeout, sync_message):
16 |         """Generate new neighbor architectures from the best model.
17 | 
18 |         Args:
19 |             descriptors: All the searched neural architectures.
20 |             timeout: An integer. The time limit in seconds.
21 |             sync_message: the Queue for multiprocessing return value.
22 | 
23 |         Returns:
24 |             out: A list of 2-elements tuple. Each tuple contains
25 |                 an instance of Graph, a morphed neural network with weights
26 |                 and the father node id in the search tree.
27 |         """
28 |         out = []
29 |         start_time = time.time()
30 |         descriptors = deepcopy(descriptors)
31 | 
32 |         if isinstance(sync_message, Queue) and sync_message.qsize() != 0:
33 |             return out
34 |         model_id = self.searcher.get_neighbour_best_model_id()
35 |         graph = self.searcher.load_model_by_id(model_id)
36 |         father_id = model_id
37 |         for temp_graph in transform(graph):
38 |             if contain(descriptors, temp_graph.extract_descriptor()):
39 |                 continue
40 |             out.append((deepcopy(temp_graph), father_id))
41 |         remaining_time = timeout - (time.time() - start_time)
42 | 
43 |         if remaining_time < 0:
44 |             raise TimeoutError
45 |         return out
46 | 
47 | 
48 | class GreedySearcher(Searcher):
49 |     """ Class to search for neural architectures using Greedy search strategy.
50 | 
51 |     Attribute:
52 |         optimizer: An instance of BayesianOptimizer.
53 |     """
54 | 
55 |     def __init__(self, n_output_node, input_shape, path, metric, loss, generators, verbose,
56 |                  trainer_args=None,
57 |                  default_model_len=None,
58 |                  default_model_width=None):
59 |         super(GreedySearcher, self).__init__(n_output_node, input_shape,
60 |                                              path, metric, loss, generators,
61 |                                              verbose, trainer_args, default_model_len,
62 |                                              default_model_width)
63 |         self.optimizer = GreedyOptimizer(self, metric)
64 | 
65 |     def generate(self, multiprocessing_queue):
66 |         """Generate the next neural architecture.
67 | 
68 |         Args:
69 |             multiprocessing_queue: the Queue for multiprocessing return value.
70 |                 pass into the search algorithm for synchronizing
71 | 
72 |         Returns:
73 |             results: A list of 2-element tuples. Each tuple contains an instance of Graph,
74 |                 and anything to be saved in the training queue together with the architecture
75 | 
76 |         """
77 |         remaining_time = self._timeout - time.time()
78 |         results = self.optimizer.generate(self.descriptors, remaining_time,
79 |                                           multiprocessing_queue)
80 |         if not results:
81 |             new_father_id = 0
82 |             generated_graph = self.generators[0](self.n_classes, self.input_shape). \
83 |                 generate(self.default_model_len, self.default_model_width)
84 |             results.append((generated_graph, new_father_id))
85 | 
86 |         return results
87 | 
88 |     def update(self, other_info, model_id, graph, metric_value):
89 |         return
90 | 
91 |     def load_neighbour_best_model(self):
92 |         return self.load_model_by_id(self.get_neighbour_best_model_id())
93 | 
94 |     def get_neighbour_best_model_id(self):
95 |         if self.metric.higher_better():
96 |             return max(self.neighbour_history, key=lambda x: x['metric_value'])['model_id']
97 |         return min(self.neighbour_history, key=lambda x: x['metric_value'])['model_id']
98 | 


--------------------------------------------------------------------------------
/nas/grid.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | 
 3 | from autokeras.constant import Constant
 4 | from autokeras.search import Searcher
 5 | 
 6 | 
 7 | def assert_search_space(search_space):
 8 |     grid = search_space
 9 |     value_list = []
10 |     if Constant.LENGTH_DIM not in list(grid.keys()):
11 |         print('No length dimension found in search Space. Using default values')
12 |         grid[Constant.LENGTH_DIM] = Constant.DEFAULT_LENGTH_SEARCH
13 |     elif not isinstance(grid[Constant.LENGTH_DIM][0], int):
14 |         print('Converting String to integers. Next time please make sure to enter integer values for Length Dimension')
15 |         grid[Constant.LENGTH_DIM] = list(map(int, grid[Constant.LENGTH_DIM]))
16 | 
17 |     if Constant.WIDTH_DIM not in list(grid.keys()):
18 |         print('No width dimension found in search Space. Using default values')
19 |         grid[Constant.WIDTH_DIM] = Constant.DEFAULT_WIDTH_SEARCH
20 |     elif not isinstance(grid[Constant.WIDTH_DIM][0], int):
21 |         print('Converting String to integers. Next time please make sure to enter integer values for Width Dimension')
22 |         grid[Constant.WIDTH_DIM] = list(map(int, grid[Constant.WIDTH_DIM]))
23 | 
24 |     grid_key_list = list(grid.keys())
25 |     grid_key_list.sort()
26 |     for key in grid_key_list:
27 |         value_list.append(grid[key])
28 | 
29 |     dimension = list(itertools.product(*value_list))
30 |     # print(dimension)
31 |     return grid, dimension
32 | 
33 | 
34 | class GridSearcher(Searcher):
35 |     """ Class to search for neural architectures using Greedy search strategy.
36 | 
37 |     Attribute:
38 |         search_space: A dictionary. Specifies the search dimensions and their possible values
39 |     """
40 | 
41 |     def __init__(self, n_output_node, input_shape, path, metric, loss, generators, verbose, search_space={},
42 |                  trainer_args=None, default_model_len=None, default_model_width=None):
43 |         super(GridSearcher, self).__init__(n_output_node, input_shape, path, metric, loss, generators, verbose,
44 |                                            trainer_args, default_model_len, default_model_width)
45 |         self.search_space, self.search_dimensions = assert_search_space(search_space)
46 |         self.search_space_counter = 0
47 | 
48 |     def get_search_dimensions(self):
49 |         return self.search_dimensions
50 | 
51 |     def search_space_exhausted(self):
52 |         """ Check if Grid search has exhausted the search space """
53 |         if self.search_space_counter == len(self.search_dimensions):
54 |             return True
55 |         return False
56 | 
57 |     def search(self, train_data, test_data, timeout=60 * 60 * 24):
58 |         """Run the search loop of training, generating and updating once.
59 | 
60 |         Call the base class implementation for search with
61 | 
62 |         Args:
63 |             train_data: An instance of DataLoader.
64 |             test_data: An instance of Dataloader.
65 |             timeout: An integer, time limit in seconds.
66 |         """
67 |         if self.search_space_exhausted():
68 |             return
69 |         else:
70 |             super().search(train_data, test_data, timeout)
71 | 
72 |     def update(self, other_info, model_id, graph, metric_value):
73 |         return
74 | 
75 |     def generate(self, multiprocessing_queue):
76 |         """Generate the next neural architecture.
77 | 
78 |         Args:
79 |             multiprocessing_queue: the Queue for multiprocessing return value.
80 | 
81 |         Returns:
82 |             list of 2-element tuples: generated_graph and other_info,
83 |             for grid searcher the length of list is 1.
84 |             generated_graph: An instance of Graph.
85 |             other_info: Always 0.
86 |         """
87 |         grid = self.get_grid()
88 |         self.search_space_counter += 1
89 |         generated_graph = self.generators[0](self.n_classes, self.input_shape). \
90 |             generate(grid[Constant.LENGTH_DIM], grid[Constant.WIDTH_DIM])
91 |         return [(generated_graph, 0)]
92 | 
93 |     def get_grid(self):
94 |         """ Return the next grid to be searched """
95 |         if self.search_space_counter < len(self.search_dimensions):
96 |             return self.search_dimensions[self.search_space_counter]
97 |         return None
98 | 


--------------------------------------------------------------------------------
/nas/random.py:
--------------------------------------------------------------------------------
 1 | from random import randrange
 2 | 
 3 | from autokeras.bayesian import SearchTree, contain
 4 | from autokeras.net_transformer import transform
 5 | from autokeras.search import Searcher
 6 | 
 7 | 
 8 | class RandomSearcher(Searcher):
 9 |     """ Class to search for neural architectures using Random search strategy.
10 |     """
11 | 
12 |     def __init__(self, n_output_node, input_shape, path, metric, loss, generators, verbose,
13 |                  trainer_args=None,
14 |                  default_model_len=None,
15 |                  default_model_width=None):
16 |         super(RandomSearcher, self).__init__(n_output_node, input_shape,
17 |                                              path, metric, loss, generators,
18 |                                              verbose, trainer_args, default_model_len,
19 |                                              default_model_width)
20 | 
21 |     def generate(self, multiprocessing_queue):
22 |         """Generate the next neural architecture.
23 | 
24 |         Args:
25 |             multiprocessing_queue: the Queue for multiprocessing return value.
26 | 
27 |         Returns:
28 |             list of 2-element tuples: generated_graph and other_info,
29 |             for random searcher the length of list is 1.
30 |             generated_graph: An instance of Graph.
31 |             other_info: Anything to be saved in the training queue together with the architecture.
32 | 
33 |         """
34 |         random_index = randrange(len(self.history))
35 |         model_id = self.history[random_index]['model_id']
36 |         graph = self.load_model_by_id(model_id)
37 |         new_father_id = None
38 |         generated_graph = None
39 |         for temp_graph in transform(graph):
40 |             if not contain(self.descriptors, temp_graph.extract_descriptor()):
41 |                 new_father_id = model_id
42 |                 generated_graph = temp_graph
43 |                 break
44 |         if new_father_id is None:
45 |             new_father_id = 0
46 |             generated_graph = self.generators[0](self.n_classes, self.input_shape). \
47 |                 generate(self.default_model_len, self.default_model_width)
48 | 
49 |         return [(generated_graph, new_father_id)]
50 | 
51 |     def update(self, other_info, model_id, graph, metric_value):
52 |         return


--------------------------------------------------------------------------------
/tests/test_nas.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import patch
  2 | 
  3 | from autokeras.bayesian import edit_distance
  4 | from autokeras.backend.torch.loss_function import classification_loss
  5 | from autokeras.nn.metric import Accuracy
  6 | from autokeras.search import *
  7 | from autokeras.nn.generator import CnnGenerator, MlpGenerator, ResNetGenerator
  8 | from tests.common import clean_dir, MockProcess, get_classification_data_loaders, get_classification_data_loaders_mlp, \
  9 |     simple_transform, TEST_TEMP_DIR, simple_transform_mlp, mock_train, mock_out_of_memory_train, \
 10 |     mock_exception_handling_train
 11 | 
 12 | from nas.greedy import GreedySearcher
 13 | from nas.grid import GridSearcher
 14 | from nas.random import RandomSearcher
 15 | 
 16 | 
 17 | @patch('torch.multiprocessing.get_context', side_effect=MockProcess)
 18 | @patch('autokeras.bayesian.transform', side_effect=simple_transform)
 19 | @patch('autokeras.backend.torch.model_trainer.ModelTrainer.train_model', side_effect=mock_train)
 20 | def test_greedy_searcher(_, _1, _2):
 21 |     train_data, test_data = get_classification_data_loaders()
 22 |     clean_dir(TEST_TEMP_DIR)
 23 |     searcher = GreedySearcher(3, (28, 28, 3), verbose=False, path=TEST_TEMP_DIR, metric=Accuracy,
 24 |                               loss=classification_loss, generators=[CnnGenerator, CnnGenerator])
 25 |     for _ in range(2):
 26 |         searcher.search(train_data, test_data)
 27 |     clean_dir(TEST_TEMP_DIR)
 28 |     assert len(searcher.history) == 2
 29 | 
 30 | 
 31 | @patch('torch.multiprocessing.get_context', side_effect=MockProcess)
 32 | @patch('autokeras.bayesian.transform', side_effect=simple_transform)
 33 | @patch('autokeras.backend.torch.model_trainer.ModelTrainer.train_model', side_effect=mock_train)
 34 | @patch('autokeras.search.get_system', return_value=Constant.SYS_GOOGLE_COLAB)
 35 | def test_greedy_searcher_sp(_, _1, _2, _3):
 36 |     train_data, test_data = get_classification_data_loaders()
 37 |     clean_dir(TEST_TEMP_DIR)
 38 |     searcher = GreedySearcher(3, (28, 28, 3), verbose=False, path=TEST_TEMP_DIR, metric=Accuracy,
 39 |                               loss=classification_loss, generators=[CnnGenerator, CnnGenerator])
 40 |     for _ in range(2):
 41 |         searcher.search(train_data, test_data)
 42 |     clean_dir(TEST_TEMP_DIR)
 43 |     assert len(searcher.history) == 2
 44 | 
 45 | 
 46 | @patch('torch.multiprocessing.get_context', side_effect=MockProcess)
 47 | @patch('autokeras.bayesian.transform', side_effect=simple_transform_mlp)
 48 | @patch('autokeras.backend.torch.model_trainer.ModelTrainer.train_model', side_effect=mock_train)
 49 | def test_greedy_searcher_mlp(_, _1, _2):
 50 |     train_data, test_data = get_classification_data_loaders_mlp()
 51 |     clean_dir(TEST_TEMP_DIR)
 52 |     generator = GreedySearcher(3, (28,), verbose=False, path=TEST_TEMP_DIR, metric=Accuracy,
 53 |                                loss=classification_loss, generators=[MlpGenerator, MlpGenerator])
 54 |     for _ in range(2):
 55 |         generator.search(train_data, test_data)
 56 |     clean_dir(TEST_TEMP_DIR)
 57 |     assert len(generator.history) == 2
 58 | 
 59 | 
 60 | @patch('torch.multiprocessing.get_context', side_effect=MockProcess)
 61 | @patch('autokeras.bayesian.transform', side_effect=simple_transform)
 62 | @patch('autokeras.backend.torch.model_trainer.ModelTrainer.train_model', side_effect=mock_train)
 63 | def test_random_searcher(_, _1, _2):
 64 |     train_data, test_data = get_classification_data_loaders()
 65 |     clean_dir(TEST_TEMP_DIR)
 66 |     searcher = RandomSearcher(3, (28, 28, 3), verbose=False, path=TEST_TEMP_DIR, metric=Accuracy,
 67 |                               loss=classification_loss, generators=[CnnGenerator, CnnGenerator])
 68 |     for _ in range(2):
 69 |         searcher.search(train_data, test_data)
 70 |     clean_dir(TEST_TEMP_DIR)
 71 |     assert len(searcher.history) == 2
 72 | 
 73 | 
 74 | @patch('torch.multiprocessing.get_context', side_effect=MockProcess)
 75 | @patch('autokeras.bayesian.transform', side_effect=simple_transform)
 76 | @patch('autokeras.backend.torch.model_trainer.ModelTrainer.train_model', side_effect=mock_train)
 77 | @patch('autokeras.search.get_system', return_value=Constant.SYS_GOOGLE_COLAB)
 78 | def test_random_searcher_sp(_, _1, _2, _3):
 79 |     train_data, test_data = get_classification_data_loaders()
 80 |     clean_dir(TEST_TEMP_DIR)
 81 |     searcher = RandomSearcher(3, (28, 28, 3), verbose=False, path=TEST_TEMP_DIR, metric=Accuracy,
 82 |                               loss=classification_loss, generators=[CnnGenerator, CnnGenerator])
 83 |     for _ in range(2):
 84 |         searcher.search(train_data, test_data)
 85 |     clean_dir(TEST_TEMP_DIR)
 86 |     assert len(searcher.history) == 2
 87 | 
 88 | 
 89 | @patch('torch.multiprocessing.get_context', side_effect=MockProcess)
 90 | @patch('autokeras.bayesian.transform', side_effect=simple_transform)
 91 | @patch('autokeras.backend.torch.model_trainer.ModelTrainer.train_model', side_effect=mock_train)
 92 | def test_grid_searcher(_, _1, _2):
 93 |     train_data, test_data = get_classification_data_loaders()
 94 |     clean_dir(TEST_TEMP_DIR)
 95 |     searcher = GridSearcher(3, (28, 28, 3), verbose=True, path=TEST_TEMP_DIR, metric=Accuracy,
 96 |                             loss=classification_loss, generators=[CnnGenerator, CnnGenerator])
 97 |     Constant.N_NEIGHBOURS = 1
 98 |     Constant.T_MIN = 0.8
 99 |     print(len(searcher.get_search_dimensions()))
100 |     for _ in range(len(searcher.get_search_dimensions())):
101 |         searcher.search(train_data, test_data)
102 |     clean_dir(TEST_TEMP_DIR)
103 |     assert len(searcher.history) == len(searcher.search_dimensions)
104 | 
105 | 


--------------------------------------------------------------------------------