├── .gitignore
├── Makefile
├── README.md
├── main.py
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
1 | venv*
2 | .data*
3 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: help lint run
 2 | 
 3 | # Makefile variables
 4 | VENV_NAME:=venv
 5 | PYTHON=${VENV_NAME}/bin/python3
 6 | 
 7 | # Include your variables here
 8 | RANDOM_SEED:=42
 9 | NUM_EPOCHS:=15
10 | INPUT_DIM:=784
11 | HIDDEN_DIM:=128
12 | OUTPUT_DIM:=10
13 | 
14 | .DEFAULT: help
15 | help:
16 | 	@echo "make venv"
17 | 	@echo "       prepare development environment, use only once"
18 | 	@echo "make lint"
19 | 	@echo "       run pylint"
20 | 	@echo "make run"
21 | 	@echo "       run project"
22 | 
23 | # Install dependencies whenever setup.py is changed.
24 | venv: $(VENV_NAME)/bin/activate
25 | $(VENV_NAME)/bin/activate: setup.py
26 | 	test -d $(VENV_NAME) || python3 -m venv $(VENV_NAME)
27 | 	${PYTHON} -m pip install -U pip
28 | 	${PYTHON} -m pip install -e .
29 | 	rm -rf ./*.egg-info
30 | 	touch $(VENV_NAME)/bin/activate
31 | 
32 | lint: venv
33 | 	${PYTHON} -m pylint main.py
34 | 
35 | run: venv
36 | 	${PYTHON} main.py --seed $(RANDOM_SEED) --num_epochs $(NUM_EPOCHS) --input_dim $(INPUT_DIM) --hidden_dim $(HIDDEN_DIM) --output_dim $(OUTPUT_DIM)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # create-ml-app
 2 | 
 3 | `create-ml-app` makes it easier to spin up a machine learning project locally in Python and handle various package dependencies. The name is inspired by [`create-react-app`](https://github.com/facebook/create-react-app). 
 4 | 
 5 | This project abstracts away `pip install`s and virtual environment commands from the user. To use, simply fork this repository and execute `make run` from your shell. The `main.py` file in this repo has an example ML script (training a neural network on MNIST in PyTorch).
 6 | 
 7 | ## Motivation
 8 | 
 9 | When starting a new ML project or prototyping a model locally, it can be tedious to:
10 | 
11 | * handle all the Python package dependencies
12 | * create/activate/deactivate your virtual environment
13 | * parameterize arguments
14 | * remember to define a random seed
15 | 
16 | Having a Makefile can simplify the virtual environment overhead and centralize parameters in one place. This repository is an example of how to use a Makefile in a simple ML project (training a neural network on MNIST in PyTorch). 
17 | 
18 | ## Background
19 | 
20 | Under the hood, this project uses `venv` to create a virtual environment and install Python packages. The primary commands supported by this Makefile are:
21 | 
22 | * `make lint`: This will show errors as flagged by pylint.
23 | * `make run`: This will download any new packages found in `setup.py` and run `main.py` with user-specified variables. You may need to modify the Makefile to include variables of your choice and change the `run` definition to run your Python file with your specified variables.
24 | 
25 | If you want to use any Python package in your project, simply add the package name to `setup.py` and it will get installed the next time you execute `make run` from your shell.
26 | 
27 | ## Usage
28 | 
29 |     git clone https://github.com/shreyashankar/create-ml-app.git my-ml-app
30 |     cd my-ml-app
31 |     make run


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | """Example script to train a simple FC NN on MNIST."""
 2 | 
 3 | import argparse
 4 | 
 5 | import torch
 6 | import torchvision
 7 | 
 8 | # Parse arguments
 9 | parser = argparse.ArgumentParser(description='Parameters.')
10 | parser.add_argument('--seed', type=int, default=42)
11 | parser.add_argument('--num_epochs', type=int, default=15)
12 | parser.add_argument('--input_dim', type=int, default=784)
13 | parser.add_argument('--hidden_dim', type=int, default=128)
14 | parser.add_argument('--output_dim', type=int, default=10)
15 | args = parser.parse_args()
16 | RANDOM_SEED = args.seed
17 | NUM_EPOCHS = args.num_epochs
18 | INPUT_DIM = args.input_dim
19 | HIDDEN_DIM = args.hidden_dim
20 | OUTPUT_DIM = args.output_dim
21 | 
22 | # Set random seed
23 | torch.manual_seed(RANDOM_SEED)
24 | 
25 | # Load dataset and transforms
26 | transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
27 |                                             torchvision.transforms.Normalize(
28 |                                                 (0.5,), (0.5,)),
29 |                                             ])
30 | train_set = torchvision.datasets.MNIST(
31 |     '.data/train', download=True, train=True, transform=transform)
32 | test_set = torchvision.datasets.MNIST(
33 |     '.data/test', download=True, train=False, transform=transform)
34 | train_loader = torch.utils.data.DataLoader(
35 |     train_set, batch_size=64, shuffle=True)
36 | test_loader = torch.utils.data.DataLoader(
37 |     test_set, batch_size=64, shuffle=True)
38 | 
39 | # Create model, loss function, and optimizer
40 | model = torch.nn.Sequential(torch.nn.Linear(INPUT_DIM, HIDDEN_DIM),
41 |                             torch.nn.Linear(HIDDEN_DIM, OUTPUT_DIM),
42 |                             torch.nn.LogSoftmax(dim=1))
43 | loss_fn = torch.nn.NLLLoss()
44 | optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
45 | 
46 | 
47 | def train():
48 |     """Run an iteration of training and return the loss and accuracy."""
49 |     model.train()
50 |     total_loss = 0
51 |     num_correct = 0
52 |     for data, target in train_loader:
53 |         data = data.view(data.shape[0], -1)
54 |         optimizer.zero_grad()
55 |         output = model(data)
56 |         loss = loss_fn(output, target)
57 |         loss.backward()
58 |         optimizer.step()
59 |         total_loss += loss.item()
60 |         pred = output.data.max(1, keepdim=True)[1]
61 |         num_correct += pred.eq(target.data.view_as(pred)).long().sum()
62 |     return total_loss, float(num_correct / len(train_set))
63 | 
64 | 
65 | def test():
66 |     """Return the loss and accuracy on the test set."""
67 |     model.eval()
68 |     total_loss = 0
69 |     num_correct = 0
70 |     for data, target in test_loader:
71 |         data = data.view(data.shape[0], -1)
72 |         output = model(data)
73 |         loss = loss_fn(output, target)
74 |         total_loss += loss
75 |         pred = output.data.max(1, keepdim=True)[1]
76 |         num_correct += pred.eq(target.data.view_as(pred)).long().sum()
77 |     return total_loss, float(num_correct / len(test_set))
78 | 
79 | 
80 | # Train and test
81 | for epoch in range(NUM_EPOCHS):
82 |     print(f'Epoch {epoch}')
83 |     train_loss, train_acc = train()
84 |     print(f'\tTrain loss: {train_loss}')
85 |     print(f'\tTrain accuracy: {train_acc}')
86 |     test_loss, test_acc = test()
87 |     print(f'\tTest loss: {test_loss}')
88 |     print(f'\tTest accuracy: {test_acc}')
89 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='my-app',
 5 |     version='0.1',
 6 |     packages=find_packages(exclude=['tests']),
 7 |     install_requires=[
 8 |         'pylint',
 9 |         'torch',
10 |         'torchvision'
11 |     ]
12 | )


--------------------------------------------------------------------------------