├── .gitignore ├── README.md ├── hw0 ├── .idea │ ├── .gitignore │ ├── hw0.iml │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── vcs.xml ├── .ipynb_checkpoints │ └── hw0-checkpoint.ipynb ├── Makefile ├── README.md ├── data │ ├── t10k-images-idx3-ubyte.gz │ ├── t10k-labels-idx1-ubyte.gz │ ├── train-images-idx3-ubyte.gz │ └── train-labels-idx1-ubyte.gz ├── hw0.ipynb ├── hw0.pdf ├── src │ ├── __pycache__ │ │ ├── simple_ml.cpython-310.pyc │ │ └── simple_ml.cpython-39.pyc │ ├── simple_ml.py │ ├── simple_ml_ext.cpp │ └── simple_ml_ext.so └── tests │ ├── __pycache__ │ └── test_simple_ml.cpython-310-pytest-7.1.2.pyc │ └── test_simple_ml.py ├── hw1 ├── .gitignore ├── apps │ └── simple_ml.py ├── data │ ├── t10k-images-idx3-ubyte.gz │ ├── t10k-labels-idx1-ubyte.gz │ ├── train-images-idx3-ubyte.gz │ └── train-labels-idx1-ubyte.gz ├── hw1.ipynb ├── hw1.pdf ├── python │ └── needle │ │ ├── __init__.py │ │ ├── autograd.py │ │ └── ops.py ├── tempCodeRunnerFile.ipynb ├── test.py └── tests │ └── test_autograd_hw.py ├── hw2 ├── .idea │ ├── .gitignore │ ├── hw2.iml │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── vcs.xml ├── README.md ├── apps │ ├── __pycache__ │ │ ├── mlp_resnet.cpython-310.pyc │ │ └── mlp_resnet.cpython-39.pyc │ └── mlp_resnet.py ├── data │ ├── t10k-images-idx3-ubyte.gz │ ├── t10k-labels-idx1-ubyte.gz │ ├── train-images-idx3-ubyte.gz │ └── train-labels-idx1-ubyte.gz ├── debug.py ├── figures │ ├── mlp_resnet.png │ └── residualblock.png ├── hw2.ipynb ├── hw2.ipynb - Colaboratory.pdf ├── python │ └── needle │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── autograd.cpython-310.pyc │ │ ├── autograd.cpython-39.pyc │ │ ├── data.cpython-310.pyc │ │ ├── data.cpython-39.pyc │ │ ├── init.cpython-310.pyc │ │ ├── init.cpython-39.pyc │ │ ├── nn.cpython-310.pyc │ │ ├── nn.cpython-39.pyc │ │ ├── ops.cpython-310.pyc │ │ ├── ops.cpython-39.pyc │ │ ├── optim.cpython-310.pyc │ │ └── optim.cpython-39.pyc │ │ ├── autograd.py │ │ ├── data.py │ │ ├── init.py │ │ ├── nn.py │ │ ├── ops.py │ │ └── optim.py └── tests │ ├── __pycache__ │ ├── test_data.cpython-310-pytest-7.1.2.pyc │ ├── test_data.cpython-39.pyc │ ├── test_nn_and_optim.cpython-310-pytest-7.1.2.pyc │ └── test_nn_and_optim.cpython-39.pyc │ ├── test_data.py │ └── test_nn_and_optim.py ├── hw3 ├── .idea │ ├── .gitignore │ ├── hw3.iml │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── vcs.xml ├── .tmp.driveupload │ └── 6569 ├── .vscode │ └── settings.json ├── CMakeLists.txt ├── Makefile ├── README.md ├── build │ ├── CMakeCache.txt │ ├── CMakeFiles │ │ ├── 3.27.9 │ │ │ ├── CMakeCCompiler.cmake │ │ │ ├── CMakeCXXCompiler.cmake │ │ │ ├── CMakeDetermineCompilerABI_C.bin │ │ │ ├── CMakeDetermineCompilerABI_CXX.bin │ │ │ ├── CMakeSystem.cmake │ │ │ ├── CompilerIdC │ │ │ │ ├── CMakeCCompilerId.c │ │ │ │ └── a.out │ │ │ └── CompilerIdCXX │ │ │ │ ├── CMakeCXXCompilerId.cpp │ │ │ │ └── a.out │ │ ├── CMakeConfigureLog.yaml │ │ ├── CMakeDirectoryInformation.cmake │ │ ├── CMakeRuleHashes.txt │ │ ├── Makefile.cmake │ │ ├── Makefile2 │ │ ├── TargetDirectories.txt │ │ ├── cmake.check_cache │ │ ├── ndarray_backend_cpu.dir │ │ │ ├── DependInfo.cmake │ │ │ ├── build.make │ │ │ ├── cmake_clean.cmake │ │ │ ├── compiler_depend.internal │ │ │ ├── compiler_depend.make │ │ │ ├── compiler_depend.ts │ │ │ ├── depend.make │ │ │ ├── flags.make │ │ │ ├── link.txt │ │ │ ├── progress.make │ │ │ └── src │ │ │ │ ├── ndarray_backend_cpu.cc.o │ │ │ │ └── ndarray_backend_cpu.cc.o.d │ │ ├── ndarray_backend_cuda.dir │ │ │ ├── DependInfo.cmake │ │ │ ├── build.make │ │ │ ├── cmake_clean.cmake │ │ │ ├── compiler_depend.make │ │ │ ├── compiler_depend.ts │ │ │ ├── depend.make │ │ │ ├── flags.make │ │ │ ├── link.txt │ │ │ ├── progress.make │ │ │ └── src │ │ │ │ ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o │ │ │ │ ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake │ │ │ │ ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen │ │ │ │ └── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.depend │ │ └── progress.marks │ ├── Makefile │ ├── cmake_install.cmake │ └── detect_cuda_compute_capabilities.cpp ├── debug.py ├── hw3.ipynb ├── hw3.ipynb - Colaboratory.pdf ├── python │ └── needle │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── autograd.cpython-310.pyc │ │ ├── backend_numpy.cpython-310.pyc │ │ ├── backend_selection.cpython-310.pyc │ │ └── optim.cpython-310.pyc │ │ ├── autograd.py │ │ ├── backend_ndarray │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── ndarray.cpython-310.pyc │ │ │ └── ndarray_backend_numpy.cpython-310.pyc │ │ ├── ndarray.py │ │ ├── ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so │ │ ├── ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so │ │ └── ndarray_backend_numpy.py │ │ ├── backend_numpy.py │ │ ├── backend_selection.py │ │ ├── data │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── data_basic.cpython-310.pyc │ │ │ └── data_transforms.cpython-310.pyc │ │ ├── data_basic.py │ │ ├── data_transforms.py │ │ └── datasets │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── mnist_dataset.cpython-310.pyc │ │ │ └── ndarray_dataset.cpython-310.pyc │ │ │ ├── mnist_dataset.py │ │ │ └── ndarray_dataset.py │ │ ├── init │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── init_basic.cpython-310.pyc │ │ │ └── init_initializers.cpython-310.pyc │ │ ├── init_basic.py │ │ └── init_initializers.py │ │ ├── nn │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ └── nn_basic.cpython-310.pyc │ │ └── nn_basic.py │ │ ├── ops │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-310.pyc │ │ │ ├── ops_logarithmic.cpython-310.pyc │ │ │ ├── ops_mathematic.cpython-310.pyc │ │ │ └── ops_tuple.cpython-310.pyc │ │ ├── ops_logarithmic.py │ │ ├── ops_mathematic.py │ │ └── ops_tuple.py │ │ └── optim.py ├── src │ ├── ndarray_backend_cpu.cc │ └── ndarray_backend_cuda.cu └── tests │ └── hw3 │ ├── __pycache__ │ ├── test_ndarray.cpython-310-pytest-7.1.2.pyc │ ├── test_ndarray.cpython-310-pytest-7.4.3.pyc │ └── test_ndarray.cpython-310.pyc │ └── test_ndarray.py └── hw4 ├── .idea ├── .gitignore ├── hw4.iml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── .tmp.driveupload ├── 7538 ├── 7792 └── 7888 ├── CMakeLists.txt ├── Makefile ├── README.md ├── ResNet9.png ├── apps ├── models.py └── simple_ml.py ├── build ├── CMakeCache.txt ├── CMakeFiles │ ├── 3.27.9 │ │ ├── CMakeCCompiler.cmake │ │ ├── CMakeCXXCompiler.cmake │ │ ├── CMakeDetermineCompilerABI_C.bin │ │ ├── CMakeDetermineCompilerABI_CXX.bin │ │ ├── CMakeSystem.cmake │ │ ├── CompilerIdC │ │ │ ├── CMakeCCompilerId.c │ │ │ └── a.out │ │ └── CompilerIdCXX │ │ │ ├── CMakeCXXCompilerId.cpp │ │ │ └── a.out │ ├── CMakeConfigureLog.yaml │ ├── CMakeDirectoryInformation.cmake │ ├── CMakeRuleHashes.txt │ ├── Makefile.cmake │ ├── Makefile2 │ ├── TargetDirectories.txt │ ├── cmake.check_cache │ ├── ndarray_backend_cpu.dir │ │ ├── DependInfo.cmake │ │ ├── build.make │ │ ├── cmake_clean.cmake │ │ ├── compiler_depend.internal │ │ ├── compiler_depend.make │ │ ├── compiler_depend.ts │ │ ├── depend.make │ │ ├── flags.make │ │ ├── link.txt │ │ ├── progress.make │ │ └── src │ │ │ ├── ndarray_backend_cpu.cc.o │ │ │ └── ndarray_backend_cpu.cc.o.d │ ├── ndarray_backend_cuda.dir │ │ ├── DependInfo.cmake │ │ ├── build.make │ │ ├── cmake_clean.cmake │ │ ├── compiler_depend.make │ │ ├── compiler_depend.ts │ │ ├── depend.make │ │ ├── flags.make │ │ ├── link.txt │ │ ├── progress.make │ │ └── src │ │ │ ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o │ │ │ ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake │ │ │ ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen │ │ │ └── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.depend │ └── progress.marks ├── Makefile ├── cmake_install.cmake └── detect_cuda_compute_capabilities.cpp ├── debug.py ├── hw4.ipynb ├── hw4.ipynb - Colaboratory.pdf ├── python └── needle │ ├── __init__.py │ ├── autograd.py │ ├── backend_ndarray │ ├── __init__.py │ ├── ndarray.py │ ├── ndarray_backend_cpu.cpython-310-darwin.so │ ├── ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so │ ├── ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so │ └── ndarray_backend_numpy.py │ ├── backend_numpy.py │ ├── backend_selection.py │ ├── data │ ├── __init__.py │ ├── data_basic.py │ ├── data_transforms.py │ └── datasets │ │ ├── __init__.py │ │ ├── cifar10_dataset.py │ │ ├── mnist_dataset.py │ │ ├── ndarray_dataset.py │ │ └── ptb_dataset.py │ ├── init │ ├── __init__.py │ ├── init_basic.py │ └── init_initializers.py │ ├── nn │ ├── __init__.py │ ├── nn_basic.py │ ├── nn_conv.py │ └── nn_sequence.py │ ├── ops │ ├── __init__.py │ ├── ops_logarithmic.py │ ├── ops_mathematic.py │ └── ops_tuple.py │ └── optim.py ├── src ├── ndarray_backend_cpu.cc └── ndarray_backend_cuda.cu └── tests └── hw4 ├── test_cifar_ptb_data.py ├── test_conv.py ├── test_nd_backend.py └── test_sequence_models.py /.gitignore: -------------------------------------------------------------------------------- 1 | **/.git/ 2 | **/__pycache__/ 3 | hw4/data/cifar-10-batches-py/ 4 | hw4/data/ptb/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepLearningSystem 2 | 3 | The project builds a simple version of PyTorch from scratch. 4 | 5 | It is the homework of CMU 10-414/714: Deep Learning Systems ( https://dlsyscourse.org/ ) 6 | -------------------------------------------------------------------------------- /hw0/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /hw0/.idea/hw0.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /hw0/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /hw0/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /hw0/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /hw0/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /hw0/Makefile: -------------------------------------------------------------------------------- 1 | # NOTE: on MacOS you need to add an addition flag: -undefined dynamic_lookup 2 | default: 3 | c++ -O3 -Wall -shared -std=c++11 -fPIC -undefined dynamic_lookup $$(python3 -m pybind11 --includes) src/simple_ml_ext.cpp -o src/simple_ml_ext.so 4 | -------------------------------------------------------------------------------- /hw0/README.md: -------------------------------------------------------------------------------- 1 | # Homework 0 2 | Public repository and stub/testing code for Homework 0 of 10-714. 3 | -------------------------------------------------------------------------------- /hw0/data/t10k-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/data/t10k-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /hw0/data/t10k-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/data/t10k-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /hw0/data/train-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/data/train-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /hw0/data/train-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/data/train-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /hw0/hw0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/hw0.pdf -------------------------------------------------------------------------------- /hw0/src/__pycache__/simple_ml.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/src/__pycache__/simple_ml.cpython-310.pyc -------------------------------------------------------------------------------- /hw0/src/__pycache__/simple_ml.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/src/__pycache__/simple_ml.cpython-39.pyc -------------------------------------------------------------------------------- /hw0/src/simple_ml_ext.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace py = pybind11; 8 | 9 | 10 | void softmax_regression_epoch_cpp(const float *X, const unsigned char *y, 11 | float *theta, size_t m, size_t n, size_t k, 12 | float lr, size_t batch) 13 | { 14 | /** 15 | * A C++ version of the softmax regression epoch code. This should run a 16 | * single epoch over the data defined by X and y (and sizes m,n,k), and 17 | * modify theta in place. Your function will probably want to allocate 18 | * (and then delete) some helper arrays to store the logits and gradients. 19 | * 20 | * Args: 21 | * X (const float *): pointer to X data, of size m*n, stored in row 22 | * major (C) format 23 | * y (const unsigned char *): pointer to y data, of size m 24 | * theta (float *): pointer to theta data, of size n*k, stored in row 25 | * major (C) format 26 | * m (size_t): number of examples 27 | * n (size_t): input dimension 28 | * k (size_t): number of classes 29 | * lr (float): learning rate / SGD step size 30 | * batch (int): SGD minibatch size 31 | * 32 | * Returns: 33 | * (None) 34 | */ 35 | 36 | size_t sample_idx = 0; 37 | auto Z = std::vector>(batch, std::vector(k, 0.0)); 38 | // Run batch 39 | while(sample_idx < m) { 40 | if(sample_idx + batch > m) { 41 | batch = m - sample_idx; 42 | } 43 | 44 | // Compute Z = normalize(exp(X * theta)) 45 | // The outer two loops iterate over Z, whose size is batch * k 46 | for(size_t idx = 0; idx < batch; idx++) { 47 | float row_sum = 0.0; 48 | for(size_t j = 0; j < k; j++) { 49 | Z[idx][j] = 0.0; 50 | for(size_t inner_idx = 0; inner_idx < n; inner_idx++) { 51 | Z[idx][j] += X[(sample_idx + idx) * n + inner_idx] * theta[inner_idx * k + j]; 52 | } 53 | Z[idx][j] = std::exp(Z[idx][j]); 54 | row_sum += Z[idx][j]; 55 | } 56 | for(size_t j = 0; j < k; j++) { 57 | Z[idx][j] /= row_sum; 58 | } 59 | } 60 | 61 | // Let Z = Z - Iy 62 | for(size_t idx = 0; idx < batch; idx++) { 63 | Z[idx][y[sample_idx + idx]] -= 1.0; 64 | } 65 | 66 | // Compute gradient 67 | // The outer two loops iterate over theta, whose size is n * k 68 | for(size_t idx = 0; idx < n; idx++) { 69 | for(size_t j = 0; j < k; j++) { 70 | float diff = 0.0; 71 | for(size_t inner_idx = 0; inner_idx < batch; inner_idx++) { 72 | // theta_diff = X^T * Z 73 | // theta_diff(idx, j) = sum_{inner_idx=1}^{batch} X^T(idx, inner_idx) * Z(inner_idx, j) 74 | // = sum_{inner_idx=1}^{batch} X(inner_idx, idx) * Z(inner_idx, j) 75 | diff += X[(sample_idx + inner_idx) * n + idx] * Z[inner_idx][j]; 76 | } 77 | theta[idx * k + j] -= lr * diff / batch; 78 | } 79 | } 80 | sample_idx += batch; 81 | } 82 | } 83 | 84 | 85 | /** 86 | * This is the pybind11 code that wraps the function above. It's only role is 87 | * wrap the function above in a Python module, and you do not need to make any 88 | * edits to the code 89 | */ 90 | PYBIND11_MODULE(simple_ml_ext, m) { 91 | m.def("softmax_regression_epoch_cpp", 92 | [](py::array_t X, 93 | py::array_t y, 94 | py::array_t theta, 95 | float lr, 96 | int batch) { 97 | softmax_regression_epoch_cpp( 98 | static_cast(X.request().ptr), 99 | static_cast(y.request().ptr), 100 | static_cast(theta.request().ptr), 101 | X.request().shape[0], 102 | X.request().shape[1], 103 | theta.request().shape[1], 104 | lr, 105 | batch 106 | ); 107 | }, 108 | py::arg("X"), py::arg("y"), py::arg("theta"), 109 | py::arg("lr"), py::arg("batch")); 110 | } 111 | -------------------------------------------------------------------------------- /hw0/src/simple_ml_ext.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/src/simple_ml_ext.so -------------------------------------------------------------------------------- /hw0/tests/__pycache__/test_simple_ml.cpython-310-pytest-7.1.2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/tests/__pycache__/test_simple_ml.cpython-310-pytest-7.1.2.pyc -------------------------------------------------------------------------------- /hw1/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .ipynb_checkpoints/ 3 | env/ 4 | tests/__pycache__ 5 | .idea/ 6 | 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | *~ 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | pip-wheel-metadata/ 32 | share/python-wheels/ 33 | *.egg-info/ 34 | .installed.cfg 35 | *.egg 36 | MANIFEST 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .nox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | *.py,cover 59 | .hypothesis/ 60 | .pytest_cache/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | db.sqlite3 70 | db.sqlite3-journal 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | target/ 84 | 85 | # Jupyter Notebook 86 | .ipynb_checkpoints 87 | 88 | # IPython 89 | profile_default/ 90 | ipython_config.py 91 | 92 | # pyenv 93 | .python-version 94 | 95 | # pipenv 96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 99 | # install all needed dependencies. 100 | #Pipfile.lock 101 | 102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 103 | __pypackages__/ 104 | 105 | # Celery stuff 106 | celerybeat-schedule 107 | celerybeat.pid 108 | 109 | # SageMath parsed files 110 | *.sage.py 111 | 112 | # Environments 113 | .env 114 | .venv 115 | env/ 116 | venv/ 117 | ENV/ 118 | env.bak/ 119 | venv.bak/ 120 | 121 | # Spyder project settings 122 | .spyderproject 123 | .spyproject 124 | 125 | # Rope project settings 126 | .ropeproject 127 | 128 | # mkdocs documentation 129 | /site 130 | 131 | # mypy 132 | .mypy_cache/ 133 | .dmypy.json 134 | dmypy.json 135 | 136 | # Pyre type checker 137 | .pyre/ 138 | -------------------------------------------------------------------------------- /hw1/apps/simple_ml.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import gzip 3 | import numpy as np 4 | 5 | import sys 6 | sys.path.append('python/') 7 | import needle as ndl 8 | 9 | 10 | def parse_mnist(image_filesname, label_filename): 11 | """ Read an images and labels file in MNIST format. See this page: 12 | http://yann.lecun.com/exdb/mnist/ for a description of the file format. 13 | 14 | Args: 15 | image_filename (str): name of gzipped images file in MNIST format 16 | label_filename (str): name of gzipped labels file in MNIST format 17 | 18 | Returns: 19 | Tuple (X,y): 20 | X (numpy.ndarray[np.float32]): 2D numpy array containing the loaded 21 | data. The dimensionality of the data should be 22 | (num_examples x input_dim) where 'input_dim' is the full 23 | dimension of the data, e.g., since MNIST images are 28x28, it 24 | will be 784. Values should be of type np.float32, and the data 25 | should be normalized to have a minimum value of 0.0 and a 26 | maximum value of 1.0. 27 | 28 | y (numpy.ndarray[dypte=np.int8]): 1D numpy array containing the 29 | labels of the examples. Values should be of type np.int8 and 30 | for MNIST will contain the values 0-9. 31 | """ 32 | with gzip.open(image_filesname, 'rb') as f: 33 | magic, num, rows, cols = struct.unpack(">IIII", f.read(16)) 34 | X = np.frombuffer(f.read(), dtype=np.uint8).reshape(num, rows*cols) 35 | X = X.astype(np.float32) / 255.0 36 | 37 | with gzip.open(label_filename, 'rb') as f: 38 | magic, num = struct.unpack(">II", f.read(8)) 39 | y = np.frombuffer(f.read(), dtype=np.uint8) 40 | 41 | return X, y 42 | 43 | 44 | def softmax_loss(Z, y_one_hot): 45 | """ Return softmax loss. Note that for the purposes of this assignment, 46 | you don't need to worry about "nicely" scaling the numerical properties 47 | of the log-sum-exp computation, but can just compute this directly. 48 | 49 | Args: 50 | Z (ndl.Tensor[np.float32]): 2D Tensor of shape 51 | (batch_size, num_classes), containing the logit predictions for 52 | each class. 53 | y (ndl.Tensor[np.int8]): 2D Tensor of shape (batch_size, num_classes) 54 | containing a 1 at the index of the true label of each example and 55 | zeros elsewhere. 56 | 57 | Returns: 58 | Average softmax loss over the sample. (ndl.Tensor[np.float32]) 59 | """ 60 | batch_size = Z.shape[0] 61 | lhs = ndl.log(ndl.exp(Z).sum(axes=(1,))) 62 | rhs = (Z * y_one_hot).sum(axes=(1,)) 63 | loss = (lhs - rhs).sum() 64 | return loss / batch_size 65 | 66 | 67 | def nn_epoch(X, y, W1, W2, lr = 0.1, batch=100): 68 | """ Run a single epoch of SGD for a two-layer neural network defined by the 69 | weights W1 and W2 (with no bias terms): 70 | logits = ReLU(X * W1) * W1 71 | The function should use the step size lr, and the specified batch size (and 72 | again, without randomizing the order of X). 73 | 74 | Args: 75 | X (np.ndarray[np.float32]): 2D input array of size 76 | (num_examples x input_dim). 77 | y (np.ndarray[np.uint8]): 1D class label array of size (num_examples,) 78 | W1 (ndl.Tensor[np.float32]): 2D array of first layer weights, of shape 79 | (input_dim, hidden_dim) 80 | W2 (ndl.Tensor[np.float32]): 2D array of second layer weights, of shape 81 | (hidden_dim, num_classes) 82 | lr (float): step size (learning rate) for SGD 83 | batch (int): size of SGD mini-batch 84 | 85 | Returns: 86 | Tuple: (W1, W2) 87 | W1: ndl.Tensor[np.float32] 88 | W2: ndl.Tensor[np.float32] 89 | """ 90 | idx = 0 91 | num_classes = W2.shape[1] 92 | while idx < X.shape[0]: 93 | X_batch = ndl.Tensor(X[idx:idx+batch]) 94 | Z1 = X_batch.matmul(W1) 95 | network_output = ndl.relu(Z1).matmul(W2) 96 | 97 | y_batch = y[idx:idx+batch] 98 | y_one_hot = np.zeros((batch, num_classes)) 99 | y_one_hot[np.arange(batch), y_batch] = 1 100 | y_one_hot = ndl.Tensor(y_one_hot) 101 | 102 | loss = softmax_loss(network_output, y_one_hot) 103 | loss.backward() 104 | 105 | W1 = ndl.Tensor(W1.numpy() - lr * W1.grad.numpy()) 106 | W2 = ndl.Tensor(W2.numpy() - lr * W2.grad.numpy()) 107 | idx += batch 108 | return W1, W2 109 | 110 | 111 | ### CODE BELOW IS FOR ILLUSTRATION, YOU DO NOT NEED TO EDIT 112 | 113 | def loss_err(h,y): 114 | """ Helper function to compute both loss and error""" 115 | y_one_hot = np.zeros((y.shape[0], h.shape[-1])) 116 | y_one_hot[np.arange(y.size), y] = 1 117 | y_ = ndl.Tensor(y_one_hot) 118 | return softmax_loss(h,y_).numpy(), np.mean(h.numpy().argmax(axis=1) != y) 119 | -------------------------------------------------------------------------------- /hw1/data/t10k-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw1/data/t10k-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /hw1/data/t10k-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw1/data/t10k-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /hw1/data/train-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw1/data/train-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /hw1/data/train-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw1/data/train-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /hw1/hw1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw1/hw1.pdf -------------------------------------------------------------------------------- /hw1/python/needle/__init__.py: -------------------------------------------------------------------------------- 1 | from .autograd import Tensor, cpu, all_devices 2 | from . import ops 3 | from .ops import * 4 | 5 | 6 | -------------------------------------------------------------------------------- /hw1/tempCodeRunnerFile.ipynb: -------------------------------------------------------------------------------- 1 | !pip3 install --upgrade --no-deps git+https://github.com/dlsys10714/mugrade.git 2 | !pip3 install numdifftools -------------------------------------------------------------------------------- /hw1/test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./tests') 3 | from test_autograd_hw import * 4 | # gradient_check(ndl.summation, ndl.Tensor(np.random.randn(5,4)), axes=(1,)) 5 | # test_nn_epoch_ndl() 6 | test_matmul_simple_backward() 7 | test_matmul_batched_backward() -------------------------------------------------------------------------------- /hw2/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /hw2/.idea/hw2.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /hw2/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /hw2/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /hw2/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /hw2/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /hw2/README.md: -------------------------------------------------------------------------------- 1 | # Homework 2 2 | 3 | Public repository and stub/testing code for Homework 2 of 10-714. 4 | 5 | -------------------------------------------------------------------------------- /hw2/apps/__pycache__/mlp_resnet.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/apps/__pycache__/mlp_resnet.cpython-310.pyc -------------------------------------------------------------------------------- /hw2/apps/__pycache__/mlp_resnet.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/apps/__pycache__/mlp_resnet.cpython-39.pyc -------------------------------------------------------------------------------- /hw2/apps/mlp_resnet.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('../python') 3 | import needle as ndl 4 | import needle.nn as nn 5 | import numpy as np 6 | import time 7 | import os 8 | 9 | np.random.seed(0) 10 | 11 | def ResidualBlock(dim, hidden_dim, norm=nn.BatchNorm1d, drop_prob=0.1): 12 | main = nn.Sequential( 13 | nn.Linear(dim, hidden_dim), 14 | norm(hidden_dim), 15 | nn.ReLU(), 16 | nn.Dropout(drop_prob), 17 | nn.Linear(hidden_dim, dim), 18 | norm(dim) 19 | ) 20 | 21 | return nn.Sequential(nn.Residual(main), nn.ReLU()) 22 | 23 | 24 | def MLPResNet(dim, hidden_dim=100, num_blocks=3, num_classes=10, norm=nn.BatchNorm1d, drop_prob=0.1): 25 | layers = [] 26 | layers.append(nn.Flatten()) 27 | layers.append(nn.Linear(dim, hidden_dim)) 28 | layers.append(nn.ReLU()) 29 | for _ in range(num_blocks): 30 | layers.append(ResidualBlock(hidden_dim, hidden_dim // 2, norm, drop_prob)) 31 | layers.append(nn.Linear(hidden_dim, num_classes)) 32 | return nn.Sequential(*layers) 33 | 34 | 35 | 36 | 37 | def epoch(dataloader, model, opt=None): 38 | np.random.seed(4) 39 | if opt is not None: 40 | model.train() 41 | else: 42 | model.eval() 43 | 44 | loss_func = nn.SoftmaxLoss() 45 | 46 | losses = [] 47 | total_acc = 0 48 | for X, y in dataloader: 49 | out = model(X) 50 | loss = loss_func(out, y) 51 | if opt is not None: 52 | loss.backward() 53 | opt.step() 54 | 55 | losses.append(loss.numpy()) 56 | total_acc += (out.numpy().argmax(axis=1) == y.numpy()).sum() 57 | 58 | return 1 - total_acc / len(dataloader.dataset), np.mean(losses) 59 | 60 | 61 | 62 | 63 | def train_mnist(batch_size=100, epochs=10, optimizer=ndl.optim.Adam, 64 | lr=0.001, weight_decay=0.001, hidden_dim=100, data_dir="data"): 65 | np.random.seed(4) 66 | dataset = ndl.data.MNISTDataset( 67 | os.path.join(data_dir, "train-images-idx3-ubyte.gz"), 68 | os.path.join(data_dir, "train-labels-idx1-ubyte.gz") 69 | ) 70 | data_loader = ndl.data.DataLoader(dataset, batch_size=batch_size, shuffle=True) 71 | test_dataset = ndl.data.MNISTDataset( 72 | os.path.join(data_dir, "t10k-images-idx3-ubyte.gz"), 73 | os.path.join(data_dir, "t10k-labels-idx1-ubyte.gz") 74 | ) 75 | test_data_loader = ndl.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False) 76 | 77 | model = MLPResNet(28 * 28, hidden_dim=hidden_dim, num_blocks=3, num_classes=10) 78 | opt = optimizer(model.parameters(), lr=lr, weight_decay=weight_decay) 79 | 80 | train_err, train_loss = 0, 0 81 | test_err, test_loss = 0, 0 82 | for i in range(epochs): 83 | start = time.time() 84 | train_err, train_loss = epoch(data_loader, model, opt) 85 | test_err, test_loss = epoch(test_data_loader, model) 86 | end = time.time() 87 | print("Epoch %d: Train err: %f, Train loss: %f | Test err: %f, Test loss: %f, Time: %f" % ( 88 | i, train_err, train_loss, test_err, test_loss, end - start 89 | )) 90 | return train_err, train_loss, test_err, test_loss 91 | 92 | 93 | 94 | if __name__ == "__main__": 95 | train_mnist(data_dir="../data") 96 | -------------------------------------------------------------------------------- /hw2/data/t10k-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/data/t10k-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /hw2/data/t10k-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/data/t10k-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /hw2/data/train-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/data/train-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /hw2/data/train-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/data/train-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /hw2/debug.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./tests') 3 | sys.path.append("./python") 4 | 5 | from test_nn_and_optim import * 6 | from test_data import * 7 | 8 | # test_nn_layernorm_backward_1() 9 | 10 | 11 | if __name__ == "__main__": 12 | pow(1000, 1/256) 13 | test_mlp_train_mnist_1() 14 | 15 | -------------------------------------------------------------------------------- /hw2/figures/mlp_resnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/figures/mlp_resnet.png -------------------------------------------------------------------------------- /hw2/figures/residualblock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/figures/residualblock.png -------------------------------------------------------------------------------- /hw2/hw2.ipynb - Colaboratory.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/hw2.ipynb - Colaboratory.pdf -------------------------------------------------------------------------------- /hw2/python/needle/__init__.py: -------------------------------------------------------------------------------- 1 | from .autograd import Tensor, cpu, all_devices 2 | from . import ops 3 | from .ops import * 4 | from . import init 5 | from . import data 6 | from . import nn 7 | from . import optim 8 | -------------------------------------------------------------------------------- /hw2/python/needle/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /hw2/python/needle/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /hw2/python/needle/__pycache__/autograd.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/autograd.cpython-310.pyc -------------------------------------------------------------------------------- /hw2/python/needle/__pycache__/autograd.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/autograd.cpython-39.pyc -------------------------------------------------------------------------------- /hw2/python/needle/__pycache__/data.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/data.cpython-310.pyc -------------------------------------------------------------------------------- /hw2/python/needle/__pycache__/data.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/data.cpython-39.pyc -------------------------------------------------------------------------------- /hw2/python/needle/__pycache__/init.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/init.cpython-310.pyc -------------------------------------------------------------------------------- /hw2/python/needle/__pycache__/init.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/init.cpython-39.pyc -------------------------------------------------------------------------------- /hw2/python/needle/__pycache__/nn.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/nn.cpython-310.pyc -------------------------------------------------------------------------------- /hw2/python/needle/__pycache__/nn.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/nn.cpython-39.pyc -------------------------------------------------------------------------------- /hw2/python/needle/__pycache__/ops.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/ops.cpython-310.pyc -------------------------------------------------------------------------------- /hw2/python/needle/__pycache__/ops.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/ops.cpython-39.pyc -------------------------------------------------------------------------------- /hw2/python/needle/__pycache__/optim.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/optim.cpython-310.pyc -------------------------------------------------------------------------------- /hw2/python/needle/__pycache__/optim.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/optim.cpython-39.pyc -------------------------------------------------------------------------------- /hw2/python/needle/data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gzip 3 | import struct 4 | from .autograd import Tensor 5 | 6 | from typing import Iterator, Optional, List, Sized, Union, Iterable, Any 7 | 8 | 9 | class Transform: 10 | def __call__(self, x): 11 | raise NotImplementedError 12 | 13 | 14 | class RandomFlipHorizontal(Transform): 15 | def __init__(self, p = 0.5): 16 | self.p = p 17 | 18 | def __call__(self, img): 19 | """ 20 | Horizonally flip an image, specified as n H x W x C NDArray. 21 | Args: 22 | img: H x W x C NDArray of an image 23 | Returns: 24 | H x W x C ndarray corresponding to image flipped with probability self.p 25 | Note: use the provided code to provide randomness, for easier testing 26 | """ 27 | flip_img = np.random.rand() < self.p 28 | if flip_img: 29 | img = img[:, ::-1, :] 30 | return img 31 | 32 | 33 | class RandomCrop(Transform): 34 | def __init__(self, padding=3): 35 | self.padding = padding 36 | 37 | def __call__(self, img): 38 | """ Zero pad and then randomly crop an image. 39 | Args: 40 | img: H x W x C NDArray of an image 41 | Return 42 | H x W x C NAArray of cliped image 43 | Note: generate the image shifted by shift_x, shift_y specified below 44 | """ 45 | shift_x, shift_y = np.random.randint(low=-self.padding, high=self.padding+1, size=2) 46 | img_pad = np.pad(img, ((self.padding, self.padding), (self.padding, self.padding), (0, 0)), 'constant', constant_values=0) 47 | img_crop = img_pad[self.padding + shift_x : self.padding + shift_x + img.shape[0], self.padding + shift_y : self.padding + shift_y + img.shape[1], :] 48 | return img_crop 49 | 50 | 51 | class Dataset: 52 | r"""An abstract class representing a `Dataset`. 53 | 54 | All subclasses should overwrite :meth:`__getitem__`, supporting fetching a 55 | data sample for a given key. Subclasses must also overwrite 56 | :meth:`__len__`, which is expected to return the size of the dataset. 57 | """ 58 | 59 | def __init__(self, transforms: Optional[List] = None): 60 | self.transforms = transforms 61 | 62 | def __getitem__(self, index) -> object: 63 | raise NotImplementedError 64 | 65 | def __len__(self) -> int: 66 | raise NotImplementedError 67 | 68 | def apply_transforms(self, x): 69 | if self.transforms is not None: 70 | # apply the transforms 71 | for tform in self.transforms: 72 | x = tform(x) 73 | return x 74 | 75 | 76 | class DataLoader: 77 | r""" 78 | Data loader. Combines a dataset and a sampler, and provides an iterable over 79 | the given dataset. 80 | Args: 81 | dataset (Dataset): dataset from which to load the data. 82 | batch_size (int, optional): how many samples per batch to load 83 | (default: ``1``). 84 | shuffle (bool, optional): set to ``True`` to have the data reshuffled 85 | at every epoch (default: ``False``). 86 | """ 87 | dataset: Dataset 88 | batch_size: Optional[int] 89 | 90 | def __init__( 91 | self, 92 | dataset: Dataset, 93 | batch_size: Optional[int] = 1, 94 | shuffle: bool = False, 95 | ): 96 | 97 | self.dataset = dataset 98 | self.shuffle = shuffle 99 | self.batch_size = batch_size 100 | if not self.shuffle: 101 | self.ordering = np.array_split(np.arange(len(dataset)), 102 | range(batch_size, len(dataset), batch_size)) 103 | self.batch_idx = 0 104 | 105 | def __iter__(self): 106 | if self.shuffle: 107 | self.ordering = np.array_split(np.random.permutation(len(self.dataset)), 108 | range(self.batch_size, len(self.dataset), self.batch_size)) 109 | else: 110 | self.ordering = np.array_split(np.arange(len(self.dataset)), 111 | range(self.batch_size, len(self.dataset), self.batch_size)) 112 | self.batch_idx = 0 113 | return self 114 | 115 | def __next__(self): 116 | if self.batch_idx >= len(self.ordering): 117 | raise StopIteration 118 | batch_indices = self.ordering[self.batch_idx] 119 | X_batch, y_batch = self.dataset[batch_indices] 120 | self.batch_idx += 1 121 | return Tensor(X_batch), Tensor(y_batch) 122 | 123 | 124 | class MNISTDataset(Dataset): 125 | def __init__( 126 | self, 127 | image_filename: str, 128 | label_filename: str, 129 | transforms: Optional[List] = None, 130 | ): 131 | # load the data - copied from hw1/apps/simple_ml.py::parse_mnist() 132 | with gzip.open(image_filename, 'rb') as f: 133 | magic, num, rows, cols = struct.unpack(">IIII", f.read(16)) 134 | X = np.frombuffer(f.read(), dtype=np.uint8).reshape(num, rows, cols, 1) 135 | X = X.astype(np.float32) / 255.0 136 | 137 | with gzip.open(label_filename, 'rb') as f: 138 | magic, num = struct.unpack(">II", f.read(8)) 139 | y = np.frombuffer(f.read(), dtype=np.uint8) 140 | 141 | self.images = X 142 | self.labels = y 143 | self.transforms = [] if transforms is None else transforms 144 | 145 | def __getitem__(self, index) -> object: 146 | image = self.images[index] 147 | label = self.labels[index] 148 | for func in self.transforms: 149 | image = func(image) 150 | return image, label 151 | 152 | def __len__(self) -> int: 153 | return len(self.labels) 154 | 155 | class NDArrayDataset(Dataset): 156 | def __init__(self, *arrays): 157 | self.arrays = arrays 158 | 159 | def __len__(self) -> int: 160 | return self.arrays[0].shape[0] 161 | 162 | def __getitem__(self, i) -> object: 163 | return tuple([a[i] for a in self.arrays]) -------------------------------------------------------------------------------- /hw2/python/needle/init.py: -------------------------------------------------------------------------------- 1 | import math 2 | import needle as ndl 3 | 4 | 5 | 6 | def rand(*shape, low=0.0, high=1.0, device=None, dtype="float32", requires_grad=False): 7 | """ Generate random numbers uniform between low and high """ 8 | device = ndl.cpu() if device is None else device 9 | array = device.rand(*shape) * (high - low) + low 10 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad) 11 | 12 | 13 | def randn(*shape, mean=0.0, std=1.0, device=None, dtype="float32", requires_grad=False): 14 | """ Generate random normal with specified mean and std deviation """ 15 | device = ndl.cpu() if device is None else device 16 | array = device.randn(*shape) * std + mean 17 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad) 18 | 19 | 20 | def constant(*shape, c=1.0, device=None, dtype="float32", requires_grad=False): 21 | """ Generate constant Tensor """ 22 | device = ndl.cpu() if device is None else device 23 | array = device.ones(*shape, dtype=dtype) * c # note: can change dtype 24 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad) 25 | 26 | 27 | def ones(*shape, device=None, dtype="float32", requires_grad=False): 28 | """ Generate all-ones Tensor """ 29 | return constant(*shape, c=1.0, device=device, dtype=dtype, requires_grad=requires_grad) 30 | 31 | 32 | def zeros(*shape, device=None, dtype="float32", requires_grad=False): 33 | """ Generate all-zeros Tensor """ 34 | return constant(*shape, c=0.0, device=device, dtype=dtype, requires_grad=requires_grad) 35 | 36 | 37 | def randb(*shape, p=0.5, device=None, dtype="bool", requires_grad=False): 38 | """ Generate binary random Tensor """ 39 | device = ndl.cpu() if device is None else device 40 | array = device.rand(*shape) <= p 41 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad) 42 | 43 | 44 | def one_hot(n, i, device=None, dtype="float32", requires_grad=False): 45 | """ Generate one-hot encoding Tensor """ 46 | device = ndl.cpu() if device is None else device 47 | return ndl.Tensor(device.one_hot(n,i.numpy(), dtype=dtype), device=device, requires_grad=requires_grad) 48 | 49 | 50 | def xavier_uniform(fan_in, fan_out, gain=1.0, **kwargs): 51 | a = gain * math.sqrt(6.0 / (fan_in + fan_out)) 52 | return rand(fan_in, fan_out, low=-a, high=a, **kwargs) 53 | 54 | 55 | def xavier_normal(fan_in, fan_out, gain=1.0, **kwargs): 56 | std = gain * math.sqrt(2.0 / (fan_in + fan_out)) 57 | return randn(fan_in, fan_out, mean=0.0, std=std, **kwargs) 58 | 59 | 60 | def kaiming_uniform(fan_in, fan_out, nonlinearity="relu", **kwargs): 61 | assert nonlinearity == "relu", "Only relu supported currently" 62 | gain = math.sqrt(2.0) 63 | bound = gain * math.sqrt(3.0 / fan_in) 64 | return rand(fan_in, fan_out, low=-bound, high=bound, **kwargs) 65 | 66 | 67 | def kaiming_normal(fan_in, fan_out, nonlinearity="relu", **kwargs): 68 | assert nonlinearity == "relu", "Only relu supported currently" 69 | gain = math.sqrt(2.0) 70 | std = gain * math.sqrt(1.0 / fan_in) 71 | return randn(fan_in, fan_out, mean=0.0, std=std, **kwargs) 72 | -------------------------------------------------------------------------------- /hw2/python/needle/optim.py: -------------------------------------------------------------------------------- 1 | """Optimization module""" 2 | import needle as ndl 3 | import numpy as np 4 | 5 | 6 | class Optimizer: 7 | def __init__(self, params): 8 | self.params = params 9 | 10 | def step(self): 11 | raise NotImplementedError() 12 | 13 | def reset_grad(self): 14 | for p in self.params: 15 | p.grad = None 16 | 17 | 18 | class SGD(Optimizer): 19 | def __init__(self, params, lr=0.01, momentum=0.0, weight_decay=0.0): 20 | super().__init__(params) 21 | self.lr = lr 22 | self.momentum = momentum 23 | self.u = {} 24 | self.weight_decay = weight_decay 25 | 26 | def step(self): 27 | for param in self.params: 28 | # grad 这里加了一个惩罚项 29 | grad_with_penalty = param.grad.detach() + self.weight_decay * param.detach() 30 | u = self.u.get(id(param), 0) * self.momentum + (1 - self.momentum) * grad_with_penalty 31 | # 将 dtype 从 float64 转换为 float32 32 | u = ndl.Tensor(u, dtype=param.dtype) 33 | self.u[id(param)] = u 34 | param.data -= self.lr * u 35 | 36 | 37 | 38 | class Adam(Optimizer): 39 | def __init__( 40 | self, 41 | params, 42 | lr=0.01, 43 | beta1=0.9, 44 | beta2=0.999, 45 | eps=1e-8, 46 | weight_decay=0.0, 47 | ): 48 | super().__init__(params) 49 | self.lr = lr 50 | self.beta1 = beta1 51 | self.beta2 = beta2 52 | self.eps = eps 53 | self.weight_decay = weight_decay 54 | self.t = 0 55 | 56 | self.m = {} 57 | self.v = {} 58 | 59 | def step(self): 60 | self.t += 1 61 | for param in self.params: 62 | # grad 这里加了一个惩罚项 63 | grad_with_penalty = param.grad.detach() + self.weight_decay * param.detach() 64 | # 将 dtype 从 float64 转换为 float32 65 | grad_with_penalty = ndl.Tensor(grad_with_penalty, dtype=param.dtype) 66 | 67 | m = self.beta1 * self.m.get(id(param), 0) + (1 - self.beta1) * grad_with_penalty 68 | v = self.beta2 * self.v.get(id(param), 0) + (1 - self.beta2) * grad_with_penalty ** 2 69 | self.m[id(param)] = m.detach() 70 | self.v[id(param)] = v.detach() 71 | m_hat = m / (1 - self.beta1 ** self.t) 72 | v_hat = v / (1 - self.beta2 ** self.t) 73 | param.data -= self.lr * m_hat / (v_hat ** 0.5 + self.eps) -------------------------------------------------------------------------------- /hw2/tests/__pycache__/test_data.cpython-310-pytest-7.1.2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/tests/__pycache__/test_data.cpython-310-pytest-7.1.2.pyc -------------------------------------------------------------------------------- /hw2/tests/__pycache__/test_data.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/tests/__pycache__/test_data.cpython-39.pyc -------------------------------------------------------------------------------- /hw2/tests/__pycache__/test_nn_and_optim.cpython-310-pytest-7.1.2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/tests/__pycache__/test_nn_and_optim.cpython-310-pytest-7.1.2.pyc -------------------------------------------------------------------------------- /hw2/tests/__pycache__/test_nn_and_optim.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/tests/__pycache__/test_nn_and_optim.cpython-39.pyc -------------------------------------------------------------------------------- /hw3/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /hw3/.idea/hw3.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | 13 | 15 | -------------------------------------------------------------------------------- /hw3/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /hw3/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /hw3/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /hw3/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /hw3/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.associations": { 3 | "vector": "cpp" 4 | } 5 | } -------------------------------------------------------------------------------- /hw3/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(needle C CXX) 3 | cmake_policy(SET CMP0146 OLD) 4 | 5 | # find correct version of Python 6 | execute_process(COMMAND python3-config --prefix 7 | OUTPUT_VARIABLE Python_ROOT_DIR) 8 | find_package(Python COMPONENTS Development Interpreter REQUIRED) 9 | include_directories(${Python_INCLUDE_DIRS}) 10 | 11 | # find pybind 12 | execute_process(COMMAND python3 -m pybind11 --cmakedir 13 | RESULT_VARIABLE __pybind_exit_code 14 | OUTPUT_VARIABLE __pybind_path 15 | OUTPUT_STRIP_TRAILING_WHITESPACE) 16 | find_package(pybind11 PATHS ${__pybind_path}) 17 | 18 | 19 | if(NOT MSVC) 20 | set(CMAKE_CXX_FLAGS "-std=c++11 -O2 -march=native ${CMAKE_CXX_FLAGS}") 21 | set(CMAKE_CUDA_STANDARD 14) 22 | else() 23 | set(CMAKE_CXX_FLAGS "/std:c++11 -O2 -march=native ${CMAKE_CXX_FLAGS}") 24 | set(CMAKE_CUDA_STANDARD 14) 25 | endif() 26 | 27 | include_directories(SYSTEM ${pybind11_INCLUDE_DIRS}) 28 | list(APPEND LINKER_LIBS ${pybind11_LIBRARIES}) 29 | 30 | 31 | ################### 32 | ### CPU BACKEND ### 33 | ################### 34 | add_library(ndarray_backend_cpu MODULE src/ndarray_backend_cpu.cc) 35 | target_link_libraries(ndarray_backend_cpu PUBLIC ${LINKER_LIBS}) 36 | pybind11_extension(ndarray_backend_cpu) 37 | pybind11_strip(ndarray_backend_cpu) 38 | 39 | 40 | # directly output to ffi folder 41 | set_target_properties(ndarray_backend_cpu 42 | PROPERTIES 43 | LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/needle/backend_ndarray 44 | CXX_VISIBILITY_PRESET "hidden" 45 | ) 46 | 47 | if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") 48 | set_property(TARGET ndarray_backend_cpu PROPERTY LINK_OPTIONS -undefined dynamic_lookup) 49 | endif() 50 | 51 | 52 | 53 | #################### 54 | ### CUDA BACKEND ### 55 | #################### 56 | find_package(CUDA) 57 | if(CUDA_FOUND) 58 | message(STATUS "Found cuda, building cuda backend") 59 | 60 | include_directories(SYSTEM ${CUDA_INCLUDE_DIRS}) 61 | list(APPEND LINKER_LIBS ${CUDA_CUDART_LIBRARY}) 62 | 63 | # invoke nvidia smi to detect if we really have a GPU 64 | execute_process(COMMAND "nvidia-smi" ERROR_QUIET RESULT_VARIABLE NV_RET) 65 | if(NV_RET EQUAL "0") 66 | CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS Auto) 67 | else() 68 | # set to 3.7 the flag of K80 69 | CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.7) 70 | endif() 71 | 72 | # set arch flags properly 73 | CUDA_ADD_LIBRARY(ndarray_backend_cuda MODULE src/ndarray_backend_cuda.cu OPTIONS ${ARCH_FLAGS}) 74 | 75 | target_link_libraries(ndarray_backend_cuda ${LINKER_LIBS}) 76 | pybind11_extension(ndarray_backend_cuda) 77 | pybind11_strip(ndarray_backend_cuda) 78 | 79 | # directly output to ffi folder 80 | set_target_properties(ndarray_backend_cuda 81 | PROPERTIES 82 | LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/needle/backend_ndarray 83 | CXX_VISIBILITY_PRESET "hidden" 84 | CUDA_VISIBILITY_PRESET "hidden" 85 | ) 86 | 87 | endif() 88 | 89 | -------------------------------------------------------------------------------- /hw3/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: lib, pybind, clean, format, all 2 | 3 | all: lib 4 | 5 | 6 | lib: 7 | @mkdir -p build 8 | @cd build; cmake .. 9 | @cd build; $(MAKE) 10 | 11 | format: 12 | python3 -m black . 13 | clang-format -i src/*.cc src/*.cu 14 | 15 | clean: 16 | rm -rf build python/needle/backend_ndarray/ndarray_backend*.so 17 | -------------------------------------------------------------------------------- /hw3/README.md: -------------------------------------------------------------------------------- 1 | # Homework 3 2 | 3 | Public repository and stub/testing code for Homework 3 of 10-714. 4 | 5 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/3.27.9/CMakeCCompiler.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_C_COMPILER "/usr/bin/cc") 2 | set(CMAKE_C_COMPILER_ARG1 "") 3 | set(CMAKE_C_COMPILER_ID "GNU") 4 | set(CMAKE_C_COMPILER_VERSION "11.4.0") 5 | set(CMAKE_C_COMPILER_VERSION_INTERNAL "") 6 | set(CMAKE_C_COMPILER_WRAPPER "") 7 | set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "17") 8 | set(CMAKE_C_EXTENSIONS_COMPUTED_DEFAULT "ON") 9 | set(CMAKE_C_COMPILE_FEATURES "c_std_90;c_function_prototypes;c_std_99;c_restrict;c_variadic_macros;c_std_11;c_static_assert;c_std_17;c_std_23") 10 | set(CMAKE_C90_COMPILE_FEATURES "c_std_90;c_function_prototypes") 11 | set(CMAKE_C99_COMPILE_FEATURES "c_std_99;c_restrict;c_variadic_macros") 12 | set(CMAKE_C11_COMPILE_FEATURES "c_std_11;c_static_assert") 13 | set(CMAKE_C17_COMPILE_FEATURES "c_std_17") 14 | set(CMAKE_C23_COMPILE_FEATURES "c_std_23") 15 | 16 | set(CMAKE_C_PLATFORM_ID "Linux") 17 | set(CMAKE_C_SIMULATE_ID "") 18 | set(CMAKE_C_COMPILER_FRONTEND_VARIANT "GNU") 19 | set(CMAKE_C_SIMULATE_VERSION "") 20 | 21 | 22 | 23 | 24 | set(CMAKE_AR "/usr/bin/ar") 25 | set(CMAKE_C_COMPILER_AR "/usr/bin/gcc-ar-11") 26 | set(CMAKE_RANLIB "/usr/bin/ranlib") 27 | set(CMAKE_C_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11") 28 | set(CMAKE_LINKER "/usr/bin/ld") 29 | set(CMAKE_MT "") 30 | set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND") 31 | set(CMAKE_COMPILER_IS_GNUCC 1) 32 | set(CMAKE_C_COMPILER_LOADED 1) 33 | set(CMAKE_C_COMPILER_WORKS TRUE) 34 | set(CMAKE_C_ABI_COMPILED TRUE) 35 | 36 | set(CMAKE_C_COMPILER_ENV_VAR "CC") 37 | 38 | set(CMAKE_C_COMPILER_ID_RUN 1) 39 | set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m) 40 | set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC) 41 | set(CMAKE_C_LINKER_PREFERENCE 10) 42 | set(CMAKE_C_LINKER_DEPFILE_SUPPORTED TRUE) 43 | 44 | # Save compiler ABI information. 45 | set(CMAKE_C_SIZEOF_DATA_PTR "8") 46 | set(CMAKE_C_COMPILER_ABI "ELF") 47 | set(CMAKE_C_BYTE_ORDER "LITTLE_ENDIAN") 48 | set(CMAKE_C_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") 49 | 50 | if(CMAKE_C_SIZEOF_DATA_PTR) 51 | set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}") 52 | endif() 53 | 54 | if(CMAKE_C_COMPILER_ABI) 55 | set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}") 56 | endif() 57 | 58 | if(CMAKE_C_LIBRARY_ARCHITECTURE) 59 | set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") 60 | endif() 61 | 62 | set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "") 63 | if(CMAKE_C_CL_SHOWINCLUDES_PREFIX) 64 | set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}") 65 | endif() 66 | 67 | 68 | 69 | 70 | 71 | set(CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include") 72 | set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "gcc;gcc_s;c;gcc;gcc_s") 73 | set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs") 74 | set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") 75 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/3.27.9/CMakeCXXCompiler.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_COMPILER "/usr/bin/c++") 2 | set(CMAKE_CXX_COMPILER_ARG1 "") 3 | set(CMAKE_CXX_COMPILER_ID "GNU") 4 | set(CMAKE_CXX_COMPILER_VERSION "11.4.0") 5 | set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "") 6 | set(CMAKE_CXX_COMPILER_WRAPPER "") 7 | set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "17") 8 | set(CMAKE_CXX_EXTENSIONS_COMPUTED_DEFAULT "ON") 9 | set(CMAKE_CXX_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters;cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates;cxx_std_17;cxx_std_20;cxx_std_23") 10 | set(CMAKE_CXX98_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters") 11 | set(CMAKE_CXX11_COMPILE_FEATURES "cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates") 12 | set(CMAKE_CXX14_COMPILE_FEATURES "cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates") 13 | set(CMAKE_CXX17_COMPILE_FEATURES "cxx_std_17") 14 | set(CMAKE_CXX20_COMPILE_FEATURES "cxx_std_20") 15 | set(CMAKE_CXX23_COMPILE_FEATURES "cxx_std_23") 16 | 17 | set(CMAKE_CXX_PLATFORM_ID "Linux") 18 | set(CMAKE_CXX_SIMULATE_ID "") 19 | set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "GNU") 20 | set(CMAKE_CXX_SIMULATE_VERSION "") 21 | 22 | 23 | 24 | 25 | set(CMAKE_AR "/usr/bin/ar") 26 | set(CMAKE_CXX_COMPILER_AR "/usr/bin/gcc-ar-11") 27 | set(CMAKE_RANLIB "/usr/bin/ranlib") 28 | set(CMAKE_CXX_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11") 29 | set(CMAKE_LINKER "/usr/bin/ld") 30 | set(CMAKE_MT "") 31 | set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND") 32 | set(CMAKE_COMPILER_IS_GNUCXX 1) 33 | set(CMAKE_CXX_COMPILER_LOADED 1) 34 | set(CMAKE_CXX_COMPILER_WORKS TRUE) 35 | set(CMAKE_CXX_ABI_COMPILED TRUE) 36 | 37 | set(CMAKE_CXX_COMPILER_ENV_VAR "CXX") 38 | 39 | set(CMAKE_CXX_COMPILER_ID_RUN 1) 40 | set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm;mpp;CPP;ixx;cppm;ccm;cxxm;c++m) 41 | set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC) 42 | 43 | foreach (lang C OBJC OBJCXX) 44 | if (CMAKE_${lang}_COMPILER_ID_RUN) 45 | foreach(extension IN LISTS CMAKE_${lang}_SOURCE_FILE_EXTENSIONS) 46 | list(REMOVE_ITEM CMAKE_CXX_SOURCE_FILE_EXTENSIONS ${extension}) 47 | endforeach() 48 | endif() 49 | endforeach() 50 | 51 | set(CMAKE_CXX_LINKER_PREFERENCE 30) 52 | set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1) 53 | set(CMAKE_CXX_LINKER_DEPFILE_SUPPORTED TRUE) 54 | 55 | # Save compiler ABI information. 56 | set(CMAKE_CXX_SIZEOF_DATA_PTR "8") 57 | set(CMAKE_CXX_COMPILER_ABI "ELF") 58 | set(CMAKE_CXX_BYTE_ORDER "LITTLE_ENDIAN") 59 | set(CMAKE_CXX_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") 60 | 61 | if(CMAKE_CXX_SIZEOF_DATA_PTR) 62 | set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}") 63 | endif() 64 | 65 | if(CMAKE_CXX_COMPILER_ABI) 66 | set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}") 67 | endif() 68 | 69 | if(CMAKE_CXX_LIBRARY_ARCHITECTURE) 70 | set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") 71 | endif() 72 | 73 | set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "") 74 | if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX) 75 | set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}") 76 | endif() 77 | 78 | 79 | 80 | 81 | 82 | set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include") 83 | set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;gcc_s;gcc;c;gcc_s;gcc") 84 | set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs") 85 | set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") 86 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/3.27.9/CMakeSystem.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_HOST_SYSTEM "Linux-5.15.120+") 2 | set(CMAKE_HOST_SYSTEM_NAME "Linux") 3 | set(CMAKE_HOST_SYSTEM_VERSION "5.15.120+") 4 | set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64") 5 | 6 | 7 | 8 | set(CMAKE_SYSTEM "Linux-5.15.120+") 9 | set(CMAKE_SYSTEM_NAME "Linux") 10 | set(CMAKE_SYSTEM_VERSION "5.15.120+") 11 | set(CMAKE_SYSTEM_PROCESSOR "x86_64") 12 | 13 | set(CMAKE_CROSSCOMPILING "FALSE") 14 | 15 | set(CMAKE_SYSTEM_LOADED 1) 16 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/3.27.9/CompilerIdC/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/3.27.9/CompilerIdC/a.out -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/CMakeDirectoryInformation.cmake: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27 3 | 4 | # Relative path conversion top directories. 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/content/drive/Othercomputers/My MacBook Pro/hw3") 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/content/drive/Othercomputers/My MacBook Pro/hw3/build") 7 | 8 | # Force unix paths in dependencies. 9 | set(CMAKE_FORCE_UNIX_PATHS 1) 10 | 11 | 12 | # The C and CXX include file regular expressions for this directory. 13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") 14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") 15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) 16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) 17 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/CMakeRuleHashes.txt: -------------------------------------------------------------------------------- 1 | # Hashes of file build rules. 2 | 9720afbab5807e3b7d272586be3395ba CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o 3 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/Makefile.cmake: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27 3 | 4 | # The generator used is: 5 | set(CMAKE_DEPENDS_GENERATOR "Unix Makefiles") 6 | 7 | # The top level Makefile was generated from the following files: 8 | set(CMAKE_MAKEFILE_DEPENDS 9 | "CMakeCache.txt" 10 | "/content/drive/Othercomputers/My MacBook Pro/hw3/CMakeLists.txt" 11 | "CMakeFiles/3.27.9/CMakeCCompiler.cmake" 12 | "CMakeFiles/3.27.9/CMakeCXXCompiler.cmake" 13 | "CMakeFiles/3.27.9/CMakeSystem.cmake" 14 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen" 15 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.depend" 16 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCInformation.cmake" 17 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCXXInformation.cmake" 18 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCheckCompilerFlagCommonPatterns.cmake" 19 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCommonLanguageInclude.cmake" 20 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeGenericSystem.cmake" 21 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeInitializeConfigs.cmake" 22 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeLanguageInformation.cmake" 23 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeSystemSpecificInformation.cmake" 24 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeSystemSpecificInitialize.cmake" 25 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCSourceCompiles.cmake" 26 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCXXCompilerFlag.cmake" 27 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCXXSourceCompiles.cmake" 28 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckIncludeFile.cmake" 29 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckLibraryExists.cmake" 30 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/CMakeCommonCompilerMacros.cmake" 31 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU-C.cmake" 32 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU-CXX.cmake" 33 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU.cmake" 34 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA.cmake" 35 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA/run_nvcc.cmake" 36 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA/select_compute_arch.cmake" 37 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPackageHandleStandardArgs.cmake" 38 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPackageMessage.cmake" 39 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPython.cmake" 40 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPython/Support.cmake" 41 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindThreads.cmake" 42 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckCompilerFlag.cmake" 43 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckFlagCommonConfig.cmake" 44 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckSourceCompiles.cmake" 45 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU-C.cmake" 46 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU-CXX.cmake" 47 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU.cmake" 48 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-Initialize.cmake" 49 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux.cmake" 50 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/UnixPaths.cmake" 51 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Common.cmake" 52 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Config.cmake" 53 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11ConfigVersion.cmake" 54 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11NewTools.cmake" 55 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Targets.cmake" 56 | ) 57 | 58 | # The corresponding makefile is: 59 | set(CMAKE_MAKEFILE_OUTPUTS 60 | "Makefile" 61 | "CMakeFiles/cmake.check_cache" 62 | ) 63 | 64 | # Byproducts of CMake generate step: 65 | set(CMAKE_MAKEFILE_PRODUCTS 66 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen" 67 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake" 68 | "CMakeFiles/CMakeDirectoryInformation.cmake" 69 | ) 70 | 71 | # Dependency information for all targets: 72 | set(CMAKE_DEPEND_INFO_FILES 73 | "CMakeFiles/ndarray_backend_cpu.dir/DependInfo.cmake" 74 | "CMakeFiles/ndarray_backend_cuda.dir/DependInfo.cmake" 75 | ) 76 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/Makefile2: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27 3 | 4 | # Default target executed when no arguments are given to make. 5 | default_target: all 6 | .PHONY : default_target 7 | 8 | #============================================================================= 9 | # Special targets provided by cmake. 10 | 11 | # Disable implicit rules so canonical targets will work. 12 | .SUFFIXES: 13 | 14 | # Disable VCS-based implicit rules. 15 | % : %,v 16 | 17 | # Disable VCS-based implicit rules. 18 | % : RCS/% 19 | 20 | # Disable VCS-based implicit rules. 21 | % : RCS/%,v 22 | 23 | # Disable VCS-based implicit rules. 24 | % : SCCS/s.% 25 | 26 | # Disable VCS-based implicit rules. 27 | % : s.% 28 | 29 | .SUFFIXES: .hpux_make_needs_suffix_list 30 | 31 | # Command-line flag to silence nested $(MAKE). 32 | $(VERBOSE)MAKESILENT = -s 33 | 34 | #Suppress display of executed commands. 35 | $(VERBOSE).SILENT: 36 | 37 | # A target that is always out of date. 38 | cmake_force: 39 | .PHONY : cmake_force 40 | 41 | #============================================================================= 42 | # Set environment variables for the build. 43 | 44 | # The shell in which to execute make rules. 45 | SHELL = /bin/sh 46 | 47 | # The CMake executable. 48 | CMAKE_COMMAND = /usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake 49 | 50 | # The command to remove a file. 51 | RM = /usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake -E rm -f 52 | 53 | # Escaping for special characters. 54 | EQUALS = = 55 | 56 | # The top-level source directory on which CMake was run. 57 | CMAKE_SOURCE_DIR = "/content/drive/Othercomputers/My MacBook Pro/hw3" 58 | 59 | # The top-level build directory on which CMake was run. 60 | CMAKE_BINARY_DIR = "/content/drive/Othercomputers/My MacBook Pro/hw3/build" 61 | 62 | #============================================================================= 63 | # Directory level rules for the build root directory 64 | 65 | # The main recursive "all" target. 66 | all: CMakeFiles/ndarray_backend_cpu.dir/all 67 | all: CMakeFiles/ndarray_backend_cuda.dir/all 68 | .PHONY : all 69 | 70 | # The main recursive "preinstall" target. 71 | preinstall: 72 | .PHONY : preinstall 73 | 74 | # The main recursive "clean" target. 75 | clean: CMakeFiles/ndarray_backend_cpu.dir/clean 76 | clean: CMakeFiles/ndarray_backend_cuda.dir/clean 77 | .PHONY : clean 78 | 79 | #============================================================================= 80 | # Target rules for target CMakeFiles/ndarray_backend_cpu.dir 81 | 82 | # All Build rule for target. 83 | CMakeFiles/ndarray_backend_cpu.dir/all: 84 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/depend 85 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/build 86 | @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --progress-dir="/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" --progress-num=1,2 "Built target ndarray_backend_cpu" 87 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/all 88 | 89 | # Build rule for subdir invocation for target. 90 | CMakeFiles/ndarray_backend_cpu.dir/rule: cmake_check_build_system 91 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" 2 92 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/ndarray_backend_cpu.dir/all 93 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" 0 94 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/rule 95 | 96 | # Convenience name for target. 97 | ndarray_backend_cpu: CMakeFiles/ndarray_backend_cpu.dir/rule 98 | .PHONY : ndarray_backend_cpu 99 | 100 | # clean rule for target. 101 | CMakeFiles/ndarray_backend_cpu.dir/clean: 102 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/clean 103 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/clean 104 | 105 | #============================================================================= 106 | # Target rules for target CMakeFiles/ndarray_backend_cuda.dir 107 | 108 | # All Build rule for target. 109 | CMakeFiles/ndarray_backend_cuda.dir/all: 110 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/depend 111 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/build 112 | @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --progress-dir="/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" --progress-num=3,4 "Built target ndarray_backend_cuda" 113 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/all 114 | 115 | # Build rule for subdir invocation for target. 116 | CMakeFiles/ndarray_backend_cuda.dir/rule: cmake_check_build_system 117 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" 2 118 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/ndarray_backend_cuda.dir/all 119 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" 0 120 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/rule 121 | 122 | # Convenience name for target. 123 | ndarray_backend_cuda: CMakeFiles/ndarray_backend_cuda.dir/rule 124 | .PHONY : ndarray_backend_cuda 125 | 126 | # clean rule for target. 127 | CMakeFiles/ndarray_backend_cuda.dir/clean: 128 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/clean 129 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/clean 130 | 131 | #============================================================================= 132 | # Special targets to cleanup operation of make. 133 | 134 | # Special rule to run CMake to check the build system integrity. 135 | # No rule that depends on this can have commands that come from listfiles 136 | # because they might be regenerated. 137 | cmake_check_build_system: 138 | $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 139 | .PHONY : cmake_check_build_system 140 | 141 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/TargetDirectories.txt: -------------------------------------------------------------------------------- 1 | /content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles/ndarray_backend_cpu.dir 2 | /content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles/ndarray_backend_cuda.dir 3 | /content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles/edit_cache.dir 4 | /content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles/rebuild_cache.dir 5 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/cmake.check_cache: -------------------------------------------------------------------------------- 1 | # This file is generated by cmake for dependency checking of the CMakeCache.txt file 2 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cpu.dir/DependInfo.cmake: -------------------------------------------------------------------------------- 1 | 2 | # Consider dependencies only in project. 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF) 4 | 5 | # The set of languages for which implicit dependencies are needed: 6 | set(CMAKE_DEPENDS_LANGUAGES 7 | ) 8 | 9 | # The set of dependency files which are needed: 10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES 11 | "/content/drive/Othercomputers/My MacBook Pro/hw3/src/ndarray_backend_cpu.cc" "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o" "gcc" "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o.d" 12 | ) 13 | 14 | # Targets to which this target links which contain Fortran sources. 15 | set(CMAKE_Fortran_TARGET_LINKED_INFO_FILES 16 | ) 17 | 18 | # Fortran module output directory. 19 | set(CMAKE_Fortran_TARGET_MODULE_DIR "") 20 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cpu.dir/cmake_clean.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so" 3 | "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cpu.pdb" 4 | "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o" 5 | "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o.d" 6 | ) 7 | 8 | # Per-language clean rules from dependency scanning. 9 | foreach(lang CXX) 10 | include(CMakeFiles/ndarray_backend_cpu.dir/cmake_clean_${lang}.cmake OPTIONAL) 11 | endforeach() 12 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cpu.dir/compiler_depend.ts: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Timestamp file for compiler generated dependencies management for ndarray_backend_cpu. 3 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cpu.dir/depend.make: -------------------------------------------------------------------------------- 1 | # Empty dependencies file for ndarray_backend_cpu. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cpu.dir/flags.make: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27 3 | 4 | # compile CXX with /usr/bin/c++ 5 | CXX_DEFINES = -Dndarray_backend_cpu_EXPORTS 6 | 7 | CXX_INCLUDES = -isystem /usr/include/python3.10 -isystem /usr/local/lib/python3.10/dist-packages/pybind11/include -isystem /usr/local/cuda/include 8 | 9 | CXX_FLAGS = -std=c++11 -O2 -march=native -fPIC -fvisibility=hidden 10 | 11 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cpu.dir/link.txt: -------------------------------------------------------------------------------- 1 | /usr/bin/c++ -fPIC -std=c++11 -O2 -march=native -shared -o "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so" CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o 2 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cpu.dir/progress.make: -------------------------------------------------------------------------------- 1 | CMAKE_PROGRESS_1 = 1 2 | CMAKE_PROGRESS_2 = 2 3 | 4 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cuda.dir/DependInfo.cmake: -------------------------------------------------------------------------------- 1 | 2 | # Consider dependencies only in project. 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF) 4 | 5 | # The set of languages for which implicit dependencies are needed: 6 | set(CMAKE_DEPENDS_LANGUAGES 7 | ) 8 | 9 | # The set of dependency files which are needed: 10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES 11 | ) 12 | 13 | # Targets to which this target links which contain Fortran sources. 14 | set(CMAKE_Fortran_TARGET_LINKED_INFO_FILES 15 | ) 16 | 17 | # Fortran module output directory. 18 | set(CMAKE_Fortran_TARGET_MODULE_DIR "") 19 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cuda.dir/cmake_clean.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so" 3 | "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cuda.pdb" 4 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o" 5 | ) 6 | 7 | # Per-language clean rules from dependency scanning. 8 | foreach(lang ) 9 | include(CMakeFiles/ndarray_backend_cuda.dir/cmake_clean_${lang}.cmake OPTIONAL) 10 | endforeach() 11 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cuda.dir/compiler_depend.make: -------------------------------------------------------------------------------- 1 | # Empty compiler generated dependencies file for ndarray_backend_cuda. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cuda.dir/compiler_depend.ts: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Timestamp file for compiler generated dependencies management for ndarray_backend_cuda. 3 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cuda.dir/depend.make: -------------------------------------------------------------------------------- 1 | # Empty dependencies file for ndarray_backend_cuda. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cuda.dir/flags.make: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27 3 | 4 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cuda.dir/link.txt: -------------------------------------------------------------------------------- 1 | /usr/bin/c++ -fPIC -std=c++11 -O2 -march=native -shared -o "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so" CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o -Wl,-rpath,/usr/local/cuda/lib64 /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/x86_64-linux-gnu/librt.a /usr/local/cuda/lib64/libcudart.so 2 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cuda.dir/progress.make: -------------------------------------------------------------------------------- 1 | CMAKE_PROGRESS_1 = 3 2 | CMAKE_PROGRESS_2 = 4 3 | 4 | -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o -------------------------------------------------------------------------------- /hw3/build/CMakeFiles/progress.marks: -------------------------------------------------------------------------------- 1 | 4 2 | -------------------------------------------------------------------------------- /hw3/build/cmake_install.cmake: -------------------------------------------------------------------------------- 1 | # Install script for directory: /content/drive/Othercomputers/My MacBook Pro/hw3 2 | 3 | # Set the install prefix 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX) 5 | set(CMAKE_INSTALL_PREFIX "/usr/local") 6 | endif() 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") 8 | 9 | # Set the install configuration name. 10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) 11 | if(BUILD_TYPE) 12 | string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" 13 | CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") 14 | else() 15 | set(CMAKE_INSTALL_CONFIG_NAME "") 16 | endif() 17 | message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") 18 | endif() 19 | 20 | # Set the component getting installed. 21 | if(NOT CMAKE_INSTALL_COMPONENT) 22 | if(COMPONENT) 23 | message(STATUS "Install component: \"${COMPONENT}\"") 24 | set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") 25 | else() 26 | set(CMAKE_INSTALL_COMPONENT) 27 | endif() 28 | endif() 29 | 30 | # Install shared libraries without execute permission? 31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) 32 | set(CMAKE_INSTALL_SO_NO_EXE "1") 33 | endif() 34 | 35 | # Is this installation the result of a crosscompile? 36 | if(NOT DEFINED CMAKE_CROSSCOMPILING) 37 | set(CMAKE_CROSSCOMPILING "FALSE") 38 | endif() 39 | 40 | # Set default install directory permissions. 41 | if(NOT DEFINED CMAKE_OBJDUMP) 42 | set(CMAKE_OBJDUMP "/usr/bin/objdump") 43 | endif() 44 | 45 | if(CMAKE_INSTALL_COMPONENT) 46 | set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt") 47 | else() 48 | set(CMAKE_INSTALL_MANIFEST "install_manifest.txt") 49 | endif() 50 | 51 | string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT 52 | "${CMAKE_INSTALL_MANIFEST_FILES}") 53 | file(WRITE "/content/drive/Othercomputers/My MacBook Pro/hw3/build/${CMAKE_INSTALL_MANIFEST}" 54 | "${CMAKE_INSTALL_MANIFEST_CONTENT}") 55 | -------------------------------------------------------------------------------- /hw3/build/detect_cuda_compute_capabilities.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | int main() 4 | { 5 | int count = 0; 6 | if (cudaSuccess != cudaGetDeviceCount(&count)) return -1; 7 | if (count == 0) return -1; 8 | for (int device = 0; device < count; ++device) 9 | { 10 | cudaDeviceProp prop; 11 | if (cudaSuccess == cudaGetDeviceProperties(&prop, device)) 12 | std::printf("%d.%d ", prop.major, prop.minor); 13 | } 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /hw3/debug.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("./tests/hw3") 3 | sys.path.append("./python") 4 | 5 | from test_ndarray import * 6 | from needle import backend_ndarray as nd 7 | 8 | 9 | if __name__ == "__main__": 10 | test_getitem(device=nd.cpu(), params={"shape": (8, 8, 2, 2, 2, 2), "fn": lambda X: X[1:3, 5:8, 1:2, 0:1, 0:1, 1:2]}) -------------------------------------------------------------------------------- /hw3/hw3.ipynb - Colaboratory.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/hw3.ipynb - Colaboratory.pdf -------------------------------------------------------------------------------- /hw3/python/needle/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ops 2 | from .ops import * 3 | from .autograd import Tensor, cpu, all_devices 4 | 5 | from . import init 6 | from .init import ones, zeros, zeros_like, ones_like 7 | 8 | from . import data 9 | from . import nn 10 | from . import optim 11 | from .backend_selection import * 12 | -------------------------------------------------------------------------------- /hw3/python/needle/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/__pycache__/autograd.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/__pycache__/autograd.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/__pycache__/backend_numpy.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/__pycache__/backend_numpy.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/__pycache__/backend_selection.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/__pycache__/backend_selection.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/__pycache__/optim.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/__pycache__/optim.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/backend_ndarray/__init__.py: -------------------------------------------------------------------------------- 1 | from .ndarray import * 2 | -------------------------------------------------------------------------------- /hw3/python/needle/backend_ndarray/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/backend_ndarray/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/backend_ndarray/__pycache__/ndarray.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/backend_ndarray/__pycache__/ndarray.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/backend_ndarray/__pycache__/ndarray_backend_numpy.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/backend_ndarray/__pycache__/ndarray_backend_numpy.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /hw3/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /hw3/python/needle/backend_ndarray/ndarray_backend_numpy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | __device_name__ = "numpy" 5 | _datatype = np.float32 6 | _datetype_size = np.dtype(_datatype).itemsize 7 | 8 | 9 | class Array: 10 | def __init__(self, size): 11 | self.array = np.empty(size, dtype=np.float32) 12 | 13 | @property 14 | def size(self): 15 | return self.array.size 16 | 17 | 18 | def to_numpy(a, shape, strides, offset): 19 | return np.lib.stride_tricks.as_strided( 20 | a.array[offset:], shape, tuple([s * _datetype_size for s in strides]) 21 | ) 22 | 23 | 24 | def from_numpy(a, out): 25 | out.array[:] = a.flatten() 26 | 27 | 28 | def fill(out, val): 29 | out.array.fill(val) 30 | 31 | 32 | def compact(a, out, shape, strides, offset): 33 | out.array[:] = to_numpy(a, shape, strides, offset).flatten() 34 | 35 | 36 | def ewise_setitem(a, out, shape, strides, offset): 37 | to_numpy(out, shape, strides, offset)[:] = a.array.reshape(shape) 38 | 39 | 40 | def scalar_setitem(size, val, out, shape, strides, offset): 41 | to_numpy(out, shape, strides, offset)[:] = val 42 | 43 | 44 | def ewise_add(a, b, out): 45 | out.array[:] = a.array + b.array 46 | 47 | 48 | def scalar_add(a, val, out): 49 | out.array[:] = a.array + val 50 | 51 | 52 | def ewise_mul(a, b, out): 53 | out.array[:] = a.array * b.array 54 | 55 | 56 | def scalar_mul(a, val, out): 57 | out.array[:] = a.array * val 58 | 59 | 60 | def ewise_div(a, b, out): 61 | out.array[:] = a.array / b.array 62 | 63 | 64 | def scalar_div(a, val, out): 65 | out.array[:] = a.array / val 66 | 67 | 68 | def scalar_power(a, val, out): 69 | out.array[:] = a.array**val 70 | 71 | 72 | def ewise_maximum(a, b, out): 73 | out.array[:] = np.maximum(a.array, b.array) 74 | 75 | 76 | def scalar_maximum(a, val, out): 77 | out.array[:] = np.maximum(a.array, val) 78 | 79 | 80 | def ewise_eq(a, b, out): 81 | out.array[:] = (a.array == b.array).astype(np.float32) 82 | 83 | 84 | def scalar_eq(a, val, out): 85 | out.array[:] = (a.array == val).astype(np.float32) 86 | 87 | 88 | def ewise_ge(a, b, out): 89 | out.array[:] = (a.array >= b.array).astype(np.float32) 90 | 91 | 92 | def scalar_ge(a, val, out): 93 | out.array[:] = (a.array >= val).astype(np.float32) 94 | 95 | 96 | def ewise_log(a, out): 97 | out.array[:] = np.log(a.array) 98 | 99 | 100 | def ewise_exp(a, out): 101 | out.array[:] = np.exp(a.array) 102 | 103 | 104 | def ewise_tanh(a, out): 105 | out.array[:] = np.tanh(a.array) 106 | 107 | 108 | def matmul(a, b, out, m, n, p): 109 | out.array[:] = (a.array.reshape(m, n) @ b.array.reshape(n, p)).reshape(-1) 110 | 111 | 112 | def reduce_max(a, out, reduce_size): 113 | out.array[:] = a.array[:].reshape(-1, reduce_size).max(axis=1) 114 | 115 | 116 | def reduce_sum(a, out, reduce_size): 117 | out.array[:] = a.array[:].reshape(-1, reduce_size).sum(axis=1) 118 | -------------------------------------------------------------------------------- /hw3/python/needle/backend_numpy.py: -------------------------------------------------------------------------------- 1 | """This file defies specific implementations of devices when using numpy as NDArray backend. 2 | """ 3 | import numpy 4 | 5 | 6 | class Device: 7 | """Baseclass of all device""" 8 | 9 | 10 | class CPUDevice(Device): 11 | """Represents data that sits in CPU""" 12 | 13 | def __repr__(self): 14 | return "needle.cpu()" 15 | 16 | def __hash__(self): 17 | return self.__repr__().__hash__() 18 | 19 | def __eq__(self, other): 20 | return isinstance(other, CPUDevice) 21 | 22 | def enabled(self): 23 | return True 24 | 25 | def zeros(self, *shape, dtype="float32"): 26 | return numpy.zeros(shape, dtype=dtype) 27 | 28 | def ones(self, *shape, dtype="float32"): 29 | return numpy.ones(shape, dtype=dtype) 30 | 31 | def randn(self, *shape): 32 | # note: numpy doesn't support types within standard random routines, and 33 | # .astype("float32") does work if we're generating a singleton 34 | return numpy.random.randn(*shape) 35 | 36 | def rand(self, *shape): 37 | # note: numpy doesn't support types within standard random routines, and 38 | # .astype("float32") does work if we're generating a singleton 39 | return numpy.random.rand(*shape) 40 | 41 | def one_hot(self, n, i, dtype="float32"): 42 | return numpy.eye(n, dtype=dtype)[i] 43 | 44 | def empty(self, shape, dtype="float32"): 45 | return numpy.empty(shape, dtype=dtype) 46 | 47 | def full(self, shape, fill_value, dtype="float32"): 48 | return numpy.full(shape, fill_value, dtype=dtype) 49 | 50 | 51 | def cpu(): 52 | """Return cpu device""" 53 | return CPUDevice() 54 | 55 | 56 | def default_device(): 57 | return cpu() 58 | 59 | 60 | def all_devices(): 61 | """return a list of all available devices""" 62 | return [cpu()] 63 | -------------------------------------------------------------------------------- /hw3/python/needle/backend_selection.py: -------------------------------------------------------------------------------- 1 | """Logic for backend selection""" 2 | import os 3 | 4 | 5 | BACKEND = os.environ.get("NEEDLE_BACKEND", "nd") 6 | 7 | 8 | if BACKEND == "nd": 9 | print("Using needle backend") 10 | from . import backend_ndarray as array_api 11 | from .backend_ndarray import ( 12 | all_devices, 13 | cuda, 14 | cpu, 15 | cpu_numpy, 16 | default_device, 17 | BackendDevice as Device, 18 | ) 19 | 20 | NDArray = array_api.NDArray 21 | elif BACKEND == "np": 22 | print("Using numpy backend") 23 | import numpy as array_api 24 | from .backend_numpy import all_devices, cpu, default_device, Device 25 | 26 | NDArray = array_api.ndarray 27 | else: 28 | raise RuntimeError("Unknown needle array backend %s" % BACKEND) 29 | -------------------------------------------------------------------------------- /hw3/python/needle/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_basic import * 2 | from .data_transforms import * 3 | from .datasets import * 4 | -------------------------------------------------------------------------------- /hw3/python/needle/data/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/data/__pycache__/data_basic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/__pycache__/data_basic.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/data/__pycache__/data_transforms.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/__pycache__/data_transforms.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/data/data_basic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ..autograd import Tensor 3 | 4 | from typing import Iterator, Optional, List, Sized, Union, Iterable, Any 5 | 6 | 7 | 8 | class Dataset: 9 | r"""An abstract class representing a `Dataset`. 10 | 11 | All subclasses should overwrite :meth:`__getitem__`, supporting fetching a 12 | data sample for a given key. Subclasses must also overwrite 13 | :meth:`__len__`, which is expected to return the size of the dataset. 14 | """ 15 | 16 | def __init__(self, transforms: Optional[List] = None): 17 | self.transforms = transforms 18 | 19 | def __getitem__(self, index) -> object: 20 | raise NotImplementedError 21 | 22 | def __len__(self) -> int: 23 | raise NotImplementedError 24 | 25 | def apply_transforms(self, x): 26 | if self.transforms is not None: 27 | # apply the transforms 28 | for tform in self.transforms: 29 | x = tform(x) 30 | return x 31 | 32 | 33 | class DataLoader: 34 | r""" 35 | Data loader. Combines a dataset and a sampler, and provides an iterable over 36 | the given dataset. 37 | Args: 38 | dataset (Dataset): dataset from which to load the data. 39 | batch_size (int, optional): how many samples per batch to load 40 | (default: ``1``). 41 | shuffle (bool, optional): set to ``True`` to have the data reshuffled 42 | at every epoch (default: ``False``). 43 | """ 44 | dataset: Dataset 45 | batch_size: Optional[int] 46 | 47 | def __init__( 48 | self, 49 | dataset: Dataset, 50 | batch_size: Optional[int] = 1, 51 | shuffle: bool = False, 52 | ): 53 | 54 | self.dataset = dataset 55 | self.shuffle = shuffle 56 | self.batch_size = batch_size 57 | if not self.shuffle: 58 | self.ordering = np.array_split(np.arange(len(dataset)), 59 | range(batch_size, len(dataset), batch_size)) 60 | 61 | def __iter__(self): 62 | ### BEGIN YOUR SOLUTION 63 | raise NotImplementedError() 64 | ### END YOUR SOLUTION 65 | return self 66 | 67 | def __next__(self): 68 | ### BEGIN YOUR SOLUTION 69 | raise NotImplementedError() 70 | ### END YOUR SOLUTION 71 | 72 | -------------------------------------------------------------------------------- /hw3/python/needle/data/data_transforms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Transform: 4 | def __call__(self, x): 5 | raise NotImplementedError 6 | 7 | 8 | class RandomFlipHorizontal(Transform): 9 | def __init__(self, p = 0.5): 10 | self.p = p 11 | 12 | def __call__(self, img): 13 | """ 14 | Horizonally flip an image, specified as an H x W x C NDArray. 15 | Args: 16 | img: H x W x C NDArray of an image 17 | Returns: 18 | H x W x C ndarray corresponding to image flipped with probability self.p 19 | Note: use the provided code to provide randomness, for easier testing 20 | """ 21 | flip_img = np.random.rand() < self.p 22 | ### BEGIN YOUR SOLUTION 23 | raise NotImplementedError() 24 | ### END YOUR SOLUTION 25 | 26 | 27 | class RandomCrop(Transform): 28 | def __init__(self, padding=3): 29 | self.padding = padding 30 | 31 | def __call__(self, img): 32 | """ Zero pad and then randomly crop an image. 33 | Args: 34 | img: H x W x C NDArray of an image 35 | Return 36 | H x W x C NAArray of cliped image 37 | Note: generate the image shifted by shift_x, shift_y specified below 38 | """ 39 | shift_x, shift_y = np.random.randint(low=-self.padding, high=self.padding+1, size=2) 40 | ### BEGIN YOUR SOLUTION 41 | raise NotImplementedError() 42 | ### END YOUR SOLUTION 43 | -------------------------------------------------------------------------------- /hw3/python/needle/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .mnist_dataset import * 2 | from .ndarray_dataset import * 3 | -------------------------------------------------------------------------------- /hw3/python/needle/data/datasets/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/datasets/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/data/datasets/__pycache__/mnist_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/datasets/__pycache__/mnist_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/data/datasets/__pycache__/ndarray_dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/datasets/__pycache__/ndarray_dataset.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/data/datasets/mnist_dataset.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | from ..data_basic import Dataset 3 | import numpy as np 4 | 5 | class MNISTDataset(Dataset): 6 | def __init__( 7 | self, 8 | image_filename: str, 9 | label_filename: str, 10 | transforms: Optional[List] = None, 11 | ): 12 | ### BEGIN YOUR SOLUTION 13 | raise NotImplementedError() 14 | ### END YOUR SOLUTION 15 | 16 | def __getitem__(self, index) -> object: 17 | ### BEGIN YOUR SOLUTION 18 | raise NotImplementedError() 19 | ### END YOUR SOLUTION 20 | 21 | def __len__(self) -> int: 22 | ### BEGIN YOUR SOLUTION 23 | raise NotImplementedError() 24 | ### END YOUR SOLUTION -------------------------------------------------------------------------------- /hw3/python/needle/data/datasets/ndarray_dataset.py: -------------------------------------------------------------------------------- 1 | from ..data_basic import Dataset 2 | 3 | class NDArrayDataset(Dataset): 4 | def __init__(self, *arrays): 5 | self.arrays = arrays 6 | 7 | def __len__(self) -> int: 8 | return self.arrays[0].shape[0] 9 | 10 | def __getitem__(self, i) -> object: 11 | return tuple([a[i] for a in self.arrays]) -------------------------------------------------------------------------------- /hw3/python/needle/init/__init__.py: -------------------------------------------------------------------------------- 1 | from .init_basic import * 2 | 3 | from .init_initializers import * 4 | -------------------------------------------------------------------------------- /hw3/python/needle/init/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/init/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/init/__pycache__/init_basic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/init/__pycache__/init_basic.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/init/__pycache__/init_initializers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/init/__pycache__/init_initializers.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/init/init_basic.py: -------------------------------------------------------------------------------- 1 | import math 2 | import needle as ndl 3 | 4 | 5 | def rand(*shape, low=0.0, high=1.0, device=None, dtype="float32", requires_grad=False): 6 | """Generate random numbers uniform between low and high""" 7 | device = ndl.cpu() if device is None else device 8 | array = device.rand(*shape) * (high - low) + low 9 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad) 10 | 11 | 12 | def randn(*shape, mean=0.0, std=1.0, device=None, dtype="float32", requires_grad=False): 13 | """Generate random normal with specified mean and std deviation""" 14 | device = ndl.cpu() if device is None else device 15 | array = device.randn(*shape) * std + mean 16 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad) 17 | 18 | 19 | def constant(*shape, c=1.0, device=None, dtype="float32", requires_grad=False): 20 | """Generate constant Tensor""" 21 | device = ndl.cpu() if device is None else device 22 | array = device.ones(*shape, dtype=dtype) * c # note: can change dtype 23 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad) 24 | 25 | 26 | def ones(*shape, device=None, dtype="float32", requires_grad=False): 27 | """Generate all-ones Tensor""" 28 | return constant( 29 | *shape, c=1.0, device=device, dtype=dtype, requires_grad=requires_grad 30 | ) 31 | 32 | 33 | def zeros(*shape, device=None, dtype="float32", requires_grad=False): 34 | """Generate all-zeros Tensor""" 35 | return constant( 36 | *shape, c=0.0, device=device, dtype=dtype, requires_grad=requires_grad 37 | ) 38 | 39 | 40 | def randb(*shape, p=0.5, device=None, dtype="bool", requires_grad=False): 41 | """Generate binary random Tensor""" 42 | device = ndl.cpu() if device is None else device 43 | array = device.rand(*shape) <= p 44 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad) 45 | 46 | 47 | def one_hot(n, i, device=None, dtype="float32", requires_grad=False): 48 | """Generate one-hot encoding Tensor""" 49 | device = ndl.cpu() if device is None else device 50 | return ndl.Tensor( 51 | device.one_hot(n, i.numpy(), dtype=dtype), 52 | device=device, 53 | requires_grad=requires_grad, 54 | ) 55 | 56 | 57 | def zeros_like(array, *, device=None, requires_grad=False): 58 | device = device if device else array.device 59 | return zeros( 60 | *array.shape, dtype=array.dtype, device=device, requires_grad=requires_grad 61 | ) 62 | 63 | 64 | def ones_like(array, *, device=None, requires_grad=False): 65 | device = device if device else array.device 66 | return ones( 67 | *array.shape, dtype=array.dtype, device=device, requires_grad=requires_grad 68 | ) 69 | -------------------------------------------------------------------------------- /hw3/python/needle/init/init_initializers.py: -------------------------------------------------------------------------------- 1 | import math 2 | from .init_basic import * 3 | 4 | 5 | def xavier_uniform(fan_in, fan_out, gain=1.0, **kwargs): 6 | ### BEGIN YOUR SOLUTION 7 | raise NotImplementedError() 8 | ### END YOUR SOLUTION 9 | 10 | 11 | def xavier_normal(fan_in, fan_out, gain=1.0, **kwargs): 12 | ### BEGIN YOUR SOLUTION 13 | raise NotImplementedError() 14 | ### END YOUR SOLUTION 15 | 16 | 17 | def kaiming_uniform(fan_in, fan_out, nonlinearity="relu", **kwargs): 18 | assert nonlinearity == "relu", "Only relu supported currently" 19 | ### BEGIN YOUR SOLUTION 20 | raise NotImplementedError() 21 | ### END YOUR SOLUTION 22 | 23 | 24 | def kaiming_normal(fan_in, fan_out, nonlinearity="relu", **kwargs): 25 | assert nonlinearity == "relu", "Only relu supported currently" 26 | ### BEGIN YOUR SOLUTION 27 | raise NotImplementedError() 28 | ### END YOUR SOLUTION 29 | -------------------------------------------------------------------------------- /hw3/python/needle/nn/__init__.py: -------------------------------------------------------------------------------- 1 | from .nn_basic import * 2 | -------------------------------------------------------------------------------- /hw3/python/needle/nn/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/nn/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/nn/__pycache__/nn_basic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/nn/__pycache__/nn_basic.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/nn/nn_basic.py: -------------------------------------------------------------------------------- 1 | """The module. 2 | """ 3 | from typing import List, Callable, Any 4 | from needle.autograd import Tensor 5 | from needle import ops 6 | import needle.init as init 7 | import numpy as np 8 | 9 | 10 | class Parameter(Tensor): 11 | """A special kind of tensor that represents parameters.""" 12 | 13 | 14 | def _unpack_params(value: object) -> List[Tensor]: 15 | if isinstance(value, Parameter): 16 | return [value] 17 | elif isinstance(value, Module): 18 | return value.parameters() 19 | elif isinstance(value, dict): 20 | params = [] 21 | for k, v in value.items(): 22 | params += _unpack_params(v) 23 | return params 24 | elif isinstance(value, (list, tuple)): 25 | params = [] 26 | for v in value: 27 | params += _unpack_params(v) 28 | return params 29 | else: 30 | return [] 31 | 32 | 33 | def _child_modules(value: object) -> List["Module"]: 34 | if isinstance(value, Module): 35 | modules = [value] 36 | modules.extend(_child_modules(value.__dict__)) 37 | return modules 38 | if isinstance(value, dict): 39 | modules = [] 40 | for k, v in value.items(): 41 | modules += _child_modules(v) 42 | return modules 43 | elif isinstance(value, (list, tuple)): 44 | modules = [] 45 | for v in value: 46 | modules += _child_modules(v) 47 | return modules 48 | else: 49 | return [] 50 | 51 | 52 | class Module: 53 | def __init__(self): 54 | self.training = True 55 | 56 | def parameters(self) -> List[Tensor]: 57 | """Return the list of parameters in the module.""" 58 | return _unpack_params(self.__dict__) 59 | 60 | def _children(self) -> List["Module"]: 61 | return _child_modules(self.__dict__) 62 | 63 | def eval(self): 64 | self.training = False 65 | for m in self._children(): 66 | m.training = False 67 | 68 | def train(self): 69 | self.training = True 70 | for m in self._children(): 71 | m.training = True 72 | 73 | def __call__(self, *args, **kwargs): 74 | return self.forward(*args, **kwargs) 75 | 76 | 77 | class Identity(Module): 78 | def forward(self, x): 79 | return x 80 | 81 | 82 | class Linear(Module): 83 | def __init__( 84 | self, in_features, out_features, bias=True, device=None, dtype="float32" 85 | ): 86 | super().__init__() 87 | self.in_features = in_features 88 | self.out_features = out_features 89 | 90 | ### BEGIN YOUR SOLUTION 91 | raise NotImplementedError() 92 | ### END YOUR SOLUTION 93 | 94 | def forward(self, X: Tensor) -> Tensor: 95 | ### BEGIN YOUR SOLUTION 96 | raise NotImplementedError() 97 | ### END YOUR SOLUTION 98 | 99 | 100 | class Flatten(Module): 101 | def forward(self, X): 102 | ### BEGIN YOUR SOLUTION 103 | raise NotImplementedError() 104 | ### END YOUR SOLUTION 105 | 106 | 107 | class ReLU(Module): 108 | def forward(self, x: Tensor) -> Tensor: 109 | ### BEGIN YOUR SOLUTION 110 | raise NotImplementedError() 111 | ### END YOUR SOLUTION 112 | 113 | 114 | class Sequential(Module): 115 | def __init__(self, *modules): 116 | super().__init__() 117 | self.modules = modules 118 | 119 | def forward(self, x: Tensor) -> Tensor: 120 | ### BEGIN YOUR SOLUTION 121 | raise NotImplementedError() 122 | ### END YOUR SOLUTION 123 | 124 | 125 | class SoftmaxLoss(Module): 126 | def forward(self, logits: Tensor, y: Tensor): 127 | ### BEGIN YOUR SOLUTION 128 | raise NotImplementedError() 129 | ### END YOUR SOLUTION 130 | 131 | 132 | class BatchNorm1d(Module): 133 | def __init__(self, dim, eps=1e-5, momentum=0.1, device=None, dtype="float32"): 134 | super().__init__() 135 | self.dim = dim 136 | self.eps = eps 137 | self.momentum = momentum 138 | ### BEGIN YOUR SOLUTION 139 | raise NotImplementedError() 140 | ### END YOUR SOLUTION 141 | 142 | def forward(self, x: Tensor) -> Tensor: 143 | ### BEGIN YOUR SOLUTION 144 | raise NotImplementedError() 145 | ### END YOUR SOLUTION 146 | 147 | 148 | class LayerNorm1d(Module): 149 | def __init__(self, dim, eps=1e-5, device=None, dtype="float32"): 150 | super().__init__() 151 | self.dim = dim 152 | self.eps = eps 153 | ### BEGIN YOUR SOLUTION 154 | raise NotImplementedError() 155 | ### END YOUR SOLUTION 156 | 157 | def forward(self, x: Tensor) -> Tensor: 158 | ### BEGIN YOUR SOLUTION 159 | raise NotImplementedError() 160 | ### END YOUR SOLUTION 161 | 162 | 163 | class Dropout(Module): 164 | def __init__(self, p=0.5): 165 | super().__init__() 166 | self.p = p 167 | 168 | def forward(self, x: Tensor) -> Tensor: 169 | ### BEGIN YOUR SOLUTION 170 | raise NotImplementedError() 171 | ### END YOUR SOLUTION 172 | 173 | 174 | class Residual(Module): 175 | def __init__(self, fn: Module): 176 | super().__init__() 177 | self.fn = fn 178 | 179 | def forward(self, x: Tensor) -> Tensor: 180 | ### BEGIN YOUR SOLUTION 181 | raise NotImplementedError() 182 | ### END YOUR SOLUTION 183 | -------------------------------------------------------------------------------- /hw3/python/needle/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .ops_mathematic import * 2 | 3 | from .ops_logarithmic import * 4 | from .ops_tuple import * 5 | -------------------------------------------------------------------------------- /hw3/python/needle/ops/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/ops/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/ops/__pycache__/ops_logarithmic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/ops/__pycache__/ops_logarithmic.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/ops/__pycache__/ops_mathematic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/ops/__pycache__/ops_mathematic.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/ops/__pycache__/ops_tuple.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/ops/__pycache__/ops_tuple.cpython-310.pyc -------------------------------------------------------------------------------- /hw3/python/needle/ops/ops_logarithmic.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from ..autograd import NDArray 3 | from ..autograd import Op, Tensor, Value, TensorOp 4 | from ..autograd import TensorTuple, TensorTupleOp 5 | 6 | from .ops_mathematic import * 7 | 8 | import numpy as array_api 9 | 10 | class LogSoftmax(TensorOp): 11 | def compute(self, Z): 12 | ### BEGIN YOUR SOLUTION 13 | raise NotImplementedError() 14 | ### END YOUR SOLUTION 15 | 16 | def gradient(self, out_grad, node): 17 | ### BEGIN YOUR SOLUTION 18 | raise NotImplementedError() 19 | ### END YOUR SOLUTION 20 | 21 | 22 | def logsoftmax(a): 23 | return LogSoftmax()(a) 24 | 25 | 26 | class LogSumExp(TensorOp): 27 | def __init__(self, axes: Optional[tuple] = None): 28 | self.axes = axes 29 | 30 | def compute(self, Z): 31 | ### BEGIN YOUR SOLUTION 32 | raise NotImplementedError() 33 | ### END YOUR SOLUTION 34 | 35 | def gradient(self, out_grad, node): 36 | ### BEGIN YOUR SOLUTION 37 | raise NotImplementedError() 38 | ### END YOUR SOLUTION 39 | 40 | 41 | def logsumexp(a, axes=None): 42 | return LogSumExp(axes=axes)(a) 43 | 44 | -------------------------------------------------------------------------------- /hw3/python/needle/ops/ops_tuple.py: -------------------------------------------------------------------------------- 1 | from ..autograd import Op, Tensor, TensorTuple, Value, TensorOp, TensorTupleOp 2 | 3 | 4 | class MakeTensorTuple(TensorTupleOp): 5 | def compute(self, *args) -> tuple: 6 | return tuple(args) 7 | 8 | def gradient(self, out_grad, node): 9 | assert isinstance(out_grad, TensorTuple) 10 | return tuple(*[out_grad[i] for i in range(len(out_grad))]) 11 | 12 | 13 | def make_tuple(*args): 14 | return MakeTensorTuple()(*args) 15 | 16 | 17 | class TupleGetItem(TensorOp): 18 | def __init__(self, index): 19 | self.index = index 20 | 21 | def __call__(self, a: TensorTuple, fold_const=True) -> Value: 22 | assert isinstance(a, TensorTuple) 23 | # constant folding 24 | if fold_const and isinstance(a.op, MakeTensorTuple): 25 | return a.inputs[self.index] 26 | return Tensor.make_from_op(self, [a]) 27 | 28 | def compute(self, a): 29 | return a[self.index] 30 | 31 | def gradient(self, out_grad, node): 32 | index = self.index 33 | in_grad = [] 34 | for i, value in enumerate(node.inputs[0]): 35 | if i != index: 36 | in_grad.append(init.zeros_like(value)) 37 | else: 38 | in_grad.append(out_grad) 39 | return MakeTensorTuple()(*in_grad) 40 | 41 | 42 | def tuple_get_item(value, index): 43 | return TupleGetItem(index)(value) 44 | 45 | 46 | class FusedAddScalars(TensorTupleOp): 47 | def __init__(self, c0: float, c1: float): 48 | self.c0 = c0 49 | self.c1 = c1 50 | 51 | def compute(self, a): 52 | return a + self.c0, a + self.c1 53 | 54 | def gradient(self, out_grad, node): 55 | return out_grad[0] + out_grad[1] 56 | 57 | 58 | def fused_add_scalars(x, c0, c1): 59 | return FusedAddScalars(c0, c1)(x) 60 | -------------------------------------------------------------------------------- /hw3/python/needle/optim.py: -------------------------------------------------------------------------------- 1 | """Optimization module""" 2 | import needle as ndl 3 | import numpy as np 4 | 5 | 6 | class Optimizer: 7 | def __init__(self, params): 8 | self.params = params 9 | 10 | def step(self): 11 | raise NotImplementedError() 12 | 13 | def reset_grad(self): 14 | for p in self.params: 15 | p.grad = None 16 | 17 | 18 | class SGD(Optimizer): 19 | def __init__(self, params, lr=0.01, momentum=0.0, weight_decay=0.0): 20 | super().__init__(params) 21 | self.lr = lr 22 | self.momentum = momentum 23 | self.u = {} 24 | self.weight_decay = weight_decay 25 | 26 | def step(self): 27 | ### BEGIN YOUR SOLUTION 28 | raise NotImplementedError() 29 | ### END YOUR SOLUTION 30 | 31 | def clip_grad_norm(self, max_norm=0.25): 32 | """ 33 | Clips gradient norm of parameters. 34 | """ 35 | ### BEGIN YOUR SOLUTION 36 | raise NotImplementedError() 37 | ### END YOUR SOLUTION 38 | 39 | 40 | class Adam(Optimizer): 41 | def __init__( 42 | self, 43 | params, 44 | lr=0.01, 45 | beta1=0.9, 46 | beta2=0.999, 47 | eps=1e-8, 48 | weight_decay=0.0, 49 | ): 50 | super().__init__(params) 51 | self.lr = lr 52 | self.beta1 = beta1 53 | self.beta2 = beta2 54 | self.eps = eps 55 | self.weight_decay = weight_decay 56 | self.t = 0 57 | 58 | self.m = {} 59 | self.v = {} 60 | 61 | def step(self): 62 | ### BEGIN YOUR SOLUTION 63 | raise NotImplementedError() 64 | ### END YOUR SOLUTION 65 | -------------------------------------------------------------------------------- /hw3/tests/hw3/__pycache__/test_ndarray.cpython-310-pytest-7.1.2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/tests/hw3/__pycache__/test_ndarray.cpython-310-pytest-7.1.2.pyc -------------------------------------------------------------------------------- /hw3/tests/hw3/__pycache__/test_ndarray.cpython-310-pytest-7.4.3.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/tests/hw3/__pycache__/test_ndarray.cpython-310-pytest-7.4.3.pyc -------------------------------------------------------------------------------- /hw3/tests/hw3/__pycache__/test_ndarray.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/tests/hw3/__pycache__/test_ndarray.cpython-310.pyc -------------------------------------------------------------------------------- /hw4/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /hw4/.idea/hw4.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | 13 | 15 | -------------------------------------------------------------------------------- /hw4/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /hw4/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /hw4/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /hw4/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /hw4/.tmp.driveupload/7792: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("./tests/hw4") 3 | sys.path.append("./python") 4 | 5 | from test_nd_backend import * 6 | from test_cifar_ptb_data import * 7 | from test_conv import * 8 | from test_sequence_models import * 9 | from needle import backend_ndarray as nd 10 | 11 | 12 | def train_cifar10(): 13 | import sys 14 | sys.path.append('./python') 15 | sys.path.append('./apps') 16 | import needle as ndl 17 | from models import ResNet9 18 | from simple_ml import train_cifar10, evaluate_cifar10 19 | 20 | device = ndl.cpu() 21 | dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=True) 22 | dataloader = ndl.data.DataLoader( \ 23 | dataset=dataset, 24 | batch_size=128, 25 | shuffle=True, ) 26 | model = ResNet9(device=device, dtype="float32") 27 | train_cifar10(model, dataloader, n_epochs=2, optimizer=ndl.optim.Adam, 28 | lr=0.001, weight_decay=0.001, device=device) 29 | evaluate_cifar10(model, dataloader) 30 | 31 | 32 | def train_language_model(): 33 | import needle as ndl 34 | sys.path.append('./apps') 35 | from models import LanguageModel 36 | from simple_ml import train_ptb, evaluate_ptb 37 | 38 | device = ndl.cpu_numpy() 39 | corpus = ndl.data.Corpus("data/ptb") 40 | train_data = ndl.data.batchify(corpus.train, batch_size=16, device=device, dtype="float32") 41 | model = LanguageModel(30, len(corpus.dictionary), hidden_size=10, num_layers=2, seq_model='rnn', device=device) 42 | train_ptb(model, train_data, seq_len=1, n_epochs=1, device=device) 43 | evaluate_ptb(model, train_data, seq_len=40, device=device) 44 | 45 | 46 | if __name__ == "__main__": 47 | """ 48 | Part 1 49 | """ 50 | # test_stack((5, 5), 0, 2, nd.cpu()) 51 | # test_stack_backward((5, 5), 0, 2, nd.cpu()) 52 | 53 | # test_matmul(16, 16, 16, nd.cpu()) 54 | # test_relu((5, 5), nd.cpu()) 55 | # test_tanh_backward((5, 5), nd.cpu()) 56 | 57 | 58 | """ 59 | Part 2 60 | """ 61 | # test_cifar10_dataset(True) 62 | 63 | 64 | """ 65 | Part 3 66 | """ 67 | # test_pad_forward({"shape": (10, 32, 32, 8), "padding": ( (0, 0), (2, 2), (2, 2), (0, 0) )}, nd.cpu()) 68 | # test_flip_forward({"shape": (10, 5), "axes": (0,)}, nd.cpu()) 69 | # test_dilate_forward(nd.cpu()) 70 | # test_op_conv((3, 16, 16, 8), (3, 3, 8, 16), 1, 2, False, nd.cpu()) 71 | # test_op_conv((3, 16, 16, 8), (3, 3, 8, 16), 2, 1, True, nd.cpu()) 72 | 73 | # test_init_kaiming_uniform(nd.cpu()) 74 | # test_nn_conv_forward(4, 8, 16, 3, 1, nd.cpu()) 75 | # test_nn_conv_backward(4, 1, 1, 3, 1, nd.cpu()) 76 | # test_resnet9(nd.cpu()) 77 | # test_train_cifar10(nd.cpu()) 78 | 79 | train_cifar10() 80 | 81 | """ 82 | Part 4 83 | """ 84 | # test_rnn_cell(1, 1, 1, False, False, 'relu', nd.cpu()) 85 | # test_lstm_cell(1, 1, 1, False, False, nd.cpu()) 86 | # test_lstm(13, 1, 1, 1, 1, True, True, nd.cpu()) 87 | 88 | """ 89 | Part 6 90 | """ 91 | # test_language_model_implementation(1, 1, 1, 1, 1, True, 1, 'rnn', nd.cpu()) 92 | 93 | """ 94 | Part 7 95 | """ 96 | # train_language_model() -------------------------------------------------------------------------------- /hw4/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(needle C CXX) 3 | cmake_policy(SET CMP0146 OLD) 4 | 5 | # find correct version of Python 6 | execute_process(COMMAND python3-config --prefix 7 | OUTPUT_VARIABLE Python_ROOT_DIR) 8 | find_package(Python COMPONENTS Development Interpreter REQUIRED) 9 | include_directories(${Python_INCLUDE_DIRS}) 10 | 11 | # find pybind 12 | execute_process(COMMAND python3 -m pybind11 --cmakedir 13 | RESULT_VARIABLE __pybind_exit_code 14 | OUTPUT_VARIABLE __pybind_path 15 | OUTPUT_STRIP_TRAILING_WHITESPACE) 16 | find_package(pybind11 PATHS ${__pybind_path}) 17 | 18 | 19 | if(NOT MSVC) 20 | set(CMAKE_CXX_FLAGS "-std=c++11 -O2 -march=native ${CMAKE_CXX_FLAGS}") 21 | set(CMAKE_CUDA_STANDARD 14) 22 | else() 23 | set(CMAKE_CXX_FLAGS "/std:c++11 -O2 -march=native ${CMAKE_CXX_FLAGS}") 24 | set(CMAKE_CUDA_STANDARD 14) 25 | endif() 26 | 27 | include_directories(SYSTEM ${pybind11_INCLUDE_DIRS}) 28 | list(APPEND LINKER_LIBS ${pybind11_LIBRARIES}) 29 | 30 | 31 | ################### 32 | ### CPU BACKEND ### 33 | ################### 34 | add_library(ndarray_backend_cpu MODULE src/ndarray_backend_cpu.cc) 35 | target_link_libraries(ndarray_backend_cpu PUBLIC ${LINKER_LIBS}) 36 | pybind11_extension(ndarray_backend_cpu) 37 | pybind11_strip(ndarray_backend_cpu) 38 | 39 | 40 | # directly output to ffi folder 41 | set_target_properties(ndarray_backend_cpu 42 | PROPERTIES 43 | LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/needle/backend_ndarray 44 | CXX_VISIBILITY_PRESET "hidden" 45 | ) 46 | 47 | if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") 48 | set_property(TARGET ndarray_backend_cpu PROPERTY LINK_OPTIONS -undefined dynamic_lookup) 49 | endif() 50 | 51 | 52 | 53 | #################### 54 | ### CUDA BACKEND ### 55 | #################### 56 | find_package(CUDA) 57 | if(CUDA_FOUND) 58 | message(STATUS "Found cuda, building cuda backend") 59 | 60 | include_directories(SYSTEM ${CUDA_INCLUDE_DIRS}) 61 | list(APPEND LINKER_LIBS ${CUDA_CUDART_LIBRARY}) 62 | 63 | # invoke nvidia smi to detect if we really have a GPU 64 | execute_process(COMMAND "nvidia-smi" ERROR_QUIET RESULT_VARIABLE NV_RET) 65 | if(NV_RET EQUAL "0") 66 | CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS Auto) 67 | else() 68 | # set to 3.7 the flag of K80 69 | CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.7) 70 | endif() 71 | 72 | # set arch flags properly 73 | CUDA_ADD_LIBRARY(ndarray_backend_cuda MODULE src/ndarray_backend_cuda.cu OPTIONS ${ARCH_FLAGS}) 74 | 75 | target_link_libraries(ndarray_backend_cuda ${LINKER_LIBS}) 76 | pybind11_extension(ndarray_backend_cuda) 77 | pybind11_strip(ndarray_backend_cuda) 78 | 79 | # directly output to ffi folder 80 | set_target_properties(ndarray_backend_cuda 81 | PROPERTIES 82 | LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/needle/backend_ndarray 83 | CXX_VISIBILITY_PRESET "hidden" 84 | CUDA_VISIBILITY_PRESET "hidden" 85 | ) 86 | 87 | endif() 88 | 89 | -------------------------------------------------------------------------------- /hw4/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: lib, pybind, clean, format, all 2 | 3 | all: lib 4 | 5 | 6 | lib: 7 | @mkdir -p build 8 | @cd build; cmake .. 9 | @cd build; $(MAKE) 10 | 11 | format: 12 | python3 -m black . 13 | clang-format -i src/*.cc src/*.cu 14 | 15 | clean: 16 | rm -rf build python/needle/backend_ndarray/ndarray_backend*.so 17 | -------------------------------------------------------------------------------- /hw4/README.md: -------------------------------------------------------------------------------- 1 | # Homework 4 2 | Public repository and stub/testing code for Homework 4 of 10-714. 3 | -------------------------------------------------------------------------------- /hw4/ResNet9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/ResNet9.png -------------------------------------------------------------------------------- /hw4/apps/models.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./python') 3 | import needle as ndl 4 | import needle.nn as nn 5 | import math 6 | import numpy as np 7 | np.random.seed(0) 8 | 9 | 10 | def ConvBN(in_channels, out_channels, kernel_size, stride, device=None, dtype="float32"): 11 | return nn.Sequential( 12 | nn.Conv(in_channels, out_channels, kernel_size=kernel_size, stride=stride, bias=True, device=device, dtype=dtype), 13 | nn.BatchNorm2d(out_channels, device=device, dtype=dtype), 14 | nn.ReLU() 15 | ) 16 | 17 | 18 | class ResidualBlock(ndl.nn.Module): 19 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, device=None, dtype="float32"): 20 | super().__init__() 21 | 22 | self.conv1 = ConvBN(in_channels, out_channels, kernel_size, stride, device=device, dtype=dtype) 23 | self.conv2 = ConvBN(out_channels, out_channels, kernel_size, stride, device=device, dtype=dtype) 24 | 25 | 26 | def forward(self, x): 27 | out = self.conv1(x) 28 | out = self.conv2(out) 29 | out = out + x 30 | return out 31 | 32 | 33 | class ResNet9(ndl.nn.Module): 34 | def __init__(self, device=None, dtype="float32"): 35 | super().__init__() 36 | self.conv1 = ConvBN(3, 16, kernel_size=7, stride=4, device=device, dtype=dtype) 37 | self.conv2 = ConvBN(16, 32, kernel_size=3, stride=2, device=device, dtype=dtype) 38 | self.resi1 = ResidualBlock(32, 32, 3, 1, device=device, dtype=dtype) 39 | self.conv3 = ConvBN(32, 64, kernel_size=3, stride=2, device=device, dtype=dtype) 40 | self.conv4 = ConvBN(64, 128, kernel_size=3, stride=2, device=device, dtype=dtype) 41 | self.resi2 = ResidualBlock(128, 128, 3, 1, device=device, dtype=dtype) 42 | self.linear1 = nn.Linear(128, 128, device=device, dtype=dtype) 43 | self.linear2 = nn.Linear(128, 10, device=device, dtype=dtype) 44 | 45 | def forward(self, x): 46 | out = self.conv1(x) 47 | out = self.conv2(out) 48 | out = self.resi1(out) 49 | out = self.conv3(out) 50 | out = self.conv4(out) 51 | out = self.resi2(out) 52 | out = nn.Flatten()(out) 53 | out = self.linear1(out) 54 | out = ndl.ops.relu(out) 55 | out = self.linear2(out) 56 | return out 57 | 58 | 59 | class LanguageModel(nn.Module): 60 | def __init__(self, embedding_size, output_size, hidden_size, num_layers=1, 61 | seq_model='rnn', device=None, dtype="float32"): 62 | """ 63 | Consists of an embedding layer, a sequence model (either RNN or LSTM), and a 64 | linear layer. 65 | Parameters: 66 | output_size: Size of dictionary 67 | embedding_size: Size of embeddings 68 | hidden_size: The number of features in the hidden state of LSTM or RNN 69 | seq_model: 'rnn' or 'lstm', whether to use RNN or LSTM 70 | num_layers: Number of layers in RNN or LSTM 71 | """ 72 | super(LanguageModel, self).__init__() 73 | 74 | self.embedding_size = embedding_size 75 | self.output_size = output_size 76 | self.hidden_size = hidden_size 77 | self.embedding = nn.Embedding(output_size, embedding_size, device=device, dtype=dtype) 78 | if seq_model == 'rnn': 79 | self.seq_model = nn.RNN(embedding_size, hidden_size, num_layers, device=device, dtype=dtype) 80 | elif seq_model == 'lstm': 81 | self.seq_model = nn.LSTM(embedding_size, hidden_size, num_layers, device=device, dtype=dtype) 82 | self.linear = nn.Linear(hidden_size, output_size, device=device, dtype=dtype) 83 | 84 | 85 | def forward(self, x, h=None): 86 | """ 87 | Given sequence (and the previous hidden state if given), returns probabilities of next word 88 | (along with the last hidden state from the sequence model). 89 | Inputs: 90 | x of shape (seq_len, bs) 91 | h of shape (num_layers, bs, hidden_size) if using RNN, 92 | else h is tuple of (h0, c0), each of shape (num_layers, bs, hidden_size) 93 | Returns (out, h) 94 | out of shape (seq_len*bs, output_size) 95 | h of shape (num_layers, bs, hidden_size) if using RNN, 96 | else h is tuple of (h0, c0), each of shape (num_layers, bs, hidden_size) 97 | """ 98 | seq_len, bs = x.shape 99 | x_emb = self.embedding(x) 100 | out, h = self.seq_model(x_emb, h) 101 | out = out.reshape((seq_len * bs, self.hidden_size)) 102 | out = self.linear(out) 103 | return out, h 104 | 105 | 106 | if __name__ == "__main__": 107 | model = ResNet9() 108 | x = ndl.ops.randu((1, 32, 32, 3), requires_grad=True) 109 | model(x) 110 | cifar10_train_dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=True) 111 | train_loader = ndl.data.DataLoader(cifar10_train_dataset, 128, ndl.cpu(), dtype="float32") 112 | print(dataset[1][0].shape) -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/3.27.9/CMakeCCompiler.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_C_COMPILER "/usr/bin/cc") 2 | set(CMAKE_C_COMPILER_ARG1 "") 3 | set(CMAKE_C_COMPILER_ID "GNU") 4 | set(CMAKE_C_COMPILER_VERSION "11.4.0") 5 | set(CMAKE_C_COMPILER_VERSION_INTERNAL "") 6 | set(CMAKE_C_COMPILER_WRAPPER "") 7 | set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "17") 8 | set(CMAKE_C_EXTENSIONS_COMPUTED_DEFAULT "ON") 9 | set(CMAKE_C_COMPILE_FEATURES "c_std_90;c_function_prototypes;c_std_99;c_restrict;c_variadic_macros;c_std_11;c_static_assert;c_std_17;c_std_23") 10 | set(CMAKE_C90_COMPILE_FEATURES "c_std_90;c_function_prototypes") 11 | set(CMAKE_C99_COMPILE_FEATURES "c_std_99;c_restrict;c_variadic_macros") 12 | set(CMAKE_C11_COMPILE_FEATURES "c_std_11;c_static_assert") 13 | set(CMAKE_C17_COMPILE_FEATURES "c_std_17") 14 | set(CMAKE_C23_COMPILE_FEATURES "c_std_23") 15 | 16 | set(CMAKE_C_PLATFORM_ID "Linux") 17 | set(CMAKE_C_SIMULATE_ID "") 18 | set(CMAKE_C_COMPILER_FRONTEND_VARIANT "GNU") 19 | set(CMAKE_C_SIMULATE_VERSION "") 20 | 21 | 22 | 23 | 24 | set(CMAKE_AR "/usr/bin/ar") 25 | set(CMAKE_C_COMPILER_AR "/usr/bin/gcc-ar-11") 26 | set(CMAKE_RANLIB "/usr/bin/ranlib") 27 | set(CMAKE_C_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11") 28 | set(CMAKE_LINKER "/usr/bin/ld") 29 | set(CMAKE_MT "") 30 | set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND") 31 | set(CMAKE_COMPILER_IS_GNUCC 1) 32 | set(CMAKE_C_COMPILER_LOADED 1) 33 | set(CMAKE_C_COMPILER_WORKS TRUE) 34 | set(CMAKE_C_ABI_COMPILED TRUE) 35 | 36 | set(CMAKE_C_COMPILER_ENV_VAR "CC") 37 | 38 | set(CMAKE_C_COMPILER_ID_RUN 1) 39 | set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m) 40 | set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC) 41 | set(CMAKE_C_LINKER_PREFERENCE 10) 42 | set(CMAKE_C_LINKER_DEPFILE_SUPPORTED TRUE) 43 | 44 | # Save compiler ABI information. 45 | set(CMAKE_C_SIZEOF_DATA_PTR "8") 46 | set(CMAKE_C_COMPILER_ABI "ELF") 47 | set(CMAKE_C_BYTE_ORDER "LITTLE_ENDIAN") 48 | set(CMAKE_C_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") 49 | 50 | if(CMAKE_C_SIZEOF_DATA_PTR) 51 | set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}") 52 | endif() 53 | 54 | if(CMAKE_C_COMPILER_ABI) 55 | set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}") 56 | endif() 57 | 58 | if(CMAKE_C_LIBRARY_ARCHITECTURE) 59 | set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") 60 | endif() 61 | 62 | set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "") 63 | if(CMAKE_C_CL_SHOWINCLUDES_PREFIX) 64 | set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}") 65 | endif() 66 | 67 | 68 | 69 | 70 | 71 | set(CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include") 72 | set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "gcc;gcc_s;c;gcc;gcc_s") 73 | set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs") 74 | set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") 75 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/3.27.9/CMakeCXXCompiler.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_COMPILER "/usr/bin/c++") 2 | set(CMAKE_CXX_COMPILER_ARG1 "") 3 | set(CMAKE_CXX_COMPILER_ID "GNU") 4 | set(CMAKE_CXX_COMPILER_VERSION "11.4.0") 5 | set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "") 6 | set(CMAKE_CXX_COMPILER_WRAPPER "") 7 | set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "17") 8 | set(CMAKE_CXX_EXTENSIONS_COMPUTED_DEFAULT "ON") 9 | set(CMAKE_CXX_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters;cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates;cxx_std_17;cxx_std_20;cxx_std_23") 10 | set(CMAKE_CXX98_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters") 11 | set(CMAKE_CXX11_COMPILE_FEATURES "cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates") 12 | set(CMAKE_CXX14_COMPILE_FEATURES "cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates") 13 | set(CMAKE_CXX17_COMPILE_FEATURES "cxx_std_17") 14 | set(CMAKE_CXX20_COMPILE_FEATURES "cxx_std_20") 15 | set(CMAKE_CXX23_COMPILE_FEATURES "cxx_std_23") 16 | 17 | set(CMAKE_CXX_PLATFORM_ID "Linux") 18 | set(CMAKE_CXX_SIMULATE_ID "") 19 | set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "GNU") 20 | set(CMAKE_CXX_SIMULATE_VERSION "") 21 | 22 | 23 | 24 | 25 | set(CMAKE_AR "/usr/bin/ar") 26 | set(CMAKE_CXX_COMPILER_AR "/usr/bin/gcc-ar-11") 27 | set(CMAKE_RANLIB "/usr/bin/ranlib") 28 | set(CMAKE_CXX_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11") 29 | set(CMAKE_LINKER "/usr/bin/ld") 30 | set(CMAKE_MT "") 31 | set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND") 32 | set(CMAKE_COMPILER_IS_GNUCXX 1) 33 | set(CMAKE_CXX_COMPILER_LOADED 1) 34 | set(CMAKE_CXX_COMPILER_WORKS TRUE) 35 | set(CMAKE_CXX_ABI_COMPILED TRUE) 36 | 37 | set(CMAKE_CXX_COMPILER_ENV_VAR "CXX") 38 | 39 | set(CMAKE_CXX_COMPILER_ID_RUN 1) 40 | set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm;mpp;CPP;ixx;cppm;ccm;cxxm;c++m) 41 | set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC) 42 | 43 | foreach (lang C OBJC OBJCXX) 44 | if (CMAKE_${lang}_COMPILER_ID_RUN) 45 | foreach(extension IN LISTS CMAKE_${lang}_SOURCE_FILE_EXTENSIONS) 46 | list(REMOVE_ITEM CMAKE_CXX_SOURCE_FILE_EXTENSIONS ${extension}) 47 | endforeach() 48 | endif() 49 | endforeach() 50 | 51 | set(CMAKE_CXX_LINKER_PREFERENCE 30) 52 | set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1) 53 | set(CMAKE_CXX_LINKER_DEPFILE_SUPPORTED TRUE) 54 | 55 | # Save compiler ABI information. 56 | set(CMAKE_CXX_SIZEOF_DATA_PTR "8") 57 | set(CMAKE_CXX_COMPILER_ABI "ELF") 58 | set(CMAKE_CXX_BYTE_ORDER "LITTLE_ENDIAN") 59 | set(CMAKE_CXX_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") 60 | 61 | if(CMAKE_CXX_SIZEOF_DATA_PTR) 62 | set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}") 63 | endif() 64 | 65 | if(CMAKE_CXX_COMPILER_ABI) 66 | set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}") 67 | endif() 68 | 69 | if(CMAKE_CXX_LIBRARY_ARCHITECTURE) 70 | set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") 71 | endif() 72 | 73 | set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "") 74 | if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX) 75 | set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}") 76 | endif() 77 | 78 | 79 | 80 | 81 | 82 | set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include") 83 | set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;gcc_s;gcc;c;gcc_s;gcc") 84 | set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs") 85 | set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") 86 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/3.27.9/CMakeSystem.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_HOST_SYSTEM "Linux-6.1.58+") 2 | set(CMAKE_HOST_SYSTEM_NAME "Linux") 3 | set(CMAKE_HOST_SYSTEM_VERSION "6.1.58+") 4 | set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64") 5 | 6 | 7 | 8 | set(CMAKE_SYSTEM "Linux-6.1.58+") 9 | set(CMAKE_SYSTEM_NAME "Linux") 10 | set(CMAKE_SYSTEM_VERSION "6.1.58+") 11 | set(CMAKE_SYSTEM_PROCESSOR "x86_64") 12 | 13 | set(CMAKE_CROSSCOMPILING "FALSE") 14 | 15 | set(CMAKE_SYSTEM_LOADED 1) 16 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/3.27.9/CompilerIdC/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/3.27.9/CompilerIdC/a.out -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/CMakeDirectoryInformation.cmake: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27 3 | 4 | # Relative path conversion top directories. 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/content/drive/Othercomputers/My MacBook Pro/hw4") 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/content/drive/Othercomputers/My MacBook Pro/hw4/build") 7 | 8 | # Force unix paths in dependencies. 9 | set(CMAKE_FORCE_UNIX_PATHS 1) 10 | 11 | 12 | # The C and CXX include file regular expressions for this directory. 13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") 14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") 15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) 16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) 17 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/CMakeRuleHashes.txt: -------------------------------------------------------------------------------- 1 | # Hashes of file build rules. 2 | 347d5addb0d9c9683a2b5d27952f36b2 CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o 3 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/Makefile.cmake: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27 3 | 4 | # The generator used is: 5 | set(CMAKE_DEPENDS_GENERATOR "Unix Makefiles") 6 | 7 | # The top level Makefile was generated from the following files: 8 | set(CMAKE_MAKEFILE_DEPENDS 9 | "CMakeCache.txt" 10 | "/content/drive/Othercomputers/My MacBook Pro/hw4/CMakeLists.txt" 11 | "CMakeFiles/3.27.9/CMakeCCompiler.cmake" 12 | "CMakeFiles/3.27.9/CMakeCXXCompiler.cmake" 13 | "CMakeFiles/3.27.9/CMakeSystem.cmake" 14 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen" 15 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.depend" 16 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCInformation.cmake" 17 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCXXInformation.cmake" 18 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCheckCompilerFlagCommonPatterns.cmake" 19 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCommonLanguageInclude.cmake" 20 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeGenericSystem.cmake" 21 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeInitializeConfigs.cmake" 22 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeLanguageInformation.cmake" 23 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeSystemSpecificInformation.cmake" 24 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeSystemSpecificInitialize.cmake" 25 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCSourceCompiles.cmake" 26 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCXXCompilerFlag.cmake" 27 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCXXSourceCompiles.cmake" 28 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckIncludeFile.cmake" 29 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckLibraryExists.cmake" 30 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/CMakeCommonCompilerMacros.cmake" 31 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU-C.cmake" 32 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU-CXX.cmake" 33 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU.cmake" 34 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA.cmake" 35 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA/run_nvcc.cmake" 36 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA/select_compute_arch.cmake" 37 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPackageHandleStandardArgs.cmake" 38 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPackageMessage.cmake" 39 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPython.cmake" 40 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPython/Support.cmake" 41 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindThreads.cmake" 42 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckCompilerFlag.cmake" 43 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckFlagCommonConfig.cmake" 44 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckSourceCompiles.cmake" 45 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU-C.cmake" 46 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU-CXX.cmake" 47 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU.cmake" 48 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-Initialize.cmake" 49 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux.cmake" 50 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/UnixPaths.cmake" 51 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Common.cmake" 52 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Config.cmake" 53 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11ConfigVersion.cmake" 54 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11NewTools.cmake" 55 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Targets.cmake" 56 | ) 57 | 58 | # The corresponding makefile is: 59 | set(CMAKE_MAKEFILE_OUTPUTS 60 | "Makefile" 61 | "CMakeFiles/cmake.check_cache" 62 | ) 63 | 64 | # Byproducts of CMake generate step: 65 | set(CMAKE_MAKEFILE_PRODUCTS 66 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen" 67 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake" 68 | "CMakeFiles/CMakeDirectoryInformation.cmake" 69 | ) 70 | 71 | # Dependency information for all targets: 72 | set(CMAKE_DEPEND_INFO_FILES 73 | "CMakeFiles/ndarray_backend_cpu.dir/DependInfo.cmake" 74 | "CMakeFiles/ndarray_backend_cuda.dir/DependInfo.cmake" 75 | ) 76 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/Makefile2: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27 3 | 4 | # Default target executed when no arguments are given to make. 5 | default_target: all 6 | .PHONY : default_target 7 | 8 | #============================================================================= 9 | # Special targets provided by cmake. 10 | 11 | # Disable implicit rules so canonical targets will work. 12 | .SUFFIXES: 13 | 14 | # Disable VCS-based implicit rules. 15 | % : %,v 16 | 17 | # Disable VCS-based implicit rules. 18 | % : RCS/% 19 | 20 | # Disable VCS-based implicit rules. 21 | % : RCS/%,v 22 | 23 | # Disable VCS-based implicit rules. 24 | % : SCCS/s.% 25 | 26 | # Disable VCS-based implicit rules. 27 | % : s.% 28 | 29 | .SUFFIXES: .hpux_make_needs_suffix_list 30 | 31 | # Command-line flag to silence nested $(MAKE). 32 | $(VERBOSE)MAKESILENT = -s 33 | 34 | #Suppress display of executed commands. 35 | $(VERBOSE).SILENT: 36 | 37 | # A target that is always out of date. 38 | cmake_force: 39 | .PHONY : cmake_force 40 | 41 | #============================================================================= 42 | # Set environment variables for the build. 43 | 44 | # The shell in which to execute make rules. 45 | SHELL = /bin/sh 46 | 47 | # The CMake executable. 48 | CMAKE_COMMAND = /usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake 49 | 50 | # The command to remove a file. 51 | RM = /usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake -E rm -f 52 | 53 | # Escaping for special characters. 54 | EQUALS = = 55 | 56 | # The top-level source directory on which CMake was run. 57 | CMAKE_SOURCE_DIR = "/content/drive/Othercomputers/My MacBook Pro/hw4" 58 | 59 | # The top-level build directory on which CMake was run. 60 | CMAKE_BINARY_DIR = "/content/drive/Othercomputers/My MacBook Pro/hw4/build" 61 | 62 | #============================================================================= 63 | # Directory level rules for the build root directory 64 | 65 | # The main recursive "all" target. 66 | all: CMakeFiles/ndarray_backend_cpu.dir/all 67 | all: CMakeFiles/ndarray_backend_cuda.dir/all 68 | .PHONY : all 69 | 70 | # The main recursive "preinstall" target. 71 | preinstall: 72 | .PHONY : preinstall 73 | 74 | # The main recursive "clean" target. 75 | clean: CMakeFiles/ndarray_backend_cpu.dir/clean 76 | clean: CMakeFiles/ndarray_backend_cuda.dir/clean 77 | .PHONY : clean 78 | 79 | #============================================================================= 80 | # Target rules for target CMakeFiles/ndarray_backend_cpu.dir 81 | 82 | # All Build rule for target. 83 | CMakeFiles/ndarray_backend_cpu.dir/all: 84 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/depend 85 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/build 86 | @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --progress-dir="/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" --progress-num=1,2 "Built target ndarray_backend_cpu" 87 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/all 88 | 89 | # Build rule for subdir invocation for target. 90 | CMakeFiles/ndarray_backend_cpu.dir/rule: cmake_check_build_system 91 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" 2 92 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/ndarray_backend_cpu.dir/all 93 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" 0 94 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/rule 95 | 96 | # Convenience name for target. 97 | ndarray_backend_cpu: CMakeFiles/ndarray_backend_cpu.dir/rule 98 | .PHONY : ndarray_backend_cpu 99 | 100 | # clean rule for target. 101 | CMakeFiles/ndarray_backend_cpu.dir/clean: 102 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/clean 103 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/clean 104 | 105 | #============================================================================= 106 | # Target rules for target CMakeFiles/ndarray_backend_cuda.dir 107 | 108 | # All Build rule for target. 109 | CMakeFiles/ndarray_backend_cuda.dir/all: 110 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/depend 111 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/build 112 | @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --progress-dir="/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" --progress-num=3,4 "Built target ndarray_backend_cuda" 113 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/all 114 | 115 | # Build rule for subdir invocation for target. 116 | CMakeFiles/ndarray_backend_cuda.dir/rule: cmake_check_build_system 117 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" 2 118 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/ndarray_backend_cuda.dir/all 119 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" 0 120 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/rule 121 | 122 | # Convenience name for target. 123 | ndarray_backend_cuda: CMakeFiles/ndarray_backend_cuda.dir/rule 124 | .PHONY : ndarray_backend_cuda 125 | 126 | # clean rule for target. 127 | CMakeFiles/ndarray_backend_cuda.dir/clean: 128 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/clean 129 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/clean 130 | 131 | #============================================================================= 132 | # Special targets to cleanup operation of make. 133 | 134 | # Special rule to run CMake to check the build system integrity. 135 | # No rule that depends on this can have commands that come from listfiles 136 | # because they might be regenerated. 137 | cmake_check_build_system: 138 | $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 139 | .PHONY : cmake_check_build_system 140 | 141 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/TargetDirectories.txt: -------------------------------------------------------------------------------- 1 | /content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles/ndarray_backend_cpu.dir 2 | /content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles/ndarray_backend_cuda.dir 3 | /content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles/edit_cache.dir 4 | /content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles/rebuild_cache.dir 5 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/cmake.check_cache: -------------------------------------------------------------------------------- 1 | # This file is generated by cmake for dependency checking of the CMakeCache.txt file 2 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cpu.dir/DependInfo.cmake: -------------------------------------------------------------------------------- 1 | 2 | # Consider dependencies only in project. 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF) 4 | 5 | # The set of languages for which implicit dependencies are needed: 6 | set(CMAKE_DEPENDS_LANGUAGES 7 | ) 8 | 9 | # The set of dependency files which are needed: 10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES 11 | "/content/drive/Othercomputers/My MacBook Pro/hw4/src/ndarray_backend_cpu.cc" "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o" "gcc" "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o.d" 12 | ) 13 | 14 | # Targets to which this target links which contain Fortran sources. 15 | set(CMAKE_Fortran_TARGET_LINKED_INFO_FILES 16 | ) 17 | 18 | # Fortran module output directory. 19 | set(CMAKE_Fortran_TARGET_MODULE_DIR "") 20 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cpu.dir/cmake_clean.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so" 3 | "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.pdb" 4 | "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o" 5 | "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o.d" 6 | ) 7 | 8 | # Per-language clean rules from dependency scanning. 9 | foreach(lang CXX) 10 | include(CMakeFiles/ndarray_backend_cpu.dir/cmake_clean_${lang}.cmake OPTIONAL) 11 | endforeach() 12 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cpu.dir/compiler_depend.ts: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Timestamp file for compiler generated dependencies management for ndarray_backend_cpu. 3 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cpu.dir/depend.make: -------------------------------------------------------------------------------- 1 | # Empty dependencies file for ndarray_backend_cpu. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cpu.dir/flags.make: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27 3 | 4 | # compile CXX with /usr/bin/c++ 5 | CXX_DEFINES = -Dndarray_backend_cpu_EXPORTS 6 | 7 | CXX_INCLUDES = -isystem /usr/include/python3.10 -isystem /usr/local/lib/python3.10/dist-packages/pybind11/include -isystem /usr/local/cuda/include 8 | 9 | CXX_FLAGS = -std=c++11 -O2 -march=native -fPIC -fvisibility=hidden 10 | 11 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cpu.dir/link.txt: -------------------------------------------------------------------------------- 1 | /usr/bin/c++ -fPIC -std=c++11 -O2 -march=native -shared -o "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so" CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o 2 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cpu.dir/progress.make: -------------------------------------------------------------------------------- 1 | CMAKE_PROGRESS_1 = 1 2 | CMAKE_PROGRESS_2 = 2 3 | 4 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cuda.dir/DependInfo.cmake: -------------------------------------------------------------------------------- 1 | 2 | # Consider dependencies only in project. 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF) 4 | 5 | # The set of languages for which implicit dependencies are needed: 6 | set(CMAKE_DEPENDS_LANGUAGES 7 | ) 8 | 9 | # The set of dependency files which are needed: 10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES 11 | ) 12 | 13 | # Targets to which this target links which contain Fortran sources. 14 | set(CMAKE_Fortran_TARGET_LINKED_INFO_FILES 15 | ) 16 | 17 | # Fortran module output directory. 18 | set(CMAKE_Fortran_TARGET_MODULE_DIR "") 19 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cuda.dir/cmake_clean.cmake: -------------------------------------------------------------------------------- 1 | file(REMOVE_RECURSE 2 | "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so" 3 | "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cuda.pdb" 4 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o" 5 | ) 6 | 7 | # Per-language clean rules from dependency scanning. 8 | foreach(lang ) 9 | include(CMakeFiles/ndarray_backend_cuda.dir/cmake_clean_${lang}.cmake OPTIONAL) 10 | endforeach() 11 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cuda.dir/compiler_depend.make: -------------------------------------------------------------------------------- 1 | # Empty compiler generated dependencies file for ndarray_backend_cuda. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cuda.dir/compiler_depend.ts: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Timestamp file for compiler generated dependencies management for ndarray_backend_cuda. 3 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cuda.dir/depend.make: -------------------------------------------------------------------------------- 1 | # Empty dependencies file for ndarray_backend_cuda. 2 | # This may be replaced when dependencies are built. 3 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cuda.dir/flags.make: -------------------------------------------------------------------------------- 1 | # CMAKE generated file: DO NOT EDIT! 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27 3 | 4 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cuda.dir/link.txt: -------------------------------------------------------------------------------- 1 | /usr/bin/c++ -fPIC -std=c++11 -O2 -march=native -shared -o "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so" CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o -Wl,-rpath,/usr/local/cuda/lib64 /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/x86_64-linux-gnu/librt.a /usr/local/cuda/lib64/libcudart.so 2 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cuda.dir/progress.make: -------------------------------------------------------------------------------- 1 | CMAKE_PROGRESS_1 = 3 2 | CMAKE_PROGRESS_2 = 4 3 | 4 | -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o -------------------------------------------------------------------------------- /hw4/build/CMakeFiles/progress.marks: -------------------------------------------------------------------------------- 1 | 4 2 | -------------------------------------------------------------------------------- /hw4/build/cmake_install.cmake: -------------------------------------------------------------------------------- 1 | # Install script for directory: /content/drive/Othercomputers/My MacBook Pro/hw4 2 | 3 | # Set the install prefix 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX) 5 | set(CMAKE_INSTALL_PREFIX "/usr/local") 6 | endif() 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") 8 | 9 | # Set the install configuration name. 10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) 11 | if(BUILD_TYPE) 12 | string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" 13 | CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") 14 | else() 15 | set(CMAKE_INSTALL_CONFIG_NAME "") 16 | endif() 17 | message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") 18 | endif() 19 | 20 | # Set the component getting installed. 21 | if(NOT CMAKE_INSTALL_COMPONENT) 22 | if(COMPONENT) 23 | message(STATUS "Install component: \"${COMPONENT}\"") 24 | set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") 25 | else() 26 | set(CMAKE_INSTALL_COMPONENT) 27 | endif() 28 | endif() 29 | 30 | # Install shared libraries without execute permission? 31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) 32 | set(CMAKE_INSTALL_SO_NO_EXE "1") 33 | endif() 34 | 35 | # Is this installation the result of a crosscompile? 36 | if(NOT DEFINED CMAKE_CROSSCOMPILING) 37 | set(CMAKE_CROSSCOMPILING "FALSE") 38 | endif() 39 | 40 | # Set default install directory permissions. 41 | if(NOT DEFINED CMAKE_OBJDUMP) 42 | set(CMAKE_OBJDUMP "/usr/bin/objdump") 43 | endif() 44 | 45 | if(CMAKE_INSTALL_COMPONENT) 46 | set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt") 47 | else() 48 | set(CMAKE_INSTALL_MANIFEST "install_manifest.txt") 49 | endif() 50 | 51 | string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT 52 | "${CMAKE_INSTALL_MANIFEST_FILES}") 53 | file(WRITE "/content/drive/Othercomputers/My MacBook Pro/hw4/build/${CMAKE_INSTALL_MANIFEST}" 54 | "${CMAKE_INSTALL_MANIFEST_CONTENT}") 55 | -------------------------------------------------------------------------------- /hw4/build/detect_cuda_compute_capabilities.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | int main() 4 | { 5 | int count = 0; 6 | if (cudaSuccess != cudaGetDeviceCount(&count)) return -1; 7 | if (count == 0) return -1; 8 | for (int device = 0; device < count; ++device) 9 | { 10 | cudaDeviceProp prop; 11 | if (cudaSuccess == cudaGetDeviceProperties(&prop, device)) 12 | std::printf("%d.%d ", prop.major, prop.minor); 13 | } 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /hw4/debug.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("./tests/hw4") 3 | sys.path.append("./python") 4 | 5 | from test_nd_backend import * 6 | from test_cifar_ptb_data import * 7 | from test_conv import * 8 | from test_sequence_models import * 9 | from needle import backend_ndarray as nd 10 | 11 | 12 | def train_cifar10(): 13 | import sys 14 | sys.path.append('./python') 15 | sys.path.append('./apps') 16 | import needle as ndl 17 | from models import ResNet9 18 | from simple_ml import train_cifar10, evaluate_cifar10 19 | 20 | device = ndl.cpu() 21 | dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=True) 22 | dataloader = ndl.data.DataLoader( \ 23 | dataset=dataset, 24 | batch_size=128, 25 | shuffle=True, ) 26 | model = ResNet9(device=device, dtype="float32") 27 | train_cifar10(model, dataloader, n_epochs=2, optimizer=ndl.optim.Adam, 28 | lr=0.001, weight_decay=0.001, device=device) 29 | evaluate_cifar10(model, dataloader) 30 | 31 | 32 | def train_language_model(): 33 | import needle as ndl 34 | sys.path.append('./apps') 35 | from models import LanguageModel 36 | from simple_ml import train_ptb, evaluate_ptb 37 | 38 | device = ndl.cpu_numpy() 39 | corpus = ndl.data.Corpus("data/ptb") 40 | train_data = ndl.data.batchify(corpus.train, batch_size=16, device=device, dtype="float32") 41 | model = LanguageModel(30, len(corpus.dictionary), hidden_size=10, num_layers=2, seq_model='rnn', device=device) 42 | train_ptb(model, train_data, seq_len=1, n_epochs=1, device=device) 43 | evaluate_ptb(model, train_data, seq_len=40, device=device) 44 | 45 | 46 | if __name__ == "__main__": 47 | """ 48 | Part 1 49 | """ 50 | # test_stack((5, 5), 0, 2, nd.cpu()) 51 | # test_stack_backward((5, 5), 0, 2, nd.cpu()) 52 | 53 | # test_matmul(16, 16, 16, nd.cpu()) 54 | # test_relu((5, 5), nd.cpu()) 55 | # test_tanh_backward((5, 5), nd.cpu()) 56 | 57 | 58 | """ 59 | Part 2 60 | """ 61 | # test_cifar10_dataset(True) 62 | 63 | 64 | """ 65 | Part 3 66 | """ 67 | # test_pad_forward({"shape": (10, 32, 32, 8), "padding": ( (0, 0), (2, 2), (2, 2), (0, 0) )}, nd.cpu()) 68 | # test_flip_forward({"shape": (10, 5), "axes": (0,)}, nd.cpu()) 69 | # test_dilate_forward(nd.cpu()) 70 | # test_op_conv((3, 16, 16, 8), (3, 3, 8, 16), 1, 2, False, nd.cpu()) 71 | # test_op_conv((3, 16, 16, 8), (3, 3, 8, 16), 2, 1, True, nd.cpu()) 72 | 73 | # test_init_kaiming_uniform(nd.cpu()) 74 | # test_nn_conv_forward(4, 8, 16, 3, 1, nd.cpu()) 75 | # test_nn_conv_backward(4, 1, 1, 3, 1, nd.cpu()) 76 | # test_resnet9(nd.cpu()) 77 | # test_train_cifar10(nd.cpu()) 78 | 79 | train_cifar10() 80 | 81 | """ 82 | Part 4 83 | """ 84 | # test_rnn_cell(1, 1, 1, False, False, 'relu', nd.cpu()) 85 | # test_lstm_cell(1, 1, 1, False, False, nd.cpu()) 86 | # test_lstm(13, 1, 1, 1, 1, True, True, nd.cpu()) 87 | 88 | """ 89 | Part 6 90 | """ 91 | # test_language_model_implementation(1, 1, 1, 1, 1, True, 1, 'rnn', nd.cpu()) 92 | 93 | """ 94 | Part 7 95 | """ 96 | # train_language_model() -------------------------------------------------------------------------------- /hw4/hw4.ipynb - Colaboratory.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/hw4.ipynb - Colaboratory.pdf -------------------------------------------------------------------------------- /hw4/python/needle/__init__.py: -------------------------------------------------------------------------------- 1 | from . import ops 2 | from .ops import * 3 | from .autograd import Tensor, cpu, all_devices 4 | 5 | from . import init 6 | from .init import ones, zeros, zeros_like, ones_like 7 | 8 | from . import data 9 | from . import nn 10 | from . import optim 11 | from .backend_selection import * 12 | -------------------------------------------------------------------------------- /hw4/python/needle/backend_ndarray/__init__.py: -------------------------------------------------------------------------------- 1 | from .ndarray import * 2 | -------------------------------------------------------------------------------- /hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-darwin.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-darwin.so -------------------------------------------------------------------------------- /hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /hw4/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /hw4/python/needle/backend_ndarray/ndarray_backend_numpy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | __device_name__ = "numpy" 5 | _datatype = np.float32 6 | _datetype_size = np.dtype(_datatype).itemsize 7 | 8 | 9 | class Array: 10 | def __init__(self, size): 11 | self.array = np.empty(size, dtype=np.float32) 12 | 13 | @property 14 | def size(self): 15 | return self.array.size 16 | 17 | 18 | def to_numpy(a, shape, strides, offset): 19 | return np.lib.stride_tricks.as_strided( 20 | a.array[offset:], shape, tuple([s * _datetype_size for s in strides]) 21 | ) 22 | 23 | 24 | def from_numpy(a, out): 25 | out.array[:] = a.flatten() 26 | 27 | 28 | def fill(out, val): 29 | out.array.fill(val) 30 | 31 | 32 | def compact(a, out, shape, strides, offset): 33 | out.array[:] = to_numpy(a, shape, strides, offset).flatten() 34 | 35 | 36 | def ewise_setitem(a, out, shape, strides, offset): 37 | to_numpy(out, shape, strides, offset)[:] = a.array.reshape(shape) 38 | 39 | 40 | def scalar_setitem(size, val, out, shape, strides, offset): 41 | to_numpy(out, shape, strides, offset)[:] = val 42 | 43 | 44 | def ewise_add(a, b, out): 45 | out.array[:] = a.array + b.array 46 | 47 | 48 | def scalar_add(a, val, out): 49 | out.array[:] = a.array + val 50 | 51 | 52 | def ewise_mul(a, b, out): 53 | out.array[:] = a.array * b.array 54 | 55 | 56 | def scalar_mul(a, val, out): 57 | out.array[:] = a.array * val 58 | 59 | 60 | def ewise_div(a, b, out): 61 | out.array[:] = a.array / b.array 62 | 63 | 64 | def scalar_div(a, val, out): 65 | out.array[:] = a.array / val 66 | 67 | 68 | def scalar_power(a, val, out): 69 | out.array[:] = a.array**val 70 | 71 | 72 | def ewise_maximum(a, b, out): 73 | out.array[:] = np.maximum(a.array, b.array) 74 | 75 | 76 | def scalar_maximum(a, val, out): 77 | out.array[:] = np.maximum(a.array, val) 78 | 79 | 80 | def ewise_eq(a, b, out): 81 | out.array[:] = (a.array == b.array).astype(np.float32) 82 | 83 | 84 | def scalar_eq(a, val, out): 85 | out.array[:] = (a.array == val).astype(np.float32) 86 | 87 | 88 | def ewise_ge(a, b, out): 89 | out.array[:] = (a.array >= b.array).astype(np.float32) 90 | 91 | 92 | def scalar_ge(a, val, out): 93 | out.array[:] = (a.array >= val).astype(np.float32) 94 | 95 | 96 | def ewise_log(a, out): 97 | out.array[:] = np.log(a.array) 98 | 99 | 100 | def ewise_exp(a, out): 101 | out.array[:] = np.exp(a.array) 102 | 103 | 104 | def ewise_tanh(a, out): 105 | out.array[:] = np.tanh(a.array) 106 | 107 | 108 | def matmul(a, b, out, m, n, p): 109 | out.array[:] = (a.array.reshape(m, n) @ b.array.reshape(n, p)).reshape(-1) 110 | 111 | 112 | def reduce_max(a, out, reduce_size): 113 | out.array[:] = a.array[:].reshape(-1, reduce_size).max(axis=1) 114 | 115 | 116 | def reduce_sum(a, out, reduce_size): 117 | out.array[:] = a.array[:].reshape(-1, reduce_size).sum(axis=1) 118 | -------------------------------------------------------------------------------- /hw4/python/needle/backend_numpy.py: -------------------------------------------------------------------------------- 1 | """This file defies specific implementations of devices when using numpy as NDArray backend. 2 | """ 3 | import numpy 4 | 5 | 6 | class Device: 7 | """Baseclass of all device""" 8 | 9 | 10 | class CPUDevice(Device): 11 | """Represents data that sits in CPU""" 12 | 13 | def __repr__(self): 14 | return "needle.cpu()" 15 | 16 | def __hash__(self): 17 | return self.__repr__().__hash__() 18 | 19 | def __eq__(self, other): 20 | return isinstance(other, CPUDevice) 21 | 22 | def enabled(self): 23 | return True 24 | 25 | def zeros(self, *shape, dtype="float32"): 26 | return numpy.zeros(shape, dtype=dtype) 27 | 28 | def ones(self, *shape, dtype="float32"): 29 | return numpy.ones(shape, dtype=dtype) 30 | 31 | def randn(self, *shape): 32 | # note: numpy doesn't support types within standard random routines, and 33 | # .astype("float32") does work if we're generating a singleton 34 | return numpy.random.randn(*shape) 35 | 36 | def rand(self, *shape): 37 | # note: numpy doesn't support types within standard random routines, and 38 | # .astype("float32") does work if we're generating a singleton 39 | return numpy.random.rand(*shape) 40 | 41 | def one_hot(self, n, i, dtype="float32"): 42 | return numpy.eye(n, dtype=dtype)[i] 43 | 44 | def empty(self, shape, dtype="float32"): 45 | return numpy.empty(shape, dtype=dtype) 46 | 47 | def full(self, shape, fill_value, dtype="float32"): 48 | return numpy.full(shape, fill_value, dtype=dtype) 49 | 50 | 51 | def cpu(): 52 | """Return cpu device""" 53 | return CPUDevice() 54 | 55 | 56 | def default_device(): 57 | return cpu() 58 | 59 | 60 | def all_devices(): 61 | """return a list of all available devices""" 62 | return [cpu()] 63 | -------------------------------------------------------------------------------- /hw4/python/needle/backend_selection.py: -------------------------------------------------------------------------------- 1 | """Logic for backend selection""" 2 | import os 3 | 4 | 5 | BACKEND = os.environ.get("NEEDLE_BACKEND", "nd") 6 | 7 | 8 | if BACKEND == "nd": 9 | print("Using needle backend") 10 | from . import backend_ndarray as array_api 11 | from .backend_ndarray import ( 12 | all_devices, 13 | cuda, 14 | cpu, 15 | cpu_numpy, 16 | default_device, 17 | BackendDevice as Device, 18 | ) 19 | 20 | NDArray = array_api.NDArray 21 | elif BACKEND == "np": 22 | print("Using numpy backend") 23 | import numpy as array_api 24 | from .backend_numpy import all_devices, cpu, default_device, Device 25 | 26 | NDArray = array_api.ndarray 27 | else: 28 | raise RuntimeError("Unknown needle array backend %s" % BACKEND) 29 | -------------------------------------------------------------------------------- /hw4/python/needle/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_basic import * 2 | from .data_transforms import * 3 | from .datasets import * 4 | -------------------------------------------------------------------------------- /hw4/python/needle/data/data_basic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ..autograd import Tensor 3 | 4 | from typing import Iterator, Optional, List, Sized, Union, Iterable, Any 5 | 6 | 7 | 8 | class Dataset: 9 | r"""An abstract class representing a `Dataset`. 10 | 11 | All subclasses should overwrite :meth:`__getitem__`, supporting fetching a 12 | data sample for a given key. Subclasses must also overwrite 13 | :meth:`__len__`, which is expected to return the size of the dataset. 14 | """ 15 | 16 | def __init__(self, transforms: Optional[List] = None): 17 | self.transforms = transforms 18 | 19 | def __getitem__(self, index) -> object: 20 | raise NotImplementedError 21 | 22 | def __len__(self) -> int: 23 | raise NotImplementedError 24 | 25 | def apply_transforms(self, x): 26 | if self.transforms is not None: 27 | # apply the transforms 28 | for tform in self.transforms: 29 | x = tform(x) 30 | return x 31 | 32 | 33 | class DataLoader: 34 | r""" 35 | Data loader. Combines a dataset and a sampler, and provides an iterable over 36 | the given dataset. 37 | Args: 38 | dataset (Dataset): dataset from which to load the data. 39 | batch_size (int, optional): how many samples per batch to load 40 | (default: ``1``). 41 | shuffle (bool, optional): set to ``True`` to have the data reshuffled 42 | at every epoch (default: ``False``). 43 | """ 44 | dataset: Dataset 45 | batch_size: Optional[int] 46 | 47 | def __init__( 48 | self, 49 | dataset: Dataset, 50 | batch_size: Optional[int] = 1, 51 | shuffle: bool = False, 52 | ): 53 | 54 | self.dataset = dataset 55 | self.shuffle = shuffle 56 | self.batch_size = batch_size 57 | if not self.shuffle: 58 | self.ordering = np.array_split(np.arange(len(dataset)), 59 | range(batch_size, len(dataset), batch_size)) 60 | 61 | def __iter__(self): 62 | if self.shuffle: 63 | self.ordering = np.array_split(np.random.permutation(len(self.dataset)), 64 | range(self.batch_size, len(self.dataset), self.batch_size)) 65 | else: 66 | self.ordering = np.array_split(np.arange(len(self.dataset)), 67 | range(self.batch_size, len(self.dataset), self.batch_size)) 68 | self.batch_idx = 0 69 | return self 70 | 71 | def __next__(self): 72 | if self.batch_idx >= len(self.ordering): 73 | raise StopIteration 74 | batch_indices = self.ordering[self.batch_idx] 75 | X_batch, y_batch = self.dataset[batch_indices] 76 | self.batch_idx += 1 77 | return Tensor(X_batch), Tensor(y_batch) 78 | 79 | -------------------------------------------------------------------------------- /hw4/python/needle/data/data_transforms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Transform: 4 | def __call__(self, x): 5 | raise NotImplementedError 6 | 7 | 8 | class RandomFlipHorizontal(Transform): 9 | def __init__(self, p = 0.5): 10 | self.p = p 11 | 12 | def __call__(self, img): 13 | """ 14 | Horizonally flip an image, specified as an H x W x C NDArray. 15 | Args: 16 | img: H x W x C NDArray of an image 17 | Returns: 18 | H x W x C ndarray corresponding to image flipped with probability self.p 19 | Note: use the provided code to provide randomness, for easier testing 20 | """ 21 | flip_img = np.random.rand() < self.p 22 | if flip_img: 23 | img = img[:, ::-1, :] 24 | return img 25 | 26 | 27 | class RandomCrop(Transform): 28 | def __init__(self, padding=3): 29 | self.padding = padding 30 | 31 | def __call__(self, img): 32 | """ Zero pad and then randomly crop an image. 33 | Args: 34 | img: H x W x C NDArray of an image 35 | Return 36 | H x W x C NAArray of cliped image 37 | Note: generate the image shifted by shift_x, shift_y specified below 38 | """ 39 | shift_x, shift_y = np.random.randint(low=-self.padding, high=self.padding+1, size=2) 40 | img_pad = np.pad(img, ((self.padding, self.padding), (self.padding, self.padding), (0, 0)), 'constant', constant_values=0) 41 | img_crop = img_pad[self.padding + shift_x : self.padding + shift_x + img.shape[0], self.padding + shift_y : self.padding + shift_y + img.shape[1], :] 42 | return img_crop 43 | -------------------------------------------------------------------------------- /hw4/python/needle/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .mnist_dataset import * 2 | from .ndarray_dataset import * 3 | from .cifar10_dataset import * 4 | from .ptb_dataset import * 5 | -------------------------------------------------------------------------------- /hw4/python/needle/data/datasets/cifar10_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | from typing import Iterator, Optional, List, Sized, Union, Iterable, Any 4 | import numpy as np 5 | from ..data_basic import Dataset 6 | 7 | class CIFAR10Dataset(Dataset): 8 | def __init__( 9 | self, 10 | base_folder: str, 11 | train: bool, 12 | p: Optional[int] = 0.5, 13 | transforms: Optional[List] = None 14 | ): 15 | """ 16 | Parameters: 17 | base_folder - cifar-10-batches-py folder filepath 18 | train - bool, if True load training dataset, else load test dataset 19 | Divide pixel values by 255. so that images are in 0-1 range. 20 | Attributes: 21 | X - numpy array of images 22 | y - numpy array of labels 23 | """ 24 | if train: 25 | self.X = np.empty((0, 3, 32, 32)) 26 | self.y = np.empty((0,)) 27 | for i in range(1, 6): 28 | with open(os.path.join(base_folder, f"data_batch_{i}"), "rb") as f: 29 | data = pickle.load(f, encoding="bytes") 30 | self.X = np.concatenate((self.X, data[b"data"].reshape(-1, 3, 32, 32)), axis=0) 31 | self.y = np.concatenate((self.y, data[b"labels"]), axis=0) 32 | else: 33 | with open(os.path.join(base_folder, "test_batch"), "rb") as f: 34 | data = pickle.load(f, encoding="bytes") 35 | self.X = data[b"data"].reshape(-1, 3, 32, 32) 36 | self.y = np.array(data[b"labels"]) 37 | 38 | self.X = self.X.astype(np.float32) / 255.0 39 | self.transforms = [] if transforms is None else transforms 40 | 41 | 42 | def __getitem__(self, index) -> object: 43 | """ 44 | Returns the image, label at given index 45 | Image should be of shape (3, 32, 32) 46 | """ 47 | images = self.X[index] 48 | labels = self.y[index] 49 | for func in self.transforms: 50 | images = func(images) 51 | return images, labels 52 | 53 | def __len__(self) -> int: 54 | """ 55 | Returns the total number of examples in the dataset 56 | """ 57 | return len(self.y) 58 | -------------------------------------------------------------------------------- /hw4/python/needle/data/datasets/mnist_dataset.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | from ..data_basic import Dataset 3 | import numpy as np 4 | 5 | class MNISTDataset(Dataset): 6 | def __init__( 7 | self, 8 | image_filename: str, 9 | label_filename: str, 10 | transforms: Optional[List] = None, 11 | ): 12 | ### BEGIN YOUR SOLUTION 13 | raise NotImplementedError() 14 | ### END YOUR SOLUTION 15 | 16 | def __getitem__(self, index) -> object: 17 | ### BEGIN YOUR SOLUTION 18 | raise NotImplementedError() 19 | ### END YOUR SOLUTION 20 | 21 | def __len__(self) -> int: 22 | ### BEGIN YOUR SOLUTION 23 | raise NotImplementedError() 24 | ### END YOUR SOLUTION -------------------------------------------------------------------------------- /hw4/python/needle/data/datasets/ndarray_dataset.py: -------------------------------------------------------------------------------- 1 | from ..data_basic import Dataset 2 | 3 | class NDArrayDataset(Dataset): 4 | def __init__(self, *arrays): 5 | self.arrays = arrays 6 | 7 | def __len__(self) -> int: 8 | return self.arrays[0].shape[0] 9 | 10 | def __getitem__(self, i) -> object: 11 | return tuple([a[i] for a in self.arrays]) -------------------------------------------------------------------------------- /hw4/python/needle/data/datasets/ptb_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | from needle import backend_ndarray as nd 5 | from needle import Tensor 6 | 7 | class Dictionary(object): 8 | """ 9 | Creates a dictionary from a list of words, mapping each word to a 10 | unique integer. 11 | Attributes: 12 | word2idx: dictionary mapping from a word to its unique ID 13 | idx2word: list of words in the dictionary, in the order they were added 14 | to the dictionary (i.e. each word only appears once in this list) 15 | """ 16 | def __init__(self): 17 | self.word2idx = {} 18 | self.idx2word = [] 19 | 20 | def add_word(self, word): 21 | """ 22 | Input: word of type str 23 | If the word is not in the dictionary, adds the word to the dictionary 24 | and appends to the list of words. 25 | Returns the word's unique ID. 26 | """ 27 | if word not in self.word2idx: 28 | idx = len(self.idx2word) 29 | self.word2idx[word] = idx 30 | self.idx2word.append(word) 31 | return self.word2idx[word] 32 | 33 | def __len__(self): 34 | """ 35 | Returns the number of unique words in the dictionary. 36 | """ 37 | return len(self.idx2word) 38 | 39 | 40 | 41 | class Corpus(object): 42 | """ 43 | Creates corpus from train, and test txt files. 44 | """ 45 | def __init__(self, base_dir, max_lines=None): 46 | self.dictionary = Dictionary() 47 | self.train = self.tokenize(os.path.join(base_dir, 'train.txt'), max_lines) 48 | self.test = self.tokenize(os.path.join(base_dir, 'test.txt'), max_lines) 49 | 50 | def tokenize(self, path, max_lines=None): 51 | """ 52 | Input: 53 | path - path to text file 54 | max_lines - maximum number of lines to read in 55 | Tokenizes a text file, first adding each word in the file to the dictionary, 56 | and then tokenizing the text file to a list of IDs. When adding words to the 57 | dictionary (and tokenizing the file content) '' should be appended to 58 | the end of each line in order to properly account for the end of the sentence. 59 | Output: 60 | ids: List of ids 61 | """ 62 | ids = [] 63 | self.dictionary.add_word('') 64 | with open(path, 'r') as f: 65 | if max_lines is not None: 66 | lines = f.readlines()[:max_lines] 67 | else: 68 | lines = f.readlines() 69 | for line in lines: 70 | words = line.split() + [''] 71 | for word in words: 72 | ids.append(self.dictionary.add_word(word)) 73 | return ids 74 | 75 | 76 | def batchify(data, batch_size, device, dtype): 77 | """ 78 | Starting from sequential data, batchify arranges the dataset into columns. 79 | For instance, with the alphabet as the sequence and batch size 4, we'd get 80 | ┌ a g m s ┐ 81 | │ b h n t │ 82 | │ c i o u │ 83 | │ d j p v │ 84 | │ e k q w │ 85 | └ f l r x ┘. 86 | These columns are treated as independent by the model, which means that the 87 | dependence of e. g. 'g' on 'f' cannot be learned, but allows more efficient 88 | batch processing. 89 | If the data cannot be evenly divided by the batch size, trim off the remainder. 90 | Returns the data as a numpy array of shape (nbatch, batch_size). 91 | """ 92 | nbatch = len(data) // batch_size 93 | data = np.array(data[:nbatch * batch_size]).reshape(nbatch, batch_size) 94 | return data 95 | 96 | 97 | def get_batch(batches, i, bptt, device=None, dtype=None): 98 | """ 99 | get_batch subdivides the source data into chunks of length bptt. 100 | If source is equal to the example output of the batchify function, with 101 | a bptt-limit of 2, we'd get the following two Variables for i = 0: 102 | ┌ a g m s ┐ ┌ b h n t ┐ 103 | └ b h n t ┘ └ c i o u ┘ 104 | Note that despite the name of the function, the subdivison of data is not 105 | done along the batch dimension (i.e. dimension 1), since that was handled 106 | by the batchify function. The chunks are along dimension 0, corresponding 107 | to the seq_len dimension in the LSTM or RNN. 108 | Inputs: 109 | batches - numpy array returned from batchify function 110 | i - index 111 | bptt - Sequence length 112 | Returns: 113 | data - Tensor of shape (bptt, bs) with cached data as NDArray 114 | target - Tensor of shape (bptt*bs,) with cached data as NDArray 115 | """ 116 | # Since we have to fetch at least one row as data and one row as target, 117 | # we subtract 1 from bptt to get the maximum possible sequence length. 118 | seq_len = min(bptt, batches.shape[0] - 1 - i) 119 | 120 | data = batches[i : i + seq_len] 121 | target = batches[i + 1 : i + 1 + seq_len].reshape(-1) 122 | return Tensor(data, device=device, dtype=dtype), Tensor(target, device=device, dtype=dtype) -------------------------------------------------------------------------------- /hw4/python/needle/init/__init__.py: -------------------------------------------------------------------------------- 1 | from .init_basic import * 2 | 3 | from .init_initializers import * 4 | -------------------------------------------------------------------------------- /hw4/python/needle/init/init_basic.py: -------------------------------------------------------------------------------- 1 | import math 2 | import needle as ndl 3 | 4 | 5 | def rand(*shape, low=0.0, high=1.0, device=None, dtype="float32", requires_grad=False): 6 | """Generate random numbers uniform between low and high""" 7 | device = ndl.cpu() if device is None else device 8 | array = device.rand(*shape) * (high - low) + low 9 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad) 10 | 11 | 12 | def randn(*shape, mean=0.0, std=1.0, device=None, dtype="float32", requires_grad=False): 13 | """Generate random normal with specified mean and std deviation""" 14 | device = ndl.cpu() if device is None else device 15 | array = device.randn(*shape) * std + mean 16 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad) 17 | 18 | 19 | 20 | def constant(*shape, c=1.0, device=None, dtype="float32", requires_grad=False): 21 | """Generate constant Tensor""" 22 | device = ndl.cpu() if device is None else device 23 | array = device.full(shape, c, dtype=dtype) 24 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad) 25 | 26 | def ones(*shape, device=None, dtype="float32", requires_grad=False): 27 | """Generate all-ones Tensor""" 28 | return constant( 29 | *shape, c=1.0, device=device, dtype=dtype, requires_grad=requires_grad 30 | ) 31 | 32 | 33 | def zeros(*shape, device=None, dtype="float32", requires_grad=False): 34 | """Generate all-zeros Tensor""" 35 | return constant( 36 | *shape, c=0.0, device=device, dtype=dtype, requires_grad=requires_grad 37 | ) 38 | 39 | 40 | def randb(*shape, p=0.5, device=None, dtype="bool", requires_grad=False): 41 | """Generate binary random Tensor""" 42 | device = ndl.cpu() if device is None else device 43 | array = device.rand(*shape) <= p 44 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad) 45 | 46 | 47 | def one_hot(n, i, device=None, dtype="float32", requires_grad=False): 48 | """Generate one-hot encoding Tensor""" 49 | device = ndl.cpu() if device is None else device 50 | return ndl.Tensor( 51 | device.one_hot(n, i.numpy().astype("int32"), dtype=dtype), 52 | device=device, 53 | requires_grad=requires_grad, 54 | ) 55 | 56 | 57 | def zeros_like(array, *, device=None, requires_grad=False): 58 | device = device if device else array.device 59 | return zeros( 60 | *array.shape, dtype=array.dtype, device=device, requires_grad=requires_grad 61 | ) 62 | 63 | 64 | def ones_like(array, *, device=None, requires_grad=False): 65 | device = device if device else array.device 66 | return ones( 67 | *array.shape, dtype=array.dtype, device=device, requires_grad=requires_grad 68 | ) 69 | -------------------------------------------------------------------------------- /hw4/python/needle/init/init_initializers.py: -------------------------------------------------------------------------------- 1 | import math 2 | from .init_basic import * 3 | 4 | 5 | def xavier_uniform(fan_in, fan_out, gain=1.0, **kwargs): 6 | a = gain * math.sqrt(6.0 / (fan_in + fan_out)) 7 | return rand(fan_in, fan_out, low=-a, high=a, **kwargs) 8 | 9 | 10 | def xavier_normal(fan_in, fan_out, gain=1.0, **kwargs): 11 | std = gain * math.sqrt(2.0 / (fan_in + fan_out)) 12 | return randn(fan_in, fan_out, mean=0.0, std=std, **kwargs) 13 | 14 | 15 | def kaiming_uniform(fan_in, fan_out, shape=None, nonlinearity="relu", **kwargs): 16 | assert nonlinearity == "relu", "Only relu supported currently" 17 | if shape is not None: 18 | fan_in = math.prod(shape[:-1]) 19 | else: 20 | shape = (fan_in, fan_out) 21 | gain = math.sqrt(2.0) 22 | bound = gain * math.sqrt(3.0 / fan_in) 23 | return rand(*shape, low=-bound, high=bound, **kwargs) 24 | 25 | 26 | def kaiming_normal(fan_in, fan_out, nonlinearity="relu", **kwargs): 27 | assert nonlinearity == "relu", "Only relu supported currently" 28 | gain = math.sqrt(2.0) 29 | std = gain * math.sqrt(1.0 / fan_in) 30 | return randn(fan_in, fan_out, mean=0.0, std=std, **kwargs) -------------------------------------------------------------------------------- /hw4/python/needle/nn/__init__.py: -------------------------------------------------------------------------------- 1 | from .nn_basic import * 2 | from .nn_conv import * 3 | from .nn_sequence import * 4 | -------------------------------------------------------------------------------- /hw4/python/needle/nn/nn_conv.py: -------------------------------------------------------------------------------- 1 | """The module. 2 | """ 3 | import math 4 | from typing import List, Callable, Any 5 | from needle.autograd import Tensor 6 | from needle import ops 7 | import needle.init as init 8 | import numpy as np 9 | from .nn_basic import Parameter, Module 10 | 11 | 12 | class Conv(Module): 13 | """ 14 | Multi-channel 2D convolutional layer 15 | IMPORTANT: Accepts inputs in NCHW format, outputs also in NCHW format 16 | Only supports padding=same 17 | No grouped convolution or dilation 18 | Only supports square kernels 19 | """ 20 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, bias=True, device=None, dtype="float32"): 21 | super().__init__() 22 | if isinstance(kernel_size, tuple): 23 | kernel_size = kernel_size[0] 24 | if isinstance(stride, tuple): 25 | stride = stride[0] 26 | self.in_channels = in_channels 27 | self.out_channels = out_channels 28 | self.kernel_size = kernel_size 29 | self.stride = stride 30 | 31 | self.weight = Parameter(init.kaiming_uniform( 32 | in_channels * kernel_size * kernel_size, out_channels, shape=(kernel_size, kernel_size, in_channels, out_channels), dtype=dtype, device=device, requires_grad=True)) 33 | if bias: 34 | # bound = 1.0 / math.sqrt(in_channels * (kernel_size ** 2)) 35 | self.bias = Parameter(init.rand(out_channels, dtype=dtype, device=device, requires_grad=True)) 36 | else: 37 | self.bias = None 38 | 39 | self.padding = (kernel_size - 1) // 2 40 | 41 | def forward(self, x: Tensor) -> Tensor: 42 | """ 43 | x: (N, C, H, W) 44 | """ 45 | # Transform x from NCHW to NHWC 46 | x = x.transpose((1, 2)).transpose((2, 3)) 47 | 48 | out = ops.conv(x, self.weight, stride=self.stride, padding=self.padding) 49 | if self.bias is not None: 50 | bias_broadcast = ops.broadcast_to(self.bias, out.shape) 51 | out = out + bias_broadcast 52 | 53 | # Transform out from NHWC to NCHW 54 | out = out.transpose((3, 1)).transpose((3, 2)) 55 | 56 | return out -------------------------------------------------------------------------------- /hw4/python/needle/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .ops_mathematic import * 2 | 3 | from .ops_logarithmic import * 4 | from .ops_tuple import * 5 | -------------------------------------------------------------------------------- /hw4/python/needle/ops/ops_logarithmic.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from ..autograd import NDArray 3 | from ..autograd import Op, Tensor, Value, TensorOp 4 | from ..autograd import TensorTuple, TensorTupleOp 5 | 6 | from .ops_mathematic import * 7 | 8 | from ..backend_selection import array_api, BACKEND 9 | 10 | class LogSoftmax(TensorOp): 11 | def compute(self, Z): 12 | ### BEGIN YOUR SOLUTION 13 | raise NotImplementedError() 14 | ### END YOUR SOLUTION 15 | 16 | def gradient(self, out_grad, node): 17 | ### BEGIN YOUR SOLUTION 18 | raise NotImplementedError() 19 | ### END YOUR SOLUTION 20 | 21 | 22 | def logsoftmax(a): 23 | return LogSoftmax()(a) 24 | 25 | 26 | class LogSumExp(TensorOp): 27 | def __init__(self, axes: Optional[tuple] = None): 28 | self.axes = axes 29 | 30 | def compute(self, Z): 31 | max_Z = array_api.max(Z, self.axes, keepdims=True) 32 | Z = Z - array_api.broadcast_to(max_Z, Z.shape) 33 | res = array_api.log(array_api.sum(array_api.exp(Z), self.axes)) 34 | res = res + array_api.reshape(max_Z, res.shape) 35 | return res 36 | 37 | def gradient(self, out_grad, node): 38 | input_data = node.inputs[0].realize_cached_data() 39 | max_input = array_api.max(input_data, self.axes, keepdims=True) 40 | input_data = input_data - array_api.broadcast_to(max_input, input_data.shape) 41 | sum_exp_z = array_api.sum(array_api.exp(input_data), self.axes, keepdims=True) 42 | cur_grad = array_api.exp(input_data) / array_api.broadcast_to(sum_exp_z, input_data.shape) 43 | 44 | if out_grad.shape != cur_grad.shape: 45 | if out_grad.cached_data.size == cur_grad.size: 46 | out_grad = reshape(out_grad, cur_grad.shape) 47 | else: 48 | # 对 out_grad 进行 reshape(比如从 (3,)变成 (3,1),否则 broadcast 时,结果和预期不一致),然后再进行 broadcast_to 49 | new_shape = list(cur_grad.shape) 50 | if self.axes is not None: 51 | if isinstance(self.axes, Number): 52 | self.axes = (self.axes,) 53 | for axis in self.axes: 54 | new_shape[axis] = 1 55 | else: 56 | new_shape = [1] * len(new_shape) 57 | out_grad = reshape(out_grad, new_shape) 58 | out_grad = broadcast_to(out_grad, cur_grad.shape) 59 | return out_grad * cur_grad 60 | 61 | def logsumexp(a, axes=None): 62 | return LogSumExp(axes=axes)(a) 63 | 64 | -------------------------------------------------------------------------------- /hw4/python/needle/ops/ops_tuple.py: -------------------------------------------------------------------------------- 1 | from ..autograd import Op, Tensor, TensorTuple, Value, TensorOp, TensorTupleOp 2 | import needle.init as init 3 | 4 | class MakeTensorTuple(TensorTupleOp): 5 | def compute(self, *args) -> tuple: 6 | return tuple(args) 7 | 8 | def gradient(self, out_grad, node): 9 | assert isinstance(out_grad, TensorTuple) 10 | return tuple([out_grad[i] for i in range(len(out_grad))]) 11 | 12 | 13 | def make_tuple(*args): 14 | return MakeTensorTuple()(*args) 15 | 16 | 17 | class TupleGetItem(TensorOp): 18 | def __init__(self, index): 19 | self.index = index 20 | 21 | def __call__(self, a: TensorTuple, fold_const=True) -> Value: 22 | assert isinstance(a, TensorTuple) 23 | # constant folding 24 | if fold_const and isinstance(a.op, MakeTensorTuple): 25 | return a.inputs[self.index] 26 | return Tensor.make_from_op(self, [a]) 27 | 28 | def compute(self, a): 29 | return a[self.index] 30 | 31 | def gradient(self, out_grad, node): 32 | index = self.index 33 | in_grad = [] 34 | for i, value in enumerate(node.inputs[0]): 35 | if i != index: 36 | in_grad.append(init.zeros_like(value)) 37 | else: 38 | in_grad.append(out_grad) 39 | return MakeTensorTuple()(*in_grad) 40 | 41 | 42 | def tuple_get_item(value, index): 43 | return TupleGetItem(index)(value) 44 | 45 | 46 | class FusedAddScalars(TensorTupleOp): 47 | def __init__(self, c0: float, c1: float): 48 | self.c0 = c0 49 | self.c1 = c1 50 | 51 | def compute(self, a): 52 | return a + self.c0, a + self.c1 53 | 54 | def gradient(self, out_grad, node): 55 | return out_grad[0] + out_grad[1] 56 | 57 | 58 | def fused_add_scalars(x, c0, c1): 59 | return FusedAddScalars(c0, c1)(x) 60 | -------------------------------------------------------------------------------- /hw4/python/needle/optim.py: -------------------------------------------------------------------------------- 1 | """Optimization module""" 2 | import needle as ndl 3 | import numpy as np 4 | 5 | 6 | class Optimizer: 7 | def __init__(self, params): 8 | self.params = params 9 | 10 | def step(self): 11 | raise NotImplementedError() 12 | 13 | def reset_grad(self): 14 | for p in self.params: 15 | p.grad = None 16 | 17 | 18 | class SGD(Optimizer): 19 | def __init__(self, params, lr=0.01, momentum=0.0, weight_decay=0.0): 20 | super().__init__(params) 21 | self.lr = lr 22 | self.momentum = momentum 23 | self.u = {} 24 | self.weight_decay = weight_decay 25 | 26 | def step(self): 27 | for param in self.params: 28 | # grad 这里加了一个惩罚项 29 | grad_with_penalty = param.grad.detach() + self.weight_decay * param.detach() 30 | u = self.u.get(id(param), 0) * self.momentum + (1 - self.momentum) * grad_with_penalty 31 | # 将 dtype 从 float64 转换为 float32 32 | u = ndl.Tensor(u, dtype=param.dtype) 33 | self.u[id(param)] = u 34 | param.data -= self.lr * u 35 | 36 | 37 | 38 | class Adam(Optimizer): 39 | def __init__( 40 | self, 41 | params, 42 | lr=0.01, 43 | beta1=0.9, 44 | beta2=0.999, 45 | eps=1e-8, 46 | weight_decay=0.0, 47 | ): 48 | super().__init__(params) 49 | self.lr = lr 50 | self.beta1 = beta1 51 | self.beta2 = beta2 52 | self.eps = eps 53 | self.weight_decay = weight_decay 54 | self.t = 0 55 | 56 | self.m = {} 57 | self.v = {} 58 | 59 | def step(self): 60 | self.t += 1 61 | for param in self.params: 62 | # grad 这里加了一个惩罚项 63 | grad_with_penalty = param.grad.detach() + self.weight_decay * param.detach() 64 | # 将 dtype 从 float64 转换为 float32 65 | grad_with_penalty = ndl.Tensor(grad_with_penalty, dtype=param.dtype) 66 | 67 | m = self.beta1 * self.m.get(id(param), 0) + (1 - self.beta1) * grad_with_penalty 68 | v = self.beta2 * self.v.get(id(param), 0) + (1 - self.beta2) * grad_with_penalty ** 2 69 | self.m[id(param)] = m.detach() 70 | self.v[id(param)] = v.detach() 71 | m_hat = m / (1 - self.beta1 ** self.t) 72 | v_hat = v / (1 - self.beta2 ** self.t) 73 | param.data -= self.lr * m_hat / (v_hat ** 0.5 + self.eps) -------------------------------------------------------------------------------- /hw4/tests/hw4/test_cifar_ptb_data.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./python') 3 | import itertools 4 | import numpy as np 5 | import pytest 6 | import mugrade 7 | 8 | import needle as ndl 9 | from needle import backend_ndarray as nd 10 | 11 | 12 | np.random.seed(2) 13 | 14 | 15 | _DEVICES = [ndl.cpu(), pytest.param(ndl.cuda(), 16 | marks=pytest.mark.skipif(not ndl.cuda().enabled(), reason="No GPU"))] 17 | 18 | 19 | TRAIN = [True, False] 20 | @pytest.mark.parametrize("train", TRAIN) 21 | def test_cifar10_dataset(train): 22 | dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=train) 23 | if train: 24 | assert len(dataset) == 50000 25 | else: 26 | assert len(dataset) == 10000 27 | example = dataset[np.random.randint(len(dataset))] 28 | assert(isinstance(example, tuple)) 29 | X, y = example 30 | assert isinstance(X, np.ndarray) 31 | assert X.shape == (3, 32, 32) 32 | 33 | 34 | BATCH_SIZES = [1, 15] 35 | @pytest.mark.parametrize("batch_size", BATCH_SIZES) 36 | @pytest.mark.parametrize("train", TRAIN) 37 | @pytest.mark.parametrize("device", _DEVICES, ids=["cpu", "cuda"]) 38 | def test_cifar10_loader(batch_size, train, device): 39 | cifar10_train_dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=True) 40 | train_loader = ndl.data.DataLoader(cifar10_train_dataset, batch_size) 41 | for (X, y) in train_loader: 42 | break 43 | assert isinstance(X.cached_data, nd.NDArray) 44 | assert isinstance(X, ndl.Tensor) 45 | assert isinstance(y, ndl.Tensor) 46 | assert X.dtype == 'float32' 47 | 48 | 49 | BPTT = [3, 32] 50 | @pytest.mark.parametrize("batch_size", BATCH_SIZES) 51 | @pytest.mark.parametrize("bptt", BPTT) 52 | @pytest.mark.parametrize("train", TRAIN) 53 | @pytest.mark.parametrize("device", _DEVICES, ids=["cpu", "cuda"]) 54 | def test_ptb_dataset(batch_size, bptt, train, device): 55 | # TODO update with more tests? 56 | corpus = ndl.data.Corpus("data/ptb") 57 | if train: 58 | data = ndl.data.batchify(corpus.train, batch_size, device=device, dtype="float32") 59 | else: 60 | data = ndl.data.batchify(corpus.test, batch_size, device=device, dtype="float32") 61 | X, y = ndl.data.get_batch(data, np.random.randint(len(data)), bptt, device=device) 62 | assert X.shape == (bptt, batch_size) 63 | assert y.shape == (bptt * batch_size,) 64 | assert isinstance(X, ndl.Tensor) 65 | assert X.dtype == 'float32' 66 | assert X.device == device 67 | assert isinstance(X.cached_data, nd.NDArray) 68 | ntokens = len(corpus.dictionary) 69 | assert ntokens == 10000 70 | 71 | 72 | ### MUGRADE ### 73 | 74 | TEST_BATCH_SIZES = [3, 5] 75 | TEST_BPTT = [6, 10] 76 | 77 | def mugrade_submit(x): 78 | if isinstance(x, np.ndarray): 79 | x = x.flatten()[:128] 80 | #print(x) 81 | mugrade.submit(x) 82 | else: 83 | #print(x) 84 | mugrade.submit(x) 85 | 86 | 87 | def submit_cifar10(): 88 | if not ndl.cuda().enabled(): 89 | print('You need a GPU to run some of these tests.') 90 | devices = [ndl.cpu(), ndl.cuda()] 91 | for train in TRAIN: 92 | dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=train) 93 | mugrade_submit(len(dataset)) 94 | for (device, batch_size) in itertools.product(devices, TEST_BATCH_SIZES): 95 | loader = ndl.data.DataLoader(dataset, batch_size) 96 | for (X, y) in loader: 97 | break 98 | mugrade_submit(X.numpy()[0, :, :, :]) 99 | mugrade_submit(y.numpy()[0]) 100 | 101 | 102 | def submit_ptb(): 103 | # devices = [ndl.cpu(), ndl.cuda()] if ndl.cuda().enabled() else [ndl.cpu()] 104 | devices = [ndl.cpu(), ndl.cuda()] 105 | 106 | corpus = ndl.data.Corpus("data/ptb") 107 | mugrade_submit(np.array(len(corpus.dictionary))) 108 | for train in TRAIN: 109 | for (device, batch_size, bptt) in itertools.product(devices, TEST_BATCH_SIZES, TEST_BPTT): 110 | if train: 111 | data = ndl.data.batchify(corpus.train, batch_size, device=device, dtype="float32") 112 | else: 113 | data = ndl.data.batchify(corpus.test, batch_size, device=device, dtype="float32") 114 | X, y = ndl.data.get_batch(data, np.random.randint(len(data)), bptt) 115 | mugrade_submit(np.array(len(data))) 116 | mugrade_submit(X.numpy()[0, :]) 117 | mugrade_submit(y.numpy()[0]) 118 | 119 | 120 | if __name__ == "__main__": 121 | submit_cifar10() 122 | submit_ptb() --------------------------------------------------------------------------------