├── .gitignore
├── README.md
├── hw0
    ├── .idea
    │   ├── .gitignore
    │   ├── hw0.iml
    │   ├── inspectionProfiles
    │   │   └── profiles_settings.xml
    │   ├── misc.xml
    │   ├── modules.xml
    │   └── vcs.xml
    ├── .ipynb_checkpoints
    │   └── hw0-checkpoint.ipynb
    ├── Makefile
    ├── README.md
    ├── data
    │   ├── t10k-images-idx3-ubyte.gz
    │   ├── t10k-labels-idx1-ubyte.gz
    │   ├── train-images-idx3-ubyte.gz
    │   └── train-labels-idx1-ubyte.gz
    ├── hw0.ipynb
    ├── hw0.pdf
    ├── src
    │   ├── __pycache__
    │   │   ├── simple_ml.cpython-310.pyc
    │   │   └── simple_ml.cpython-39.pyc
    │   ├── simple_ml.py
    │   ├── simple_ml_ext.cpp
    │   └── simple_ml_ext.so
    └── tests
    │   ├── __pycache__
    │       └── test_simple_ml.cpython-310-pytest-7.1.2.pyc
    │   └── test_simple_ml.py
├── hw1
    ├── .gitignore
    ├── apps
    │   └── simple_ml.py
    ├── data
    │   ├── t10k-images-idx3-ubyte.gz
    │   ├── t10k-labels-idx1-ubyte.gz
    │   ├── train-images-idx3-ubyte.gz
    │   └── train-labels-idx1-ubyte.gz
    ├── hw1.ipynb
    ├── hw1.pdf
    ├── python
    │   └── needle
    │   │   ├── __init__.py
    │   │   ├── autograd.py
    │   │   └── ops.py
    ├── tempCodeRunnerFile.ipynb
    ├── test.py
    └── tests
    │   └── test_autograd_hw.py
├── hw2
    ├── .idea
    │   ├── .gitignore
    │   ├── hw2.iml
    │   ├── inspectionProfiles
    │   │   └── profiles_settings.xml
    │   ├── misc.xml
    │   ├── modules.xml
    │   └── vcs.xml
    ├── README.md
    ├── apps
    │   ├── __pycache__
    │   │   ├── mlp_resnet.cpython-310.pyc
    │   │   └── mlp_resnet.cpython-39.pyc
    │   └── mlp_resnet.py
    ├── data
    │   ├── t10k-images-idx3-ubyte.gz
    │   ├── t10k-labels-idx1-ubyte.gz
    │   ├── train-images-idx3-ubyte.gz
    │   └── train-labels-idx1-ubyte.gz
    ├── debug.py
    ├── figures
    │   ├── mlp_resnet.png
    │   └── residualblock.png
    ├── hw2.ipynb
    ├── hw2.ipynb - Colaboratory.pdf
    ├── python
    │   └── needle
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-310.pyc
    │   │       ├── __init__.cpython-39.pyc
    │   │       ├── autograd.cpython-310.pyc
    │   │       ├── autograd.cpython-39.pyc
    │   │       ├── data.cpython-310.pyc
    │   │       ├── data.cpython-39.pyc
    │   │       ├── init.cpython-310.pyc
    │   │       ├── init.cpython-39.pyc
    │   │       ├── nn.cpython-310.pyc
    │   │       ├── nn.cpython-39.pyc
    │   │       ├── ops.cpython-310.pyc
    │   │       ├── ops.cpython-39.pyc
    │   │       ├── optim.cpython-310.pyc
    │   │       └── optim.cpython-39.pyc
    │   │   ├── autograd.py
    │   │   ├── data.py
    │   │   ├── init.py
    │   │   ├── nn.py
    │   │   ├── ops.py
    │   │   └── optim.py
    └── tests
    │   ├── __pycache__
    │       ├── test_data.cpython-310-pytest-7.1.2.pyc
    │       ├── test_data.cpython-39.pyc
    │       ├── test_nn_and_optim.cpython-310-pytest-7.1.2.pyc
    │       └── test_nn_and_optim.cpython-39.pyc
    │   ├── test_data.py
    │   └── test_nn_and_optim.py
├── hw3
    ├── .idea
    │   ├── .gitignore
    │   ├── hw3.iml
    │   ├── inspectionProfiles
    │   │   └── profiles_settings.xml
    │   ├── misc.xml
    │   ├── modules.xml
    │   └── vcs.xml
    ├── .tmp.driveupload
    │   └── 6569
    ├── .vscode
    │   └── settings.json
    ├── CMakeLists.txt
    ├── Makefile
    ├── README.md
    ├── build
    │   ├── CMakeCache.txt
    │   ├── CMakeFiles
    │   │   ├── 3.27.9
    │   │   │   ├── CMakeCCompiler.cmake
    │   │   │   ├── CMakeCXXCompiler.cmake
    │   │   │   ├── CMakeDetermineCompilerABI_C.bin
    │   │   │   ├── CMakeDetermineCompilerABI_CXX.bin
    │   │   │   ├── CMakeSystem.cmake
    │   │   │   ├── CompilerIdC
    │   │   │   │   ├── CMakeCCompilerId.c
    │   │   │   │   └── a.out
    │   │   │   └── CompilerIdCXX
    │   │   │   │   ├── CMakeCXXCompilerId.cpp
    │   │   │   │   └── a.out
    │   │   ├── CMakeConfigureLog.yaml
    │   │   ├── CMakeDirectoryInformation.cmake
    │   │   ├── CMakeRuleHashes.txt
    │   │   ├── Makefile.cmake
    │   │   ├── Makefile2
    │   │   ├── TargetDirectories.txt
    │   │   ├── cmake.check_cache
    │   │   ├── ndarray_backend_cpu.dir
    │   │   │   ├── DependInfo.cmake
    │   │   │   ├── build.make
    │   │   │   ├── cmake_clean.cmake
    │   │   │   ├── compiler_depend.internal
    │   │   │   ├── compiler_depend.make
    │   │   │   ├── compiler_depend.ts
    │   │   │   ├── depend.make
    │   │   │   ├── flags.make
    │   │   │   ├── link.txt
    │   │   │   ├── progress.make
    │   │   │   └── src
    │   │   │   │   ├── ndarray_backend_cpu.cc.o
    │   │   │   │   └── ndarray_backend_cpu.cc.o.d
    │   │   ├── ndarray_backend_cuda.dir
    │   │   │   ├── DependInfo.cmake
    │   │   │   ├── build.make
    │   │   │   ├── cmake_clean.cmake
    │   │   │   ├── compiler_depend.make
    │   │   │   ├── compiler_depend.ts
    │   │   │   ├── depend.make
    │   │   │   ├── flags.make
    │   │   │   ├── link.txt
    │   │   │   ├── progress.make
    │   │   │   └── src
    │   │   │   │   ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o
    │   │   │   │   ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake
    │   │   │   │   ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen
    │   │   │   │   └── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.depend
    │   │   └── progress.marks
    │   ├── Makefile
    │   ├── cmake_install.cmake
    │   └── detect_cuda_compute_capabilities.cpp
    ├── debug.py
    ├── hw3.ipynb
    ├── hw3.ipynb - Colaboratory.pdf
    ├── python
    │   └── needle
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-310.pyc
    │   │       ├── autograd.cpython-310.pyc
    │   │       ├── backend_numpy.cpython-310.pyc
    │   │       ├── backend_selection.cpython-310.pyc
    │   │       └── optim.cpython-310.pyc
    │   │   ├── autograd.py
    │   │   ├── backend_ndarray
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-310.pyc
    │   │       │   ├── ndarray.cpython-310.pyc
    │   │       │   └── ndarray_backend_numpy.cpython-310.pyc
    │   │       ├── ndarray.py
    │   │       ├── ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so
    │   │       ├── ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so
    │   │       └── ndarray_backend_numpy.py
    │   │   ├── backend_numpy.py
    │   │   ├── backend_selection.py
    │   │   ├── data
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-310.pyc
    │   │       │   ├── data_basic.cpython-310.pyc
    │   │       │   └── data_transforms.cpython-310.pyc
    │   │       ├── data_basic.py
    │   │       ├── data_transforms.py
    │   │       └── datasets
    │   │       │   ├── __init__.py
    │   │       │   ├── __pycache__
    │   │       │       ├── __init__.cpython-310.pyc
    │   │       │       ├── mnist_dataset.cpython-310.pyc
    │   │       │       └── ndarray_dataset.cpython-310.pyc
    │   │       │   ├── mnist_dataset.py
    │   │       │   └── ndarray_dataset.py
    │   │   ├── init
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-310.pyc
    │   │       │   ├── init_basic.cpython-310.pyc
    │   │       │   └── init_initializers.cpython-310.pyc
    │   │       ├── init_basic.py
    │   │       └── init_initializers.py
    │   │   ├── nn
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-310.pyc
    │   │       │   └── nn_basic.cpython-310.pyc
    │   │       └── nn_basic.py
    │   │   ├── ops
    │   │       ├── __init__.py
    │   │       ├── __pycache__
    │   │       │   ├── __init__.cpython-310.pyc
    │   │       │   ├── ops_logarithmic.cpython-310.pyc
    │   │       │   ├── ops_mathematic.cpython-310.pyc
    │   │       │   └── ops_tuple.cpython-310.pyc
    │   │       ├── ops_logarithmic.py
    │   │       ├── ops_mathematic.py
    │   │       └── ops_tuple.py
    │   │   └── optim.py
    ├── src
    │   ├── ndarray_backend_cpu.cc
    │   └── ndarray_backend_cuda.cu
    └── tests
    │   └── hw3
    │       ├── __pycache__
    │           ├── test_ndarray.cpython-310-pytest-7.1.2.pyc
    │           ├── test_ndarray.cpython-310-pytest-7.4.3.pyc
    │           └── test_ndarray.cpython-310.pyc
    │       └── test_ndarray.py
└── hw4
    ├── .idea
        ├── .gitignore
        ├── hw4.iml
        ├── inspectionProfiles
        │   └── profiles_settings.xml
        ├── misc.xml
        ├── modules.xml
        └── vcs.xml
    ├── .tmp.driveupload
        ├── 7538
        ├── 7792
        └── 7888
    ├── CMakeLists.txt
    ├── Makefile
    ├── README.md
    ├── ResNet9.png
    ├── apps
        ├── models.py
        └── simple_ml.py
    ├── build
        ├── CMakeCache.txt
        ├── CMakeFiles
        │   ├── 3.27.9
        │   │   ├── CMakeCCompiler.cmake
        │   │   ├── CMakeCXXCompiler.cmake
        │   │   ├── CMakeDetermineCompilerABI_C.bin
        │   │   ├── CMakeDetermineCompilerABI_CXX.bin
        │   │   ├── CMakeSystem.cmake
        │   │   ├── CompilerIdC
        │   │   │   ├── CMakeCCompilerId.c
        │   │   │   └── a.out
        │   │   └── CompilerIdCXX
        │   │   │   ├── CMakeCXXCompilerId.cpp
        │   │   │   └── a.out
        │   ├── CMakeConfigureLog.yaml
        │   ├── CMakeDirectoryInformation.cmake
        │   ├── CMakeRuleHashes.txt
        │   ├── Makefile.cmake
        │   ├── Makefile2
        │   ├── TargetDirectories.txt
        │   ├── cmake.check_cache
        │   ├── ndarray_backend_cpu.dir
        │   │   ├── DependInfo.cmake
        │   │   ├── build.make
        │   │   ├── cmake_clean.cmake
        │   │   ├── compiler_depend.internal
        │   │   ├── compiler_depend.make
        │   │   ├── compiler_depend.ts
        │   │   ├── depend.make
        │   │   ├── flags.make
        │   │   ├── link.txt
        │   │   ├── progress.make
        │   │   └── src
        │   │   │   ├── ndarray_backend_cpu.cc.o
        │   │   │   └── ndarray_backend_cpu.cc.o.d
        │   ├── ndarray_backend_cuda.dir
        │   │   ├── DependInfo.cmake
        │   │   ├── build.make
        │   │   ├── cmake_clean.cmake
        │   │   ├── compiler_depend.make
        │   │   ├── compiler_depend.ts
        │   │   ├── depend.make
        │   │   ├── flags.make
        │   │   ├── link.txt
        │   │   ├── progress.make
        │   │   └── src
        │   │   │   ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o
        │   │   │   ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake
        │   │   │   ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen
        │   │   │   └── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.depend
        │   └── progress.marks
        ├── Makefile
        ├── cmake_install.cmake
        └── detect_cuda_compute_capabilities.cpp
    ├── debug.py
    ├── hw4.ipynb
    ├── hw4.ipynb - Colaboratory.pdf
    ├── python
        └── needle
        │   ├── __init__.py
        │   ├── autograd.py
        │   ├── backend_ndarray
        │       ├── __init__.py
        │       ├── ndarray.py
        │       ├── ndarray_backend_cpu.cpython-310-darwin.so
        │       ├── ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so
        │       ├── ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so
        │       └── ndarray_backend_numpy.py
        │   ├── backend_numpy.py
        │   ├── backend_selection.py
        │   ├── data
        │       ├── __init__.py
        │       ├── data_basic.py
        │       ├── data_transforms.py
        │       └── datasets
        │       │   ├── __init__.py
        │       │   ├── cifar10_dataset.py
        │       │   ├── mnist_dataset.py
        │       │   ├── ndarray_dataset.py
        │       │   └── ptb_dataset.py
        │   ├── init
        │       ├── __init__.py
        │       ├── init_basic.py
        │       └── init_initializers.py
        │   ├── nn
        │       ├── __init__.py
        │       ├── nn_basic.py
        │       ├── nn_conv.py
        │       └── nn_sequence.py
        │   ├── ops
        │       ├── __init__.py
        │       ├── ops_logarithmic.py
        │       ├── ops_mathematic.py
        │       └── ops_tuple.py
        │   └── optim.py
    ├── src
        ├── ndarray_backend_cpu.cc
        └── ndarray_backend_cuda.cu
    └── tests
        └── hw4
            ├── test_cifar_ptb_data.py
            ├── test_conv.py
            ├── test_nd_backend.py
            └── test_sequence_models.py


/.gitignore:
--------------------------------------------------------------------------------
1 | **/.git/
2 | **/__pycache__/
3 | hw4/data/cifar-10-batches-py/
4 | hw4/data/ptb/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DeepLearningSystem
2 | 
3 | The project builds a simple version of PyTorch from scratch.
4 | 
5 | It is the homework of CMU 10-414/714: Deep Learning Systems ( https://dlsyscourse.org/ )
6 | 


--------------------------------------------------------------------------------
/hw0/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 | 


--------------------------------------------------------------------------------
/hw0/.idea/hw0.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="Python 3.10 (base)" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="PyDocumentationSettings">
 9 |     <option name="format" value="GOOGLE" />
10 |     <option name="myDocStringFormat" value="Google" />
11 |   </component>
12 | </module>


--------------------------------------------------------------------------------
/hw0/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/hw0/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (base)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/hw0/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/hw0.iml" filepath="$PROJECT_DIR$/.idea/hw0.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/hw0/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/hw0/Makefile:
--------------------------------------------------------------------------------
1 | # NOTE: on MacOS you need to add an addition flag: -undefined dynamic_lookup
2 | default:
3 | 	c++ -O3 -Wall -shared -std=c++11 -fPIC -undefined dynamic_lookup $$(python3 -m pybind11 --includes) src/simple_ml_ext.cpp -o src/simple_ml_ext.so
4 | 


--------------------------------------------------------------------------------
/hw0/README.md:
--------------------------------------------------------------------------------
1 | # Homework 0
2 | Public repository and stub/testing code for Homework 0 of 10-714.
3 | 


--------------------------------------------------------------------------------
/hw0/data/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/data/t10k-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/hw0/data/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/data/t10k-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/hw0/data/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/data/train-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/hw0/data/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/data/train-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/hw0/hw0.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/hw0.pdf


--------------------------------------------------------------------------------
/hw0/src/__pycache__/simple_ml.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/src/__pycache__/simple_ml.cpython-310.pyc


--------------------------------------------------------------------------------
/hw0/src/__pycache__/simple_ml.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/src/__pycache__/simple_ml.cpython-39.pyc


--------------------------------------------------------------------------------
/hw0/src/simple_ml_ext.cpp:
--------------------------------------------------------------------------------
  1 | #include <pybind11/pybind11.h>
  2 | #include <pybind11/numpy.h>
  3 | #include <cmath>
  4 | #include <iostream>
  5 | #include <vector>
  6 | 
  7 | namespace py = pybind11;
  8 | 
  9 | 
 10 | void softmax_regression_epoch_cpp(const float *X, const unsigned char *y,
 11 | 								  float *theta, size_t m, size_t n, size_t k,
 12 | 								  float lr, size_t batch)
 13 | {
 14 |     /**
 15 |      * A C++ version of the softmax regression epoch code.  This should run a
 16 |      * single epoch over the data defined by X and y (and sizes m,n,k), and
 17 |      * modify theta in place.  Your function will probably want to allocate
 18 |      * (and then delete) some helper arrays to store the logits and gradients.
 19 |      *
 20 |      * Args:
 21 |      *     X (const float *): pointer to X data, of size m*n, stored in row
 22 |      *          major (C) format
 23 |      *     y (const unsigned char *): pointer to y data, of size m
 24 |      *     theta (float *): pointer to theta data, of size n*k, stored in row
 25 |      *          major (C) format
 26 |      *     m (size_t): number of examples
 27 |      *     n (size_t): input dimension
 28 |      *     k (size_t): number of classes
 29 |      *     lr (float): learning rate / SGD step size
 30 |      *     batch (int): SGD minibatch size
 31 |      *
 32 |      * Returns:
 33 |      *     (None)
 34 |      */
 35 | 
 36 |     size_t sample_idx = 0;
 37 |     auto Z = std::vector<std::vector<float>>(batch, std::vector<float>(k, 0.0));
 38 |     // Run batch 
 39 |     while(sample_idx < m) {
 40 |         if(sample_idx + batch > m) {
 41 |             batch = m - sample_idx;
 42 |         }
 43 | 
 44 |         // Compute Z = normalize(exp(X * theta))
 45 |         // The outer two loops iterate over Z, whose size is batch * k
 46 |         for(size_t idx = 0; idx < batch; idx++) {
 47 |             float row_sum = 0.0;
 48 |             for(size_t j = 0; j < k; j++) {
 49 |                 Z[idx][j] = 0.0;
 50 |                 for(size_t inner_idx = 0; inner_idx < n; inner_idx++) {
 51 |                     Z[idx][j] += X[(sample_idx + idx) * n + inner_idx] * theta[inner_idx * k + j];
 52 |                 }
 53 |                 Z[idx][j] = std::exp(Z[idx][j]);
 54 |                 row_sum += Z[idx][j];
 55 |             }
 56 |             for(size_t j = 0; j < k; j++) {
 57 |                 Z[idx][j] /= row_sum;
 58 |             }
 59 |         }
 60 | 
 61 |         // Let Z = Z - Iy
 62 |         for(size_t idx = 0; idx < batch; idx++) {
 63 |             Z[idx][y[sample_idx + idx]] -= 1.0;
 64 |         }
 65 | 
 66 |         // Compute gradient
 67 |         // The outer two loops iterate over theta, whose size is n * k
 68 |         for(size_t idx = 0; idx < n; idx++) {
 69 |             for(size_t j = 0; j < k; j++) {
 70 |                 float diff = 0.0;
 71 |                 for(size_t inner_idx = 0; inner_idx < batch; inner_idx++) {
 72 |                     // theta_diff = X^T * Z
 73 |                     // theta_diff(idx, j) = sum_{inner_idx=1}^{batch} X^T(idx, inner_idx) * Z(inner_idx, j) 
 74 |                     //              = sum_{inner_idx=1}^{batch} X(inner_idx, idx) * Z(inner_idx, j)
 75 |                     diff += X[(sample_idx + inner_idx) * n + idx] * Z[inner_idx][j];
 76 |                 }
 77 |                 theta[idx * k + j] -= lr * diff / batch;
 78 |             }
 79 |         }
 80 |         sample_idx += batch;
 81 |     }
 82 | }
 83 | 
 84 | 
 85 | /**
 86 |  * This is the pybind11 code that wraps the function above.  It's only role is
 87 |  * wrap the function above in a Python module, and you do not need to make any
 88 |  * edits to the code
 89 |  */
 90 | PYBIND11_MODULE(simple_ml_ext, m) {
 91 |     m.def("softmax_regression_epoch_cpp",
 92 |     	[](py::array_t<float, py::array::c_style> X,
 93 |            py::array_t<unsigned char, py::array::c_style> y,
 94 |            py::array_t<float, py::array::c_style> theta,
 95 |            float lr,
 96 |            int batch) {
 97 |         softmax_regression_epoch_cpp(
 98 |         	static_cast<const float*>(X.request().ptr),
 99 |             static_cast<const unsigned char*>(y.request().ptr),
100 |             static_cast<float*>(theta.request().ptr),
101 |             X.request().shape[0],
102 |             X.request().shape[1],
103 |             theta.request().shape[1],
104 |             lr,
105 |             batch
106 |            );
107 |     },
108 |     py::arg("X"), py::arg("y"), py::arg("theta"),
109 |     py::arg("lr"), py::arg("batch"));
110 | }
111 | 


--------------------------------------------------------------------------------
/hw0/src/simple_ml_ext.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/src/simple_ml_ext.so


--------------------------------------------------------------------------------
/hw0/tests/__pycache__/test_simple_ml.cpython-310-pytest-7.1.2.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/tests/__pycache__/test_simple_ml.cpython-310-pytest-7.1.2.pyc


--------------------------------------------------------------------------------
/hw1/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | .ipynb_checkpoints/
  3 | env/
  4 | tests/__pycache__
  5 | .idea/
  6 | 
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | *~
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | pip-wheel-metadata/
 32 | share/python-wheels/
 33 | *.egg-info/
 34 | .installed.cfg
 35 | *.egg
 36 | MANIFEST
 37 | 
 38 | # PyInstaller
 39 | #  Usually these files are written by a python script from a template
 40 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 41 | *.manifest
 42 | *.spec
 43 | 
 44 | # Installer logs
 45 | pip-log.txt
 46 | pip-delete-this-directory.txt
 47 | 
 48 | # Unit test / coverage reports
 49 | htmlcov/
 50 | .tox/
 51 | .nox/
 52 | .coverage
 53 | .coverage.*
 54 | .cache
 55 | nosetests.xml
 56 | coverage.xml
 57 | *.cover
 58 | *.py,cover
 59 | .hypothesis/
 60 | .pytest_cache/
 61 | 
 62 | # Translations
 63 | *.mo
 64 | *.pot
 65 | 
 66 | # Django stuff:
 67 | *.log
 68 | local_settings.py
 69 | db.sqlite3
 70 | db.sqlite3-journal
 71 | 
 72 | # Flask stuff:
 73 | instance/
 74 | .webassets-cache
 75 | 
 76 | # Scrapy stuff:
 77 | .scrapy
 78 | 
 79 | # Sphinx documentation
 80 | docs/_build/
 81 | 
 82 | # PyBuilder
 83 | target/
 84 | 
 85 | # Jupyter Notebook
 86 | .ipynb_checkpoints
 87 | 
 88 | # IPython
 89 | profile_default/
 90 | ipython_config.py
 91 | 
 92 | # pyenv
 93 | .python-version
 94 | 
 95 | # pipenv
 96 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 97 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 98 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 99 | #   install all needed dependencies.
100 | #Pipfile.lock
101 | 
102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
103 | __pypackages__/
104 | 
105 | # Celery stuff
106 | celerybeat-schedule
107 | celerybeat.pid
108 | 
109 | # SageMath parsed files
110 | *.sage.py
111 | 
112 | # Environments
113 | .env
114 | .venv
115 | env/
116 | venv/
117 | ENV/
118 | env.bak/
119 | venv.bak/
120 | 
121 | # Spyder project settings
122 | .spyderproject
123 | .spyproject
124 | 
125 | # Rope project settings
126 | .ropeproject
127 | 
128 | # mkdocs documentation
129 | /site
130 | 
131 | # mypy
132 | .mypy_cache/
133 | .dmypy.json
134 | dmypy.json
135 | 
136 | # Pyre type checker
137 | .pyre/
138 | 


--------------------------------------------------------------------------------
/hw1/apps/simple_ml.py:
--------------------------------------------------------------------------------
  1 | import struct
  2 | import gzip
  3 | import numpy as np
  4 | 
  5 | import sys
  6 | sys.path.append('python/')
  7 | import needle as ndl
  8 | 
  9 | 
 10 | def parse_mnist(image_filesname, label_filename):
 11 |     """ Read an images and labels file in MNIST format.  See this page:
 12 |     http://yann.lecun.com/exdb/mnist/ for a description of the file format.
 13 | 
 14 |     Args:
 15 |         image_filename (str): name of gzipped images file in MNIST format
 16 |         label_filename (str): name of gzipped labels file in MNIST format
 17 | 
 18 |     Returns:
 19 |         Tuple (X,y):
 20 |             X (numpy.ndarray[np.float32]): 2D numpy array containing the loaded
 21 |                 data.  The dimensionality of the data should be
 22 |                 (num_examples x input_dim) where 'input_dim' is the full
 23 |                 dimension of the data, e.g., since MNIST images are 28x28, it
 24 |                 will be 784.  Values should be of type np.float32, and the data
 25 |                 should be normalized to have a minimum value of 0.0 and a
 26 |                 maximum value of 1.0.
 27 | 
 28 |             y (numpy.ndarray[dypte=np.int8]): 1D numpy array containing the
 29 |                 labels of the examples.  Values should be of type np.int8 and
 30 |                 for MNIST will contain the values 0-9.
 31 |     """
 32 |     with gzip.open(image_filesname, 'rb') as f:
 33 |         magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
 34 |         X = np.frombuffer(f.read(), dtype=np.uint8).reshape(num, rows*cols)
 35 |         X = X.astype(np.float32) / 255.0
 36 | 
 37 |     with gzip.open(label_filename, 'rb') as f:
 38 |         magic, num = struct.unpack(">II", f.read(8))
 39 |         y = np.frombuffer(f.read(), dtype=np.uint8)
 40 | 
 41 |     return X, y
 42 | 
 43 | 
 44 | def softmax_loss(Z, y_one_hot):
 45 |     """ Return softmax loss.  Note that for the purposes of this assignment,
 46 |     you don't need to worry about "nicely" scaling the numerical properties
 47 |     of the log-sum-exp computation, but can just compute this directly.
 48 | 
 49 |     Args:
 50 |         Z (ndl.Tensor[np.float32]): 2D Tensor of shape
 51 |             (batch_size, num_classes), containing the logit predictions for
 52 |             each class.
 53 |         y (ndl.Tensor[np.int8]): 2D Tensor of shape (batch_size, num_classes)
 54 |             containing a 1 at the index of the true label of each example and
 55 |             zeros elsewhere.
 56 | 
 57 |     Returns:
 58 |         Average softmax loss over the sample. (ndl.Tensor[np.float32])
 59 |     """
 60 |     batch_size = Z.shape[0]
 61 |     lhs = ndl.log(ndl.exp(Z).sum(axes=(1,)))
 62 |     rhs = (Z * y_one_hot).sum(axes=(1,))
 63 |     loss = (lhs - rhs).sum()
 64 |     return loss / batch_size
 65 | 
 66 | 
 67 | def nn_epoch(X, y, W1, W2, lr = 0.1, batch=100):
 68 |     """ Run a single epoch of SGD for a two-layer neural network defined by the
 69 |     weights W1 and W2 (with no bias terms):
 70 |         logits = ReLU(X * W1) * W1
 71 |     The function should use the step size lr, and the specified batch size (and
 72 |     again, without randomizing the order of X).
 73 | 
 74 |     Args:
 75 |         X (np.ndarray[np.float32]): 2D input array of size
 76 |             (num_examples x input_dim).
 77 |         y (np.ndarray[np.uint8]): 1D class label array of size (num_examples,)
 78 |         W1 (ndl.Tensor[np.float32]): 2D array of first layer weights, of shape
 79 |             (input_dim, hidden_dim)
 80 |         W2 (ndl.Tensor[np.float32]): 2D array of second layer weights, of shape
 81 |             (hidden_dim, num_classes)
 82 |         lr (float): step size (learning rate) for SGD
 83 |         batch (int): size of SGD mini-batch
 84 | 
 85 |     Returns:
 86 |         Tuple: (W1, W2)
 87 |             W1: ndl.Tensor[np.float32]
 88 |             W2: ndl.Tensor[np.float32]
 89 |     """
 90 |     idx = 0
 91 |     num_classes = W2.shape[1]
 92 |     while idx < X.shape[0]:
 93 |         X_batch = ndl.Tensor(X[idx:idx+batch])
 94 |         Z1 = X_batch.matmul(W1)
 95 |         network_output = ndl.relu(Z1).matmul(W2)
 96 | 
 97 |         y_batch = y[idx:idx+batch]
 98 |         y_one_hot = np.zeros((batch, num_classes))
 99 |         y_one_hot[np.arange(batch), y_batch] = 1
100 |         y_one_hot = ndl.Tensor(y_one_hot)
101 | 
102 |         loss = softmax_loss(network_output, y_one_hot)
103 |         loss.backward()
104 | 
105 |         W1 = ndl.Tensor(W1.numpy() - lr * W1.grad.numpy())
106 |         W2 = ndl.Tensor(W2.numpy() - lr * W2.grad.numpy())
107 |         idx += batch
108 |     return W1, W2
109 | 
110 | 
111 | ### CODE BELOW IS FOR ILLUSTRATION, YOU DO NOT NEED TO EDIT
112 | 
113 | def loss_err(h,y):
114 |     """ Helper function to compute both loss and error"""
115 |     y_one_hot = np.zeros((y.shape[0], h.shape[-1]))
116 |     y_one_hot[np.arange(y.size), y] = 1
117 |     y_ = ndl.Tensor(y_one_hot)
118 |     return softmax_loss(h,y_).numpy(), np.mean(h.numpy().argmax(axis=1) != y)
119 | 


--------------------------------------------------------------------------------
/hw1/data/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw1/data/t10k-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/hw1/data/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw1/data/t10k-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/hw1/data/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw1/data/train-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/hw1/data/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw1/data/train-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/hw1/hw1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw1/hw1.pdf


--------------------------------------------------------------------------------
/hw1/python/needle/__init__.py:
--------------------------------------------------------------------------------
1 | from .autograd import Tensor, cpu, all_devices
2 | from . import ops
3 | from .ops import *
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/hw1/tempCodeRunnerFile.ipynb:
--------------------------------------------------------------------------------
1 | !pip3 install --upgrade --no-deps git+https://github.com/dlsys10714/mugrade.git
2 | !pip3 install numdifftools


--------------------------------------------------------------------------------
/hw1/test.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('./tests')
3 | from test_autograd_hw import *
4 | # gradient_check(ndl.summation, ndl.Tensor(np.random.randn(5,4)), axes=(1,))
5 | # test_nn_epoch_ndl()
6 | test_matmul_simple_backward()
7 | test_matmul_batched_backward()


--------------------------------------------------------------------------------
/hw2/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 | 


--------------------------------------------------------------------------------
/hw2/.idea/hw2.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="PyDocumentationSettings">
 9 |     <option name="format" value="PLAIN" />
10 |     <option name="myDocStringFormat" value="Plain" />
11 |   </component>
12 | </module>


--------------------------------------------------------------------------------
/hw2/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/hw2/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/hw2/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/hw2.iml" filepath="$PROJECT_DIR$/.idea/hw2.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/hw2/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/hw2/README.md:
--------------------------------------------------------------------------------
1 | # Homework 2
2 | 
3 | Public repository and stub/testing code for Homework 2 of 10-714.
4 | 
5 | 


--------------------------------------------------------------------------------
/hw2/apps/__pycache__/mlp_resnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/apps/__pycache__/mlp_resnet.cpython-310.pyc


--------------------------------------------------------------------------------
/hw2/apps/__pycache__/mlp_resnet.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/apps/__pycache__/mlp_resnet.cpython-39.pyc


--------------------------------------------------------------------------------
/hw2/apps/mlp_resnet.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('../python')
 3 | import needle as ndl
 4 | import needle.nn as nn
 5 | import numpy as np
 6 | import time
 7 | import os
 8 | 
 9 | np.random.seed(0)
10 | 
11 | def ResidualBlock(dim, hidden_dim, norm=nn.BatchNorm1d, drop_prob=0.1):
12 |     main = nn.Sequential(
13 |         nn.Linear(dim, hidden_dim),
14 |         norm(hidden_dim),
15 |         nn.ReLU(),
16 |         nn.Dropout(drop_prob),
17 |         nn.Linear(hidden_dim, dim),
18 |         norm(dim)
19 |     )
20 | 
21 |     return nn.Sequential(nn.Residual(main), nn.ReLU())
22 | 
23 | 
24 | def MLPResNet(dim, hidden_dim=100, num_blocks=3, num_classes=10, norm=nn.BatchNorm1d, drop_prob=0.1):
25 |     layers = []
26 |     layers.append(nn.Flatten())
27 |     layers.append(nn.Linear(dim, hidden_dim))
28 |     layers.append(nn.ReLU())
29 |     for _ in range(num_blocks):
30 |         layers.append(ResidualBlock(hidden_dim, hidden_dim // 2, norm, drop_prob))
31 |     layers.append(nn.Linear(hidden_dim, num_classes))
32 |     return nn.Sequential(*layers)
33 | 
34 | 
35 | 
36 | 
37 | def epoch(dataloader, model, opt=None):
38 |     np.random.seed(4)
39 |     if opt is not None:
40 |         model.train()
41 |     else:
42 |         model.eval()
43 | 
44 |     loss_func = nn.SoftmaxLoss()
45 | 
46 |     losses = []
47 |     total_acc = 0
48 |     for X, y in dataloader:
49 |         out = model(X)
50 |         loss = loss_func(out, y)
51 |         if opt is not None:
52 |             loss.backward()
53 |             opt.step()
54 | 
55 |         losses.append(loss.numpy())
56 |         total_acc += (out.numpy().argmax(axis=1) == y.numpy()).sum()
57 |     
58 |     return 1 - total_acc / len(dataloader.dataset), np.mean(losses)
59 | 
60 | 
61 | 
62 | 
63 | def train_mnist(batch_size=100, epochs=10, optimizer=ndl.optim.Adam,
64 |                 lr=0.001, weight_decay=0.001, hidden_dim=100, data_dir="data"):
65 |     np.random.seed(4)
66 |     dataset = ndl.data.MNISTDataset(
67 |         os.path.join(data_dir, "train-images-idx3-ubyte.gz"),
68 |         os.path.join(data_dir, "train-labels-idx1-ubyte.gz")
69 |     )
70 |     data_loader = ndl.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
71 |     test_dataset = ndl.data.MNISTDataset(
72 |         os.path.join(data_dir, "t10k-images-idx3-ubyte.gz"),
73 |         os.path.join(data_dir, "t10k-labels-idx1-ubyte.gz")
74 |     )
75 |     test_data_loader = ndl.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
76 | 
77 |     model = MLPResNet(28 * 28, hidden_dim=hidden_dim, num_blocks=3, num_classes=10)
78 |     opt = optimizer(model.parameters(), lr=lr, weight_decay=weight_decay)
79 | 
80 |     train_err, train_loss = 0, 0
81 |     test_err, test_loss = 0, 0
82 |     for i in range(epochs):
83 |         start = time.time()
84 |         train_err, train_loss = epoch(data_loader, model, opt)
85 |         test_err, test_loss = epoch(test_data_loader, model)
86 |         end = time.time()
87 |         print("Epoch %d: Train err: %f, Train loss: %f | Test err: %f, Test loss: %f, Time: %f" % (
88 |             i, train_err, train_loss, test_err, test_loss, end - start
89 |         ))
90 |     return train_err, train_loss, test_err, test_loss
91 |         
92 |     
93 | 
94 | if __name__ == "__main__":
95 |     train_mnist(data_dir="../data")
96 | 


--------------------------------------------------------------------------------
/hw2/data/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/data/t10k-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/hw2/data/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/data/t10k-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/hw2/data/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/data/train-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/hw2/data/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/data/train-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/hw2/debug.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('./tests')
 3 | sys.path.append("./python")
 4 | 
 5 | from test_nn_and_optim import *
 6 | from test_data import *
 7 | 
 8 | # test_nn_layernorm_backward_1()
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     pow(1000, 1/256)
13 |     test_mlp_train_mnist_1()
14 | 
15 | 


--------------------------------------------------------------------------------
/hw2/figures/mlp_resnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/figures/mlp_resnet.png


--------------------------------------------------------------------------------
/hw2/figures/residualblock.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/figures/residualblock.png


--------------------------------------------------------------------------------
/hw2/hw2.ipynb - Colaboratory.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/hw2.ipynb - Colaboratory.pdf


--------------------------------------------------------------------------------
/hw2/python/needle/__init__.py:
--------------------------------------------------------------------------------
1 | from .autograd import Tensor, cpu, all_devices
2 | from . import ops
3 | from .ops import *
4 | from . import init
5 | from . import data
6 | from . import nn
7 | from . import optim
8 | 


--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/autograd.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/autograd.cpython-310.pyc


--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/autograd.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/autograd.cpython-39.pyc


--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/data.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/data.cpython-310.pyc


--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/data.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/data.cpython-39.pyc


--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/init.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/init.cpython-310.pyc


--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/init.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/init.cpython-39.pyc


--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/nn.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/nn.cpython-310.pyc


--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/nn.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/nn.cpython-39.pyc


--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/ops.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/ops.cpython-310.pyc


--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/ops.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/ops.cpython-39.pyc


--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/optim.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/optim.cpython-310.pyc


--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/optim.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/optim.cpython-39.pyc


--------------------------------------------------------------------------------
/hw2/python/needle/data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import gzip
  3 | import struct
  4 | from .autograd import Tensor
  5 | 
  6 | from typing import Iterator, Optional, List, Sized, Union, Iterable, Any
  7 | 
  8 | 
  9 | class Transform:
 10 |     def __call__(self, x):
 11 |         raise NotImplementedError
 12 | 
 13 | 
 14 | class RandomFlipHorizontal(Transform):
 15 |     def __init__(self, p = 0.5):
 16 |         self.p = p
 17 | 
 18 |     def __call__(self, img):
 19 |         """
 20 |         Horizonally flip an image, specified as n H x W x C NDArray.
 21 |         Args:
 22 |             img: H x W x C NDArray of an image
 23 |         Returns:
 24 |             H x W x C ndarray corresponding to image flipped with probability self.p
 25 |         Note: use the provided code to provide randomness, for easier testing
 26 |         """
 27 |         flip_img = np.random.rand() < self.p
 28 |         if flip_img:
 29 |             img = img[:, ::-1, :]
 30 |         return img
 31 | 
 32 | 
 33 | class RandomCrop(Transform):
 34 |     def __init__(self, padding=3):
 35 |         self.padding = padding
 36 | 
 37 |     def __call__(self, img):
 38 |         """ Zero pad and then randomly crop an image.
 39 |         Args:
 40 |              img: H x W x C NDArray of an image
 41 |         Return 
 42 |             H x W x C NAArray of cliped image
 43 |         Note: generate the image shifted by shift_x, shift_y specified below
 44 |         """
 45 |         shift_x, shift_y = np.random.randint(low=-self.padding, high=self.padding+1, size=2)
 46 |         img_pad = np.pad(img, ((self.padding, self.padding), (self.padding, self.padding), (0, 0)), 'constant', constant_values=0)
 47 |         img_crop = img_pad[self.padding + shift_x : self.padding + shift_x + img.shape[0], self.padding + shift_y : self.padding + shift_y + img.shape[1], :]
 48 |         return img_crop
 49 | 
 50 | 
 51 | class Dataset:
 52 |     r"""An abstract class representing a `Dataset`.
 53 | 
 54 |     All subclasses should overwrite :meth:`__getitem__`, supporting fetching a
 55 |     data sample for a given key. Subclasses must also overwrite
 56 |     :meth:`__len__`, which is expected to return the size of the dataset.
 57 |     """
 58 | 
 59 |     def __init__(self, transforms: Optional[List] = None):
 60 |         self.transforms = transforms
 61 | 
 62 |     def __getitem__(self, index) -> object:
 63 |         raise NotImplementedError
 64 | 
 65 |     def __len__(self) -> int:
 66 |         raise NotImplementedError
 67 | 
 68 |     def apply_transforms(self, x):
 69 |         if self.transforms is not None:
 70 |             # apply the transforms
 71 |             for tform in self.transforms:
 72 |                 x = tform(x)
 73 |         return x
 74 | 
 75 | 
 76 | class DataLoader:
 77 |     r"""
 78 |     Data loader. Combines a dataset and a sampler, and provides an iterable over
 79 |     the given dataset.
 80 |     Args:
 81 |         dataset (Dataset): dataset from which to load the data.
 82 |         batch_size (int, optional): how many samples per batch to load
 83 |             (default: ``1``).
 84 |         shuffle (bool, optional): set to ``True`` to have the data reshuffled
 85 |             at every epoch (default: ``False``).
 86 |      """
 87 |     dataset: Dataset
 88 |     batch_size: Optional[int]
 89 | 
 90 |     def __init__(
 91 |         self,
 92 |         dataset: Dataset,
 93 |         batch_size: Optional[int] = 1,
 94 |         shuffle: bool = False,
 95 |     ):
 96 | 
 97 |         self.dataset = dataset
 98 |         self.shuffle = shuffle
 99 |         self.batch_size = batch_size
100 |         if not self.shuffle:
101 |             self.ordering = np.array_split(np.arange(len(dataset)), 
102 |                                            range(batch_size, len(dataset), batch_size))
103 |         self.batch_idx = 0
104 | 
105 |     def __iter__(self):
106 |         if self.shuffle:
107 |             self.ordering = np.array_split(np.random.permutation(len(self.dataset)),
108 |                                            range(self.batch_size, len(self.dataset), self.batch_size))
109 |         else:
110 |             self.ordering = np.array_split(np.arange(len(self.dataset)), 
111 |                                            range(self.batch_size, len(self.dataset), self.batch_size))
112 |         self.batch_idx = 0
113 |         return self
114 | 
115 |     def __next__(self):
116 |         if self.batch_idx >= len(self.ordering):
117 |             raise StopIteration
118 |         batch_indices = self.ordering[self.batch_idx]
119 |         X_batch, y_batch = self.dataset[batch_indices]
120 |         self.batch_idx += 1
121 |         return Tensor(X_batch), Tensor(y_batch)
122 | 
123 | 
124 | class MNISTDataset(Dataset):
125 |     def __init__(
126 |         self,
127 |         image_filename: str,
128 |         label_filename: str,
129 |         transforms: Optional[List] = None,
130 |     ):
131 |         # load the data - copied from hw1/apps/simple_ml.py::parse_mnist()
132 |         with gzip.open(image_filename, 'rb') as f:
133 |             magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
134 |             X = np.frombuffer(f.read(), dtype=np.uint8).reshape(num, rows, cols, 1)
135 |             X = X.astype(np.float32) / 255.0
136 | 
137 |         with gzip.open(label_filename, 'rb') as f:
138 |             magic, num = struct.unpack(">II", f.read(8))
139 |             y = np.frombuffer(f.read(), dtype=np.uint8)
140 | 
141 |         self.images = X
142 |         self.labels = y
143 |         self.transforms = [] if transforms is None else transforms
144 | 
145 |     def __getitem__(self, index) -> object:
146 |         image = self.images[index]
147 |         label = self.labels[index]
148 |         for func in self.transforms:
149 |             image = func(image)
150 |         return image, label
151 | 
152 |     def __len__(self) -> int:
153 |         return len(self.labels)
154 | 
155 | class NDArrayDataset(Dataset):
156 |     def __init__(self, *arrays):
157 |         self.arrays = arrays
158 | 
159 |     def __len__(self) -> int:
160 |         return self.arrays[0].shape[0]
161 | 
162 |     def __getitem__(self, i) -> object:
163 |         return tuple([a[i] for a in self.arrays])


--------------------------------------------------------------------------------
/hw2/python/needle/init.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import needle as ndl
 3 | 
 4 | 
 5 | 
 6 | def rand(*shape, low=0.0, high=1.0, device=None, dtype="float32", requires_grad=False):
 7 |     """ Generate random numbers uniform between low and high """
 8 |     device = ndl.cpu() if device is None else device
 9 |     array = device.rand(*shape) * (high - low) + low
10 |     return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
11 |     
12 | 
13 | def randn(*shape, mean=0.0, std=1.0, device=None, dtype="float32", requires_grad=False):
14 |     """ Generate random normal with specified mean and std deviation """
15 |     device = ndl.cpu() if device is None else device
16 |     array = device.randn(*shape) * std + mean
17 |     return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
18 | 
19 | 
20 | def constant(*shape, c=1.0, device=None, dtype="float32", requires_grad=False):
21 |     """ Generate constant Tensor """
22 |     device = ndl.cpu() if device is None else device
23 |     array = device.ones(*shape, dtype=dtype) * c # note: can change dtype
24 |     return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
25 | 
26 | 
27 | def ones(*shape, device=None, dtype="float32", requires_grad=False):
28 |     """ Generate all-ones Tensor """
29 |     return constant(*shape, c=1.0, device=device, dtype=dtype, requires_grad=requires_grad)
30 | 
31 | 
32 | def zeros(*shape, device=None, dtype="float32", requires_grad=False):
33 |     """ Generate all-zeros Tensor """
34 |     return constant(*shape, c=0.0, device=device, dtype=dtype, requires_grad=requires_grad)
35 | 
36 | 
37 | def randb(*shape, p=0.5, device=None, dtype="bool", requires_grad=False):
38 |     """ Generate binary random Tensor """
39 |     device = ndl.cpu() if device is None else device
40 |     array = device.rand(*shape) <= p
41 |     return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
42 | 
43 | 
44 | def one_hot(n, i, device=None, dtype="float32", requires_grad=False):
45 |     """ Generate one-hot encoding Tensor """
46 |     device = ndl.cpu() if device is None else device
47 |     return ndl.Tensor(device.one_hot(n,i.numpy(), dtype=dtype), device=device, requires_grad=requires_grad)
48 | 
49 | 
50 | def xavier_uniform(fan_in, fan_out, gain=1.0, **kwargs):
51 |     a = gain * math.sqrt(6.0 / (fan_in + fan_out))
52 |     return rand(fan_in, fan_out, low=-a, high=a, **kwargs)
53 | 
54 | 
55 | def xavier_normal(fan_in, fan_out, gain=1.0, **kwargs):
56 |     std = gain * math.sqrt(2.0 / (fan_in + fan_out))
57 |     return randn(fan_in, fan_out, mean=0.0, std=std, **kwargs)
58 | 
59 | 
60 | def kaiming_uniform(fan_in, fan_out, nonlinearity="relu", **kwargs):
61 |     assert nonlinearity == "relu", "Only relu supported currently"
62 |     gain = math.sqrt(2.0)
63 |     bound = gain * math.sqrt(3.0 / fan_in)
64 |     return rand(fan_in, fan_out, low=-bound, high=bound, **kwargs)
65 | 
66 | 
67 | def kaiming_normal(fan_in, fan_out, nonlinearity="relu", **kwargs):
68 |     assert nonlinearity == "relu", "Only relu supported currently"
69 |     gain = math.sqrt(2.0) 
70 |     std = gain * math.sqrt(1.0 / fan_in)
71 |     return randn(fan_in, fan_out, mean=0.0, std=std, **kwargs)
72 | 


--------------------------------------------------------------------------------
/hw2/python/needle/optim.py:
--------------------------------------------------------------------------------
 1 | """Optimization module"""
 2 | import needle as ndl
 3 | import numpy as np
 4 | 
 5 | 
 6 | class Optimizer:
 7 |     def __init__(self, params):
 8 |         self.params = params
 9 | 
10 |     def step(self):
11 |         raise NotImplementedError()
12 | 
13 |     def reset_grad(self):
14 |         for p in self.params:
15 |             p.grad = None
16 | 
17 | 
18 | class SGD(Optimizer):
19 |     def __init__(self, params, lr=0.01, momentum=0.0, weight_decay=0.0):
20 |         super().__init__(params)
21 |         self.lr = lr
22 |         self.momentum = momentum
23 |         self.u = {}
24 |         self.weight_decay = weight_decay
25 | 
26 |     def step(self):
27 |         for param in self.params:
28 |             # grad 这里加了一个惩罚项
29 |             grad_with_penalty = param.grad.detach() + self.weight_decay * param.detach()
30 |             u = self.u.get(id(param), 0) * self.momentum + (1 - self.momentum) * grad_with_penalty
31 |             # 将 dtype 从 float64 转换为 float32
32 |             u = ndl.Tensor(u, dtype=param.dtype)
33 |             self.u[id(param)] = u
34 |             param.data -= self.lr * u
35 | 
36 |             
37 | 
38 | class Adam(Optimizer):
39 |     def __init__(
40 |         self,
41 |         params,
42 |         lr=0.01,
43 |         beta1=0.9,
44 |         beta2=0.999,
45 |         eps=1e-8,
46 |         weight_decay=0.0,
47 |     ):
48 |         super().__init__(params)
49 |         self.lr = lr
50 |         self.beta1 = beta1
51 |         self.beta2 = beta2
52 |         self.eps = eps
53 |         self.weight_decay = weight_decay
54 |         self.t = 0
55 | 
56 |         self.m = {}
57 |         self.v = {}
58 | 
59 |     def step(self):
60 |         self.t += 1
61 |         for param in self.params:
62 |             # grad 这里加了一个惩罚项
63 |             grad_with_penalty = param.grad.detach() + self.weight_decay * param.detach()
64 |             # 将 dtype 从 float64 转换为 float32
65 |             grad_with_penalty = ndl.Tensor(grad_with_penalty, dtype=param.dtype)
66 | 
67 |             m = self.beta1 * self.m.get(id(param), 0) + (1 - self.beta1) * grad_with_penalty
68 |             v = self.beta2 * self.v.get(id(param), 0) + (1 - self.beta2) * grad_with_penalty ** 2
69 |             self.m[id(param)] = m.detach()
70 |             self.v[id(param)] = v.detach()
71 |             m_hat = m / (1 - self.beta1 ** self.t)
72 |             v_hat = v / (1 - self.beta2 ** self.t)
73 |             param.data -= self.lr * m_hat / (v_hat ** 0.5 + self.eps)


--------------------------------------------------------------------------------
/hw2/tests/__pycache__/test_data.cpython-310-pytest-7.1.2.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/tests/__pycache__/test_data.cpython-310-pytest-7.1.2.pyc


--------------------------------------------------------------------------------
/hw2/tests/__pycache__/test_data.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/tests/__pycache__/test_data.cpython-39.pyc


--------------------------------------------------------------------------------
/hw2/tests/__pycache__/test_nn_and_optim.cpython-310-pytest-7.1.2.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/tests/__pycache__/test_nn_and_optim.cpython-310-pytest-7.1.2.pyc


--------------------------------------------------------------------------------
/hw2/tests/__pycache__/test_nn_and_optim.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/tests/__pycache__/test_nn_and_optim.cpython-39.pyc


--------------------------------------------------------------------------------
/hw3/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 | 


--------------------------------------------------------------------------------
/hw3/.idea/hw3.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="Python 3.10 (base)" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="PyDocumentationSettings">
 9 |     <option name="format" value="PLAIN" />
10 |     <option name="myDocStringFormat" value="Plain" />
11 |   </component>
12 |   <component name="TestRunnerService">
13 |     <option name="PROJECT_TEST_RUNNER" value="pytest" />
14 |   </component>
15 | </module>


--------------------------------------------------------------------------------
/hw3/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/hw3/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (base)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/hw3/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/hw3.iml" filepath="$PROJECT_DIR$/.idea/hw3.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/hw3/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/hw3/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "files.associations": {
3 |         "vector": "cpp"
4 |     }
5 | }


--------------------------------------------------------------------------------
/hw3/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.5)
 2 | project(needle C CXX)
 3 | cmake_policy(SET CMP0146 OLD)
 4 | 
 5 | # find correct version of Python
 6 | execute_process(COMMAND python3-config --prefix
 7 |   OUTPUT_VARIABLE Python_ROOT_DIR)
 8 | find_package(Python COMPONENTS Development Interpreter REQUIRED)
 9 | include_directories(${Python_INCLUDE_DIRS})
10 | 
11 | # find pybind
12 | execute_process(COMMAND python3 -m pybind11 --cmakedir
13 |   RESULT_VARIABLE __pybind_exit_code
14 |   OUTPUT_VARIABLE __pybind_path
15 |   OUTPUT_STRIP_TRAILING_WHITESPACE)
16 | find_package(pybind11 PATHS ${__pybind_path})
17 | 
18 | 
19 | if(NOT MSVC)
20 |   set(CMAKE_CXX_FLAGS "-std=c++11 -O2 -march=native ${CMAKE_CXX_FLAGS}")
21 |   set(CMAKE_CUDA_STANDARD 14)
22 | else()
23 |   set(CMAKE_CXX_FLAGS "/std:c++11 -O2 -march=native ${CMAKE_CXX_FLAGS}")
24 |   set(CMAKE_CUDA_STANDARD 14)
25 | endif()
26 | 
27 | include_directories(SYSTEM ${pybind11_INCLUDE_DIRS})
28 | list(APPEND LINKER_LIBS ${pybind11_LIBRARIES})
29 | 
30 | 
31 | ###################
32 | ### CPU BACKEND ###
33 | ###################
34 | add_library(ndarray_backend_cpu MODULE src/ndarray_backend_cpu.cc)
35 | target_link_libraries(ndarray_backend_cpu PUBLIC ${LINKER_LIBS})
36 | pybind11_extension(ndarray_backend_cpu)
37 | pybind11_strip(ndarray_backend_cpu)
38 | 
39 | 
40 | # directly output to ffi folder
41 | set_target_properties(ndarray_backend_cpu
42 |   PROPERTIES
43 |   LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/needle/backend_ndarray
44 |   CXX_VISIBILITY_PRESET "hidden"
45 | )
46 | 
47 | if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
48 |   set_property(TARGET ndarray_backend_cpu PROPERTY LINK_OPTIONS -undefined dynamic_lookup)
49 | endif()
50 | 
51 | 
52 | 
53 | ####################
54 | ### CUDA BACKEND ###
55 | ####################
56 | find_package(CUDA)
57 | if(CUDA_FOUND)
58 |   message(STATUS "Found cuda, building cuda backend")
59 | 
60 |   include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
61 |   list(APPEND LINKER_LIBS ${CUDA_CUDART_LIBRARY})
62 | 
63 |   # invoke nvidia smi to detect if we really have a GPU
64 |   execute_process(COMMAND "nvidia-smi" ERROR_QUIET  RESULT_VARIABLE NV_RET)
65 |   if(NV_RET EQUAL "0")
66 |     CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS Auto)
67 |   else()
68 |     # set to 3.7 the flag of K80
69 |     CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.7)
70 |   endif()
71 | 
72 |   # set arch flags properly
73 |   CUDA_ADD_LIBRARY(ndarray_backend_cuda MODULE src/ndarray_backend_cuda.cu OPTIONS ${ARCH_FLAGS})
74 | 
75 |   target_link_libraries(ndarray_backend_cuda ${LINKER_LIBS})
76 |   pybind11_extension(ndarray_backend_cuda)
77 |   pybind11_strip(ndarray_backend_cuda)
78 | 
79 |   # directly output to ffi folder
80 |   set_target_properties(ndarray_backend_cuda
81 |     PROPERTIES
82 |     LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/needle/backend_ndarray
83 |     CXX_VISIBILITY_PRESET "hidden"
84 |     CUDA_VISIBILITY_PRESET "hidden"
85 | )
86 | 
87 | endif()
88 | 
89 | 


--------------------------------------------------------------------------------
/hw3/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: lib, pybind, clean, format, all
 2 | 
 3 | all: lib
 4 | 
 5 | 
 6 | lib:
 7 | 	@mkdir -p build
 8 | 	@cd build; cmake ..
 9 | 	@cd build; $(MAKE)
10 | 
11 | format:
12 | 	python3 -m black .
13 | 	clang-format -i src/*.cc src/*.cu
14 | 
15 | clean:
16 | 	rm -rf build python/needle/backend_ndarray/ndarray_backend*.so
17 | 


--------------------------------------------------------------------------------
/hw3/README.md:
--------------------------------------------------------------------------------
1 | # Homework 3
2 | 
3 | Public repository and stub/testing code for Homework 3 of 10-714.
4 | 
5 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/3.27.9/CMakeCCompiler.cmake:
--------------------------------------------------------------------------------
 1 | set(CMAKE_C_COMPILER "/usr/bin/cc")
 2 | set(CMAKE_C_COMPILER_ARG1 "")
 3 | set(CMAKE_C_COMPILER_ID "GNU")
 4 | set(CMAKE_C_COMPILER_VERSION "11.4.0")
 5 | set(CMAKE_C_COMPILER_VERSION_INTERNAL "")
 6 | set(CMAKE_C_COMPILER_WRAPPER "")
 7 | set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "17")
 8 | set(CMAKE_C_EXTENSIONS_COMPUTED_DEFAULT "ON")
 9 | set(CMAKE_C_COMPILE_FEATURES "c_std_90;c_function_prototypes;c_std_99;c_restrict;c_variadic_macros;c_std_11;c_static_assert;c_std_17;c_std_23")
10 | set(CMAKE_C90_COMPILE_FEATURES "c_std_90;c_function_prototypes")
11 | set(CMAKE_C99_COMPILE_FEATURES "c_std_99;c_restrict;c_variadic_macros")
12 | set(CMAKE_C11_COMPILE_FEATURES "c_std_11;c_static_assert")
13 | set(CMAKE_C17_COMPILE_FEATURES "c_std_17")
14 | set(CMAKE_C23_COMPILE_FEATURES "c_std_23")
15 | 
16 | set(CMAKE_C_PLATFORM_ID "Linux")
17 | set(CMAKE_C_SIMULATE_ID "")
18 | set(CMAKE_C_COMPILER_FRONTEND_VARIANT "GNU")
19 | set(CMAKE_C_SIMULATE_VERSION "")
20 | 
21 | 
22 | 
23 | 
24 | set(CMAKE_AR "/usr/bin/ar")
25 | set(CMAKE_C_COMPILER_AR "/usr/bin/gcc-ar-11")
26 | set(CMAKE_RANLIB "/usr/bin/ranlib")
27 | set(CMAKE_C_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11")
28 | set(CMAKE_LINKER "/usr/bin/ld")
29 | set(CMAKE_MT "")
30 | set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND")
31 | set(CMAKE_COMPILER_IS_GNUCC 1)
32 | set(CMAKE_C_COMPILER_LOADED 1)
33 | set(CMAKE_C_COMPILER_WORKS TRUE)
34 | set(CMAKE_C_ABI_COMPILED TRUE)
35 | 
36 | set(CMAKE_C_COMPILER_ENV_VAR "CC")
37 | 
38 | set(CMAKE_C_COMPILER_ID_RUN 1)
39 | set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m)
40 | set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC)
41 | set(CMAKE_C_LINKER_PREFERENCE 10)
42 | set(CMAKE_C_LINKER_DEPFILE_SUPPORTED TRUE)
43 | 
44 | # Save compiler ABI information.
45 | set(CMAKE_C_SIZEOF_DATA_PTR "8")
46 | set(CMAKE_C_COMPILER_ABI "ELF")
47 | set(CMAKE_C_BYTE_ORDER "LITTLE_ENDIAN")
48 | set(CMAKE_C_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
49 | 
50 | if(CMAKE_C_SIZEOF_DATA_PTR)
51 |   set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}")
52 | endif()
53 | 
54 | if(CMAKE_C_COMPILER_ABI)
55 |   set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}")
56 | endif()
57 | 
58 | if(CMAKE_C_LIBRARY_ARCHITECTURE)
59 |   set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
60 | endif()
61 | 
62 | set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "")
63 | if(CMAKE_C_CL_SHOWINCLUDES_PREFIX)
64 |   set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}")
65 | endif()
66 | 
67 | 
68 | 
69 | 
70 | 
71 | set(CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
72 | set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "gcc;gcc_s;c;gcc;gcc_s")
73 | set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs")
74 | set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
75 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/3.27.9/CMakeCXXCompiler.cmake:
--------------------------------------------------------------------------------
 1 | set(CMAKE_CXX_COMPILER "/usr/bin/c++")
 2 | set(CMAKE_CXX_COMPILER_ARG1 "")
 3 | set(CMAKE_CXX_COMPILER_ID "GNU")
 4 | set(CMAKE_CXX_COMPILER_VERSION "11.4.0")
 5 | set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "")
 6 | set(CMAKE_CXX_COMPILER_WRAPPER "")
 7 | set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "17")
 8 | set(CMAKE_CXX_EXTENSIONS_COMPUTED_DEFAULT "ON")
 9 | set(CMAKE_CXX_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters;cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates;cxx_std_17;cxx_std_20;cxx_std_23")
10 | set(CMAKE_CXX98_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters")
11 | set(CMAKE_CXX11_COMPILE_FEATURES "cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates")
12 | set(CMAKE_CXX14_COMPILE_FEATURES "cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates")
13 | set(CMAKE_CXX17_COMPILE_FEATURES "cxx_std_17")
14 | set(CMAKE_CXX20_COMPILE_FEATURES "cxx_std_20")
15 | set(CMAKE_CXX23_COMPILE_FEATURES "cxx_std_23")
16 | 
17 | set(CMAKE_CXX_PLATFORM_ID "Linux")
18 | set(CMAKE_CXX_SIMULATE_ID "")
19 | set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "GNU")
20 | set(CMAKE_CXX_SIMULATE_VERSION "")
21 | 
22 | 
23 | 
24 | 
25 | set(CMAKE_AR "/usr/bin/ar")
26 | set(CMAKE_CXX_COMPILER_AR "/usr/bin/gcc-ar-11")
27 | set(CMAKE_RANLIB "/usr/bin/ranlib")
28 | set(CMAKE_CXX_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11")
29 | set(CMAKE_LINKER "/usr/bin/ld")
30 | set(CMAKE_MT "")
31 | set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND")
32 | set(CMAKE_COMPILER_IS_GNUCXX 1)
33 | set(CMAKE_CXX_COMPILER_LOADED 1)
34 | set(CMAKE_CXX_COMPILER_WORKS TRUE)
35 | set(CMAKE_CXX_ABI_COMPILED TRUE)
36 | 
37 | set(CMAKE_CXX_COMPILER_ENV_VAR "CXX")
38 | 
39 | set(CMAKE_CXX_COMPILER_ID_RUN 1)
40 | set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm;mpp;CPP;ixx;cppm;ccm;cxxm;c++m)
41 | set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC)
42 | 
43 | foreach (lang C OBJC OBJCXX)
44 |   if (CMAKE_${lang}_COMPILER_ID_RUN)
45 |     foreach(extension IN LISTS CMAKE_${lang}_SOURCE_FILE_EXTENSIONS)
46 |       list(REMOVE_ITEM CMAKE_CXX_SOURCE_FILE_EXTENSIONS ${extension})
47 |     endforeach()
48 |   endif()
49 | endforeach()
50 | 
51 | set(CMAKE_CXX_LINKER_PREFERENCE 30)
52 | set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1)
53 | set(CMAKE_CXX_LINKER_DEPFILE_SUPPORTED TRUE)
54 | 
55 | # Save compiler ABI information.
56 | set(CMAKE_CXX_SIZEOF_DATA_PTR "8")
57 | set(CMAKE_CXX_COMPILER_ABI "ELF")
58 | set(CMAKE_CXX_BYTE_ORDER "LITTLE_ENDIAN")
59 | set(CMAKE_CXX_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
60 | 
61 | if(CMAKE_CXX_SIZEOF_DATA_PTR)
62 |   set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}")
63 | endif()
64 | 
65 | if(CMAKE_CXX_COMPILER_ABI)
66 |   set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}")
67 | endif()
68 | 
69 | if(CMAKE_CXX_LIBRARY_ARCHITECTURE)
70 |   set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
71 | endif()
72 | 
73 | set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "")
74 | if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX)
75 |   set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}")
76 | endif()
77 | 
78 | 
79 | 
80 | 
81 | 
82 | set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
83 | set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;gcc_s;gcc;c;gcc_s;gcc")
84 | set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs")
85 | set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
86 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/3.27.9/CMakeSystem.cmake:
--------------------------------------------------------------------------------
 1 | set(CMAKE_HOST_SYSTEM "Linux-5.15.120+")
 2 | set(CMAKE_HOST_SYSTEM_NAME "Linux")
 3 | set(CMAKE_HOST_SYSTEM_VERSION "5.15.120+")
 4 | set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
 5 | 
 6 | 
 7 | 
 8 | set(CMAKE_SYSTEM "Linux-5.15.120+")
 9 | set(CMAKE_SYSTEM_NAME "Linux")
10 | set(CMAKE_SYSTEM_VERSION "5.15.120+")
11 | set(CMAKE_SYSTEM_PROCESSOR "x86_64")
12 | 
13 | set(CMAKE_CROSSCOMPILING "FALSE")
14 | 
15 | set(CMAKE_SYSTEM_LOADED 1)
16 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/3.27.9/CompilerIdC/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/3.27.9/CompilerIdC/a.out


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/CMakeDirectoryInformation.cmake:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
 3 | 
 4 | # Relative path conversion top directories.
 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/content/drive/Othercomputers/My MacBook Pro/hw3")
 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/content/drive/Othercomputers/My MacBook Pro/hw3/build")
 7 | 
 8 | # Force unix paths in dependencies.
 9 | set(CMAKE_FORCE_UNIX_PATHS 1)
10 | 
11 | 
12 | # The C and CXX include file regular expressions for this directory.
13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
17 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/CMakeRuleHashes.txt:
--------------------------------------------------------------------------------
1 | # Hashes of file build rules.
2 | 9720afbab5807e3b7d272586be3395ba CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o
3 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/Makefile.cmake:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
 3 | 
 4 | # The generator used is:
 5 | set(CMAKE_DEPENDS_GENERATOR "Unix Makefiles")
 6 | 
 7 | # The top level Makefile was generated from the following files:
 8 | set(CMAKE_MAKEFILE_DEPENDS
 9 |   "CMakeCache.txt"
10 |   "/content/drive/Othercomputers/My MacBook Pro/hw3/CMakeLists.txt"
11 |   "CMakeFiles/3.27.9/CMakeCCompiler.cmake"
12 |   "CMakeFiles/3.27.9/CMakeCXXCompiler.cmake"
13 |   "CMakeFiles/3.27.9/CMakeSystem.cmake"
14 |   "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen"
15 |   "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.depend"
16 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCInformation.cmake"
17 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCXXInformation.cmake"
18 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCheckCompilerFlagCommonPatterns.cmake"
19 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCommonLanguageInclude.cmake"
20 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeGenericSystem.cmake"
21 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeInitializeConfigs.cmake"
22 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeLanguageInformation.cmake"
23 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeSystemSpecificInformation.cmake"
24 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeSystemSpecificInitialize.cmake"
25 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCSourceCompiles.cmake"
26 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCXXCompilerFlag.cmake"
27 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCXXSourceCompiles.cmake"
28 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckIncludeFile.cmake"
29 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckLibraryExists.cmake"
30 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/CMakeCommonCompilerMacros.cmake"
31 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU-C.cmake"
32 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU-CXX.cmake"
33 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU.cmake"
34 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA.cmake"
35 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA/run_nvcc.cmake"
36 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA/select_compute_arch.cmake"
37 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPackageHandleStandardArgs.cmake"
38 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPackageMessage.cmake"
39 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPython.cmake"
40 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPython/Support.cmake"
41 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindThreads.cmake"
42 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckCompilerFlag.cmake"
43 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckFlagCommonConfig.cmake"
44 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckSourceCompiles.cmake"
45 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU-C.cmake"
46 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU-CXX.cmake"
47 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU.cmake"
48 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-Initialize.cmake"
49 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux.cmake"
50 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/UnixPaths.cmake"
51 |   "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Common.cmake"
52 |   "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Config.cmake"
53 |   "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11ConfigVersion.cmake"
54 |   "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11NewTools.cmake"
55 |   "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Targets.cmake"
56 |   )
57 | 
58 | # The corresponding makefile is:
59 | set(CMAKE_MAKEFILE_OUTPUTS
60 |   "Makefile"
61 |   "CMakeFiles/cmake.check_cache"
62 |   )
63 | 
64 | # Byproducts of CMake generate step:
65 | set(CMAKE_MAKEFILE_PRODUCTS
66 |   "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen"
67 |   "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake"
68 |   "CMakeFiles/CMakeDirectoryInformation.cmake"
69 |   )
70 | 
71 | # Dependency information for all targets:
72 | set(CMAKE_DEPEND_INFO_FILES
73 |   "CMakeFiles/ndarray_backend_cpu.dir/DependInfo.cmake"
74 |   "CMakeFiles/ndarray_backend_cuda.dir/DependInfo.cmake"
75 |   )
76 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/Makefile2:
--------------------------------------------------------------------------------
  1 | # CMAKE generated file: DO NOT EDIT!
  2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
  3 | 
  4 | # Default target executed when no arguments are given to make.
  5 | default_target: all
  6 | .PHONY : default_target
  7 | 
  8 | #=============================================================================
  9 | # Special targets provided by cmake.
 10 | 
 11 | # Disable implicit rules so canonical targets will work.
 12 | .SUFFIXES:
 13 | 
 14 | # Disable VCS-based implicit rules.
 15 | % : %,v
 16 | 
 17 | # Disable VCS-based implicit rules.
 18 | % : RCS/%
 19 | 
 20 | # Disable VCS-based implicit rules.
 21 | % : RCS/%,v
 22 | 
 23 | # Disable VCS-based implicit rules.
 24 | % : SCCS/s.%
 25 | 
 26 | # Disable VCS-based implicit rules.
 27 | % : s.%
 28 | 
 29 | .SUFFIXES: .hpux_make_needs_suffix_list
 30 | 
 31 | # Command-line flag to silence nested $(MAKE).
 32 | $(VERBOSE)MAKESILENT = -s
 33 | 
 34 | #Suppress display of executed commands.
 35 | $(VERBOSE).SILENT:
 36 | 
 37 | # A target that is always out of date.
 38 | cmake_force:
 39 | .PHONY : cmake_force
 40 | 
 41 | #=============================================================================
 42 | # Set environment variables for the build.
 43 | 
 44 | # The shell in which to execute make rules.
 45 | SHELL = /bin/sh
 46 | 
 47 | # The CMake executable.
 48 | CMAKE_COMMAND = /usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake
 49 | 
 50 | # The command to remove a file.
 51 | RM = /usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake -E rm -f
 52 | 
 53 | # Escaping for special characters.
 54 | EQUALS = =
 55 | 
 56 | # The top-level source directory on which CMake was run.
 57 | CMAKE_SOURCE_DIR = "/content/drive/Othercomputers/My MacBook Pro/hw3"
 58 | 
 59 | # The top-level build directory on which CMake was run.
 60 | CMAKE_BINARY_DIR = "/content/drive/Othercomputers/My MacBook Pro/hw3/build"
 61 | 
 62 | #=============================================================================
 63 | # Directory level rules for the build root directory
 64 | 
 65 | # The main recursive "all" target.
 66 | all: CMakeFiles/ndarray_backend_cpu.dir/all
 67 | all: CMakeFiles/ndarray_backend_cuda.dir/all
 68 | .PHONY : all
 69 | 
 70 | # The main recursive "preinstall" target.
 71 | preinstall:
 72 | .PHONY : preinstall
 73 | 
 74 | # The main recursive "clean" target.
 75 | clean: CMakeFiles/ndarray_backend_cpu.dir/clean
 76 | clean: CMakeFiles/ndarray_backend_cuda.dir/clean
 77 | .PHONY : clean
 78 | 
 79 | #=============================================================================
 80 | # Target rules for target CMakeFiles/ndarray_backend_cpu.dir
 81 | 
 82 | # All Build rule for target.
 83 | CMakeFiles/ndarray_backend_cpu.dir/all:
 84 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/depend
 85 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/build
 86 | 	@$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --progress-dir="/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" --progress-num=1,2 "Built target ndarray_backend_cpu"
 87 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/all
 88 | 
 89 | # Build rule for subdir invocation for target.
 90 | CMakeFiles/ndarray_backend_cpu.dir/rule: cmake_check_build_system
 91 | 	$(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" 2
 92 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/ndarray_backend_cpu.dir/all
 93 | 	$(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" 0
 94 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/rule
 95 | 
 96 | # Convenience name for target.
 97 | ndarray_backend_cpu: CMakeFiles/ndarray_backend_cpu.dir/rule
 98 | .PHONY : ndarray_backend_cpu
 99 | 
100 | # clean rule for target.
101 | CMakeFiles/ndarray_backend_cpu.dir/clean:
102 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/clean
103 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/clean
104 | 
105 | #=============================================================================
106 | # Target rules for target CMakeFiles/ndarray_backend_cuda.dir
107 | 
108 | # All Build rule for target.
109 | CMakeFiles/ndarray_backend_cuda.dir/all:
110 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/depend
111 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/build
112 | 	@$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --progress-dir="/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" --progress-num=3,4 "Built target ndarray_backend_cuda"
113 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/all
114 | 
115 | # Build rule for subdir invocation for target.
116 | CMakeFiles/ndarray_backend_cuda.dir/rule: cmake_check_build_system
117 | 	$(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" 2
118 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/ndarray_backend_cuda.dir/all
119 | 	$(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" 0
120 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/rule
121 | 
122 | # Convenience name for target.
123 | ndarray_backend_cuda: CMakeFiles/ndarray_backend_cuda.dir/rule
124 | .PHONY : ndarray_backend_cuda
125 | 
126 | # clean rule for target.
127 | CMakeFiles/ndarray_backend_cuda.dir/clean:
128 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/clean
129 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/clean
130 | 
131 | #=============================================================================
132 | # Special targets to cleanup operation of make.
133 | 
134 | # Special rule to run CMake to check the build system integrity.
135 | # No rule that depends on this can have commands that come from listfiles
136 | # because they might be regenerated.
137 | cmake_check_build_system:
138 | 	$(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
139 | .PHONY : cmake_check_build_system
140 | 
141 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/TargetDirectories.txt:
--------------------------------------------------------------------------------
1 | /content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles/ndarray_backend_cpu.dir
2 | /content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles/ndarray_backend_cuda.dir
3 | /content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles/edit_cache.dir
4 | /content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles/rebuild_cache.dir
5 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/cmake.check_cache:
--------------------------------------------------------------------------------
1 | # This file is generated by cmake for dependency checking of the CMakeCache.txt file
2 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | # Consider dependencies only in project.
 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
 4 | 
 5 | # The set of languages for which implicit dependencies are needed:
 6 | set(CMAKE_DEPENDS_LANGUAGES
 7 |   )
 8 | 
 9 | # The set of dependency files which are needed:
10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES
11 |   "/content/drive/Othercomputers/My MacBook Pro/hw3/src/ndarray_backend_cpu.cc" "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o" "gcc" "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o.d"
12 |   )
13 | 
14 | # Targets to which this target links which contain Fortran sources.
15 | set(CMAKE_Fortran_TARGET_LINKED_INFO_FILES
16 |   )
17 | 
18 | # Fortran module output directory.
19 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
20 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
 1 | file(REMOVE_RECURSE
 2 |   "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so"
 3 |   "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cpu.pdb"
 4 |   "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o"
 5 |   "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o.d"
 6 | )
 7 | 
 8 | # Per-language clean rules from dependency scanning.
 9 | foreach(lang CXX)
10 |   include(CMakeFiles/ndarray_backend_cpu.dir/cmake_clean_${lang}.cmake OPTIONAL)
11 | endforeach()
12 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for ndarray_backend_cpu.
3 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for ndarray_backend_cpu.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/flags.make:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
 3 | 
 4 | # compile CXX with /usr/bin/c++
 5 | CXX_DEFINES = -Dndarray_backend_cpu_EXPORTS
 6 | 
 7 | CXX_INCLUDES = -isystem /usr/include/python3.10 -isystem /usr/local/lib/python3.10/dist-packages/pybind11/include -isystem /usr/local/cuda/include
 8 | 
 9 | CXX_FLAGS = -std=c++11 -O2 -march=native  -fPIC -fvisibility=hidden
10 | 
11 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/link.txt:
--------------------------------------------------------------------------------
1 | /usr/bin/c++ -fPIC -std=c++11 -O2 -march=native  -shared  -o "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so" CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o 
2 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/progress.make:
--------------------------------------------------------------------------------
1 | CMAKE_PROGRESS_1 = 1
2 | CMAKE_PROGRESS_2 = 2
3 | 
4 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | # Consider dependencies only in project.
 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
 4 | 
 5 | # The set of languages for which implicit dependencies are needed:
 6 | set(CMAKE_DEPENDS_LANGUAGES
 7 |   )
 8 | 
 9 | # The set of dependency files which are needed:
10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES
11 |   )
12 | 
13 | # Targets to which this target links which contain Fortran sources.
14 | set(CMAKE_Fortran_TARGET_LINKED_INFO_FILES
15 |   )
16 | 
17 | # Fortran module output directory.
18 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
19 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
 1 | file(REMOVE_RECURSE
 2 |   "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so"
 3 |   "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cuda.pdb"
 4 |   "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o"
 5 | )
 6 | 
 7 | # Per-language clean rules from dependency scanning.
 8 | foreach(lang )
 9 |   include(CMakeFiles/ndarray_backend_cuda.dir/cmake_clean_${lang}.cmake OPTIONAL)
10 | endforeach()
11 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/compiler_depend.make:
--------------------------------------------------------------------------------
1 | # Empty compiler generated dependencies file for ndarray_backend_cuda.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for ndarray_backend_cuda.
3 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for ndarray_backend_cuda.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/flags.make:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
3 | 
4 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/link.txt:
--------------------------------------------------------------------------------
1 | /usr/bin/c++ -fPIC -std=c++11 -O2 -march=native  -shared  -o "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so" CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o  -Wl,-rpath,/usr/local/cuda/lib64 /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/x86_64-linux-gnu/librt.a /usr/local/cuda/lib64/libcudart.so 
2 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/progress.make:
--------------------------------------------------------------------------------
1 | CMAKE_PROGRESS_1 = 3
2 | CMAKE_PROGRESS_2 = 4
3 | 
4 | 


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o


--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/progress.marks:
--------------------------------------------------------------------------------
1 | 4
2 | 


--------------------------------------------------------------------------------
/hw3/build/cmake_install.cmake:
--------------------------------------------------------------------------------
 1 | # Install script for directory: /content/drive/Othercomputers/My MacBook Pro/hw3
 2 | 
 3 | # Set the install prefix
 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX)
 5 |   set(CMAKE_INSTALL_PREFIX "/usr/local")
 6 | endif()
 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
 8 | 
 9 | # Set the install configuration name.
10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
11 |   if(BUILD_TYPE)
12 |     string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
13 |            CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
14 |   else()
15 |     set(CMAKE_INSTALL_CONFIG_NAME "")
16 |   endif()
17 |   message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
18 | endif()
19 | 
20 | # Set the component getting installed.
21 | if(NOT CMAKE_INSTALL_COMPONENT)
22 |   if(COMPONENT)
23 |     message(STATUS "Install component: \"${COMPONENT}\"")
24 |     set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
25 |   else()
26 |     set(CMAKE_INSTALL_COMPONENT)
27 |   endif()
28 | endif()
29 | 
30 | # Install shared libraries without execute permission?
31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
32 |   set(CMAKE_INSTALL_SO_NO_EXE "1")
33 | endif()
34 | 
35 | # Is this installation the result of a crosscompile?
36 | if(NOT DEFINED CMAKE_CROSSCOMPILING)
37 |   set(CMAKE_CROSSCOMPILING "FALSE")
38 | endif()
39 | 
40 | # Set default install directory permissions.
41 | if(NOT DEFINED CMAKE_OBJDUMP)
42 |   set(CMAKE_OBJDUMP "/usr/bin/objdump")
43 | endif()
44 | 
45 | if(CMAKE_INSTALL_COMPONENT)
46 |   set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt")
47 | else()
48 |   set(CMAKE_INSTALL_MANIFEST "install_manifest.txt")
49 | endif()
50 | 
51 | string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT
52 |        "${CMAKE_INSTALL_MANIFEST_FILES}")
53 | file(WRITE "/content/drive/Othercomputers/My MacBook Pro/hw3/build/${CMAKE_INSTALL_MANIFEST}"
54 |      "${CMAKE_INSTALL_MANIFEST_CONTENT}")
55 | 


--------------------------------------------------------------------------------
/hw3/build/detect_cuda_compute_capabilities.cpp:
--------------------------------------------------------------------------------
 1 | #include <cuda_runtime.h>
 2 | #include <cstdio>
 3 | int main()
 4 | {
 5 |   int count = 0;
 6 |   if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;
 7 |   if (count == 0) return -1;
 8 |   for (int device = 0; device < count; ++device)
 9 |   {
10 |     cudaDeviceProp prop;
11 |     if (cudaSuccess == cudaGetDeviceProperties(&prop, device))
12 |       std::printf("%d.%d ", prop.major, prop.minor);
13 |   }
14 |   return 0;
15 | }
16 | 


--------------------------------------------------------------------------------
/hw3/debug.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append("./tests/hw3")
 3 | sys.path.append("./python")
 4 | 
 5 | from test_ndarray import *
 6 | from needle import backend_ndarray as nd
 7 | 
 8 | 
 9 | if __name__ == "__main__":
10 |     test_getitem(device=nd.cpu(), params={"shape": (8, 8, 2, 2, 2, 2), "fn": lambda X: X[1:3, 5:8, 1:2, 0:1, 0:1, 1:2]})


--------------------------------------------------------------------------------
/hw3/hw3.ipynb - Colaboratory.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/hw3.ipynb - Colaboratory.pdf


--------------------------------------------------------------------------------
/hw3/python/needle/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import ops
 2 | from .ops import *
 3 | from .autograd import Tensor, cpu, all_devices
 4 | 
 5 | from . import init
 6 | from .init import ones, zeros, zeros_like, ones_like
 7 | 
 8 | from . import data
 9 | from . import nn
10 | from . import optim
11 | from .backend_selection import *
12 | 


--------------------------------------------------------------------------------
/hw3/python/needle/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/__pycache__/autograd.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/__pycache__/autograd.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/__pycache__/backend_numpy.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/__pycache__/backend_numpy.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/__pycache__/backend_selection.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/__pycache__/backend_selection.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/__pycache__/optim.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/__pycache__/optim.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/backend_ndarray/__init__.py:
--------------------------------------------------------------------------------
1 | from .ndarray import *
2 | 


--------------------------------------------------------------------------------
/hw3/python/needle/backend_ndarray/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/backend_ndarray/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/backend_ndarray/__pycache__/ndarray.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/backend_ndarray/__pycache__/ndarray.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/backend_ndarray/__pycache__/ndarray_backend_numpy.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/backend_ndarray/__pycache__/ndarray_backend_numpy.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/hw3/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/hw3/python/needle/backend_ndarray/ndarray_backend_numpy.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | __device_name__ = "numpy"
  5 | _datatype = np.float32
  6 | _datetype_size = np.dtype(_datatype).itemsize
  7 | 
  8 | 
  9 | class Array:
 10 |     def __init__(self, size):
 11 |         self.array = np.empty(size, dtype=np.float32)
 12 | 
 13 |     @property
 14 |     def size(self):
 15 |         return self.array.size
 16 | 
 17 | 
 18 | def to_numpy(a, shape, strides, offset):
 19 |     return np.lib.stride_tricks.as_strided(
 20 |         a.array[offset:], shape, tuple([s * _datetype_size for s in strides])
 21 |     )
 22 | 
 23 | 
 24 | def from_numpy(a, out):
 25 |     out.array[:] = a.flatten()
 26 | 
 27 | 
 28 | def fill(out, val):
 29 |     out.array.fill(val)
 30 | 
 31 | 
 32 | def compact(a, out, shape, strides, offset):
 33 |     out.array[:] = to_numpy(a, shape, strides, offset).flatten()
 34 | 
 35 | 
 36 | def ewise_setitem(a, out, shape, strides, offset):
 37 |     to_numpy(out, shape, strides, offset)[:] = a.array.reshape(shape)
 38 | 
 39 | 
 40 | def scalar_setitem(size, val, out, shape, strides, offset):
 41 |     to_numpy(out, shape, strides, offset)[:] = val
 42 | 
 43 | 
 44 | def ewise_add(a, b, out):
 45 |     out.array[:] = a.array + b.array
 46 | 
 47 | 
 48 | def scalar_add(a, val, out):
 49 |     out.array[:] = a.array + val
 50 | 
 51 | 
 52 | def ewise_mul(a, b, out):
 53 |     out.array[:] = a.array * b.array
 54 | 
 55 | 
 56 | def scalar_mul(a, val, out):
 57 |     out.array[:] = a.array * val
 58 | 
 59 | 
 60 | def ewise_div(a, b, out):
 61 |     out.array[:] = a.array / b.array
 62 | 
 63 | 
 64 | def scalar_div(a, val, out):
 65 |     out.array[:] = a.array / val
 66 | 
 67 | 
 68 | def scalar_power(a, val, out):
 69 |     out.array[:] = a.array**val
 70 | 
 71 | 
 72 | def ewise_maximum(a, b, out):
 73 |     out.array[:] = np.maximum(a.array, b.array)
 74 | 
 75 | 
 76 | def scalar_maximum(a, val, out):
 77 |     out.array[:] = np.maximum(a.array, val)
 78 | 
 79 | 
 80 | def ewise_eq(a, b, out):
 81 |     out.array[:] = (a.array == b.array).astype(np.float32)
 82 | 
 83 | 
 84 | def scalar_eq(a, val, out):
 85 |     out.array[:] = (a.array == val).astype(np.float32)
 86 | 
 87 | 
 88 | def ewise_ge(a, b, out):
 89 |     out.array[:] = (a.array >= b.array).astype(np.float32)
 90 | 
 91 | 
 92 | def scalar_ge(a, val, out):
 93 |     out.array[:] = (a.array >= val).astype(np.float32)
 94 | 
 95 | 
 96 | def ewise_log(a, out):
 97 |     out.array[:] = np.log(a.array)
 98 | 
 99 | 
100 | def ewise_exp(a, out):
101 |     out.array[:] = np.exp(a.array)
102 | 
103 | 
104 | def ewise_tanh(a, out):
105 |     out.array[:] = np.tanh(a.array)
106 | 
107 | 
108 | def matmul(a, b, out, m, n, p):
109 |     out.array[:] = (a.array.reshape(m, n) @ b.array.reshape(n, p)).reshape(-1)
110 | 
111 | 
112 | def reduce_max(a, out, reduce_size):
113 |     out.array[:] = a.array[:].reshape(-1, reduce_size).max(axis=1)
114 | 
115 | 
116 | def reduce_sum(a, out, reduce_size):
117 |     out.array[:] = a.array[:].reshape(-1, reduce_size).sum(axis=1)
118 | 


--------------------------------------------------------------------------------
/hw3/python/needle/backend_numpy.py:
--------------------------------------------------------------------------------
 1 | """This file defies specific implementations of devices when using numpy as NDArray backend.
 2 | """
 3 | import numpy
 4 | 
 5 | 
 6 | class Device:
 7 |     """Baseclass of all device"""
 8 | 
 9 | 
10 | class CPUDevice(Device):
11 |     """Represents data that sits in CPU"""
12 | 
13 |     def __repr__(self):
14 |         return "needle.cpu()"
15 | 
16 |     def __hash__(self):
17 |         return self.__repr__().__hash__()
18 | 
19 |     def __eq__(self, other):
20 |         return isinstance(other, CPUDevice)
21 | 
22 |     def enabled(self):
23 |         return True
24 | 
25 |     def zeros(self, *shape, dtype="float32"):
26 |         return numpy.zeros(shape, dtype=dtype)
27 | 
28 |     def ones(self, *shape, dtype="float32"):
29 |         return numpy.ones(shape, dtype=dtype)
30 | 
31 |     def randn(self, *shape):
32 |         # note: numpy doesn't support types within standard random routines, and
33 |         # .astype("float32") does work if we're generating a singleton
34 |         return numpy.random.randn(*shape)
35 | 
36 |     def rand(self, *shape):
37 |         # note: numpy doesn't support types within standard random routines, and
38 |         # .astype("float32") does work if we're generating a singleton
39 |         return numpy.random.rand(*shape)
40 | 
41 |     def one_hot(self, n, i, dtype="float32"):
42 |         return numpy.eye(n, dtype=dtype)[i]
43 | 
44 |     def empty(self, shape, dtype="float32"):
45 |         return numpy.empty(shape, dtype=dtype)
46 | 
47 |     def full(self, shape, fill_value, dtype="float32"):
48 |         return numpy.full(shape, fill_value, dtype=dtype)
49 | 
50 | 
51 | def cpu():
52 |     """Return cpu device"""
53 |     return CPUDevice()
54 | 
55 | 
56 | def default_device():
57 |     return cpu()
58 | 
59 | 
60 | def all_devices():
61 |     """return a list of all available devices"""
62 |     return [cpu()]
63 | 


--------------------------------------------------------------------------------
/hw3/python/needle/backend_selection.py:
--------------------------------------------------------------------------------
 1 | """Logic for backend selection"""
 2 | import os
 3 | 
 4 | 
 5 | BACKEND = os.environ.get("NEEDLE_BACKEND", "nd")
 6 | 
 7 | 
 8 | if BACKEND == "nd":
 9 |     print("Using needle backend")
10 |     from . import backend_ndarray as array_api
11 |     from .backend_ndarray import (
12 |         all_devices,
13 |         cuda,
14 |         cpu,
15 |         cpu_numpy,
16 |         default_device,
17 |         BackendDevice as Device,
18 |     )
19 | 
20 |     NDArray = array_api.NDArray
21 | elif BACKEND == "np":
22 |     print("Using numpy backend")
23 |     import numpy as array_api
24 |     from .backend_numpy import all_devices, cpu, default_device, Device
25 | 
26 |     NDArray = array_api.ndarray
27 | else:
28 |     raise RuntimeError("Unknown needle array backend %s" % BACKEND)
29 | 


--------------------------------------------------------------------------------
/hw3/python/needle/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_basic import *
2 | from .data_transforms import *
3 | from .datasets import *
4 | 


--------------------------------------------------------------------------------
/hw3/python/needle/data/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/data/__pycache__/data_basic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/__pycache__/data_basic.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/data/__pycache__/data_transforms.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/__pycache__/data_transforms.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/data/data_basic.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ..autograd import Tensor
 3 | 
 4 | from typing import Iterator, Optional, List, Sized, Union, Iterable, Any
 5 | 
 6 | 
 7 | 
 8 | class Dataset:
 9 |     r"""An abstract class representing a `Dataset`.
10 | 
11 |     All subclasses should overwrite :meth:`__getitem__`, supporting fetching a
12 |     data sample for a given key. Subclasses must also overwrite
13 |     :meth:`__len__`, which is expected to return the size of the dataset.
14 |     """
15 | 
16 |     def __init__(self, transforms: Optional[List] = None):
17 |         self.transforms = transforms
18 | 
19 |     def __getitem__(self, index) -> object:
20 |         raise NotImplementedError
21 | 
22 |     def __len__(self) -> int:
23 |         raise NotImplementedError
24 |     
25 |     def apply_transforms(self, x):
26 |         if self.transforms is not None:
27 |             # apply the transforms
28 |             for tform in self.transforms:
29 |                 x = tform(x)
30 |         return x
31 | 
32 | 
33 | class DataLoader:
34 |     r"""
35 |     Data loader. Combines a dataset and a sampler, and provides an iterable over
36 |     the given dataset.
37 |     Args:
38 |         dataset (Dataset): dataset from which to load the data.
39 |         batch_size (int, optional): how many samples per batch to load
40 |             (default: ``1``).
41 |         shuffle (bool, optional): set to ``True`` to have the data reshuffled
42 |             at every epoch (default: ``False``).
43 |      """
44 |     dataset: Dataset
45 |     batch_size: Optional[int]
46 | 
47 |     def __init__(
48 |         self,
49 |         dataset: Dataset,
50 |         batch_size: Optional[int] = 1,
51 |         shuffle: bool = False,
52 |     ):
53 | 
54 |         self.dataset = dataset
55 |         self.shuffle = shuffle
56 |         self.batch_size = batch_size
57 |         if not self.shuffle:
58 |             self.ordering = np.array_split(np.arange(len(dataset)), 
59 |                                            range(batch_size, len(dataset), batch_size))
60 | 
61 |     def __iter__(self):
62 |         ### BEGIN YOUR SOLUTION
63 |         raise NotImplementedError()
64 |         ### END YOUR SOLUTION
65 |         return self
66 | 
67 |     def __next__(self):
68 |         ### BEGIN YOUR SOLUTION
69 |         raise NotImplementedError()
70 |         ### END YOUR SOLUTION
71 | 
72 | 


--------------------------------------------------------------------------------
/hw3/python/needle/data/data_transforms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class Transform:
 4 |     def __call__(self, x):
 5 |         raise NotImplementedError
 6 | 
 7 | 
 8 | class RandomFlipHorizontal(Transform):
 9 |     def __init__(self, p = 0.5):
10 |         self.p = p
11 | 
12 |     def __call__(self, img):
13 |         """
14 |         Horizonally flip an image, specified as an H x W x C NDArray.
15 |         Args:
16 |             img: H x W x C NDArray of an image
17 |         Returns:
18 |             H x W x C ndarray corresponding to image flipped with probability self.p
19 |         Note: use the provided code to provide randomness, for easier testing
20 |         """
21 |         flip_img = np.random.rand() < self.p
22 |         ### BEGIN YOUR SOLUTION
23 |         raise NotImplementedError()
24 |         ### END YOUR SOLUTION
25 | 
26 | 
27 | class RandomCrop(Transform):
28 |     def __init__(self, padding=3):
29 |         self.padding = padding
30 | 
31 |     def __call__(self, img):
32 |         """ Zero pad and then randomly crop an image.
33 |         Args:
34 |              img: H x W x C NDArray of an image
35 |         Return 
36 |             H x W x C NAArray of cliped image
37 |         Note: generate the image shifted by shift_x, shift_y specified below
38 |         """
39 |         shift_x, shift_y = np.random.randint(low=-self.padding, high=self.padding+1, size=2)
40 |         ### BEGIN YOUR SOLUTION
41 |         raise NotImplementedError()
42 |         ### END YOUR SOLUTION
43 | 


--------------------------------------------------------------------------------
/hw3/python/needle/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .mnist_dataset import *
2 | from .ndarray_dataset import *
3 | 


--------------------------------------------------------------------------------
/hw3/python/needle/data/datasets/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/datasets/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/data/datasets/__pycache__/mnist_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/datasets/__pycache__/mnist_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/data/datasets/__pycache__/ndarray_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/datasets/__pycache__/ndarray_dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/data/datasets/mnist_dataset.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | from ..data_basic import Dataset
 3 | import numpy as np
 4 | 
 5 | class MNISTDataset(Dataset):
 6 |     def __init__(
 7 |         self,
 8 |         image_filename: str,
 9 |         label_filename: str,
10 |         transforms: Optional[List] = None,
11 |     ):
12 |         ### BEGIN YOUR SOLUTION
13 |         raise NotImplementedError()
14 |         ### END YOUR SOLUTION
15 | 
16 |     def __getitem__(self, index) -> object:
17 |         ### BEGIN YOUR SOLUTION
18 |         raise NotImplementedError()
19 |         ### END YOUR SOLUTION
20 | 
21 |     def __len__(self) -> int:
22 |         ### BEGIN YOUR SOLUTION
23 |         raise NotImplementedError()
24 |         ### END YOUR SOLUTION


--------------------------------------------------------------------------------
/hw3/python/needle/data/datasets/ndarray_dataset.py:
--------------------------------------------------------------------------------
 1 | from ..data_basic import Dataset
 2 | 
 3 | class NDArrayDataset(Dataset):
 4 |     def __init__(self, *arrays):
 5 |         self.arrays = arrays
 6 | 
 7 |     def __len__(self) -> int:
 8 |         return self.arrays[0].shape[0]
 9 | 
10 |     def __getitem__(self, i) -> object:
11 |         return tuple([a[i] for a in self.arrays])


--------------------------------------------------------------------------------
/hw3/python/needle/init/__init__.py:
--------------------------------------------------------------------------------
1 | from .init_basic import *
2 | 
3 | from .init_initializers import *
4 | 


--------------------------------------------------------------------------------
/hw3/python/needle/init/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/init/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/init/__pycache__/init_basic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/init/__pycache__/init_basic.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/init/__pycache__/init_initializers.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/init/__pycache__/init_initializers.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/init/init_basic.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import needle as ndl
 3 | 
 4 | 
 5 | def rand(*shape, low=0.0, high=1.0, device=None, dtype="float32", requires_grad=False):
 6 |     """Generate random numbers uniform between low and high"""
 7 |     device = ndl.cpu() if device is None else device
 8 |     array = device.rand(*shape) * (high - low) + low
 9 |     return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
10 | 
11 | 
12 | def randn(*shape, mean=0.0, std=1.0, device=None, dtype="float32", requires_grad=False):
13 |     """Generate random normal with specified mean and std deviation"""
14 |     device = ndl.cpu() if device is None else device
15 |     array = device.randn(*shape) * std + mean
16 |     return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
17 | 
18 | 
19 | def constant(*shape, c=1.0, device=None, dtype="float32", requires_grad=False):
20 |     """Generate constant Tensor"""
21 |     device = ndl.cpu() if device is None else device
22 |     array = device.ones(*shape, dtype=dtype) * c  # note: can change dtype
23 |     return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
24 | 
25 | 
26 | def ones(*shape, device=None, dtype="float32", requires_grad=False):
27 |     """Generate all-ones Tensor"""
28 |     return constant(
29 |         *shape, c=1.0, device=device, dtype=dtype, requires_grad=requires_grad
30 |     )
31 | 
32 | 
33 | def zeros(*shape, device=None, dtype="float32", requires_grad=False):
34 |     """Generate all-zeros Tensor"""
35 |     return constant(
36 |         *shape, c=0.0, device=device, dtype=dtype, requires_grad=requires_grad
37 |     )
38 | 
39 | 
40 | def randb(*shape, p=0.5, device=None, dtype="bool", requires_grad=False):
41 |     """Generate binary random Tensor"""
42 |     device = ndl.cpu() if device is None else device
43 |     array = device.rand(*shape) <= p
44 |     return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
45 | 
46 | 
47 | def one_hot(n, i, device=None, dtype="float32", requires_grad=False):
48 |     """Generate one-hot encoding Tensor"""
49 |     device = ndl.cpu() if device is None else device
50 |     return ndl.Tensor(
51 |         device.one_hot(n, i.numpy(), dtype=dtype),
52 |         device=device,
53 |         requires_grad=requires_grad,
54 |     )
55 | 
56 | 
57 | def zeros_like(array, *, device=None, requires_grad=False):
58 |     device = device if device else array.device
59 |     return zeros(
60 |         *array.shape, dtype=array.dtype, device=device, requires_grad=requires_grad
61 |     )
62 | 
63 | 
64 | def ones_like(array, *, device=None, requires_grad=False):
65 |     device = device if device else array.device
66 |     return ones(
67 |         *array.shape, dtype=array.dtype, device=device, requires_grad=requires_grad
68 |     )
69 | 


--------------------------------------------------------------------------------
/hw3/python/needle/init/init_initializers.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from .init_basic import *
 3 | 
 4 | 
 5 | def xavier_uniform(fan_in, fan_out, gain=1.0, **kwargs):
 6 |     ### BEGIN YOUR SOLUTION
 7 |     raise NotImplementedError()
 8 |     ### END YOUR SOLUTION
 9 | 
10 | 
11 | def xavier_normal(fan_in, fan_out, gain=1.0, **kwargs):
12 |     ### BEGIN YOUR SOLUTION
13 |     raise NotImplementedError()
14 |     ### END YOUR SOLUTION
15 | 
16 | 
17 | def kaiming_uniform(fan_in, fan_out, nonlinearity="relu", **kwargs):
18 |     assert nonlinearity == "relu", "Only relu supported currently"
19 |     ### BEGIN YOUR SOLUTION
20 |     raise NotImplementedError()
21 |     ### END YOUR SOLUTION
22 | 
23 | 
24 | def kaiming_normal(fan_in, fan_out, nonlinearity="relu", **kwargs):
25 |     assert nonlinearity == "relu", "Only relu supported currently"
26 |     ### BEGIN YOUR SOLUTION
27 |     raise NotImplementedError()
28 |     ### END YOUR SOLUTION
29 | 


--------------------------------------------------------------------------------
/hw3/python/needle/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .nn_basic import *
2 | 


--------------------------------------------------------------------------------
/hw3/python/needle/nn/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/nn/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/nn/__pycache__/nn_basic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/nn/__pycache__/nn_basic.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/nn/nn_basic.py:
--------------------------------------------------------------------------------
  1 | """The module.
  2 | """
  3 | from typing import List, Callable, Any
  4 | from needle.autograd import Tensor
  5 | from needle import ops
  6 | import needle.init as init
  7 | import numpy as np
  8 | 
  9 | 
 10 | class Parameter(Tensor):
 11 |     """A special kind of tensor that represents parameters."""
 12 | 
 13 | 
 14 | def _unpack_params(value: object) -> List[Tensor]:
 15 |     if isinstance(value, Parameter):
 16 |         return [value]
 17 |     elif isinstance(value, Module):
 18 |         return value.parameters()
 19 |     elif isinstance(value, dict):
 20 |         params = []
 21 |         for k, v in value.items():
 22 |             params += _unpack_params(v)
 23 |         return params
 24 |     elif isinstance(value, (list, tuple)):
 25 |         params = []
 26 |         for v in value:
 27 |             params += _unpack_params(v)
 28 |         return params
 29 |     else:
 30 |         return []
 31 | 
 32 | 
 33 | def _child_modules(value: object) -> List["Module"]:
 34 |     if isinstance(value, Module):
 35 |         modules = [value]
 36 |         modules.extend(_child_modules(value.__dict__))
 37 |         return modules
 38 |     if isinstance(value, dict):
 39 |         modules = []
 40 |         for k, v in value.items():
 41 |             modules += _child_modules(v)
 42 |         return modules
 43 |     elif isinstance(value, (list, tuple)):
 44 |         modules = []
 45 |         for v in value:
 46 |             modules += _child_modules(v)
 47 |         return modules
 48 |     else:
 49 |         return []
 50 | 
 51 | 
 52 | class Module:
 53 |     def __init__(self):
 54 |         self.training = True
 55 | 
 56 |     def parameters(self) -> List[Tensor]:
 57 |         """Return the list of parameters in the module."""
 58 |         return _unpack_params(self.__dict__)
 59 | 
 60 |     def _children(self) -> List["Module"]:
 61 |         return _child_modules(self.__dict__)
 62 | 
 63 |     def eval(self):
 64 |         self.training = False
 65 |         for m in self._children():
 66 |             m.training = False
 67 | 
 68 |     def train(self):
 69 |         self.training = True
 70 |         for m in self._children():
 71 |             m.training = True
 72 | 
 73 |     def __call__(self, *args, **kwargs):
 74 |         return self.forward(*args, **kwargs)
 75 | 
 76 | 
 77 | class Identity(Module):
 78 |     def forward(self, x):
 79 |         return x
 80 | 
 81 | 
 82 | class Linear(Module):
 83 |     def __init__(
 84 |         self, in_features, out_features, bias=True, device=None, dtype="float32"
 85 |     ):
 86 |         super().__init__()
 87 |         self.in_features = in_features
 88 |         self.out_features = out_features
 89 | 
 90 |         ### BEGIN YOUR SOLUTION
 91 |         raise NotImplementedError()
 92 |         ### END YOUR SOLUTION
 93 | 
 94 |     def forward(self, X: Tensor) -> Tensor:
 95 |         ### BEGIN YOUR SOLUTION
 96 |         raise NotImplementedError()
 97 |         ### END YOUR SOLUTION
 98 | 
 99 | 
100 | class Flatten(Module):
101 |     def forward(self, X):
102 |         ### BEGIN YOUR SOLUTION
103 |         raise NotImplementedError()
104 |         ### END YOUR SOLUTION
105 | 
106 | 
107 | class ReLU(Module):
108 |     def forward(self, x: Tensor) -> Tensor:
109 |         ### BEGIN YOUR SOLUTION
110 |         raise NotImplementedError()
111 |         ### END YOUR SOLUTION
112 | 
113 | 
114 | class Sequential(Module):
115 |     def __init__(self, *modules):
116 |         super().__init__()
117 |         self.modules = modules
118 | 
119 |     def forward(self, x: Tensor) -> Tensor:
120 |         ### BEGIN YOUR SOLUTION
121 |         raise NotImplementedError()
122 |         ### END YOUR SOLUTION
123 | 
124 | 
125 | class SoftmaxLoss(Module):
126 |     def forward(self, logits: Tensor, y: Tensor):
127 |         ### BEGIN YOUR SOLUTION
128 |         raise NotImplementedError()
129 |         ### END YOUR SOLUTION
130 | 
131 | 
132 | class BatchNorm1d(Module):
133 |     def __init__(self, dim, eps=1e-5, momentum=0.1, device=None, dtype="float32"):
134 |         super().__init__()
135 |         self.dim = dim
136 |         self.eps = eps
137 |         self.momentum = momentum
138 |         ### BEGIN YOUR SOLUTION
139 |         raise NotImplementedError()
140 |         ### END YOUR SOLUTION
141 | 
142 |     def forward(self, x: Tensor) -> Tensor:
143 |         ### BEGIN YOUR SOLUTION
144 |         raise NotImplementedError()
145 |         ### END YOUR SOLUTION
146 | 
147 | 
148 | class LayerNorm1d(Module):
149 |     def __init__(self, dim, eps=1e-5, device=None, dtype="float32"):
150 |         super().__init__()
151 |         self.dim = dim
152 |         self.eps = eps
153 |         ### BEGIN YOUR SOLUTION
154 |         raise NotImplementedError()
155 |         ### END YOUR SOLUTION
156 | 
157 |     def forward(self, x: Tensor) -> Tensor:
158 |         ### BEGIN YOUR SOLUTION
159 |         raise NotImplementedError()
160 |         ### END YOUR SOLUTION
161 | 
162 | 
163 | class Dropout(Module):
164 |     def __init__(self, p=0.5):
165 |         super().__init__()
166 |         self.p = p
167 | 
168 |     def forward(self, x: Tensor) -> Tensor:
169 |         ### BEGIN YOUR SOLUTION
170 |         raise NotImplementedError()
171 |         ### END YOUR SOLUTION
172 | 
173 | 
174 | class Residual(Module):
175 |     def __init__(self, fn: Module):
176 |         super().__init__()
177 |         self.fn = fn
178 | 
179 |     def forward(self, x: Tensor) -> Tensor:
180 |         ### BEGIN YOUR SOLUTION
181 |         raise NotImplementedError()
182 |         ### END YOUR SOLUTION
183 | 


--------------------------------------------------------------------------------
/hw3/python/needle/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .ops_mathematic import *
2 | 
3 | from .ops_logarithmic import *
4 | from .ops_tuple import *
5 | 


--------------------------------------------------------------------------------
/hw3/python/needle/ops/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/ops/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/ops/__pycache__/ops_logarithmic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/ops/__pycache__/ops_logarithmic.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/ops/__pycache__/ops_mathematic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/ops/__pycache__/ops_mathematic.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/ops/__pycache__/ops_tuple.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/ops/__pycache__/ops_tuple.cpython-310.pyc


--------------------------------------------------------------------------------
/hw3/python/needle/ops/ops_logarithmic.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from ..autograd import NDArray
 3 | from ..autograd import Op, Tensor, Value, TensorOp
 4 | from ..autograd import TensorTuple, TensorTupleOp
 5 | 
 6 | from .ops_mathematic import *
 7 | 
 8 | import numpy as array_api
 9 | 
10 | class LogSoftmax(TensorOp):
11 |     def compute(self, Z):
12 |         ### BEGIN YOUR SOLUTION
13 |         raise NotImplementedError()
14 |         ### END YOUR SOLUTION
15 | 
16 |     def gradient(self, out_grad, node):
17 |         ### BEGIN YOUR SOLUTION
18 |         raise NotImplementedError()
19 |         ### END YOUR SOLUTION
20 | 
21 | 
22 | def logsoftmax(a):
23 |     return LogSoftmax()(a)
24 | 
25 | 
26 | class LogSumExp(TensorOp):
27 |     def __init__(self, axes: Optional[tuple] = None):
28 |         self.axes = axes
29 | 
30 |     def compute(self, Z):
31 |         ### BEGIN YOUR SOLUTION
32 |         raise NotImplementedError()
33 |         ### END YOUR SOLUTION
34 | 
35 |     def gradient(self, out_grad, node):
36 |         ### BEGIN YOUR SOLUTION
37 |         raise NotImplementedError()
38 |         ### END YOUR SOLUTION
39 | 
40 | 
41 | def logsumexp(a, axes=None):
42 |     return LogSumExp(axes=axes)(a)
43 | 
44 | 


--------------------------------------------------------------------------------
/hw3/python/needle/ops/ops_tuple.py:
--------------------------------------------------------------------------------
 1 | from ..autograd import Op, Tensor, TensorTuple, Value, TensorOp, TensorTupleOp
 2 | 
 3 | 
 4 | class MakeTensorTuple(TensorTupleOp):
 5 |     def compute(self, *args) -> tuple:
 6 |         return tuple(args)
 7 | 
 8 |     def gradient(self, out_grad, node):
 9 |         assert isinstance(out_grad, TensorTuple)
10 |         return tuple(*[out_grad[i] for i in range(len(out_grad))])
11 | 
12 | 
13 | def make_tuple(*args):
14 |     return MakeTensorTuple()(*args)
15 | 
16 | 
17 | class TupleGetItem(TensorOp):
18 |     def __init__(self, index):
19 |         self.index = index
20 | 
21 |     def __call__(self, a: TensorTuple, fold_const=True) -> Value:
22 |         assert isinstance(a, TensorTuple)
23 |         # constant folding
24 |         if fold_const and isinstance(a.op, MakeTensorTuple):
25 |             return a.inputs[self.index]
26 |         return Tensor.make_from_op(self, [a])
27 | 
28 |     def compute(self, a):
29 |         return a[self.index]
30 | 
31 |     def gradient(self, out_grad, node):
32 |         index = self.index
33 |         in_grad = []
34 |         for i, value in enumerate(node.inputs[0]):
35 |             if i != index:
36 |                 in_grad.append(init.zeros_like(value))
37 |             else:
38 |                 in_grad.append(out_grad)
39 |         return MakeTensorTuple()(*in_grad)
40 | 
41 | 
42 | def tuple_get_item(value, index):
43 |     return TupleGetItem(index)(value)
44 | 
45 | 
46 | class FusedAddScalars(TensorTupleOp):
47 |     def __init__(self, c0: float, c1: float):
48 |         self.c0 = c0
49 |         self.c1 = c1
50 | 
51 |     def compute(self, a):
52 |         return a + self.c0, a + self.c1
53 | 
54 |     def gradient(self, out_grad, node):
55 |         return out_grad[0] + out_grad[1]
56 | 
57 | 
58 | def fused_add_scalars(x, c0, c1):
59 |     return FusedAddScalars(c0, c1)(x)
60 | 


--------------------------------------------------------------------------------
/hw3/python/needle/optim.py:
--------------------------------------------------------------------------------
 1 | """Optimization module"""
 2 | import needle as ndl
 3 | import numpy as np
 4 | 
 5 | 
 6 | class Optimizer:
 7 |     def __init__(self, params):
 8 |         self.params = params
 9 | 
10 |     def step(self):
11 |         raise NotImplementedError()
12 | 
13 |     def reset_grad(self):
14 |         for p in self.params:
15 |             p.grad = None
16 | 
17 | 
18 | class SGD(Optimizer):
19 |     def __init__(self, params, lr=0.01, momentum=0.0, weight_decay=0.0):
20 |         super().__init__(params)
21 |         self.lr = lr
22 |         self.momentum = momentum
23 |         self.u = {}
24 |         self.weight_decay = weight_decay
25 | 
26 |     def step(self):
27 |         ### BEGIN YOUR SOLUTION
28 |         raise NotImplementedError()
29 |         ### END YOUR SOLUTION
30 | 
31 |     def clip_grad_norm(self, max_norm=0.25):
32 |         """
33 |         Clips gradient norm of parameters.
34 |         """
35 |         ### BEGIN YOUR SOLUTION
36 |         raise NotImplementedError()
37 |         ### END YOUR SOLUTION
38 | 
39 | 
40 | class Adam(Optimizer):
41 |     def __init__(
42 |         self,
43 |         params,
44 |         lr=0.01,
45 |         beta1=0.9,
46 |         beta2=0.999,
47 |         eps=1e-8,
48 |         weight_decay=0.0,
49 |     ):
50 |         super().__init__(params)
51 |         self.lr = lr
52 |         self.beta1 = beta1
53 |         self.beta2 = beta2
54 |         self.eps = eps
55 |         self.weight_decay = weight_decay
56 |         self.t = 0
57 | 
58 |         self.m = {}
59 |         self.v = {}
60 | 
61 |     def step(self):
62 |         ### BEGIN YOUR SOLUTION
63 |         raise NotImplementedError()
64 |         ### END YOUR SOLUTION
65 | 


--------------------------------------------------------------------------------
/hw3/tests/hw3/__pycache__/test_ndarray.cpython-310-pytest-7.1.2.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/tests/hw3/__pycache__/test_ndarray.cpython-310-pytest-7.1.2.pyc


--------------------------------------------------------------------------------
/hw3/tests/hw3/__pycache__/test_ndarray.cpython-310-pytest-7.4.3.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/tests/hw3/__pycache__/test_ndarray.cpython-310-pytest-7.4.3.pyc


--------------------------------------------------------------------------------
/hw3/tests/hw3/__pycache__/test_ndarray.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/tests/hw3/__pycache__/test_ndarray.cpython-310.pyc


--------------------------------------------------------------------------------
/hw4/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 | 


--------------------------------------------------------------------------------
/hw4/.idea/hw4.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="Python 3.10 (base)" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="PyDocumentationSettings">
 9 |     <option name="format" value="GOOGLE" />
10 |     <option name="myDocStringFormat" value="Google" />
11 |   </component>
12 |   <component name="TestRunnerService">
13 |     <option name="PROJECT_TEST_RUNNER" value="pytest" />
14 |   </component>
15 | </module>


--------------------------------------------------------------------------------
/hw4/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/hw4/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (base)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/hw4/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/hw4.iml" filepath="$PROJECT_DIR$/.idea/hw4.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/hw4/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/hw4/.tmp.driveupload/7792:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append("./tests/hw4")
 3 | sys.path.append("./python")
 4 | 
 5 | from test_nd_backend import *
 6 | from test_cifar_ptb_data import *
 7 | from test_conv import *
 8 | from test_sequence_models import *
 9 | from needle import backend_ndarray as nd
10 | 
11 | 
12 | def train_cifar10():
13 |     import sys
14 |     sys.path.append('./python')
15 |     sys.path.append('./apps')
16 |     import needle as ndl
17 |     from models import ResNet9
18 |     from simple_ml import train_cifar10, evaluate_cifar10
19 | 
20 |     device = ndl.cpu()
21 |     dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=True)
22 |     dataloader = ndl.data.DataLoader( \
23 |         dataset=dataset,
24 |         batch_size=128,
25 |         shuffle=True, )
26 |     model = ResNet9(device=device, dtype="float32")
27 |     train_cifar10(model, dataloader, n_epochs=2, optimizer=ndl.optim.Adam,
28 |                   lr=0.001, weight_decay=0.001, device=device)
29 |     evaluate_cifar10(model, dataloader)
30 | 
31 | 
32 | def train_language_model():
33 |     import needle as ndl
34 |     sys.path.append('./apps')
35 |     from models import LanguageModel
36 |     from simple_ml import train_ptb, evaluate_ptb
37 | 
38 |     device = ndl.cpu_numpy()
39 |     corpus = ndl.data.Corpus("data/ptb")
40 |     train_data = ndl.data.batchify(corpus.train, batch_size=16, device=device, dtype="float32")
41 |     model = LanguageModel(30, len(corpus.dictionary), hidden_size=10, num_layers=2, seq_model='rnn', device=device)
42 |     train_ptb(model, train_data, seq_len=1, n_epochs=1, device=device)
43 |     evaluate_ptb(model, train_data, seq_len=40, device=device)
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     """
48 |     Part 1
49 |     """
50 |     # test_stack((5, 5), 0, 2, nd.cpu())
51 |     # test_stack_backward((5, 5), 0, 2, nd.cpu())
52 | 
53 |     # test_matmul(16, 16, 16, nd.cpu())
54 |     # test_relu((5, 5), nd.cpu())
55 |     # test_tanh_backward((5, 5), nd.cpu())
56 | 
57 | 
58 |     """
59 |     Part 2
60 |     """
61 |     # test_cifar10_dataset(True)
62 | 
63 | 
64 |     """
65 |     Part 3
66 |     """
67 |     # test_pad_forward({"shape": (10, 32, 32, 8), "padding": ( (0, 0), (2, 2), (2, 2), (0, 0) )}, nd.cpu())
68 |     # test_flip_forward({"shape": (10, 5), "axes": (0,)}, nd.cpu())
69 |     # test_dilate_forward(nd.cpu())
70 |     # test_op_conv((3, 16, 16, 8), (3, 3, 8, 16), 1, 2, False, nd.cpu())
71 |     # test_op_conv((3, 16, 16, 8), (3, 3, 8, 16), 2, 1, True, nd.cpu())
72 | 
73 |     # test_init_kaiming_uniform(nd.cpu())
74 |     # test_nn_conv_forward(4, 8, 16, 3, 1, nd.cpu())
75 |     # test_nn_conv_backward(4, 1, 1, 3, 1, nd.cpu())
76 |     # test_resnet9(nd.cpu())
77 |     # test_train_cifar10(nd.cpu())
78 | 
79 |     train_cifar10()
80 | 
81 |     """
82 |     Part 4
83 |     """
84 |     # test_rnn_cell(1, 1, 1, False, False, 'relu', nd.cpu())
85 |     # test_lstm_cell(1, 1, 1, False, False, nd.cpu())
86 |     # test_lstm(13, 1, 1, 1, 1, True, True, nd.cpu())
87 | 
88 |     """
89 |     Part 6
90 |     """
91 |     # test_language_model_implementation(1, 1, 1, 1, 1, True, 1, 'rnn', nd.cpu())
92 | 
93 |     """
94 |     Part 7
95 |     """
96 |     # train_language_model()


--------------------------------------------------------------------------------
/hw4/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.5)
 2 | project(needle C CXX)
 3 | cmake_policy(SET CMP0146 OLD)
 4 | 
 5 | # find correct version of Python
 6 | execute_process(COMMAND python3-config --prefix
 7 |   OUTPUT_VARIABLE Python_ROOT_DIR)
 8 | find_package(Python COMPONENTS Development Interpreter REQUIRED)
 9 | include_directories(${Python_INCLUDE_DIRS})
10 | 
11 | # find pybind
12 | execute_process(COMMAND python3 -m pybind11 --cmakedir
13 |   RESULT_VARIABLE __pybind_exit_code
14 |   OUTPUT_VARIABLE __pybind_path
15 |   OUTPUT_STRIP_TRAILING_WHITESPACE)
16 | find_package(pybind11 PATHS ${__pybind_path})
17 | 
18 | 
19 | if(NOT MSVC)
20 |   set(CMAKE_CXX_FLAGS "-std=c++11 -O2 -march=native ${CMAKE_CXX_FLAGS}")
21 |   set(CMAKE_CUDA_STANDARD 14)
22 | else()
23 |   set(CMAKE_CXX_FLAGS "/std:c++11 -O2 -march=native ${CMAKE_CXX_FLAGS}")
24 |   set(CMAKE_CUDA_STANDARD 14)
25 | endif()
26 | 
27 | include_directories(SYSTEM ${pybind11_INCLUDE_DIRS})
28 | list(APPEND LINKER_LIBS ${pybind11_LIBRARIES})
29 | 
30 | 
31 | ###################
32 | ### CPU BACKEND ###
33 | ###################
34 | add_library(ndarray_backend_cpu MODULE src/ndarray_backend_cpu.cc)
35 | target_link_libraries(ndarray_backend_cpu PUBLIC ${LINKER_LIBS})
36 | pybind11_extension(ndarray_backend_cpu)
37 | pybind11_strip(ndarray_backend_cpu)
38 | 
39 | 
40 | # directly output to ffi folder
41 | set_target_properties(ndarray_backend_cpu
42 |   PROPERTIES
43 |   LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/needle/backend_ndarray
44 |   CXX_VISIBILITY_PRESET "hidden"
45 | )
46 | 
47 | if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
48 |   set_property(TARGET ndarray_backend_cpu PROPERTY LINK_OPTIONS -undefined dynamic_lookup)
49 | endif()
50 | 
51 | 
52 | 
53 | ####################
54 | ### CUDA BACKEND ###
55 | ####################
56 | find_package(CUDA)
57 | if(CUDA_FOUND)
58 |   message(STATUS "Found cuda, building cuda backend")
59 | 
60 |   include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
61 |   list(APPEND LINKER_LIBS ${CUDA_CUDART_LIBRARY})
62 | 
63 |   # invoke nvidia smi to detect if we really have a GPU
64 |   execute_process(COMMAND "nvidia-smi" ERROR_QUIET  RESULT_VARIABLE NV_RET)
65 |   if(NV_RET EQUAL "0")
66 |     CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS Auto)
67 |   else()
68 |     # set to 3.7 the flag of K80
69 |     CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.7)
70 |   endif()
71 | 
72 |   # set arch flags properly
73 |   CUDA_ADD_LIBRARY(ndarray_backend_cuda MODULE src/ndarray_backend_cuda.cu OPTIONS ${ARCH_FLAGS})
74 | 
75 |   target_link_libraries(ndarray_backend_cuda ${LINKER_LIBS})
76 |   pybind11_extension(ndarray_backend_cuda)
77 |   pybind11_strip(ndarray_backend_cuda)
78 | 
79 |   # directly output to ffi folder
80 |   set_target_properties(ndarray_backend_cuda
81 |     PROPERTIES
82 |     LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/needle/backend_ndarray
83 |     CXX_VISIBILITY_PRESET "hidden"
84 |     CUDA_VISIBILITY_PRESET "hidden"
85 | )
86 | 
87 | endif()
88 | 
89 | 


--------------------------------------------------------------------------------
/hw4/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: lib, pybind, clean, format, all
 2 | 
 3 | all: lib
 4 | 
 5 | 
 6 | lib:
 7 | 	@mkdir -p build
 8 | 	@cd build; cmake ..
 9 | 	@cd build; $(MAKE)
10 | 
11 | format:
12 | 	python3 -m black .
13 | 	clang-format -i src/*.cc src/*.cu
14 | 
15 | clean:
16 | 	rm -rf build python/needle/backend_ndarray/ndarray_backend*.so
17 | 


--------------------------------------------------------------------------------
/hw4/README.md:
--------------------------------------------------------------------------------
1 | # Homework 4
2 | Public repository and stub/testing code for Homework 4 of 10-714.
3 | 


--------------------------------------------------------------------------------
/hw4/ResNet9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/ResNet9.png


--------------------------------------------------------------------------------
/hw4/apps/models.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('./python')
  3 | import needle as ndl
  4 | import needle.nn as nn
  5 | import math
  6 | import numpy as np
  7 | np.random.seed(0)
  8 | 
  9 | 
 10 | def ConvBN(in_channels, out_channels, kernel_size, stride, device=None, dtype="float32"):
 11 |     return nn.Sequential(
 12 |         nn.Conv(in_channels, out_channels, kernel_size=kernel_size, stride=stride, bias=True, device=device, dtype=dtype),
 13 |         nn.BatchNorm2d(out_channels, device=device, dtype=dtype),
 14 |         nn.ReLU()
 15 |     )
 16 | 
 17 | 
 18 | class ResidualBlock(ndl.nn.Module):
 19 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, device=None, dtype="float32"):
 20 |         super().__init__()
 21 | 
 22 |         self.conv1 = ConvBN(in_channels, out_channels, kernel_size, stride, device=device, dtype=dtype)
 23 |         self.conv2 = ConvBN(out_channels, out_channels, kernel_size, stride, device=device, dtype=dtype)
 24 |         
 25 | 
 26 |     def forward(self, x):
 27 |         out = self.conv1(x)
 28 |         out = self.conv2(out)
 29 |         out = out + x
 30 |         return out
 31 | 
 32 | 
 33 | class ResNet9(ndl.nn.Module):
 34 |     def __init__(self, device=None, dtype="float32"):
 35 |         super().__init__()
 36 |         self.conv1 = ConvBN(3, 16, kernel_size=7, stride=4, device=device, dtype=dtype)
 37 |         self.conv2 = ConvBN(16, 32, kernel_size=3, stride=2, device=device, dtype=dtype)
 38 |         self.resi1 = ResidualBlock(32, 32, 3, 1, device=device, dtype=dtype)
 39 |         self.conv3 = ConvBN(32, 64, kernel_size=3, stride=2, device=device, dtype=dtype)
 40 |         self.conv4 = ConvBN(64, 128, kernel_size=3, stride=2, device=device, dtype=dtype)
 41 |         self.resi2 = ResidualBlock(128, 128, 3, 1, device=device, dtype=dtype)
 42 |         self.linear1 = nn.Linear(128, 128, device=device, dtype=dtype)
 43 |         self.linear2 = nn.Linear(128, 10, device=device, dtype=dtype)
 44 | 
 45 |     def forward(self, x):
 46 |         out = self.conv1(x)
 47 |         out = self.conv2(out)
 48 |         out = self.resi1(out)
 49 |         out = self.conv3(out)
 50 |         out = self.conv4(out)
 51 |         out = self.resi2(out)
 52 |         out = nn.Flatten()(out)
 53 |         out = self.linear1(out)
 54 |         out = ndl.ops.relu(out)
 55 |         out = self.linear2(out)
 56 |         return out
 57 | 
 58 | 
 59 | class LanguageModel(nn.Module):
 60 |     def __init__(self, embedding_size, output_size, hidden_size, num_layers=1,
 61 |                  seq_model='rnn', device=None, dtype="float32"):
 62 |         """
 63 |         Consists of an embedding layer, a sequence model (either RNN or LSTM), and a
 64 |         linear layer.
 65 |         Parameters:
 66 |         output_size: Size of dictionary
 67 |         embedding_size: Size of embeddings
 68 |         hidden_size: The number of features in the hidden state of LSTM or RNN
 69 |         seq_model: 'rnn' or 'lstm', whether to use RNN or LSTM
 70 |         num_layers: Number of layers in RNN or LSTM
 71 |         """
 72 |         super(LanguageModel, self).__init__()
 73 |         
 74 |         self.embedding_size = embedding_size
 75 |         self.output_size = output_size
 76 |         self.hidden_size = hidden_size
 77 |         self.embedding = nn.Embedding(output_size, embedding_size, device=device, dtype=dtype)
 78 |         if seq_model == 'rnn':
 79 |             self.seq_model = nn.RNN(embedding_size, hidden_size, num_layers, device=device, dtype=dtype)
 80 |         elif seq_model == 'lstm':
 81 |             self.seq_model = nn.LSTM(embedding_size, hidden_size, num_layers, device=device, dtype=dtype)
 82 |         self.linear = nn.Linear(hidden_size, output_size, device=device, dtype=dtype)
 83 |         
 84 | 
 85 |     def forward(self, x, h=None):
 86 |         """
 87 |         Given sequence (and the previous hidden state if given), returns probabilities of next word
 88 |         (along with the last hidden state from the sequence model).
 89 |         Inputs:
 90 |         x of shape (seq_len, bs)
 91 |         h of shape (num_layers, bs, hidden_size) if using RNN,
 92 |             else h is tuple of (h0, c0), each of shape (num_layers, bs, hidden_size)
 93 |         Returns (out, h)
 94 |         out of shape (seq_len*bs, output_size)
 95 |         h of shape (num_layers, bs, hidden_size) if using RNN,
 96 |             else h is tuple of (h0, c0), each of shape (num_layers, bs, hidden_size)
 97 |         """
 98 |         seq_len, bs = x.shape
 99 |         x_emb = self.embedding(x)
100 |         out, h = self.seq_model(x_emb, h)
101 |         out = out.reshape((seq_len * bs, self.hidden_size))
102 |         out = self.linear(out)
103 |         return out, h
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     model = ResNet9()
108 |     x = ndl.ops.randu((1, 32, 32, 3), requires_grad=True)
109 |     model(x)
110 |     cifar10_train_dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=True)
111 |     train_loader = ndl.data.DataLoader(cifar10_train_dataset, 128, ndl.cpu(), dtype="float32")
112 |     print(dataset[1][0].shape)


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/3.27.9/CMakeCCompiler.cmake:
--------------------------------------------------------------------------------
 1 | set(CMAKE_C_COMPILER "/usr/bin/cc")
 2 | set(CMAKE_C_COMPILER_ARG1 "")
 3 | set(CMAKE_C_COMPILER_ID "GNU")
 4 | set(CMAKE_C_COMPILER_VERSION "11.4.0")
 5 | set(CMAKE_C_COMPILER_VERSION_INTERNAL "")
 6 | set(CMAKE_C_COMPILER_WRAPPER "")
 7 | set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "17")
 8 | set(CMAKE_C_EXTENSIONS_COMPUTED_DEFAULT "ON")
 9 | set(CMAKE_C_COMPILE_FEATURES "c_std_90;c_function_prototypes;c_std_99;c_restrict;c_variadic_macros;c_std_11;c_static_assert;c_std_17;c_std_23")
10 | set(CMAKE_C90_COMPILE_FEATURES "c_std_90;c_function_prototypes")
11 | set(CMAKE_C99_COMPILE_FEATURES "c_std_99;c_restrict;c_variadic_macros")
12 | set(CMAKE_C11_COMPILE_FEATURES "c_std_11;c_static_assert")
13 | set(CMAKE_C17_COMPILE_FEATURES "c_std_17")
14 | set(CMAKE_C23_COMPILE_FEATURES "c_std_23")
15 | 
16 | set(CMAKE_C_PLATFORM_ID "Linux")
17 | set(CMAKE_C_SIMULATE_ID "")
18 | set(CMAKE_C_COMPILER_FRONTEND_VARIANT "GNU")
19 | set(CMAKE_C_SIMULATE_VERSION "")
20 | 
21 | 
22 | 
23 | 
24 | set(CMAKE_AR "/usr/bin/ar")
25 | set(CMAKE_C_COMPILER_AR "/usr/bin/gcc-ar-11")
26 | set(CMAKE_RANLIB "/usr/bin/ranlib")
27 | set(CMAKE_C_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11")
28 | set(CMAKE_LINKER "/usr/bin/ld")
29 | set(CMAKE_MT "")
30 | set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND")
31 | set(CMAKE_COMPILER_IS_GNUCC 1)
32 | set(CMAKE_C_COMPILER_LOADED 1)
33 | set(CMAKE_C_COMPILER_WORKS TRUE)
34 | set(CMAKE_C_ABI_COMPILED TRUE)
35 | 
36 | set(CMAKE_C_COMPILER_ENV_VAR "CC")
37 | 
38 | set(CMAKE_C_COMPILER_ID_RUN 1)
39 | set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m)
40 | set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC)
41 | set(CMAKE_C_LINKER_PREFERENCE 10)
42 | set(CMAKE_C_LINKER_DEPFILE_SUPPORTED TRUE)
43 | 
44 | # Save compiler ABI information.
45 | set(CMAKE_C_SIZEOF_DATA_PTR "8")
46 | set(CMAKE_C_COMPILER_ABI "ELF")
47 | set(CMAKE_C_BYTE_ORDER "LITTLE_ENDIAN")
48 | set(CMAKE_C_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
49 | 
50 | if(CMAKE_C_SIZEOF_DATA_PTR)
51 |   set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}")
52 | endif()
53 | 
54 | if(CMAKE_C_COMPILER_ABI)
55 |   set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}")
56 | endif()
57 | 
58 | if(CMAKE_C_LIBRARY_ARCHITECTURE)
59 |   set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
60 | endif()
61 | 
62 | set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "")
63 | if(CMAKE_C_CL_SHOWINCLUDES_PREFIX)
64 |   set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}")
65 | endif()
66 | 
67 | 
68 | 
69 | 
70 | 
71 | set(CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
72 | set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "gcc;gcc_s;c;gcc;gcc_s")
73 | set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs")
74 | set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
75 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/3.27.9/CMakeCXXCompiler.cmake:
--------------------------------------------------------------------------------
 1 | set(CMAKE_CXX_COMPILER "/usr/bin/c++")
 2 | set(CMAKE_CXX_COMPILER_ARG1 "")
 3 | set(CMAKE_CXX_COMPILER_ID "GNU")
 4 | set(CMAKE_CXX_COMPILER_VERSION "11.4.0")
 5 | set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "")
 6 | set(CMAKE_CXX_COMPILER_WRAPPER "")
 7 | set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "17")
 8 | set(CMAKE_CXX_EXTENSIONS_COMPUTED_DEFAULT "ON")
 9 | set(CMAKE_CXX_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters;cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates;cxx_std_17;cxx_std_20;cxx_std_23")
10 | set(CMAKE_CXX98_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters")
11 | set(CMAKE_CXX11_COMPILE_FEATURES "cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates")
12 | set(CMAKE_CXX14_COMPILE_FEATURES "cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates")
13 | set(CMAKE_CXX17_COMPILE_FEATURES "cxx_std_17")
14 | set(CMAKE_CXX20_COMPILE_FEATURES "cxx_std_20")
15 | set(CMAKE_CXX23_COMPILE_FEATURES "cxx_std_23")
16 | 
17 | set(CMAKE_CXX_PLATFORM_ID "Linux")
18 | set(CMAKE_CXX_SIMULATE_ID "")
19 | set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "GNU")
20 | set(CMAKE_CXX_SIMULATE_VERSION "")
21 | 
22 | 
23 | 
24 | 
25 | set(CMAKE_AR "/usr/bin/ar")
26 | set(CMAKE_CXX_COMPILER_AR "/usr/bin/gcc-ar-11")
27 | set(CMAKE_RANLIB "/usr/bin/ranlib")
28 | set(CMAKE_CXX_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11")
29 | set(CMAKE_LINKER "/usr/bin/ld")
30 | set(CMAKE_MT "")
31 | set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND")
32 | set(CMAKE_COMPILER_IS_GNUCXX 1)
33 | set(CMAKE_CXX_COMPILER_LOADED 1)
34 | set(CMAKE_CXX_COMPILER_WORKS TRUE)
35 | set(CMAKE_CXX_ABI_COMPILED TRUE)
36 | 
37 | set(CMAKE_CXX_COMPILER_ENV_VAR "CXX")
38 | 
39 | set(CMAKE_CXX_COMPILER_ID_RUN 1)
40 | set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm;mpp;CPP;ixx;cppm;ccm;cxxm;c++m)
41 | set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC)
42 | 
43 | foreach (lang C OBJC OBJCXX)
44 |   if (CMAKE_${lang}_COMPILER_ID_RUN)
45 |     foreach(extension IN LISTS CMAKE_${lang}_SOURCE_FILE_EXTENSIONS)
46 |       list(REMOVE_ITEM CMAKE_CXX_SOURCE_FILE_EXTENSIONS ${extension})
47 |     endforeach()
48 |   endif()
49 | endforeach()
50 | 
51 | set(CMAKE_CXX_LINKER_PREFERENCE 30)
52 | set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1)
53 | set(CMAKE_CXX_LINKER_DEPFILE_SUPPORTED TRUE)
54 | 
55 | # Save compiler ABI information.
56 | set(CMAKE_CXX_SIZEOF_DATA_PTR "8")
57 | set(CMAKE_CXX_COMPILER_ABI "ELF")
58 | set(CMAKE_CXX_BYTE_ORDER "LITTLE_ENDIAN")
59 | set(CMAKE_CXX_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
60 | 
61 | if(CMAKE_CXX_SIZEOF_DATA_PTR)
62 |   set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}")
63 | endif()
64 | 
65 | if(CMAKE_CXX_COMPILER_ABI)
66 |   set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}")
67 | endif()
68 | 
69 | if(CMAKE_CXX_LIBRARY_ARCHITECTURE)
70 |   set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
71 | endif()
72 | 
73 | set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "")
74 | if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX)
75 |   set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}")
76 | endif()
77 | 
78 | 
79 | 
80 | 
81 | 
82 | set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
83 | set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;gcc_s;gcc;c;gcc_s;gcc")
84 | set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs")
85 | set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
86 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/3.27.9/CMakeSystem.cmake:
--------------------------------------------------------------------------------
 1 | set(CMAKE_HOST_SYSTEM "Linux-6.1.58+")
 2 | set(CMAKE_HOST_SYSTEM_NAME "Linux")
 3 | set(CMAKE_HOST_SYSTEM_VERSION "6.1.58+")
 4 | set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
 5 | 
 6 | 
 7 | 
 8 | set(CMAKE_SYSTEM "Linux-6.1.58+")
 9 | set(CMAKE_SYSTEM_NAME "Linux")
10 | set(CMAKE_SYSTEM_VERSION "6.1.58+")
11 | set(CMAKE_SYSTEM_PROCESSOR "x86_64")
12 | 
13 | set(CMAKE_CROSSCOMPILING "FALSE")
14 | 
15 | set(CMAKE_SYSTEM_LOADED 1)
16 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/3.27.9/CompilerIdC/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/3.27.9/CompilerIdC/a.out


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/CMakeDirectoryInformation.cmake:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
 3 | 
 4 | # Relative path conversion top directories.
 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/content/drive/Othercomputers/My MacBook Pro/hw4")
 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/content/drive/Othercomputers/My MacBook Pro/hw4/build")
 7 | 
 8 | # Force unix paths in dependencies.
 9 | set(CMAKE_FORCE_UNIX_PATHS 1)
10 | 
11 | 
12 | # The C and CXX include file regular expressions for this directory.
13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
17 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/CMakeRuleHashes.txt:
--------------------------------------------------------------------------------
1 | # Hashes of file build rules.
2 | 347d5addb0d9c9683a2b5d27952f36b2 CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o
3 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/Makefile.cmake:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
 3 | 
 4 | # The generator used is:
 5 | set(CMAKE_DEPENDS_GENERATOR "Unix Makefiles")
 6 | 
 7 | # The top level Makefile was generated from the following files:
 8 | set(CMAKE_MAKEFILE_DEPENDS
 9 |   "CMakeCache.txt"
10 |   "/content/drive/Othercomputers/My MacBook Pro/hw4/CMakeLists.txt"
11 |   "CMakeFiles/3.27.9/CMakeCCompiler.cmake"
12 |   "CMakeFiles/3.27.9/CMakeCXXCompiler.cmake"
13 |   "CMakeFiles/3.27.9/CMakeSystem.cmake"
14 |   "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen"
15 |   "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.depend"
16 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCInformation.cmake"
17 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCXXInformation.cmake"
18 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCheckCompilerFlagCommonPatterns.cmake"
19 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCommonLanguageInclude.cmake"
20 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeGenericSystem.cmake"
21 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeInitializeConfigs.cmake"
22 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeLanguageInformation.cmake"
23 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeSystemSpecificInformation.cmake"
24 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeSystemSpecificInitialize.cmake"
25 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCSourceCompiles.cmake"
26 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCXXCompilerFlag.cmake"
27 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCXXSourceCompiles.cmake"
28 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckIncludeFile.cmake"
29 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckLibraryExists.cmake"
30 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/CMakeCommonCompilerMacros.cmake"
31 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU-C.cmake"
32 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU-CXX.cmake"
33 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU.cmake"
34 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA.cmake"
35 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA/run_nvcc.cmake"
36 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA/select_compute_arch.cmake"
37 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPackageHandleStandardArgs.cmake"
38 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPackageMessage.cmake"
39 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPython.cmake"
40 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPython/Support.cmake"
41 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindThreads.cmake"
42 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckCompilerFlag.cmake"
43 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckFlagCommonConfig.cmake"
44 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckSourceCompiles.cmake"
45 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU-C.cmake"
46 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU-CXX.cmake"
47 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU.cmake"
48 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-Initialize.cmake"
49 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux.cmake"
50 |   "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/UnixPaths.cmake"
51 |   "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Common.cmake"
52 |   "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Config.cmake"
53 |   "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11ConfigVersion.cmake"
54 |   "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11NewTools.cmake"
55 |   "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Targets.cmake"
56 |   )
57 | 
58 | # The corresponding makefile is:
59 | set(CMAKE_MAKEFILE_OUTPUTS
60 |   "Makefile"
61 |   "CMakeFiles/cmake.check_cache"
62 |   )
63 | 
64 | # Byproducts of CMake generate step:
65 | set(CMAKE_MAKEFILE_PRODUCTS
66 |   "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen"
67 |   "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake"
68 |   "CMakeFiles/CMakeDirectoryInformation.cmake"
69 |   )
70 | 
71 | # Dependency information for all targets:
72 | set(CMAKE_DEPEND_INFO_FILES
73 |   "CMakeFiles/ndarray_backend_cpu.dir/DependInfo.cmake"
74 |   "CMakeFiles/ndarray_backend_cuda.dir/DependInfo.cmake"
75 |   )
76 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/Makefile2:
--------------------------------------------------------------------------------
  1 | # CMAKE generated file: DO NOT EDIT!
  2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
  3 | 
  4 | # Default target executed when no arguments are given to make.
  5 | default_target: all
  6 | .PHONY : default_target
  7 | 
  8 | #=============================================================================
  9 | # Special targets provided by cmake.
 10 | 
 11 | # Disable implicit rules so canonical targets will work.
 12 | .SUFFIXES:
 13 | 
 14 | # Disable VCS-based implicit rules.
 15 | % : %,v
 16 | 
 17 | # Disable VCS-based implicit rules.
 18 | % : RCS/%
 19 | 
 20 | # Disable VCS-based implicit rules.
 21 | % : RCS/%,v
 22 | 
 23 | # Disable VCS-based implicit rules.
 24 | % : SCCS/s.%
 25 | 
 26 | # Disable VCS-based implicit rules.
 27 | % : s.%
 28 | 
 29 | .SUFFIXES: .hpux_make_needs_suffix_list
 30 | 
 31 | # Command-line flag to silence nested $(MAKE).
 32 | $(VERBOSE)MAKESILENT = -s
 33 | 
 34 | #Suppress display of executed commands.
 35 | $(VERBOSE).SILENT:
 36 | 
 37 | # A target that is always out of date.
 38 | cmake_force:
 39 | .PHONY : cmake_force
 40 | 
 41 | #=============================================================================
 42 | # Set environment variables for the build.
 43 | 
 44 | # The shell in which to execute make rules.
 45 | SHELL = /bin/sh
 46 | 
 47 | # The CMake executable.
 48 | CMAKE_COMMAND = /usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake
 49 | 
 50 | # The command to remove a file.
 51 | RM = /usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake -E rm -f
 52 | 
 53 | # Escaping for special characters.
 54 | EQUALS = =
 55 | 
 56 | # The top-level source directory on which CMake was run.
 57 | CMAKE_SOURCE_DIR = "/content/drive/Othercomputers/My MacBook Pro/hw4"
 58 | 
 59 | # The top-level build directory on which CMake was run.
 60 | CMAKE_BINARY_DIR = "/content/drive/Othercomputers/My MacBook Pro/hw4/build"
 61 | 
 62 | #=============================================================================
 63 | # Directory level rules for the build root directory
 64 | 
 65 | # The main recursive "all" target.
 66 | all: CMakeFiles/ndarray_backend_cpu.dir/all
 67 | all: CMakeFiles/ndarray_backend_cuda.dir/all
 68 | .PHONY : all
 69 | 
 70 | # The main recursive "preinstall" target.
 71 | preinstall:
 72 | .PHONY : preinstall
 73 | 
 74 | # The main recursive "clean" target.
 75 | clean: CMakeFiles/ndarray_backend_cpu.dir/clean
 76 | clean: CMakeFiles/ndarray_backend_cuda.dir/clean
 77 | .PHONY : clean
 78 | 
 79 | #=============================================================================
 80 | # Target rules for target CMakeFiles/ndarray_backend_cpu.dir
 81 | 
 82 | # All Build rule for target.
 83 | CMakeFiles/ndarray_backend_cpu.dir/all:
 84 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/depend
 85 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/build
 86 | 	@$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --progress-dir="/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" --progress-num=1,2 "Built target ndarray_backend_cpu"
 87 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/all
 88 | 
 89 | # Build rule for subdir invocation for target.
 90 | CMakeFiles/ndarray_backend_cpu.dir/rule: cmake_check_build_system
 91 | 	$(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" 2
 92 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/ndarray_backend_cpu.dir/all
 93 | 	$(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" 0
 94 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/rule
 95 | 
 96 | # Convenience name for target.
 97 | ndarray_backend_cpu: CMakeFiles/ndarray_backend_cpu.dir/rule
 98 | .PHONY : ndarray_backend_cpu
 99 | 
100 | # clean rule for target.
101 | CMakeFiles/ndarray_backend_cpu.dir/clean:
102 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/clean
103 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/clean
104 | 
105 | #=============================================================================
106 | # Target rules for target CMakeFiles/ndarray_backend_cuda.dir
107 | 
108 | # All Build rule for target.
109 | CMakeFiles/ndarray_backend_cuda.dir/all:
110 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/depend
111 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/build
112 | 	@$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --progress-dir="/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" --progress-num=3,4 "Built target ndarray_backend_cuda"
113 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/all
114 | 
115 | # Build rule for subdir invocation for target.
116 | CMakeFiles/ndarray_backend_cuda.dir/rule: cmake_check_build_system
117 | 	$(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" 2
118 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/ndarray_backend_cuda.dir/all
119 | 	$(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" 0
120 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/rule
121 | 
122 | # Convenience name for target.
123 | ndarray_backend_cuda: CMakeFiles/ndarray_backend_cuda.dir/rule
124 | .PHONY : ndarray_backend_cuda
125 | 
126 | # clean rule for target.
127 | CMakeFiles/ndarray_backend_cuda.dir/clean:
128 | 	$(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/clean
129 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/clean
130 | 
131 | #=============================================================================
132 | # Special targets to cleanup operation of make.
133 | 
134 | # Special rule to run CMake to check the build system integrity.
135 | # No rule that depends on this can have commands that come from listfiles
136 | # because they might be regenerated.
137 | cmake_check_build_system:
138 | 	$(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
139 | .PHONY : cmake_check_build_system
140 | 
141 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/TargetDirectories.txt:
--------------------------------------------------------------------------------
1 | /content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles/ndarray_backend_cpu.dir
2 | /content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles/ndarray_backend_cuda.dir
3 | /content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles/edit_cache.dir
4 | /content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles/rebuild_cache.dir
5 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/cmake.check_cache:
--------------------------------------------------------------------------------
1 | # This file is generated by cmake for dependency checking of the CMakeCache.txt file
2 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | # Consider dependencies only in project.
 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
 4 | 
 5 | # The set of languages for which implicit dependencies are needed:
 6 | set(CMAKE_DEPENDS_LANGUAGES
 7 |   )
 8 | 
 9 | # The set of dependency files which are needed:
10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES
11 |   "/content/drive/Othercomputers/My MacBook Pro/hw4/src/ndarray_backend_cpu.cc" "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o" "gcc" "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o.d"
12 |   )
13 | 
14 | # Targets to which this target links which contain Fortran sources.
15 | set(CMAKE_Fortran_TARGET_LINKED_INFO_FILES
16 |   )
17 | 
18 | # Fortran module output directory.
19 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
20 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
 1 | file(REMOVE_RECURSE
 2 |   "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so"
 3 |   "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.pdb"
 4 |   "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o"
 5 |   "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o.d"
 6 | )
 7 | 
 8 | # Per-language clean rules from dependency scanning.
 9 | foreach(lang CXX)
10 |   include(CMakeFiles/ndarray_backend_cpu.dir/cmake_clean_${lang}.cmake OPTIONAL)
11 | endforeach()
12 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for ndarray_backend_cpu.
3 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for ndarray_backend_cpu.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/flags.make:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
 3 | 
 4 | # compile CXX with /usr/bin/c++
 5 | CXX_DEFINES = -Dndarray_backend_cpu_EXPORTS
 6 | 
 7 | CXX_INCLUDES = -isystem /usr/include/python3.10 -isystem /usr/local/lib/python3.10/dist-packages/pybind11/include -isystem /usr/local/cuda/include
 8 | 
 9 | CXX_FLAGS = -std=c++11 -O2 -march=native  -fPIC -fvisibility=hidden
10 | 
11 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/link.txt:
--------------------------------------------------------------------------------
1 | /usr/bin/c++ -fPIC -std=c++11 -O2 -march=native  -shared  -o "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so" CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o 
2 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/progress.make:
--------------------------------------------------------------------------------
1 | CMAKE_PROGRESS_1 = 1
2 | CMAKE_PROGRESS_2 = 2
3 | 
4 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | # Consider dependencies only in project.
 3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
 4 | 
 5 | # The set of languages for which implicit dependencies are needed:
 6 | set(CMAKE_DEPENDS_LANGUAGES
 7 |   )
 8 | 
 9 | # The set of dependency files which are needed:
10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES
11 |   )
12 | 
13 | # Targets to which this target links which contain Fortran sources.
14 | set(CMAKE_Fortran_TARGET_LINKED_INFO_FILES
15 |   )
16 | 
17 | # Fortran module output directory.
18 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
19 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
 1 | file(REMOVE_RECURSE
 2 |   "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so"
 3 |   "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cuda.pdb"
 4 |   "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o"
 5 | )
 6 | 
 7 | # Per-language clean rules from dependency scanning.
 8 | foreach(lang )
 9 |   include(CMakeFiles/ndarray_backend_cuda.dir/cmake_clean_${lang}.cmake OPTIONAL)
10 | endforeach()
11 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/compiler_depend.make:
--------------------------------------------------------------------------------
1 | # Empty compiler generated dependencies file for ndarray_backend_cuda.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for ndarray_backend_cuda.
3 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for ndarray_backend_cuda.
2 | # This may be replaced when dependencies are built.
3 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/flags.make:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
3 | 
4 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/link.txt:
--------------------------------------------------------------------------------
1 | /usr/bin/c++ -fPIC -std=c++11 -O2 -march=native  -shared  -o "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so" CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o  -Wl,-rpath,/usr/local/cuda/lib64 /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/x86_64-linux-gnu/librt.a /usr/local/cuda/lib64/libcudart.so 
2 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/progress.make:
--------------------------------------------------------------------------------
1 | CMAKE_PROGRESS_1 = 3
2 | CMAKE_PROGRESS_2 = 4
3 | 
4 | 


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o


--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/progress.marks:
--------------------------------------------------------------------------------
1 | 4
2 | 


--------------------------------------------------------------------------------
/hw4/build/cmake_install.cmake:
--------------------------------------------------------------------------------
 1 | # Install script for directory: /content/drive/Othercomputers/My MacBook Pro/hw4
 2 | 
 3 | # Set the install prefix
 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX)
 5 |   set(CMAKE_INSTALL_PREFIX "/usr/local")
 6 | endif()
 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
 8 | 
 9 | # Set the install configuration name.
10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
11 |   if(BUILD_TYPE)
12 |     string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
13 |            CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
14 |   else()
15 |     set(CMAKE_INSTALL_CONFIG_NAME "")
16 |   endif()
17 |   message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
18 | endif()
19 | 
20 | # Set the component getting installed.
21 | if(NOT CMAKE_INSTALL_COMPONENT)
22 |   if(COMPONENT)
23 |     message(STATUS "Install component: \"${COMPONENT}\"")
24 |     set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
25 |   else()
26 |     set(CMAKE_INSTALL_COMPONENT)
27 |   endif()
28 | endif()
29 | 
30 | # Install shared libraries without execute permission?
31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
32 |   set(CMAKE_INSTALL_SO_NO_EXE "1")
33 | endif()
34 | 
35 | # Is this installation the result of a crosscompile?
36 | if(NOT DEFINED CMAKE_CROSSCOMPILING)
37 |   set(CMAKE_CROSSCOMPILING "FALSE")
38 | endif()
39 | 
40 | # Set default install directory permissions.
41 | if(NOT DEFINED CMAKE_OBJDUMP)
42 |   set(CMAKE_OBJDUMP "/usr/bin/objdump")
43 | endif()
44 | 
45 | if(CMAKE_INSTALL_COMPONENT)
46 |   set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt")
47 | else()
48 |   set(CMAKE_INSTALL_MANIFEST "install_manifest.txt")
49 | endif()
50 | 
51 | string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT
52 |        "${CMAKE_INSTALL_MANIFEST_FILES}")
53 | file(WRITE "/content/drive/Othercomputers/My MacBook Pro/hw4/build/${CMAKE_INSTALL_MANIFEST}"
54 |      "${CMAKE_INSTALL_MANIFEST_CONTENT}")
55 | 


--------------------------------------------------------------------------------
/hw4/build/detect_cuda_compute_capabilities.cpp:
--------------------------------------------------------------------------------
 1 | #include <cuda_runtime.h>
 2 | #include <cstdio>
 3 | int main()
 4 | {
 5 |   int count = 0;
 6 |   if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;
 7 |   if (count == 0) return -1;
 8 |   for (int device = 0; device < count; ++device)
 9 |   {
10 |     cudaDeviceProp prop;
11 |     if (cudaSuccess == cudaGetDeviceProperties(&prop, device))
12 |       std::printf("%d.%d ", prop.major, prop.minor);
13 |   }
14 |   return 0;
15 | }
16 | 


--------------------------------------------------------------------------------
/hw4/debug.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append("./tests/hw4")
 3 | sys.path.append("./python")
 4 | 
 5 | from test_nd_backend import *
 6 | from test_cifar_ptb_data import *
 7 | from test_conv import *
 8 | from test_sequence_models import *
 9 | from needle import backend_ndarray as nd
10 | 
11 | 
12 | def train_cifar10():
13 |     import sys
14 |     sys.path.append('./python')
15 |     sys.path.append('./apps')
16 |     import needle as ndl
17 |     from models import ResNet9
18 |     from simple_ml import train_cifar10, evaluate_cifar10
19 | 
20 |     device = ndl.cpu()
21 |     dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=True)
22 |     dataloader = ndl.data.DataLoader( \
23 |         dataset=dataset,
24 |         batch_size=128,
25 |         shuffle=True, )
26 |     model = ResNet9(device=device, dtype="float32")
27 |     train_cifar10(model, dataloader, n_epochs=2, optimizer=ndl.optim.Adam,
28 |                   lr=0.001, weight_decay=0.001, device=device)
29 |     evaluate_cifar10(model, dataloader)
30 | 
31 | 
32 | def train_language_model():
33 |     import needle as ndl
34 |     sys.path.append('./apps')
35 |     from models import LanguageModel
36 |     from simple_ml import train_ptb, evaluate_ptb
37 | 
38 |     device = ndl.cpu_numpy()
39 |     corpus = ndl.data.Corpus("data/ptb")
40 |     train_data = ndl.data.batchify(corpus.train, batch_size=16, device=device, dtype="float32")
41 |     model = LanguageModel(30, len(corpus.dictionary), hidden_size=10, num_layers=2, seq_model='rnn', device=device)
42 |     train_ptb(model, train_data, seq_len=1, n_epochs=1, device=device)
43 |     evaluate_ptb(model, train_data, seq_len=40, device=device)
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     """
48 |     Part 1
49 |     """
50 |     # test_stack((5, 5), 0, 2, nd.cpu())
51 |     # test_stack_backward((5, 5), 0, 2, nd.cpu())
52 | 
53 |     # test_matmul(16, 16, 16, nd.cpu())
54 |     # test_relu((5, 5), nd.cpu())
55 |     # test_tanh_backward((5, 5), nd.cpu())
56 | 
57 | 
58 |     """
59 |     Part 2
60 |     """
61 |     # test_cifar10_dataset(True)
62 | 
63 | 
64 |     """
65 |     Part 3
66 |     """
67 |     # test_pad_forward({"shape": (10, 32, 32, 8), "padding": ( (0, 0), (2, 2), (2, 2), (0, 0) )}, nd.cpu())
68 |     # test_flip_forward({"shape": (10, 5), "axes": (0,)}, nd.cpu())
69 |     # test_dilate_forward(nd.cpu())
70 |     # test_op_conv((3, 16, 16, 8), (3, 3, 8, 16), 1, 2, False, nd.cpu())
71 |     # test_op_conv((3, 16, 16, 8), (3, 3, 8, 16), 2, 1, True, nd.cpu())
72 | 
73 |     # test_init_kaiming_uniform(nd.cpu())
74 |     # test_nn_conv_forward(4, 8, 16, 3, 1, nd.cpu())
75 |     # test_nn_conv_backward(4, 1, 1, 3, 1, nd.cpu())
76 |     # test_resnet9(nd.cpu())
77 |     # test_train_cifar10(nd.cpu())
78 | 
79 |     train_cifar10()
80 | 
81 |     """
82 |     Part 4
83 |     """
84 |     # test_rnn_cell(1, 1, 1, False, False, 'relu', nd.cpu())
85 |     # test_lstm_cell(1, 1, 1, False, False, nd.cpu())
86 |     # test_lstm(13, 1, 1, 1, 1, True, True, nd.cpu())
87 | 
88 |     """
89 |     Part 6
90 |     """
91 |     # test_language_model_implementation(1, 1, 1, 1, 1, True, 1, 'rnn', nd.cpu())
92 | 
93 |     """
94 |     Part 7
95 |     """
96 |     # train_language_model()


--------------------------------------------------------------------------------
/hw4/hw4.ipynb - Colaboratory.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/hw4.ipynb - Colaboratory.pdf


--------------------------------------------------------------------------------
/hw4/python/needle/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import ops
 2 | from .ops import *
 3 | from .autograd import Tensor, cpu, all_devices
 4 | 
 5 | from . import init
 6 | from .init import ones, zeros, zeros_like, ones_like
 7 | 
 8 | from . import data
 9 | from . import nn
10 | from . import optim
11 | from .backend_selection import *
12 | 


--------------------------------------------------------------------------------
/hw4/python/needle/backend_ndarray/__init__.py:
--------------------------------------------------------------------------------
1 | from .ndarray import *
2 | 


--------------------------------------------------------------------------------
/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-darwin.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-darwin.so


--------------------------------------------------------------------------------
/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/hw4/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/hw4/python/needle/backend_ndarray/ndarray_backend_numpy.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | __device_name__ = "numpy"
  5 | _datatype = np.float32
  6 | _datetype_size = np.dtype(_datatype).itemsize
  7 | 
  8 | 
  9 | class Array:
 10 |     def __init__(self, size):
 11 |         self.array = np.empty(size, dtype=np.float32)
 12 | 
 13 |     @property
 14 |     def size(self):
 15 |         return self.array.size
 16 | 
 17 | 
 18 | def to_numpy(a, shape, strides, offset):
 19 |     return np.lib.stride_tricks.as_strided(
 20 |         a.array[offset:], shape, tuple([s * _datetype_size for s in strides])
 21 |     )
 22 | 
 23 | 
 24 | def from_numpy(a, out):
 25 |     out.array[:] = a.flatten()
 26 | 
 27 | 
 28 | def fill(out, val):
 29 |     out.array.fill(val)
 30 | 
 31 | 
 32 | def compact(a, out, shape, strides, offset):
 33 |     out.array[:] = to_numpy(a, shape, strides, offset).flatten()
 34 | 
 35 | 
 36 | def ewise_setitem(a, out, shape, strides, offset):
 37 |     to_numpy(out, shape, strides, offset)[:] = a.array.reshape(shape)
 38 | 
 39 | 
 40 | def scalar_setitem(size, val, out, shape, strides, offset):
 41 |     to_numpy(out, shape, strides, offset)[:] = val
 42 | 
 43 | 
 44 | def ewise_add(a, b, out):
 45 |     out.array[:] = a.array + b.array
 46 | 
 47 | 
 48 | def scalar_add(a, val, out):
 49 |     out.array[:] = a.array + val
 50 | 
 51 | 
 52 | def ewise_mul(a, b, out):
 53 |     out.array[:] = a.array * b.array
 54 | 
 55 | 
 56 | def scalar_mul(a, val, out):
 57 |     out.array[:] = a.array * val
 58 | 
 59 | 
 60 | def ewise_div(a, b, out):
 61 |     out.array[:] = a.array / b.array
 62 | 
 63 | 
 64 | def scalar_div(a, val, out):
 65 |     out.array[:] = a.array / val
 66 | 
 67 | 
 68 | def scalar_power(a, val, out):
 69 |     out.array[:] = a.array**val
 70 | 
 71 | 
 72 | def ewise_maximum(a, b, out):
 73 |     out.array[:] = np.maximum(a.array, b.array)
 74 | 
 75 | 
 76 | def scalar_maximum(a, val, out):
 77 |     out.array[:] = np.maximum(a.array, val)
 78 | 
 79 | 
 80 | def ewise_eq(a, b, out):
 81 |     out.array[:] = (a.array == b.array).astype(np.float32)
 82 | 
 83 | 
 84 | def scalar_eq(a, val, out):
 85 |     out.array[:] = (a.array == val).astype(np.float32)
 86 | 
 87 | 
 88 | def ewise_ge(a, b, out):
 89 |     out.array[:] = (a.array >= b.array).astype(np.float32)
 90 | 
 91 | 
 92 | def scalar_ge(a, val, out):
 93 |     out.array[:] = (a.array >= val).astype(np.float32)
 94 | 
 95 | 
 96 | def ewise_log(a, out):
 97 |     out.array[:] = np.log(a.array)
 98 | 
 99 | 
100 | def ewise_exp(a, out):
101 |     out.array[:] = np.exp(a.array)
102 | 
103 | 
104 | def ewise_tanh(a, out):
105 |     out.array[:] = np.tanh(a.array)
106 | 
107 | 
108 | def matmul(a, b, out, m, n, p):
109 |     out.array[:] = (a.array.reshape(m, n) @ b.array.reshape(n, p)).reshape(-1)
110 | 
111 | 
112 | def reduce_max(a, out, reduce_size):
113 |     out.array[:] = a.array[:].reshape(-1, reduce_size).max(axis=1)
114 | 
115 | 
116 | def reduce_sum(a, out, reduce_size):
117 |     out.array[:] = a.array[:].reshape(-1, reduce_size).sum(axis=1)
118 | 


--------------------------------------------------------------------------------
/hw4/python/needle/backend_numpy.py:
--------------------------------------------------------------------------------
 1 | """This file defies specific implementations of devices when using numpy as NDArray backend.
 2 | """
 3 | import numpy
 4 | 
 5 | 
 6 | class Device:
 7 |     """Baseclass of all device"""
 8 | 
 9 | 
10 | class CPUDevice(Device):
11 |     """Represents data that sits in CPU"""
12 | 
13 |     def __repr__(self):
14 |         return "needle.cpu()"
15 | 
16 |     def __hash__(self):
17 |         return self.__repr__().__hash__()
18 | 
19 |     def __eq__(self, other):
20 |         return isinstance(other, CPUDevice)
21 | 
22 |     def enabled(self):
23 |         return True
24 | 
25 |     def zeros(self, *shape, dtype="float32"):
26 |         return numpy.zeros(shape, dtype=dtype)
27 | 
28 |     def ones(self, *shape, dtype="float32"):
29 |         return numpy.ones(shape, dtype=dtype)
30 | 
31 |     def randn(self, *shape):
32 |         # note: numpy doesn't support types within standard random routines, and
33 |         # .astype("float32") does work if we're generating a singleton
34 |         return numpy.random.randn(*shape)
35 | 
36 |     def rand(self, *shape):
37 |         # note: numpy doesn't support types within standard random routines, and
38 |         # .astype("float32") does work if we're generating a singleton
39 |         return numpy.random.rand(*shape)
40 | 
41 |     def one_hot(self, n, i, dtype="float32"):
42 |         return numpy.eye(n, dtype=dtype)[i]
43 | 
44 |     def empty(self, shape, dtype="float32"):
45 |         return numpy.empty(shape, dtype=dtype)
46 | 
47 |     def full(self, shape, fill_value, dtype="float32"):
48 |         return numpy.full(shape, fill_value, dtype=dtype)
49 | 
50 | 
51 | def cpu():
52 |     """Return cpu device"""
53 |     return CPUDevice()
54 | 
55 | 
56 | def default_device():
57 |     return cpu()
58 | 
59 | 
60 | def all_devices():
61 |     """return a list of all available devices"""
62 |     return [cpu()]
63 | 


--------------------------------------------------------------------------------
/hw4/python/needle/backend_selection.py:
--------------------------------------------------------------------------------
 1 | """Logic for backend selection"""
 2 | import os
 3 | 
 4 | 
 5 | BACKEND = os.environ.get("NEEDLE_BACKEND", "nd")
 6 | 
 7 | 
 8 | if BACKEND == "nd":
 9 |     print("Using needle backend")
10 |     from . import backend_ndarray as array_api
11 |     from .backend_ndarray import (
12 |         all_devices,
13 |         cuda,
14 |         cpu,
15 |         cpu_numpy,
16 |         default_device,
17 |         BackendDevice as Device,
18 |     )
19 | 
20 |     NDArray = array_api.NDArray
21 | elif BACKEND == "np":
22 |     print("Using numpy backend")
23 |     import numpy as array_api
24 |     from .backend_numpy import all_devices, cpu, default_device, Device
25 | 
26 |     NDArray = array_api.ndarray
27 | else:
28 |     raise RuntimeError("Unknown needle array backend %s" % BACKEND)
29 | 


--------------------------------------------------------------------------------
/hw4/python/needle/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_basic import *
2 | from .data_transforms import *
3 | from .datasets import *
4 | 


--------------------------------------------------------------------------------
/hw4/python/needle/data/data_basic.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ..autograd import Tensor
 3 | 
 4 | from typing import Iterator, Optional, List, Sized, Union, Iterable, Any
 5 | 
 6 | 
 7 | 
 8 | class Dataset:
 9 |     r"""An abstract class representing a `Dataset`.
10 | 
11 |     All subclasses should overwrite :meth:`__getitem__`, supporting fetching a
12 |     data sample for a given key. Subclasses must also overwrite
13 |     :meth:`__len__`, which is expected to return the size of the dataset.
14 |     """
15 | 
16 |     def __init__(self, transforms: Optional[List] = None):
17 |         self.transforms = transforms
18 | 
19 |     def __getitem__(self, index) -> object:
20 |         raise NotImplementedError
21 | 
22 |     def __len__(self) -> int:
23 |         raise NotImplementedError
24 |     
25 |     def apply_transforms(self, x):
26 |         if self.transforms is not None:
27 |             # apply the transforms
28 |             for tform in self.transforms:
29 |                 x = tform(x)
30 |         return x
31 | 
32 | 
33 | class DataLoader:
34 |     r"""
35 |     Data loader. Combines a dataset and a sampler, and provides an iterable over
36 |     the given dataset.
37 |     Args:
38 |         dataset (Dataset): dataset from which to load the data.
39 |         batch_size (int, optional): how many samples per batch to load
40 |             (default: ``1``).
41 |         shuffle (bool, optional): set to ``True`` to have the data reshuffled
42 |             at every epoch (default: ``False``).
43 |      """
44 |     dataset: Dataset
45 |     batch_size: Optional[int]
46 | 
47 |     def __init__(
48 |         self,
49 |         dataset: Dataset,
50 |         batch_size: Optional[int] = 1,
51 |         shuffle: bool = False,
52 |     ):
53 | 
54 |         self.dataset = dataset
55 |         self.shuffle = shuffle
56 |         self.batch_size = batch_size
57 |         if not self.shuffle:
58 |             self.ordering = np.array_split(np.arange(len(dataset)), 
59 |                                            range(batch_size, len(dataset), batch_size))
60 | 
61 |     def __iter__(self):
62 |         if self.shuffle:
63 |             self.ordering = np.array_split(np.random.permutation(len(self.dataset)),
64 |                                            range(self.batch_size, len(self.dataset), self.batch_size))
65 |         else:
66 |             self.ordering = np.array_split(np.arange(len(self.dataset)), 
67 |                                            range(self.batch_size, len(self.dataset), self.batch_size))
68 |         self.batch_idx = 0
69 |         return self
70 | 
71 |     def __next__(self):
72 |         if self.batch_idx >= len(self.ordering):
73 |             raise StopIteration
74 |         batch_indices = self.ordering[self.batch_idx]
75 |         X_batch, y_batch = self.dataset[batch_indices]
76 |         self.batch_idx += 1
77 |         return Tensor(X_batch), Tensor(y_batch)
78 | 
79 | 


--------------------------------------------------------------------------------
/hw4/python/needle/data/data_transforms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class Transform:
 4 |     def __call__(self, x):
 5 |         raise NotImplementedError
 6 | 
 7 | 
 8 | class RandomFlipHorizontal(Transform):
 9 |     def __init__(self, p = 0.5):
10 |         self.p = p
11 | 
12 |     def __call__(self, img):
13 |         """
14 |         Horizonally flip an image, specified as an H x W x C NDArray.
15 |         Args:
16 |             img: H x W x C NDArray of an image
17 |         Returns:
18 |             H x W x C ndarray corresponding to image flipped with probability self.p
19 |         Note: use the provided code to provide randomness, for easier testing
20 |         """
21 |         flip_img = np.random.rand() < self.p
22 |         if flip_img:
23 |             img = img[:, ::-1, :]
24 |         return img
25 | 
26 | 
27 | class RandomCrop(Transform):
28 |     def __init__(self, padding=3):
29 |         self.padding = padding
30 | 
31 |     def __call__(self, img):
32 |         """ Zero pad and then randomly crop an image.
33 |         Args:
34 |              img: H x W x C NDArray of an image
35 |         Return 
36 |             H x W x C NAArray of cliped image
37 |         Note: generate the image shifted by shift_x, shift_y specified below
38 |         """
39 |         shift_x, shift_y = np.random.randint(low=-self.padding, high=self.padding+1, size=2)
40 |         img_pad = np.pad(img, ((self.padding, self.padding), (self.padding, self.padding), (0, 0)), 'constant', constant_values=0)
41 |         img_crop = img_pad[self.padding + shift_x : self.padding + shift_x + img.shape[0], self.padding + shift_y : self.padding + shift_y + img.shape[1], :]
42 |         return img_crop
43 | 


--------------------------------------------------------------------------------
/hw4/python/needle/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .mnist_dataset import *
2 | from .ndarray_dataset import *
3 | from .cifar10_dataset import *
4 | from .ptb_dataset import *
5 | 


--------------------------------------------------------------------------------
/hw4/python/needle/data/datasets/cifar10_dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | from typing import Iterator, Optional, List, Sized, Union, Iterable, Any
 4 | import numpy as np
 5 | from ..data_basic import Dataset
 6 | 
 7 | class CIFAR10Dataset(Dataset):
 8 |     def __init__(
 9 |         self,
10 |         base_folder: str,
11 |         train: bool,
12 |         p: Optional[int] = 0.5,
13 |         transforms: Optional[List] = None
14 |     ):
15 |         """
16 |         Parameters:
17 |         base_folder - cifar-10-batches-py folder filepath
18 |         train - bool, if True load training dataset, else load test dataset
19 |         Divide pixel values by 255. so that images are in 0-1 range.
20 |         Attributes:
21 |         X - numpy array of images
22 |         y - numpy array of labels
23 |         """
24 |         if train:
25 |             self.X = np.empty((0, 3, 32, 32))
26 |             self.y = np.empty((0,))
27 |             for i in range(1, 6):
28 |                 with open(os.path.join(base_folder, f"data_batch_{i}"), "rb") as f:
29 |                     data = pickle.load(f, encoding="bytes")
30 |                     self.X = np.concatenate((self.X, data[b"data"].reshape(-1, 3, 32, 32)), axis=0)
31 |                     self.y = np.concatenate((self.y, data[b"labels"]), axis=0)
32 |         else:
33 |             with open(os.path.join(base_folder, "test_batch"), "rb") as f:
34 |                 data = pickle.load(f, encoding="bytes")
35 |                 self.X = data[b"data"].reshape(-1, 3, 32, 32)
36 |                 self.y = np.array(data[b"labels"])
37 | 
38 |         self.X = self.X.astype(np.float32) / 255.0
39 |         self.transforms = [] if transforms is None else transforms
40 | 
41 | 
42 |     def __getitem__(self, index) -> object:
43 |         """
44 |         Returns the image, label at given index
45 |         Image should be of shape (3, 32, 32)
46 |         """
47 |         images = self.X[index]
48 |         labels = self.y[index]
49 |         for func in self.transforms:
50 |             images = func(images)
51 |         return images, labels
52 | 
53 |     def __len__(self) -> int:
54 |         """
55 |         Returns the total number of examples in the dataset
56 |         """
57 |         return len(self.y)
58 | 


--------------------------------------------------------------------------------
/hw4/python/needle/data/datasets/mnist_dataset.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | from ..data_basic import Dataset
 3 | import numpy as np
 4 | 
 5 | class MNISTDataset(Dataset):
 6 |     def __init__(
 7 |         self,
 8 |         image_filename: str,
 9 |         label_filename: str,
10 |         transforms: Optional[List] = None,
11 |     ):
12 |         ### BEGIN YOUR SOLUTION
13 |         raise NotImplementedError()
14 |         ### END YOUR SOLUTION
15 | 
16 |     def __getitem__(self, index) -> object:
17 |         ### BEGIN YOUR SOLUTION
18 |         raise NotImplementedError()
19 |         ### END YOUR SOLUTION
20 | 
21 |     def __len__(self) -> int:
22 |         ### BEGIN YOUR SOLUTION
23 |         raise NotImplementedError()
24 |         ### END YOUR SOLUTION


--------------------------------------------------------------------------------
/hw4/python/needle/data/datasets/ndarray_dataset.py:
--------------------------------------------------------------------------------
 1 | from ..data_basic import Dataset
 2 | 
 3 | class NDArrayDataset(Dataset):
 4 |     def __init__(self, *arrays):
 5 |         self.arrays = arrays
 6 | 
 7 |     def __len__(self) -> int:
 8 |         return self.arrays[0].shape[0]
 9 | 
10 |     def __getitem__(self, i) -> object:
11 |         return tuple([a[i] for a in self.arrays])


--------------------------------------------------------------------------------
/hw4/python/needle/data/datasets/ptb_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | from needle import backend_ndarray as nd
  5 | from needle import Tensor
  6 | 
  7 | class Dictionary(object):
  8 |     """
  9 |     Creates a dictionary from a list of words, mapping each word to a
 10 |     unique integer.
 11 |     Attributes:
 12 |     word2idx: dictionary mapping from a word to its unique ID
 13 |     idx2word: list of words in the dictionary, in the order they were added
 14 |         to the dictionary (i.e. each word only appears once in this list)
 15 |     """
 16 |     def __init__(self):
 17 |         self.word2idx = {}
 18 |         self.idx2word = []
 19 | 
 20 |     def add_word(self, word):
 21 |         """
 22 |         Input: word of type str
 23 |         If the word is not in the dictionary, adds the word to the dictionary
 24 |         and appends to the list of words.
 25 |         Returns the word's unique ID.
 26 |         """
 27 |         if word not in self.word2idx:
 28 |             idx = len(self.idx2word)
 29 |             self.word2idx[word] = idx
 30 |             self.idx2word.append(word)
 31 |         return self.word2idx[word]
 32 | 
 33 |     def __len__(self):
 34 |         """
 35 |         Returns the number of unique words in the dictionary.
 36 |         """
 37 |         return len(self.idx2word)
 38 | 
 39 | 
 40 | 
 41 | class Corpus(object):
 42 |     """
 43 |     Creates corpus from train, and test txt files.
 44 |     """
 45 |     def __init__(self, base_dir, max_lines=None):
 46 |         self.dictionary = Dictionary()
 47 |         self.train = self.tokenize(os.path.join(base_dir, 'train.txt'), max_lines)
 48 |         self.test = self.tokenize(os.path.join(base_dir, 'test.txt'), max_lines)
 49 | 
 50 |     def tokenize(self, path, max_lines=None):
 51 |         """
 52 |         Input:
 53 |         path - path to text file
 54 |         max_lines - maximum number of lines to read in
 55 |         Tokenizes a text file, first adding each word in the file to the dictionary,
 56 |         and then tokenizing the text file to a list of IDs. When adding words to the
 57 |         dictionary (and tokenizing the file content) '<eos>' should be appended to
 58 |         the end of each line in order to properly account for the end of the sentence.
 59 |         Output:
 60 |         ids: List of ids
 61 |         """
 62 |         ids = []
 63 |         self.dictionary.add_word('<eos>')
 64 |         with open(path, 'r') as f:
 65 |             if max_lines is not None:
 66 |                 lines = f.readlines()[:max_lines]
 67 |             else:
 68 |                 lines = f.readlines()
 69 |             for line in lines:
 70 |                 words = line.split() + ['<eos>']
 71 |                 for word in words:
 72 |                     ids.append(self.dictionary.add_word(word))
 73 |         return ids
 74 | 
 75 | 
 76 | def batchify(data, batch_size, device, dtype):
 77 |     """
 78 |     Starting from sequential data, batchify arranges the dataset into columns.
 79 |     For instance, with the alphabet as the sequence and batch size 4, we'd get
 80 |     ┌ a g m s ┐
 81 |     │ b h n t │
 82 |     │ c i o u │
 83 |     │ d j p v │
 84 |     │ e k q w │
 85 |     └ f l r x ┘.
 86 |     These columns are treated as independent by the model, which means that the
 87 |     dependence of e. g. 'g' on 'f' cannot be learned, but allows more efficient
 88 |     batch processing.
 89 |     If the data cannot be evenly divided by the batch size, trim off the remainder.
 90 |     Returns the data as a numpy array of shape (nbatch, batch_size).
 91 |     """
 92 |     nbatch = len(data) // batch_size
 93 |     data = np.array(data[:nbatch * batch_size]).reshape(nbatch, batch_size)
 94 |     return data
 95 | 
 96 | 
 97 | def get_batch(batches, i, bptt, device=None, dtype=None):
 98 |     """
 99 |     get_batch subdivides the source data into chunks of length bptt.
100 |     If source is equal to the example output of the batchify function, with
101 |     a bptt-limit of 2, we'd get the following two Variables for i = 0:
102 |     ┌ a g m s ┐ ┌ b h n t ┐
103 |     └ b h n t ┘ └ c i o u ┘
104 |     Note that despite the name of the function, the subdivison of data is not
105 |     done along the batch dimension (i.e. dimension 1), since that was handled
106 |     by the batchify function. The chunks are along dimension 0, corresponding
107 |     to the seq_len dimension in the LSTM or RNN.
108 |     Inputs:
109 |     batches - numpy array returned from batchify function
110 |     i - index
111 |     bptt - Sequence length
112 |     Returns:
113 |     data - Tensor of shape (bptt, bs) with cached data as NDArray
114 |     target - Tensor of shape (bptt*bs,) with cached data as NDArray
115 |     """
116 |     # Since we have to fetch at least one row as data and one row as target,
117 |     # we subtract 1 from bptt to get the maximum possible sequence length.
118 |     seq_len = min(bptt, batches.shape[0] - 1 - i)
119 | 
120 |     data = batches[i : i + seq_len]
121 |     target = batches[i + 1 : i + 1 + seq_len].reshape(-1)
122 |     return Tensor(data, device=device, dtype=dtype), Tensor(target, device=device, dtype=dtype)


--------------------------------------------------------------------------------
/hw4/python/needle/init/__init__.py:
--------------------------------------------------------------------------------
1 | from .init_basic import *
2 | 
3 | from .init_initializers import *
4 | 


--------------------------------------------------------------------------------
/hw4/python/needle/init/init_basic.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import needle as ndl
 3 | 
 4 | 
 5 | def rand(*shape, low=0.0, high=1.0, device=None, dtype="float32", requires_grad=False):
 6 |     """Generate random numbers uniform between low and high"""
 7 |     device = ndl.cpu() if device is None else device
 8 |     array = device.rand(*shape) * (high - low) + low
 9 |     return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
10 | 
11 | 
12 | def randn(*shape, mean=0.0, std=1.0, device=None, dtype="float32", requires_grad=False):
13 |     """Generate random normal with specified mean and std deviation"""
14 |     device = ndl.cpu() if device is None else device
15 |     array = device.randn(*shape) * std + mean
16 |     return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
17 | 
18 | 
19 | 
20 | def constant(*shape, c=1.0, device=None, dtype="float32", requires_grad=False):
21 |     """Generate constant Tensor"""
22 |     device = ndl.cpu() if device is None else device
23 |     array = device.full(shape, c, dtype=dtype)
24 |     return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
25 | 
26 | def ones(*shape, device=None, dtype="float32", requires_grad=False):
27 |     """Generate all-ones Tensor"""
28 |     return constant(
29 |         *shape, c=1.0, device=device, dtype=dtype, requires_grad=requires_grad
30 |     )
31 | 
32 | 
33 | def zeros(*shape, device=None, dtype="float32", requires_grad=False):
34 |     """Generate all-zeros Tensor"""
35 |     return constant(
36 |         *shape, c=0.0, device=device, dtype=dtype, requires_grad=requires_grad
37 |     )
38 | 
39 | 
40 | def randb(*shape, p=0.5, device=None, dtype="bool", requires_grad=False):
41 |     """Generate binary random Tensor"""
42 |     device = ndl.cpu() if device is None else device
43 |     array = device.rand(*shape) <= p
44 |     return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
45 | 
46 | 
47 | def one_hot(n, i, device=None, dtype="float32", requires_grad=False):
48 |     """Generate one-hot encoding Tensor"""
49 |     device = ndl.cpu() if device is None else device
50 |     return ndl.Tensor(
51 |         device.one_hot(n, i.numpy().astype("int32"), dtype=dtype),
52 |         device=device,
53 |         requires_grad=requires_grad,
54 |     )
55 | 
56 | 
57 | def zeros_like(array, *, device=None, requires_grad=False):
58 |     device = device if device else array.device
59 |     return zeros(
60 |         *array.shape, dtype=array.dtype, device=device, requires_grad=requires_grad
61 |     )
62 | 
63 | 
64 | def ones_like(array, *, device=None, requires_grad=False):
65 |     device = device if device else array.device
66 |     return ones(
67 |         *array.shape, dtype=array.dtype, device=device, requires_grad=requires_grad
68 |     )
69 | 


--------------------------------------------------------------------------------
/hw4/python/needle/init/init_initializers.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from .init_basic import *
 3 | 
 4 | 
 5 | def xavier_uniform(fan_in, fan_out, gain=1.0, **kwargs):
 6 |     a = gain * math.sqrt(6.0 / (fan_in + fan_out))
 7 |     return rand(fan_in, fan_out, low=-a, high=a, **kwargs)
 8 | 
 9 | 
10 | def xavier_normal(fan_in, fan_out, gain=1.0, **kwargs):
11 |     std = gain * math.sqrt(2.0 / (fan_in + fan_out))
12 |     return randn(fan_in, fan_out, mean=0.0, std=std, **kwargs)
13 | 
14 | 
15 | def kaiming_uniform(fan_in, fan_out, shape=None, nonlinearity="relu", **kwargs):
16 |     assert nonlinearity == "relu", "Only relu supported currently"
17 |     if shape is not None:
18 |         fan_in = math.prod(shape[:-1])
19 |     else:
20 |         shape = (fan_in, fan_out)
21 |     gain = math.sqrt(2.0)
22 |     bound = gain * math.sqrt(3.0 / fan_in)
23 |     return rand(*shape, low=-bound, high=bound, **kwargs)
24 | 
25 | 
26 | def kaiming_normal(fan_in, fan_out, nonlinearity="relu", **kwargs):
27 |     assert nonlinearity == "relu", "Only relu supported currently"
28 |     gain = math.sqrt(2.0) 
29 |     std = gain * math.sqrt(1.0 / fan_in)
30 |     return randn(fan_in, fan_out, mean=0.0, std=std, **kwargs)


--------------------------------------------------------------------------------
/hw4/python/needle/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .nn_basic import *
2 | from .nn_conv import *
3 | from .nn_sequence import *
4 | 


--------------------------------------------------------------------------------
/hw4/python/needle/nn/nn_conv.py:
--------------------------------------------------------------------------------
 1 | """The module.
 2 | """
 3 | import math
 4 | from typing import List, Callable, Any
 5 | from needle.autograd import Tensor
 6 | from needle import ops
 7 | import needle.init as init
 8 | import numpy as np
 9 | from .nn_basic import Parameter, Module
10 | 
11 | 
12 | class Conv(Module):
13 |     """
14 |     Multi-channel 2D convolutional layer
15 |     IMPORTANT: Accepts inputs in NCHW format, outputs also in NCHW format
16 |     Only supports padding=same
17 |     No grouped convolution or dilation
18 |     Only supports square kernels
19 |     """
20 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, bias=True, device=None, dtype="float32"):
21 |         super().__init__()
22 |         if isinstance(kernel_size, tuple):
23 |             kernel_size = kernel_size[0]
24 |         if isinstance(stride, tuple):
25 |             stride = stride[0]
26 |         self.in_channels = in_channels
27 |         self.out_channels = out_channels
28 |         self.kernel_size = kernel_size
29 |         self.stride = stride
30 | 
31 |         self.weight = Parameter(init.kaiming_uniform(
32 |                 in_channels * kernel_size * kernel_size, out_channels, shape=(kernel_size, kernel_size, in_channels, out_channels), dtype=dtype, device=device, requires_grad=True))
33 |         if bias:
34 |             # bound = 1.0 / math.sqrt(in_channels * (kernel_size ** 2))
35 |             self.bias = Parameter(init.rand(out_channels, dtype=dtype, device=device, requires_grad=True))
36 |         else:
37 |             self.bias = None
38 | 
39 |         self.padding = (kernel_size - 1) // 2
40 | 
41 |     def forward(self, x: Tensor) -> Tensor:
42 |         """
43 |         x: (N, C, H, W)
44 |         """
45 |         # Transform x from NCHW to NHWC
46 |         x = x.transpose((1, 2)).transpose((2, 3))
47 | 
48 |         out = ops.conv(x, self.weight, stride=self.stride, padding=self.padding)
49 |         if self.bias is not None:
50 |             bias_broadcast = ops.broadcast_to(self.bias, out.shape)
51 |             out = out + bias_broadcast
52 | 
53 |         # Transform out from NHWC to NCHW
54 |         out = out.transpose((3, 1)).transpose((3, 2))
55 | 
56 |         return out


--------------------------------------------------------------------------------
/hw4/python/needle/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .ops_mathematic import *
2 | 
3 | from .ops_logarithmic import *
4 | from .ops_tuple import *
5 | 


--------------------------------------------------------------------------------
/hw4/python/needle/ops/ops_logarithmic.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from ..autograd import NDArray
 3 | from ..autograd import Op, Tensor, Value, TensorOp
 4 | from ..autograd import TensorTuple, TensorTupleOp
 5 | 
 6 | from .ops_mathematic import *
 7 | 
 8 | from ..backend_selection import array_api, BACKEND 
 9 | 
10 | class LogSoftmax(TensorOp):
11 |     def compute(self, Z):
12 |         ### BEGIN YOUR SOLUTION
13 |         raise NotImplementedError()
14 |         ### END YOUR SOLUTION
15 | 
16 |     def gradient(self, out_grad, node):
17 |         ### BEGIN YOUR SOLUTION
18 |         raise NotImplementedError()
19 |         ### END YOUR SOLUTION
20 | 
21 | 
22 | def logsoftmax(a):
23 |     return LogSoftmax()(a)
24 | 
25 | 
26 | class LogSumExp(TensorOp):
27 |     def __init__(self, axes: Optional[tuple] = None):
28 |         self.axes = axes
29 | 
30 |     def compute(self, Z):
31 |         max_Z = array_api.max(Z, self.axes, keepdims=True)
32 |         Z = Z - array_api.broadcast_to(max_Z, Z.shape)
33 |         res = array_api.log(array_api.sum(array_api.exp(Z), self.axes))
34 |         res = res + array_api.reshape(max_Z, res.shape)
35 |         return res
36 | 
37 |     def gradient(self, out_grad, node):
38 |         input_data = node.inputs[0].realize_cached_data()
39 |         max_input = array_api.max(input_data, self.axes, keepdims=True)
40 |         input_data = input_data - array_api.broadcast_to(max_input, input_data.shape)
41 |         sum_exp_z = array_api.sum(array_api.exp(input_data), self.axes, keepdims=True)
42 |         cur_grad = array_api.exp(input_data) / array_api.broadcast_to(sum_exp_z, input_data.shape)
43 | 
44 |         if out_grad.shape != cur_grad.shape:
45 |             if out_grad.cached_data.size == cur_grad.size:
46 |                 out_grad = reshape(out_grad, cur_grad.shape)
47 |             else:
48 |                 # 对 out_grad 进行 reshape（比如从 （3,）变成 （3,1），否则 broadcast 时，结果和预期不一致），然后再进行 broadcast_to
49 |                 new_shape = list(cur_grad.shape)
50 |                 if self.axes is not None:
51 |                     if isinstance(self.axes, Number):
52 |                         self.axes = (self.axes,)
53 |                     for axis in self.axes:
54 |                         new_shape[axis] = 1
55 |                 else:
56 |                     new_shape = [1] * len(new_shape)
57 |                 out_grad = reshape(out_grad, new_shape)
58 |                 out_grad = broadcast_to(out_grad, cur_grad.shape)
59 |         return out_grad * cur_grad
60 | 
61 | def logsumexp(a, axes=None):
62 |     return LogSumExp(axes=axes)(a)
63 | 
64 | 


--------------------------------------------------------------------------------
/hw4/python/needle/ops/ops_tuple.py:
--------------------------------------------------------------------------------
 1 | from ..autograd import Op, Tensor, TensorTuple, Value, TensorOp, TensorTupleOp
 2 | import needle.init as init
 3 | 
 4 | class MakeTensorTuple(TensorTupleOp):
 5 |     def compute(self, *args) -> tuple:
 6 |         return tuple(args)
 7 | 
 8 |     def gradient(self, out_grad, node):
 9 |         assert isinstance(out_grad, TensorTuple)
10 |         return tuple([out_grad[i] for i in range(len(out_grad))])
11 | 
12 | 
13 | def make_tuple(*args):
14 |     return MakeTensorTuple()(*args)
15 | 
16 | 
17 | class TupleGetItem(TensorOp):
18 |     def __init__(self, index):
19 |         self.index = index
20 | 
21 |     def __call__(self, a: TensorTuple, fold_const=True) -> Value:
22 |         assert isinstance(a, TensorTuple)
23 |         # constant folding
24 |         if fold_const and isinstance(a.op, MakeTensorTuple):
25 |             return a.inputs[self.index]
26 |         return Tensor.make_from_op(self, [a])
27 | 
28 |     def compute(self, a):
29 |         return a[self.index]
30 | 
31 |     def gradient(self, out_grad, node):
32 |         index = self.index
33 |         in_grad = []
34 |         for i, value in enumerate(node.inputs[0]):
35 |             if i != index:
36 |                 in_grad.append(init.zeros_like(value))
37 |             else:
38 |                 in_grad.append(out_grad)
39 |         return MakeTensorTuple()(*in_grad)
40 | 
41 | 
42 | def tuple_get_item(value, index):
43 |     return TupleGetItem(index)(value)
44 | 
45 | 
46 | class FusedAddScalars(TensorTupleOp):
47 |     def __init__(self, c0: float, c1: float):
48 |         self.c0 = c0
49 |         self.c1 = c1
50 | 
51 |     def compute(self, a):
52 |         return a + self.c0, a + self.c1
53 | 
54 |     def gradient(self, out_grad, node):
55 |         return out_grad[0] + out_grad[1]
56 | 
57 | 
58 | def fused_add_scalars(x, c0, c1):
59 |     return FusedAddScalars(c0, c1)(x)
60 | 


--------------------------------------------------------------------------------
/hw4/python/needle/optim.py:
--------------------------------------------------------------------------------
 1 | """Optimization module"""
 2 | import needle as ndl
 3 | import numpy as np
 4 | 
 5 | 
 6 | class Optimizer:
 7 |     def __init__(self, params):
 8 |         self.params = params
 9 | 
10 |     def step(self):
11 |         raise NotImplementedError()
12 | 
13 |     def reset_grad(self):
14 |         for p in self.params:
15 |             p.grad = None
16 | 
17 | 
18 | class SGD(Optimizer):
19 |     def __init__(self, params, lr=0.01, momentum=0.0, weight_decay=0.0):
20 |         super().__init__(params)
21 |         self.lr = lr
22 |         self.momentum = momentum
23 |         self.u = {}
24 |         self.weight_decay = weight_decay
25 | 
26 |     def step(self):
27 |         for param in self.params:
28 |             # grad 这里加了一个惩罚项
29 |             grad_with_penalty = param.grad.detach() + self.weight_decay * param.detach()
30 |             u = self.u.get(id(param), 0) * self.momentum + (1 - self.momentum) * grad_with_penalty
31 |             # 将 dtype 从 float64 转换为 float32
32 |             u = ndl.Tensor(u, dtype=param.dtype)
33 |             self.u[id(param)] = u
34 |             param.data -= self.lr * u
35 | 
36 |             
37 | 
38 | class Adam(Optimizer):
39 |     def __init__(
40 |         self,
41 |         params,
42 |         lr=0.01,
43 |         beta1=0.9,
44 |         beta2=0.999,
45 |         eps=1e-8,
46 |         weight_decay=0.0,
47 |     ):
48 |         super().__init__(params)
49 |         self.lr = lr
50 |         self.beta1 = beta1
51 |         self.beta2 = beta2
52 |         self.eps = eps
53 |         self.weight_decay = weight_decay
54 |         self.t = 0
55 | 
56 |         self.m = {}
57 |         self.v = {}
58 | 
59 |     def step(self):
60 |         self.t += 1
61 |         for param in self.params:
62 |             # grad 这里加了一个惩罚项
63 |             grad_with_penalty = param.grad.detach() + self.weight_decay * param.detach()
64 |             # 将 dtype 从 float64 转换为 float32
65 |             grad_with_penalty = ndl.Tensor(grad_with_penalty, dtype=param.dtype)
66 | 
67 |             m = self.beta1 * self.m.get(id(param), 0) + (1 - self.beta1) * grad_with_penalty
68 |             v = self.beta2 * self.v.get(id(param), 0) + (1 - self.beta2) * grad_with_penalty ** 2
69 |             self.m[id(param)] = m.detach()
70 |             self.v[id(param)] = v.detach()
71 |             m_hat = m / (1 - self.beta1 ** self.t)
72 |             v_hat = v / (1 - self.beta2 ** self.t)
73 |             param.data -= self.lr * m_hat / (v_hat ** 0.5 + self.eps)


--------------------------------------------------------------------------------
/hw4/tests/hw4/test_cifar_ptb_data.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('./python')
  3 | import itertools
  4 | import numpy as np
  5 | import pytest
  6 | import mugrade
  7 | 
  8 | import needle as ndl
  9 | from needle import backend_ndarray as nd
 10 | 
 11 | 
 12 | np.random.seed(2)
 13 | 
 14 | 
 15 | _DEVICES = [ndl.cpu(), pytest.param(ndl.cuda(),
 16 |     marks=pytest.mark.skipif(not ndl.cuda().enabled(), reason="No GPU"))]
 17 | 
 18 | 
 19 | TRAIN = [True, False]
 20 | @pytest.mark.parametrize("train", TRAIN)
 21 | def test_cifar10_dataset(train):
 22 |     dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=train)
 23 |     if train:
 24 |         assert len(dataset) == 50000
 25 |     else:
 26 |         assert len(dataset) == 10000
 27 |     example = dataset[np.random.randint(len(dataset))]
 28 |     assert(isinstance(example, tuple))
 29 |     X, y = example
 30 |     assert isinstance(X, np.ndarray)
 31 |     assert X.shape == (3, 32, 32)
 32 | 
 33 | 
 34 | BATCH_SIZES = [1, 15]
 35 | @pytest.mark.parametrize("batch_size", BATCH_SIZES)
 36 | @pytest.mark.parametrize("train", TRAIN)
 37 | @pytest.mark.parametrize("device", _DEVICES, ids=["cpu", "cuda"])
 38 | def test_cifar10_loader(batch_size, train, device):
 39 |     cifar10_train_dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=True)
 40 |     train_loader = ndl.data.DataLoader(cifar10_train_dataset, batch_size)
 41 |     for (X, y) in train_loader:
 42 |         break
 43 |     assert isinstance(X.cached_data, nd.NDArray)
 44 |     assert isinstance(X, ndl.Tensor)
 45 |     assert isinstance(y, ndl.Tensor)
 46 |     assert X.dtype == 'float32'
 47 | 
 48 | 
 49 | BPTT = [3, 32]
 50 | @pytest.mark.parametrize("batch_size", BATCH_SIZES)
 51 | @pytest.mark.parametrize("bptt", BPTT)
 52 | @pytest.mark.parametrize("train", TRAIN)
 53 | @pytest.mark.parametrize("device", _DEVICES, ids=["cpu", "cuda"])
 54 | def test_ptb_dataset(batch_size, bptt, train, device):
 55 |     # TODO update with more tests?
 56 |     corpus = ndl.data.Corpus("data/ptb")
 57 |     if train:
 58 |         data = ndl.data.batchify(corpus.train, batch_size, device=device, dtype="float32")
 59 |     else:
 60 |         data = ndl.data.batchify(corpus.test, batch_size, device=device, dtype="float32")
 61 |     X, y = ndl.data.get_batch(data, np.random.randint(len(data)), bptt, device=device)
 62 |     assert X.shape == (bptt, batch_size)
 63 |     assert y.shape == (bptt * batch_size,)
 64 |     assert isinstance(X, ndl.Tensor)
 65 |     assert X.dtype == 'float32'
 66 |     assert X.device == device
 67 |     assert isinstance(X.cached_data, nd.NDArray)
 68 |     ntokens = len(corpus.dictionary)
 69 |     assert ntokens == 10000
 70 | 
 71 | 
 72 | ### MUGRADE ###
 73 | 
 74 | TEST_BATCH_SIZES = [3, 5]
 75 | TEST_BPTT = [6, 10]
 76 | 
 77 | def mugrade_submit(x):
 78 |     if isinstance(x, np.ndarray):
 79 |         x = x.flatten()[:128]
 80 |         #print(x)
 81 |         mugrade.submit(x)
 82 |     else:
 83 |         #print(x)
 84 |         mugrade.submit(x)
 85 | 
 86 | 
 87 | def submit_cifar10():
 88 |     if not ndl.cuda().enabled():
 89 |         print('You need a GPU to run some of these tests.')
 90 |     devices = [ndl.cpu(), ndl.cuda()]
 91 |     for train in TRAIN:
 92 |         dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=train)
 93 |         mugrade_submit(len(dataset))
 94 |         for (device, batch_size) in itertools.product(devices, TEST_BATCH_SIZES):
 95 |             loader = ndl.data.DataLoader(dataset, batch_size)
 96 |             for (X, y) in loader:
 97 |                 break
 98 |             mugrade_submit(X.numpy()[0, :, :, :])
 99 |             mugrade_submit(y.numpy()[0])
100 | 
101 | 
102 | def submit_ptb():
103 |     # devices = [ndl.cpu(), ndl.cuda()] if ndl.cuda().enabled() else [ndl.cpu()]
104 |     devices = [ndl.cpu(), ndl.cuda()]
105 | 
106 |     corpus = ndl.data.Corpus("data/ptb")
107 |     mugrade_submit(np.array(len(corpus.dictionary)))
108 |     for train in TRAIN:
109 |         for (device, batch_size, bptt) in itertools.product(devices, TEST_BATCH_SIZES, TEST_BPTT):
110 |             if train:
111 |                 data = ndl.data.batchify(corpus.train, batch_size, device=device, dtype="float32")
112 |             else:
113 |                 data = ndl.data.batchify(corpus.test, batch_size, device=device, dtype="float32")
114 |             X, y = ndl.data.get_batch(data, np.random.randint(len(data)), bptt)
115 |             mugrade_submit(np.array(len(data)))
116 |             mugrade_submit(X.numpy()[0, :])
117 |             mugrade_submit(y.numpy()[0])
118 | 
119 | 
120 | if __name__ == "__main__":
121 |     submit_cifar10()
122 |     submit_ptb()


--------------------------------------------------------------------------------