├── .gitignore
├── README.md
├── hw0
├── .idea
│ ├── .gitignore
│ ├── hw0.iml
│ ├── inspectionProfiles
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── vcs.xml
├── .ipynb_checkpoints
│ └── hw0-checkpoint.ipynb
├── Makefile
├── README.md
├── data
│ ├── t10k-images-idx3-ubyte.gz
│ ├── t10k-labels-idx1-ubyte.gz
│ ├── train-images-idx3-ubyte.gz
│ └── train-labels-idx1-ubyte.gz
├── hw0.ipynb
├── hw0.pdf
├── src
│ ├── __pycache__
│ │ ├── simple_ml.cpython-310.pyc
│ │ └── simple_ml.cpython-39.pyc
│ ├── simple_ml.py
│ ├── simple_ml_ext.cpp
│ └── simple_ml_ext.so
└── tests
│ ├── __pycache__
│ └── test_simple_ml.cpython-310-pytest-7.1.2.pyc
│ └── test_simple_ml.py
├── hw1
├── .gitignore
├── apps
│ └── simple_ml.py
├── data
│ ├── t10k-images-idx3-ubyte.gz
│ ├── t10k-labels-idx1-ubyte.gz
│ ├── train-images-idx3-ubyte.gz
│ └── train-labels-idx1-ubyte.gz
├── hw1.ipynb
├── hw1.pdf
├── python
│ └── needle
│ │ ├── __init__.py
│ │ ├── autograd.py
│ │ └── ops.py
├── tempCodeRunnerFile.ipynb
├── test.py
└── tests
│ └── test_autograd_hw.py
├── hw2
├── .idea
│ ├── .gitignore
│ ├── hw2.iml
│ ├── inspectionProfiles
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── vcs.xml
├── README.md
├── apps
│ ├── __pycache__
│ │ ├── mlp_resnet.cpython-310.pyc
│ │ └── mlp_resnet.cpython-39.pyc
│ └── mlp_resnet.py
├── data
│ ├── t10k-images-idx3-ubyte.gz
│ ├── t10k-labels-idx1-ubyte.gz
│ ├── train-images-idx3-ubyte.gz
│ └── train-labels-idx1-ubyte.gz
├── debug.py
├── figures
│ ├── mlp_resnet.png
│ └── residualblock.png
├── hw2.ipynb
├── hw2.ipynb - Colaboratory.pdf
├── python
│ └── needle
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ ├── __init__.cpython-310.pyc
│ │ ├── __init__.cpython-39.pyc
│ │ ├── autograd.cpython-310.pyc
│ │ ├── autograd.cpython-39.pyc
│ │ ├── data.cpython-310.pyc
│ │ ├── data.cpython-39.pyc
│ │ ├── init.cpython-310.pyc
│ │ ├── init.cpython-39.pyc
│ │ ├── nn.cpython-310.pyc
│ │ ├── nn.cpython-39.pyc
│ │ ├── ops.cpython-310.pyc
│ │ ├── ops.cpython-39.pyc
│ │ ├── optim.cpython-310.pyc
│ │ └── optim.cpython-39.pyc
│ │ ├── autograd.py
│ │ ├── data.py
│ │ ├── init.py
│ │ ├── nn.py
│ │ ├── ops.py
│ │ └── optim.py
└── tests
│ ├── __pycache__
│ ├── test_data.cpython-310-pytest-7.1.2.pyc
│ ├── test_data.cpython-39.pyc
│ ├── test_nn_and_optim.cpython-310-pytest-7.1.2.pyc
│ └── test_nn_and_optim.cpython-39.pyc
│ ├── test_data.py
│ └── test_nn_and_optim.py
├── hw3
├── .idea
│ ├── .gitignore
│ ├── hw3.iml
│ ├── inspectionProfiles
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── vcs.xml
├── .tmp.driveupload
│ └── 6569
├── .vscode
│ └── settings.json
├── CMakeLists.txt
├── Makefile
├── README.md
├── build
│ ├── CMakeCache.txt
│ ├── CMakeFiles
│ │ ├── 3.27.9
│ │ │ ├── CMakeCCompiler.cmake
│ │ │ ├── CMakeCXXCompiler.cmake
│ │ │ ├── CMakeDetermineCompilerABI_C.bin
│ │ │ ├── CMakeDetermineCompilerABI_CXX.bin
│ │ │ ├── CMakeSystem.cmake
│ │ │ ├── CompilerIdC
│ │ │ │ ├── CMakeCCompilerId.c
│ │ │ │ └── a.out
│ │ │ └── CompilerIdCXX
│ │ │ │ ├── CMakeCXXCompilerId.cpp
│ │ │ │ └── a.out
│ │ ├── CMakeConfigureLog.yaml
│ │ ├── CMakeDirectoryInformation.cmake
│ │ ├── CMakeRuleHashes.txt
│ │ ├── Makefile.cmake
│ │ ├── Makefile2
│ │ ├── TargetDirectories.txt
│ │ ├── cmake.check_cache
│ │ ├── ndarray_backend_cpu.dir
│ │ │ ├── DependInfo.cmake
│ │ │ ├── build.make
│ │ │ ├── cmake_clean.cmake
│ │ │ ├── compiler_depend.internal
│ │ │ ├── compiler_depend.make
│ │ │ ├── compiler_depend.ts
│ │ │ ├── depend.make
│ │ │ ├── flags.make
│ │ │ ├── link.txt
│ │ │ ├── progress.make
│ │ │ └── src
│ │ │ │ ├── ndarray_backend_cpu.cc.o
│ │ │ │ └── ndarray_backend_cpu.cc.o.d
│ │ ├── ndarray_backend_cuda.dir
│ │ │ ├── DependInfo.cmake
│ │ │ ├── build.make
│ │ │ ├── cmake_clean.cmake
│ │ │ ├── compiler_depend.make
│ │ │ ├── compiler_depend.ts
│ │ │ ├── depend.make
│ │ │ ├── flags.make
│ │ │ ├── link.txt
│ │ │ ├── progress.make
│ │ │ └── src
│ │ │ │ ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o
│ │ │ │ ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake
│ │ │ │ ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen
│ │ │ │ └── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.depend
│ │ └── progress.marks
│ ├── Makefile
│ ├── cmake_install.cmake
│ └── detect_cuda_compute_capabilities.cpp
├── debug.py
├── hw3.ipynb
├── hw3.ipynb - Colaboratory.pdf
├── python
│ └── needle
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ ├── __init__.cpython-310.pyc
│ │ ├── autograd.cpython-310.pyc
│ │ ├── backend_numpy.cpython-310.pyc
│ │ ├── backend_selection.cpython-310.pyc
│ │ └── optim.cpython-310.pyc
│ │ ├── autograd.py
│ │ ├── backend_ndarray
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── ndarray.cpython-310.pyc
│ │ │ └── ndarray_backend_numpy.cpython-310.pyc
│ │ ├── ndarray.py
│ │ ├── ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so
│ │ ├── ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so
│ │ └── ndarray_backend_numpy.py
│ │ ├── backend_numpy.py
│ │ ├── backend_selection.py
│ │ ├── data
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── data_basic.cpython-310.pyc
│ │ │ └── data_transforms.cpython-310.pyc
│ │ ├── data_basic.py
│ │ ├── data_transforms.py
│ │ └── datasets
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── mnist_dataset.cpython-310.pyc
│ │ │ └── ndarray_dataset.cpython-310.pyc
│ │ │ ├── mnist_dataset.py
│ │ │ └── ndarray_dataset.py
│ │ ├── init
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── init_basic.cpython-310.pyc
│ │ │ └── init_initializers.cpython-310.pyc
│ │ ├── init_basic.py
│ │ └── init_initializers.py
│ │ ├── nn
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ └── nn_basic.cpython-310.pyc
│ │ └── nn_basic.py
│ │ ├── ops
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-310.pyc
│ │ │ ├── ops_logarithmic.cpython-310.pyc
│ │ │ ├── ops_mathematic.cpython-310.pyc
│ │ │ └── ops_tuple.cpython-310.pyc
│ │ ├── ops_logarithmic.py
│ │ ├── ops_mathematic.py
│ │ └── ops_tuple.py
│ │ └── optim.py
├── src
│ ├── ndarray_backend_cpu.cc
│ └── ndarray_backend_cuda.cu
└── tests
│ └── hw3
│ ├── __pycache__
│ ├── test_ndarray.cpython-310-pytest-7.1.2.pyc
│ ├── test_ndarray.cpython-310-pytest-7.4.3.pyc
│ └── test_ndarray.cpython-310.pyc
│ └── test_ndarray.py
└── hw4
├── .idea
├── .gitignore
├── hw4.iml
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── .tmp.driveupload
├── 7538
├── 7792
└── 7888
├── CMakeLists.txt
├── Makefile
├── README.md
├── ResNet9.png
├── apps
├── models.py
└── simple_ml.py
├── build
├── CMakeCache.txt
├── CMakeFiles
│ ├── 3.27.9
│ │ ├── CMakeCCompiler.cmake
│ │ ├── CMakeCXXCompiler.cmake
│ │ ├── CMakeDetermineCompilerABI_C.bin
│ │ ├── CMakeDetermineCompilerABI_CXX.bin
│ │ ├── CMakeSystem.cmake
│ │ ├── CompilerIdC
│ │ │ ├── CMakeCCompilerId.c
│ │ │ └── a.out
│ │ └── CompilerIdCXX
│ │ │ ├── CMakeCXXCompilerId.cpp
│ │ │ └── a.out
│ ├── CMakeConfigureLog.yaml
│ ├── CMakeDirectoryInformation.cmake
│ ├── CMakeRuleHashes.txt
│ ├── Makefile.cmake
│ ├── Makefile2
│ ├── TargetDirectories.txt
│ ├── cmake.check_cache
│ ├── ndarray_backend_cpu.dir
│ │ ├── DependInfo.cmake
│ │ ├── build.make
│ │ ├── cmake_clean.cmake
│ │ ├── compiler_depend.internal
│ │ ├── compiler_depend.make
│ │ ├── compiler_depend.ts
│ │ ├── depend.make
│ │ ├── flags.make
│ │ ├── link.txt
│ │ ├── progress.make
│ │ └── src
│ │ │ ├── ndarray_backend_cpu.cc.o
│ │ │ └── ndarray_backend_cpu.cc.o.d
│ ├── ndarray_backend_cuda.dir
│ │ ├── DependInfo.cmake
│ │ ├── build.make
│ │ ├── cmake_clean.cmake
│ │ ├── compiler_depend.make
│ │ ├── compiler_depend.ts
│ │ ├── depend.make
│ │ ├── flags.make
│ │ ├── link.txt
│ │ ├── progress.make
│ │ └── src
│ │ │ ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o
│ │ │ ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake
│ │ │ ├── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen
│ │ │ └── ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.depend
│ └── progress.marks
├── Makefile
├── cmake_install.cmake
└── detect_cuda_compute_capabilities.cpp
├── debug.py
├── hw4.ipynb
├── hw4.ipynb - Colaboratory.pdf
├── python
└── needle
│ ├── __init__.py
│ ├── autograd.py
│ ├── backend_ndarray
│ ├── __init__.py
│ ├── ndarray.py
│ ├── ndarray_backend_cpu.cpython-310-darwin.so
│ ├── ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so
│ ├── ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so
│ └── ndarray_backend_numpy.py
│ ├── backend_numpy.py
│ ├── backend_selection.py
│ ├── data
│ ├── __init__.py
│ ├── data_basic.py
│ ├── data_transforms.py
│ └── datasets
│ │ ├── __init__.py
│ │ ├── cifar10_dataset.py
│ │ ├── mnist_dataset.py
│ │ ├── ndarray_dataset.py
│ │ └── ptb_dataset.py
│ ├── init
│ ├── __init__.py
│ ├── init_basic.py
│ └── init_initializers.py
│ ├── nn
│ ├── __init__.py
│ ├── nn_basic.py
│ ├── nn_conv.py
│ └── nn_sequence.py
│ ├── ops
│ ├── __init__.py
│ ├── ops_logarithmic.py
│ ├── ops_mathematic.py
│ └── ops_tuple.py
│ └── optim.py
├── src
├── ndarray_backend_cpu.cc
└── ndarray_backend_cuda.cu
└── tests
└── hw4
├── test_cifar_ptb_data.py
├── test_conv.py
├── test_nd_backend.py
└── test_sequence_models.py
/.gitignore:
--------------------------------------------------------------------------------
1 | **/.git/
2 | **/__pycache__/
3 | hw4/data/cifar-10-batches-py/
4 | hw4/data/ptb/
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DeepLearningSystem
2 |
3 | The project builds a simple version of PyTorch from scratch.
4 |
5 | It is the homework of CMU 10-414/714: Deep Learning Systems ( https://dlsyscourse.org/ )
6 |
--------------------------------------------------------------------------------
/hw0/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/hw0/.idea/hw0.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/hw0/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw0/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/hw0/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/hw0/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw0/Makefile:
--------------------------------------------------------------------------------
1 | # NOTE: on MacOS you need to add an addition flag: -undefined dynamic_lookup
2 | default:
3 | c++ -O3 -Wall -shared -std=c++11 -fPIC -undefined dynamic_lookup $$(python3 -m pybind11 --includes) src/simple_ml_ext.cpp -o src/simple_ml_ext.so
4 |
--------------------------------------------------------------------------------
/hw0/README.md:
--------------------------------------------------------------------------------
1 | # Homework 0
2 | Public repository and stub/testing code for Homework 0 of 10-714.
3 |
--------------------------------------------------------------------------------
/hw0/data/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/data/t10k-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/hw0/data/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/data/t10k-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/hw0/data/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/data/train-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/hw0/data/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/data/train-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/hw0/hw0.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/hw0.pdf
--------------------------------------------------------------------------------
/hw0/src/__pycache__/simple_ml.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/src/__pycache__/simple_ml.cpython-310.pyc
--------------------------------------------------------------------------------
/hw0/src/__pycache__/simple_ml.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/src/__pycache__/simple_ml.cpython-39.pyc
--------------------------------------------------------------------------------
/hw0/src/simple_ml_ext.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | namespace py = pybind11;
8 |
9 |
10 | void softmax_regression_epoch_cpp(const float *X, const unsigned char *y,
11 | float *theta, size_t m, size_t n, size_t k,
12 | float lr, size_t batch)
13 | {
14 | /**
15 | * A C++ version of the softmax regression epoch code. This should run a
16 | * single epoch over the data defined by X and y (and sizes m,n,k), and
17 | * modify theta in place. Your function will probably want to allocate
18 | * (and then delete) some helper arrays to store the logits and gradients.
19 | *
20 | * Args:
21 | * X (const float *): pointer to X data, of size m*n, stored in row
22 | * major (C) format
23 | * y (const unsigned char *): pointer to y data, of size m
24 | * theta (float *): pointer to theta data, of size n*k, stored in row
25 | * major (C) format
26 | * m (size_t): number of examples
27 | * n (size_t): input dimension
28 | * k (size_t): number of classes
29 | * lr (float): learning rate / SGD step size
30 | * batch (int): SGD minibatch size
31 | *
32 | * Returns:
33 | * (None)
34 | */
35 |
36 | size_t sample_idx = 0;
37 | auto Z = std::vector>(batch, std::vector(k, 0.0));
38 | // Run batch
39 | while(sample_idx < m) {
40 | if(sample_idx + batch > m) {
41 | batch = m - sample_idx;
42 | }
43 |
44 | // Compute Z = normalize(exp(X * theta))
45 | // The outer two loops iterate over Z, whose size is batch * k
46 | for(size_t idx = 0; idx < batch; idx++) {
47 | float row_sum = 0.0;
48 | for(size_t j = 0; j < k; j++) {
49 | Z[idx][j] = 0.0;
50 | for(size_t inner_idx = 0; inner_idx < n; inner_idx++) {
51 | Z[idx][j] += X[(sample_idx + idx) * n + inner_idx] * theta[inner_idx * k + j];
52 | }
53 | Z[idx][j] = std::exp(Z[idx][j]);
54 | row_sum += Z[idx][j];
55 | }
56 | for(size_t j = 0; j < k; j++) {
57 | Z[idx][j] /= row_sum;
58 | }
59 | }
60 |
61 | // Let Z = Z - Iy
62 | for(size_t idx = 0; idx < batch; idx++) {
63 | Z[idx][y[sample_idx + idx]] -= 1.0;
64 | }
65 |
66 | // Compute gradient
67 | // The outer two loops iterate over theta, whose size is n * k
68 | for(size_t idx = 0; idx < n; idx++) {
69 | for(size_t j = 0; j < k; j++) {
70 | float diff = 0.0;
71 | for(size_t inner_idx = 0; inner_idx < batch; inner_idx++) {
72 | // theta_diff = X^T * Z
73 | // theta_diff(idx, j) = sum_{inner_idx=1}^{batch} X^T(idx, inner_idx) * Z(inner_idx, j)
74 | // = sum_{inner_idx=1}^{batch} X(inner_idx, idx) * Z(inner_idx, j)
75 | diff += X[(sample_idx + inner_idx) * n + idx] * Z[inner_idx][j];
76 | }
77 | theta[idx * k + j] -= lr * diff / batch;
78 | }
79 | }
80 | sample_idx += batch;
81 | }
82 | }
83 |
84 |
85 | /**
86 | * This is the pybind11 code that wraps the function above. It's only role is
87 | * wrap the function above in a Python module, and you do not need to make any
88 | * edits to the code
89 | */
90 | PYBIND11_MODULE(simple_ml_ext, m) {
91 | m.def("softmax_regression_epoch_cpp",
92 | [](py::array_t X,
93 | py::array_t y,
94 | py::array_t theta,
95 | float lr,
96 | int batch) {
97 | softmax_regression_epoch_cpp(
98 | static_cast(X.request().ptr),
99 | static_cast(y.request().ptr),
100 | static_cast(theta.request().ptr),
101 | X.request().shape[0],
102 | X.request().shape[1],
103 | theta.request().shape[1],
104 | lr,
105 | batch
106 | );
107 | },
108 | py::arg("X"), py::arg("y"), py::arg("theta"),
109 | py::arg("lr"), py::arg("batch"));
110 | }
111 |
--------------------------------------------------------------------------------
/hw0/src/simple_ml_ext.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/src/simple_ml_ext.so
--------------------------------------------------------------------------------
/hw0/tests/__pycache__/test_simple_ml.cpython-310-pytest-7.1.2.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw0/tests/__pycache__/test_simple_ml.cpython-310-pytest-7.1.2.pyc
--------------------------------------------------------------------------------
/hw1/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .ipynb_checkpoints/
3 | env/
4 | tests/__pycache__
5 | .idea/
6 |
7 |
8 | # Byte-compiled / optimized / DLL files
9 | __pycache__/
10 | *.py[cod]
11 | *$py.class
12 |
13 | # C extensions
14 | *.so
15 | *~
16 |
17 | # Distribution / packaging
18 | .Python
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | wheels/
31 | pip-wheel-metadata/
32 | share/python-wheels/
33 | *.egg-info/
34 | .installed.cfg
35 | *.egg
36 | MANIFEST
37 |
38 | # PyInstaller
39 | # Usually these files are written by a python script from a template
40 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
41 | *.manifest
42 | *.spec
43 |
44 | # Installer logs
45 | pip-log.txt
46 | pip-delete-this-directory.txt
47 |
48 | # Unit test / coverage reports
49 | htmlcov/
50 | .tox/
51 | .nox/
52 | .coverage
53 | .coverage.*
54 | .cache
55 | nosetests.xml
56 | coverage.xml
57 | *.cover
58 | *.py,cover
59 | .hypothesis/
60 | .pytest_cache/
61 |
62 | # Translations
63 | *.mo
64 | *.pot
65 |
66 | # Django stuff:
67 | *.log
68 | local_settings.py
69 | db.sqlite3
70 | db.sqlite3-journal
71 |
72 | # Flask stuff:
73 | instance/
74 | .webassets-cache
75 |
76 | # Scrapy stuff:
77 | .scrapy
78 |
79 | # Sphinx documentation
80 | docs/_build/
81 |
82 | # PyBuilder
83 | target/
84 |
85 | # Jupyter Notebook
86 | .ipynb_checkpoints
87 |
88 | # IPython
89 | profile_default/
90 | ipython_config.py
91 |
92 | # pyenv
93 | .python-version
94 |
95 | # pipenv
96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
99 | # install all needed dependencies.
100 | #Pipfile.lock
101 |
102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
103 | __pypackages__/
104 |
105 | # Celery stuff
106 | celerybeat-schedule
107 | celerybeat.pid
108 |
109 | # SageMath parsed files
110 | *.sage.py
111 |
112 | # Environments
113 | .env
114 | .venv
115 | env/
116 | venv/
117 | ENV/
118 | env.bak/
119 | venv.bak/
120 |
121 | # Spyder project settings
122 | .spyderproject
123 | .spyproject
124 |
125 | # Rope project settings
126 | .ropeproject
127 |
128 | # mkdocs documentation
129 | /site
130 |
131 | # mypy
132 | .mypy_cache/
133 | .dmypy.json
134 | dmypy.json
135 |
136 | # Pyre type checker
137 | .pyre/
138 |
--------------------------------------------------------------------------------
/hw1/apps/simple_ml.py:
--------------------------------------------------------------------------------
1 | import struct
2 | import gzip
3 | import numpy as np
4 |
5 | import sys
6 | sys.path.append('python/')
7 | import needle as ndl
8 |
9 |
10 | def parse_mnist(image_filesname, label_filename):
11 | """ Read an images and labels file in MNIST format. See this page:
12 | http://yann.lecun.com/exdb/mnist/ for a description of the file format.
13 |
14 | Args:
15 | image_filename (str): name of gzipped images file in MNIST format
16 | label_filename (str): name of gzipped labels file in MNIST format
17 |
18 | Returns:
19 | Tuple (X,y):
20 | X (numpy.ndarray[np.float32]): 2D numpy array containing the loaded
21 | data. The dimensionality of the data should be
22 | (num_examples x input_dim) where 'input_dim' is the full
23 | dimension of the data, e.g., since MNIST images are 28x28, it
24 | will be 784. Values should be of type np.float32, and the data
25 | should be normalized to have a minimum value of 0.0 and a
26 | maximum value of 1.0.
27 |
28 | y (numpy.ndarray[dypte=np.int8]): 1D numpy array containing the
29 | labels of the examples. Values should be of type np.int8 and
30 | for MNIST will contain the values 0-9.
31 | """
32 | with gzip.open(image_filesname, 'rb') as f:
33 | magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
34 | X = np.frombuffer(f.read(), dtype=np.uint8).reshape(num, rows*cols)
35 | X = X.astype(np.float32) / 255.0
36 |
37 | with gzip.open(label_filename, 'rb') as f:
38 | magic, num = struct.unpack(">II", f.read(8))
39 | y = np.frombuffer(f.read(), dtype=np.uint8)
40 |
41 | return X, y
42 |
43 |
44 | def softmax_loss(Z, y_one_hot):
45 | """ Return softmax loss. Note that for the purposes of this assignment,
46 | you don't need to worry about "nicely" scaling the numerical properties
47 | of the log-sum-exp computation, but can just compute this directly.
48 |
49 | Args:
50 | Z (ndl.Tensor[np.float32]): 2D Tensor of shape
51 | (batch_size, num_classes), containing the logit predictions for
52 | each class.
53 | y (ndl.Tensor[np.int8]): 2D Tensor of shape (batch_size, num_classes)
54 | containing a 1 at the index of the true label of each example and
55 | zeros elsewhere.
56 |
57 | Returns:
58 | Average softmax loss over the sample. (ndl.Tensor[np.float32])
59 | """
60 | batch_size = Z.shape[0]
61 | lhs = ndl.log(ndl.exp(Z).sum(axes=(1,)))
62 | rhs = (Z * y_one_hot).sum(axes=(1,))
63 | loss = (lhs - rhs).sum()
64 | return loss / batch_size
65 |
66 |
67 | def nn_epoch(X, y, W1, W2, lr = 0.1, batch=100):
68 | """ Run a single epoch of SGD for a two-layer neural network defined by the
69 | weights W1 and W2 (with no bias terms):
70 | logits = ReLU(X * W1) * W1
71 | The function should use the step size lr, and the specified batch size (and
72 | again, without randomizing the order of X).
73 |
74 | Args:
75 | X (np.ndarray[np.float32]): 2D input array of size
76 | (num_examples x input_dim).
77 | y (np.ndarray[np.uint8]): 1D class label array of size (num_examples,)
78 | W1 (ndl.Tensor[np.float32]): 2D array of first layer weights, of shape
79 | (input_dim, hidden_dim)
80 | W2 (ndl.Tensor[np.float32]): 2D array of second layer weights, of shape
81 | (hidden_dim, num_classes)
82 | lr (float): step size (learning rate) for SGD
83 | batch (int): size of SGD mini-batch
84 |
85 | Returns:
86 | Tuple: (W1, W2)
87 | W1: ndl.Tensor[np.float32]
88 | W2: ndl.Tensor[np.float32]
89 | """
90 | idx = 0
91 | num_classes = W2.shape[1]
92 | while idx < X.shape[0]:
93 | X_batch = ndl.Tensor(X[idx:idx+batch])
94 | Z1 = X_batch.matmul(W1)
95 | network_output = ndl.relu(Z1).matmul(W2)
96 |
97 | y_batch = y[idx:idx+batch]
98 | y_one_hot = np.zeros((batch, num_classes))
99 | y_one_hot[np.arange(batch), y_batch] = 1
100 | y_one_hot = ndl.Tensor(y_one_hot)
101 |
102 | loss = softmax_loss(network_output, y_one_hot)
103 | loss.backward()
104 |
105 | W1 = ndl.Tensor(W1.numpy() - lr * W1.grad.numpy())
106 | W2 = ndl.Tensor(W2.numpy() - lr * W2.grad.numpy())
107 | idx += batch
108 | return W1, W2
109 |
110 |
111 | ### CODE BELOW IS FOR ILLUSTRATION, YOU DO NOT NEED TO EDIT
112 |
113 | def loss_err(h,y):
114 | """ Helper function to compute both loss and error"""
115 | y_one_hot = np.zeros((y.shape[0], h.shape[-1]))
116 | y_one_hot[np.arange(y.size), y] = 1
117 | y_ = ndl.Tensor(y_one_hot)
118 | return softmax_loss(h,y_).numpy(), np.mean(h.numpy().argmax(axis=1) != y)
119 |
--------------------------------------------------------------------------------
/hw1/data/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw1/data/t10k-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/hw1/data/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw1/data/t10k-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/hw1/data/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw1/data/train-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/hw1/data/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw1/data/train-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/hw1/hw1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw1/hw1.pdf
--------------------------------------------------------------------------------
/hw1/python/needle/__init__.py:
--------------------------------------------------------------------------------
1 | from .autograd import Tensor, cpu, all_devices
2 | from . import ops
3 | from .ops import *
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw1/tempCodeRunnerFile.ipynb:
--------------------------------------------------------------------------------
1 | !pip3 install --upgrade --no-deps git+https://github.com/dlsys10714/mugrade.git
2 | !pip3 install numdifftools
--------------------------------------------------------------------------------
/hw1/test.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('./tests')
3 | from test_autograd_hw import *
4 | # gradient_check(ndl.summation, ndl.Tensor(np.random.randn(5,4)), axes=(1,))
5 | # test_nn_epoch_ndl()
6 | test_matmul_simple_backward()
7 | test_matmul_batched_backward()
--------------------------------------------------------------------------------
/hw2/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/hw2/.idea/hw2.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/hw2/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw2/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/hw2/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/hw2/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw2/README.md:
--------------------------------------------------------------------------------
1 | # Homework 2
2 |
3 | Public repository and stub/testing code for Homework 2 of 10-714.
4 |
5 |
--------------------------------------------------------------------------------
/hw2/apps/__pycache__/mlp_resnet.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/apps/__pycache__/mlp_resnet.cpython-310.pyc
--------------------------------------------------------------------------------
/hw2/apps/__pycache__/mlp_resnet.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/apps/__pycache__/mlp_resnet.cpython-39.pyc
--------------------------------------------------------------------------------
/hw2/apps/mlp_resnet.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('../python')
3 | import needle as ndl
4 | import needle.nn as nn
5 | import numpy as np
6 | import time
7 | import os
8 |
9 | np.random.seed(0)
10 |
11 | def ResidualBlock(dim, hidden_dim, norm=nn.BatchNorm1d, drop_prob=0.1):
12 | main = nn.Sequential(
13 | nn.Linear(dim, hidden_dim),
14 | norm(hidden_dim),
15 | nn.ReLU(),
16 | nn.Dropout(drop_prob),
17 | nn.Linear(hidden_dim, dim),
18 | norm(dim)
19 | )
20 |
21 | return nn.Sequential(nn.Residual(main), nn.ReLU())
22 |
23 |
24 | def MLPResNet(dim, hidden_dim=100, num_blocks=3, num_classes=10, norm=nn.BatchNorm1d, drop_prob=0.1):
25 | layers = []
26 | layers.append(nn.Flatten())
27 | layers.append(nn.Linear(dim, hidden_dim))
28 | layers.append(nn.ReLU())
29 | for _ in range(num_blocks):
30 | layers.append(ResidualBlock(hidden_dim, hidden_dim // 2, norm, drop_prob))
31 | layers.append(nn.Linear(hidden_dim, num_classes))
32 | return nn.Sequential(*layers)
33 |
34 |
35 |
36 |
37 | def epoch(dataloader, model, opt=None):
38 | np.random.seed(4)
39 | if opt is not None:
40 | model.train()
41 | else:
42 | model.eval()
43 |
44 | loss_func = nn.SoftmaxLoss()
45 |
46 | losses = []
47 | total_acc = 0
48 | for X, y in dataloader:
49 | out = model(X)
50 | loss = loss_func(out, y)
51 | if opt is not None:
52 | loss.backward()
53 | opt.step()
54 |
55 | losses.append(loss.numpy())
56 | total_acc += (out.numpy().argmax(axis=1) == y.numpy()).sum()
57 |
58 | return 1 - total_acc / len(dataloader.dataset), np.mean(losses)
59 |
60 |
61 |
62 |
63 | def train_mnist(batch_size=100, epochs=10, optimizer=ndl.optim.Adam,
64 | lr=0.001, weight_decay=0.001, hidden_dim=100, data_dir="data"):
65 | np.random.seed(4)
66 | dataset = ndl.data.MNISTDataset(
67 | os.path.join(data_dir, "train-images-idx3-ubyte.gz"),
68 | os.path.join(data_dir, "train-labels-idx1-ubyte.gz")
69 | )
70 | data_loader = ndl.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
71 | test_dataset = ndl.data.MNISTDataset(
72 | os.path.join(data_dir, "t10k-images-idx3-ubyte.gz"),
73 | os.path.join(data_dir, "t10k-labels-idx1-ubyte.gz")
74 | )
75 | test_data_loader = ndl.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
76 |
77 | model = MLPResNet(28 * 28, hidden_dim=hidden_dim, num_blocks=3, num_classes=10)
78 | opt = optimizer(model.parameters(), lr=lr, weight_decay=weight_decay)
79 |
80 | train_err, train_loss = 0, 0
81 | test_err, test_loss = 0, 0
82 | for i in range(epochs):
83 | start = time.time()
84 | train_err, train_loss = epoch(data_loader, model, opt)
85 | test_err, test_loss = epoch(test_data_loader, model)
86 | end = time.time()
87 | print("Epoch %d: Train err: %f, Train loss: %f | Test err: %f, Test loss: %f, Time: %f" % (
88 | i, train_err, train_loss, test_err, test_loss, end - start
89 | ))
90 | return train_err, train_loss, test_err, test_loss
91 |
92 |
93 |
94 | if __name__ == "__main__":
95 | train_mnist(data_dir="../data")
96 |
--------------------------------------------------------------------------------
/hw2/data/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/data/t10k-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/hw2/data/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/data/t10k-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/hw2/data/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/data/train-images-idx3-ubyte.gz
--------------------------------------------------------------------------------
/hw2/data/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/data/train-labels-idx1-ubyte.gz
--------------------------------------------------------------------------------
/hw2/debug.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('./tests')
3 | sys.path.append("./python")
4 |
5 | from test_nn_and_optim import *
6 | from test_data import *
7 |
8 | # test_nn_layernorm_backward_1()
9 |
10 |
11 | if __name__ == "__main__":
12 | pow(1000, 1/256)
13 | test_mlp_train_mnist_1()
14 |
15 |
--------------------------------------------------------------------------------
/hw2/figures/mlp_resnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/figures/mlp_resnet.png
--------------------------------------------------------------------------------
/hw2/figures/residualblock.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/figures/residualblock.png
--------------------------------------------------------------------------------
/hw2/hw2.ipynb - Colaboratory.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/hw2.ipynb - Colaboratory.pdf
--------------------------------------------------------------------------------
/hw2/python/needle/__init__.py:
--------------------------------------------------------------------------------
1 | from .autograd import Tensor, cpu, all_devices
2 | from . import ops
3 | from .ops import *
4 | from . import init
5 | from . import data
6 | from . import nn
7 | from . import optim
8 |
--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/__init__.cpython-39.pyc
--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/autograd.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/autograd.cpython-310.pyc
--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/autograd.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/autograd.cpython-39.pyc
--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/data.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/data.cpython-310.pyc
--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/data.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/data.cpython-39.pyc
--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/init.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/init.cpython-310.pyc
--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/init.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/init.cpython-39.pyc
--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/nn.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/nn.cpython-310.pyc
--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/nn.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/nn.cpython-39.pyc
--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/ops.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/ops.cpython-310.pyc
--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/ops.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/ops.cpython-39.pyc
--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/optim.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/optim.cpython-310.pyc
--------------------------------------------------------------------------------
/hw2/python/needle/__pycache__/optim.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/python/needle/__pycache__/optim.cpython-39.pyc
--------------------------------------------------------------------------------
/hw2/python/needle/data.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import gzip
3 | import struct
4 | from .autograd import Tensor
5 |
6 | from typing import Iterator, Optional, List, Sized, Union, Iterable, Any
7 |
8 |
9 | class Transform:
10 | def __call__(self, x):
11 | raise NotImplementedError
12 |
13 |
14 | class RandomFlipHorizontal(Transform):
15 | def __init__(self, p = 0.5):
16 | self.p = p
17 |
18 | def __call__(self, img):
19 | """
20 | Horizonally flip an image, specified as n H x W x C NDArray.
21 | Args:
22 | img: H x W x C NDArray of an image
23 | Returns:
24 | H x W x C ndarray corresponding to image flipped with probability self.p
25 | Note: use the provided code to provide randomness, for easier testing
26 | """
27 | flip_img = np.random.rand() < self.p
28 | if flip_img:
29 | img = img[:, ::-1, :]
30 | return img
31 |
32 |
33 | class RandomCrop(Transform):
34 | def __init__(self, padding=3):
35 | self.padding = padding
36 |
37 | def __call__(self, img):
38 | """ Zero pad and then randomly crop an image.
39 | Args:
40 | img: H x W x C NDArray of an image
41 | Return
42 | H x W x C NAArray of cliped image
43 | Note: generate the image shifted by shift_x, shift_y specified below
44 | """
45 | shift_x, shift_y = np.random.randint(low=-self.padding, high=self.padding+1, size=2)
46 | img_pad = np.pad(img, ((self.padding, self.padding), (self.padding, self.padding), (0, 0)), 'constant', constant_values=0)
47 | img_crop = img_pad[self.padding + shift_x : self.padding + shift_x + img.shape[0], self.padding + shift_y : self.padding + shift_y + img.shape[1], :]
48 | return img_crop
49 |
50 |
51 | class Dataset:
52 | r"""An abstract class representing a `Dataset`.
53 |
54 | All subclasses should overwrite :meth:`__getitem__`, supporting fetching a
55 | data sample for a given key. Subclasses must also overwrite
56 | :meth:`__len__`, which is expected to return the size of the dataset.
57 | """
58 |
59 | def __init__(self, transforms: Optional[List] = None):
60 | self.transforms = transforms
61 |
62 | def __getitem__(self, index) -> object:
63 | raise NotImplementedError
64 |
65 | def __len__(self) -> int:
66 | raise NotImplementedError
67 |
68 | def apply_transforms(self, x):
69 | if self.transforms is not None:
70 | # apply the transforms
71 | for tform in self.transforms:
72 | x = tform(x)
73 | return x
74 |
75 |
76 | class DataLoader:
77 | r"""
78 | Data loader. Combines a dataset and a sampler, and provides an iterable over
79 | the given dataset.
80 | Args:
81 | dataset (Dataset): dataset from which to load the data.
82 | batch_size (int, optional): how many samples per batch to load
83 | (default: ``1``).
84 | shuffle (bool, optional): set to ``True`` to have the data reshuffled
85 | at every epoch (default: ``False``).
86 | """
87 | dataset: Dataset
88 | batch_size: Optional[int]
89 |
90 | def __init__(
91 | self,
92 | dataset: Dataset,
93 | batch_size: Optional[int] = 1,
94 | shuffle: bool = False,
95 | ):
96 |
97 | self.dataset = dataset
98 | self.shuffle = shuffle
99 | self.batch_size = batch_size
100 | if not self.shuffle:
101 | self.ordering = np.array_split(np.arange(len(dataset)),
102 | range(batch_size, len(dataset), batch_size))
103 | self.batch_idx = 0
104 |
105 | def __iter__(self):
106 | if self.shuffle:
107 | self.ordering = np.array_split(np.random.permutation(len(self.dataset)),
108 | range(self.batch_size, len(self.dataset), self.batch_size))
109 | else:
110 | self.ordering = np.array_split(np.arange(len(self.dataset)),
111 | range(self.batch_size, len(self.dataset), self.batch_size))
112 | self.batch_idx = 0
113 | return self
114 |
115 | def __next__(self):
116 | if self.batch_idx >= len(self.ordering):
117 | raise StopIteration
118 | batch_indices = self.ordering[self.batch_idx]
119 | X_batch, y_batch = self.dataset[batch_indices]
120 | self.batch_idx += 1
121 | return Tensor(X_batch), Tensor(y_batch)
122 |
123 |
124 | class MNISTDataset(Dataset):
125 | def __init__(
126 | self,
127 | image_filename: str,
128 | label_filename: str,
129 | transforms: Optional[List] = None,
130 | ):
131 | # load the data - copied from hw1/apps/simple_ml.py::parse_mnist()
132 | with gzip.open(image_filename, 'rb') as f:
133 | magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
134 | X = np.frombuffer(f.read(), dtype=np.uint8).reshape(num, rows, cols, 1)
135 | X = X.astype(np.float32) / 255.0
136 |
137 | with gzip.open(label_filename, 'rb') as f:
138 | magic, num = struct.unpack(">II", f.read(8))
139 | y = np.frombuffer(f.read(), dtype=np.uint8)
140 |
141 | self.images = X
142 | self.labels = y
143 | self.transforms = [] if transforms is None else transforms
144 |
145 | def __getitem__(self, index) -> object:
146 | image = self.images[index]
147 | label = self.labels[index]
148 | for func in self.transforms:
149 | image = func(image)
150 | return image, label
151 |
152 | def __len__(self) -> int:
153 | return len(self.labels)
154 |
155 | class NDArrayDataset(Dataset):
156 | def __init__(self, *arrays):
157 | self.arrays = arrays
158 |
159 | def __len__(self) -> int:
160 | return self.arrays[0].shape[0]
161 |
162 | def __getitem__(self, i) -> object:
163 | return tuple([a[i] for a in self.arrays])
--------------------------------------------------------------------------------
/hw2/python/needle/init.py:
--------------------------------------------------------------------------------
1 | import math
2 | import needle as ndl
3 |
4 |
5 |
6 | def rand(*shape, low=0.0, high=1.0, device=None, dtype="float32", requires_grad=False):
7 | """ Generate random numbers uniform between low and high """
8 | device = ndl.cpu() if device is None else device
9 | array = device.rand(*shape) * (high - low) + low
10 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
11 |
12 |
13 | def randn(*shape, mean=0.0, std=1.0, device=None, dtype="float32", requires_grad=False):
14 | """ Generate random normal with specified mean and std deviation """
15 | device = ndl.cpu() if device is None else device
16 | array = device.randn(*shape) * std + mean
17 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
18 |
19 |
20 | def constant(*shape, c=1.0, device=None, dtype="float32", requires_grad=False):
21 | """ Generate constant Tensor """
22 | device = ndl.cpu() if device is None else device
23 | array = device.ones(*shape, dtype=dtype) * c # note: can change dtype
24 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
25 |
26 |
27 | def ones(*shape, device=None, dtype="float32", requires_grad=False):
28 | """ Generate all-ones Tensor """
29 | return constant(*shape, c=1.0, device=device, dtype=dtype, requires_grad=requires_grad)
30 |
31 |
32 | def zeros(*shape, device=None, dtype="float32", requires_grad=False):
33 | """ Generate all-zeros Tensor """
34 | return constant(*shape, c=0.0, device=device, dtype=dtype, requires_grad=requires_grad)
35 |
36 |
37 | def randb(*shape, p=0.5, device=None, dtype="bool", requires_grad=False):
38 | """ Generate binary random Tensor """
39 | device = ndl.cpu() if device is None else device
40 | array = device.rand(*shape) <= p
41 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
42 |
43 |
44 | def one_hot(n, i, device=None, dtype="float32", requires_grad=False):
45 | """ Generate one-hot encoding Tensor """
46 | device = ndl.cpu() if device is None else device
47 | return ndl.Tensor(device.one_hot(n,i.numpy(), dtype=dtype), device=device, requires_grad=requires_grad)
48 |
49 |
50 | def xavier_uniform(fan_in, fan_out, gain=1.0, **kwargs):
51 | a = gain * math.sqrt(6.0 / (fan_in + fan_out))
52 | return rand(fan_in, fan_out, low=-a, high=a, **kwargs)
53 |
54 |
55 | def xavier_normal(fan_in, fan_out, gain=1.0, **kwargs):
56 | std = gain * math.sqrt(2.0 / (fan_in + fan_out))
57 | return randn(fan_in, fan_out, mean=0.0, std=std, **kwargs)
58 |
59 |
60 | def kaiming_uniform(fan_in, fan_out, nonlinearity="relu", **kwargs):
61 | assert nonlinearity == "relu", "Only relu supported currently"
62 | gain = math.sqrt(2.0)
63 | bound = gain * math.sqrt(3.0 / fan_in)
64 | return rand(fan_in, fan_out, low=-bound, high=bound, **kwargs)
65 |
66 |
67 | def kaiming_normal(fan_in, fan_out, nonlinearity="relu", **kwargs):
68 | assert nonlinearity == "relu", "Only relu supported currently"
69 | gain = math.sqrt(2.0)
70 | std = gain * math.sqrt(1.0 / fan_in)
71 | return randn(fan_in, fan_out, mean=0.0, std=std, **kwargs)
72 |
--------------------------------------------------------------------------------
/hw2/python/needle/optim.py:
--------------------------------------------------------------------------------
1 | """Optimization module"""
2 | import needle as ndl
3 | import numpy as np
4 |
5 |
6 | class Optimizer:
7 | def __init__(self, params):
8 | self.params = params
9 |
10 | def step(self):
11 | raise NotImplementedError()
12 |
13 | def reset_grad(self):
14 | for p in self.params:
15 | p.grad = None
16 |
17 |
18 | class SGD(Optimizer):
19 | def __init__(self, params, lr=0.01, momentum=0.0, weight_decay=0.0):
20 | super().__init__(params)
21 | self.lr = lr
22 | self.momentum = momentum
23 | self.u = {}
24 | self.weight_decay = weight_decay
25 |
26 | def step(self):
27 | for param in self.params:
28 | # grad 这里加了一个惩罚项
29 | grad_with_penalty = param.grad.detach() + self.weight_decay * param.detach()
30 | u = self.u.get(id(param), 0) * self.momentum + (1 - self.momentum) * grad_with_penalty
31 | # 将 dtype 从 float64 转换为 float32
32 | u = ndl.Tensor(u, dtype=param.dtype)
33 | self.u[id(param)] = u
34 | param.data -= self.lr * u
35 |
36 |
37 |
38 | class Adam(Optimizer):
39 | def __init__(
40 | self,
41 | params,
42 | lr=0.01,
43 | beta1=0.9,
44 | beta2=0.999,
45 | eps=1e-8,
46 | weight_decay=0.0,
47 | ):
48 | super().__init__(params)
49 | self.lr = lr
50 | self.beta1 = beta1
51 | self.beta2 = beta2
52 | self.eps = eps
53 | self.weight_decay = weight_decay
54 | self.t = 0
55 |
56 | self.m = {}
57 | self.v = {}
58 |
59 | def step(self):
60 | self.t += 1
61 | for param in self.params:
62 | # grad 这里加了一个惩罚项
63 | grad_with_penalty = param.grad.detach() + self.weight_decay * param.detach()
64 | # 将 dtype 从 float64 转换为 float32
65 | grad_with_penalty = ndl.Tensor(grad_with_penalty, dtype=param.dtype)
66 |
67 | m = self.beta1 * self.m.get(id(param), 0) + (1 - self.beta1) * grad_with_penalty
68 | v = self.beta2 * self.v.get(id(param), 0) + (1 - self.beta2) * grad_with_penalty ** 2
69 | self.m[id(param)] = m.detach()
70 | self.v[id(param)] = v.detach()
71 | m_hat = m / (1 - self.beta1 ** self.t)
72 | v_hat = v / (1 - self.beta2 ** self.t)
73 | param.data -= self.lr * m_hat / (v_hat ** 0.5 + self.eps)
--------------------------------------------------------------------------------
/hw2/tests/__pycache__/test_data.cpython-310-pytest-7.1.2.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/tests/__pycache__/test_data.cpython-310-pytest-7.1.2.pyc
--------------------------------------------------------------------------------
/hw2/tests/__pycache__/test_data.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/tests/__pycache__/test_data.cpython-39.pyc
--------------------------------------------------------------------------------
/hw2/tests/__pycache__/test_nn_and_optim.cpython-310-pytest-7.1.2.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/tests/__pycache__/test_nn_and_optim.cpython-310-pytest-7.1.2.pyc
--------------------------------------------------------------------------------
/hw2/tests/__pycache__/test_nn_and_optim.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw2/tests/__pycache__/test_nn_and_optim.cpython-39.pyc
--------------------------------------------------------------------------------
/hw3/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/hw3/.idea/hw3.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/hw3/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw3/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/hw3/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/hw3/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw3/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "files.associations": {
3 | "vector": "cpp"
4 | }
5 | }
--------------------------------------------------------------------------------
/hw3/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.5)
2 | project(needle C CXX)
3 | cmake_policy(SET CMP0146 OLD)
4 |
5 | # find correct version of Python
6 | execute_process(COMMAND python3-config --prefix
7 | OUTPUT_VARIABLE Python_ROOT_DIR)
8 | find_package(Python COMPONENTS Development Interpreter REQUIRED)
9 | include_directories(${Python_INCLUDE_DIRS})
10 |
11 | # find pybind
12 | execute_process(COMMAND python3 -m pybind11 --cmakedir
13 | RESULT_VARIABLE __pybind_exit_code
14 | OUTPUT_VARIABLE __pybind_path
15 | OUTPUT_STRIP_TRAILING_WHITESPACE)
16 | find_package(pybind11 PATHS ${__pybind_path})
17 |
18 |
19 | if(NOT MSVC)
20 | set(CMAKE_CXX_FLAGS "-std=c++11 -O2 -march=native ${CMAKE_CXX_FLAGS}")
21 | set(CMAKE_CUDA_STANDARD 14)
22 | else()
23 | set(CMAKE_CXX_FLAGS "/std:c++11 -O2 -march=native ${CMAKE_CXX_FLAGS}")
24 | set(CMAKE_CUDA_STANDARD 14)
25 | endif()
26 |
27 | include_directories(SYSTEM ${pybind11_INCLUDE_DIRS})
28 | list(APPEND LINKER_LIBS ${pybind11_LIBRARIES})
29 |
30 |
31 | ###################
32 | ### CPU BACKEND ###
33 | ###################
34 | add_library(ndarray_backend_cpu MODULE src/ndarray_backend_cpu.cc)
35 | target_link_libraries(ndarray_backend_cpu PUBLIC ${LINKER_LIBS})
36 | pybind11_extension(ndarray_backend_cpu)
37 | pybind11_strip(ndarray_backend_cpu)
38 |
39 |
40 | # directly output to ffi folder
41 | set_target_properties(ndarray_backend_cpu
42 | PROPERTIES
43 | LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/needle/backend_ndarray
44 | CXX_VISIBILITY_PRESET "hidden"
45 | )
46 |
47 | if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
48 | set_property(TARGET ndarray_backend_cpu PROPERTY LINK_OPTIONS -undefined dynamic_lookup)
49 | endif()
50 |
51 |
52 |
53 | ####################
54 | ### CUDA BACKEND ###
55 | ####################
56 | find_package(CUDA)
57 | if(CUDA_FOUND)
58 | message(STATUS "Found cuda, building cuda backend")
59 |
60 | include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
61 | list(APPEND LINKER_LIBS ${CUDA_CUDART_LIBRARY})
62 |
63 | # invoke nvidia smi to detect if we really have a GPU
64 | execute_process(COMMAND "nvidia-smi" ERROR_QUIET RESULT_VARIABLE NV_RET)
65 | if(NV_RET EQUAL "0")
66 | CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS Auto)
67 | else()
68 | # set to 3.7 the flag of K80
69 | CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.7)
70 | endif()
71 |
72 | # set arch flags properly
73 | CUDA_ADD_LIBRARY(ndarray_backend_cuda MODULE src/ndarray_backend_cuda.cu OPTIONS ${ARCH_FLAGS})
74 |
75 | target_link_libraries(ndarray_backend_cuda ${LINKER_LIBS})
76 | pybind11_extension(ndarray_backend_cuda)
77 | pybind11_strip(ndarray_backend_cuda)
78 |
79 | # directly output to ffi folder
80 | set_target_properties(ndarray_backend_cuda
81 | PROPERTIES
82 | LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/needle/backend_ndarray
83 | CXX_VISIBILITY_PRESET "hidden"
84 | CUDA_VISIBILITY_PRESET "hidden"
85 | )
86 |
87 | endif()
88 |
89 |
--------------------------------------------------------------------------------
/hw3/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: lib, pybind, clean, format, all
2 |
3 | all: lib
4 |
5 |
6 | lib:
7 | @mkdir -p build
8 | @cd build; cmake ..
9 | @cd build; $(MAKE)
10 |
11 | format:
12 | python3 -m black .
13 | clang-format -i src/*.cc src/*.cu
14 |
15 | clean:
16 | rm -rf build python/needle/backend_ndarray/ndarray_backend*.so
17 |
--------------------------------------------------------------------------------
/hw3/README.md:
--------------------------------------------------------------------------------
1 | # Homework 3
2 |
3 | Public repository and stub/testing code for Homework 3 of 10-714.
4 |
5 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/3.27.9/CMakeCCompiler.cmake:
--------------------------------------------------------------------------------
1 | set(CMAKE_C_COMPILER "/usr/bin/cc")
2 | set(CMAKE_C_COMPILER_ARG1 "")
3 | set(CMAKE_C_COMPILER_ID "GNU")
4 | set(CMAKE_C_COMPILER_VERSION "11.4.0")
5 | set(CMAKE_C_COMPILER_VERSION_INTERNAL "")
6 | set(CMAKE_C_COMPILER_WRAPPER "")
7 | set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "17")
8 | set(CMAKE_C_EXTENSIONS_COMPUTED_DEFAULT "ON")
9 | set(CMAKE_C_COMPILE_FEATURES "c_std_90;c_function_prototypes;c_std_99;c_restrict;c_variadic_macros;c_std_11;c_static_assert;c_std_17;c_std_23")
10 | set(CMAKE_C90_COMPILE_FEATURES "c_std_90;c_function_prototypes")
11 | set(CMAKE_C99_COMPILE_FEATURES "c_std_99;c_restrict;c_variadic_macros")
12 | set(CMAKE_C11_COMPILE_FEATURES "c_std_11;c_static_assert")
13 | set(CMAKE_C17_COMPILE_FEATURES "c_std_17")
14 | set(CMAKE_C23_COMPILE_FEATURES "c_std_23")
15 |
16 | set(CMAKE_C_PLATFORM_ID "Linux")
17 | set(CMAKE_C_SIMULATE_ID "")
18 | set(CMAKE_C_COMPILER_FRONTEND_VARIANT "GNU")
19 | set(CMAKE_C_SIMULATE_VERSION "")
20 |
21 |
22 |
23 |
24 | set(CMAKE_AR "/usr/bin/ar")
25 | set(CMAKE_C_COMPILER_AR "/usr/bin/gcc-ar-11")
26 | set(CMAKE_RANLIB "/usr/bin/ranlib")
27 | set(CMAKE_C_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11")
28 | set(CMAKE_LINKER "/usr/bin/ld")
29 | set(CMAKE_MT "")
30 | set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND")
31 | set(CMAKE_COMPILER_IS_GNUCC 1)
32 | set(CMAKE_C_COMPILER_LOADED 1)
33 | set(CMAKE_C_COMPILER_WORKS TRUE)
34 | set(CMAKE_C_ABI_COMPILED TRUE)
35 |
36 | set(CMAKE_C_COMPILER_ENV_VAR "CC")
37 |
38 | set(CMAKE_C_COMPILER_ID_RUN 1)
39 | set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m)
40 | set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC)
41 | set(CMAKE_C_LINKER_PREFERENCE 10)
42 | set(CMAKE_C_LINKER_DEPFILE_SUPPORTED TRUE)
43 |
44 | # Save compiler ABI information.
45 | set(CMAKE_C_SIZEOF_DATA_PTR "8")
46 | set(CMAKE_C_COMPILER_ABI "ELF")
47 | set(CMAKE_C_BYTE_ORDER "LITTLE_ENDIAN")
48 | set(CMAKE_C_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
49 |
50 | if(CMAKE_C_SIZEOF_DATA_PTR)
51 | set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}")
52 | endif()
53 |
54 | if(CMAKE_C_COMPILER_ABI)
55 | set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}")
56 | endif()
57 |
58 | if(CMAKE_C_LIBRARY_ARCHITECTURE)
59 | set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
60 | endif()
61 |
62 | set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "")
63 | if(CMAKE_C_CL_SHOWINCLUDES_PREFIX)
64 | set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}")
65 | endif()
66 |
67 |
68 |
69 |
70 |
71 | set(CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
72 | set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "gcc;gcc_s;c;gcc;gcc_s")
73 | set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs")
74 | set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
75 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/3.27.9/CMakeCXXCompiler.cmake:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_COMPILER "/usr/bin/c++")
2 | set(CMAKE_CXX_COMPILER_ARG1 "")
3 | set(CMAKE_CXX_COMPILER_ID "GNU")
4 | set(CMAKE_CXX_COMPILER_VERSION "11.4.0")
5 | set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "")
6 | set(CMAKE_CXX_COMPILER_WRAPPER "")
7 | set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "17")
8 | set(CMAKE_CXX_EXTENSIONS_COMPUTED_DEFAULT "ON")
9 | set(CMAKE_CXX_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters;cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates;cxx_std_17;cxx_std_20;cxx_std_23")
10 | set(CMAKE_CXX98_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters")
11 | set(CMAKE_CXX11_COMPILE_FEATURES "cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates")
12 | set(CMAKE_CXX14_COMPILE_FEATURES "cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates")
13 | set(CMAKE_CXX17_COMPILE_FEATURES "cxx_std_17")
14 | set(CMAKE_CXX20_COMPILE_FEATURES "cxx_std_20")
15 | set(CMAKE_CXX23_COMPILE_FEATURES "cxx_std_23")
16 |
17 | set(CMAKE_CXX_PLATFORM_ID "Linux")
18 | set(CMAKE_CXX_SIMULATE_ID "")
19 | set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "GNU")
20 | set(CMAKE_CXX_SIMULATE_VERSION "")
21 |
22 |
23 |
24 |
25 | set(CMAKE_AR "/usr/bin/ar")
26 | set(CMAKE_CXX_COMPILER_AR "/usr/bin/gcc-ar-11")
27 | set(CMAKE_RANLIB "/usr/bin/ranlib")
28 | set(CMAKE_CXX_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11")
29 | set(CMAKE_LINKER "/usr/bin/ld")
30 | set(CMAKE_MT "")
31 | set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND")
32 | set(CMAKE_COMPILER_IS_GNUCXX 1)
33 | set(CMAKE_CXX_COMPILER_LOADED 1)
34 | set(CMAKE_CXX_COMPILER_WORKS TRUE)
35 | set(CMAKE_CXX_ABI_COMPILED TRUE)
36 |
37 | set(CMAKE_CXX_COMPILER_ENV_VAR "CXX")
38 |
39 | set(CMAKE_CXX_COMPILER_ID_RUN 1)
40 | set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm;mpp;CPP;ixx;cppm;ccm;cxxm;c++m)
41 | set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC)
42 |
43 | foreach (lang C OBJC OBJCXX)
44 | if (CMAKE_${lang}_COMPILER_ID_RUN)
45 | foreach(extension IN LISTS CMAKE_${lang}_SOURCE_FILE_EXTENSIONS)
46 | list(REMOVE_ITEM CMAKE_CXX_SOURCE_FILE_EXTENSIONS ${extension})
47 | endforeach()
48 | endif()
49 | endforeach()
50 |
51 | set(CMAKE_CXX_LINKER_PREFERENCE 30)
52 | set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1)
53 | set(CMAKE_CXX_LINKER_DEPFILE_SUPPORTED TRUE)
54 |
55 | # Save compiler ABI information.
56 | set(CMAKE_CXX_SIZEOF_DATA_PTR "8")
57 | set(CMAKE_CXX_COMPILER_ABI "ELF")
58 | set(CMAKE_CXX_BYTE_ORDER "LITTLE_ENDIAN")
59 | set(CMAKE_CXX_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
60 |
61 | if(CMAKE_CXX_SIZEOF_DATA_PTR)
62 | set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}")
63 | endif()
64 |
65 | if(CMAKE_CXX_COMPILER_ABI)
66 | set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}")
67 | endif()
68 |
69 | if(CMAKE_CXX_LIBRARY_ARCHITECTURE)
70 | set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
71 | endif()
72 |
73 | set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "")
74 | if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX)
75 | set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}")
76 | endif()
77 |
78 |
79 |
80 |
81 |
82 | set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
83 | set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;gcc_s;gcc;c;gcc_s;gcc")
84 | set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs")
85 | set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
86 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/3.27.9/CMakeSystem.cmake:
--------------------------------------------------------------------------------
1 | set(CMAKE_HOST_SYSTEM "Linux-5.15.120+")
2 | set(CMAKE_HOST_SYSTEM_NAME "Linux")
3 | set(CMAKE_HOST_SYSTEM_VERSION "5.15.120+")
4 | set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
5 |
6 |
7 |
8 | set(CMAKE_SYSTEM "Linux-5.15.120+")
9 | set(CMAKE_SYSTEM_NAME "Linux")
10 | set(CMAKE_SYSTEM_VERSION "5.15.120+")
11 | set(CMAKE_SYSTEM_PROCESSOR "x86_64")
12 |
13 | set(CMAKE_CROSSCOMPILING "FALSE")
14 |
15 | set(CMAKE_SYSTEM_LOADED 1)
16 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/3.27.9/CompilerIdC/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/3.27.9/CompilerIdC/a.out
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/CMakeDirectoryInformation.cmake:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
3 |
4 | # Relative path conversion top directories.
5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/content/drive/Othercomputers/My MacBook Pro/hw3")
6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/content/drive/Othercomputers/My MacBook Pro/hw3/build")
7 |
8 | # Force unix paths in dependencies.
9 | set(CMAKE_FORCE_UNIX_PATHS 1)
10 |
11 |
12 | # The C and CXX include file regular expressions for this directory.
13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
17 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/CMakeRuleHashes.txt:
--------------------------------------------------------------------------------
1 | # Hashes of file build rules.
2 | 9720afbab5807e3b7d272586be3395ba CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o
3 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/Makefile.cmake:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
3 |
4 | # The generator used is:
5 | set(CMAKE_DEPENDS_GENERATOR "Unix Makefiles")
6 |
7 | # The top level Makefile was generated from the following files:
8 | set(CMAKE_MAKEFILE_DEPENDS
9 | "CMakeCache.txt"
10 | "/content/drive/Othercomputers/My MacBook Pro/hw3/CMakeLists.txt"
11 | "CMakeFiles/3.27.9/CMakeCCompiler.cmake"
12 | "CMakeFiles/3.27.9/CMakeCXXCompiler.cmake"
13 | "CMakeFiles/3.27.9/CMakeSystem.cmake"
14 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen"
15 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.depend"
16 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCInformation.cmake"
17 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCXXInformation.cmake"
18 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCheckCompilerFlagCommonPatterns.cmake"
19 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCommonLanguageInclude.cmake"
20 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeGenericSystem.cmake"
21 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeInitializeConfigs.cmake"
22 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeLanguageInformation.cmake"
23 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeSystemSpecificInformation.cmake"
24 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeSystemSpecificInitialize.cmake"
25 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCSourceCompiles.cmake"
26 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCXXCompilerFlag.cmake"
27 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCXXSourceCompiles.cmake"
28 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckIncludeFile.cmake"
29 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckLibraryExists.cmake"
30 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/CMakeCommonCompilerMacros.cmake"
31 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU-C.cmake"
32 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU-CXX.cmake"
33 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU.cmake"
34 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA.cmake"
35 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA/run_nvcc.cmake"
36 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA/select_compute_arch.cmake"
37 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPackageHandleStandardArgs.cmake"
38 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPackageMessage.cmake"
39 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPython.cmake"
40 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPython/Support.cmake"
41 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindThreads.cmake"
42 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckCompilerFlag.cmake"
43 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckFlagCommonConfig.cmake"
44 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckSourceCompiles.cmake"
45 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU-C.cmake"
46 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU-CXX.cmake"
47 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU.cmake"
48 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-Initialize.cmake"
49 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux.cmake"
50 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/UnixPaths.cmake"
51 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Common.cmake"
52 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Config.cmake"
53 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11ConfigVersion.cmake"
54 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11NewTools.cmake"
55 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Targets.cmake"
56 | )
57 |
58 | # The corresponding makefile is:
59 | set(CMAKE_MAKEFILE_OUTPUTS
60 | "Makefile"
61 | "CMakeFiles/cmake.check_cache"
62 | )
63 |
64 | # Byproducts of CMake generate step:
65 | set(CMAKE_MAKEFILE_PRODUCTS
66 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen"
67 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake"
68 | "CMakeFiles/CMakeDirectoryInformation.cmake"
69 | )
70 |
71 | # Dependency information for all targets:
72 | set(CMAKE_DEPEND_INFO_FILES
73 | "CMakeFiles/ndarray_backend_cpu.dir/DependInfo.cmake"
74 | "CMakeFiles/ndarray_backend_cuda.dir/DependInfo.cmake"
75 | )
76 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/Makefile2:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
3 |
4 | # Default target executed when no arguments are given to make.
5 | default_target: all
6 | .PHONY : default_target
7 |
8 | #=============================================================================
9 | # Special targets provided by cmake.
10 |
11 | # Disable implicit rules so canonical targets will work.
12 | .SUFFIXES:
13 |
14 | # Disable VCS-based implicit rules.
15 | % : %,v
16 |
17 | # Disable VCS-based implicit rules.
18 | % : RCS/%
19 |
20 | # Disable VCS-based implicit rules.
21 | % : RCS/%,v
22 |
23 | # Disable VCS-based implicit rules.
24 | % : SCCS/s.%
25 |
26 | # Disable VCS-based implicit rules.
27 | % : s.%
28 |
29 | .SUFFIXES: .hpux_make_needs_suffix_list
30 |
31 | # Command-line flag to silence nested $(MAKE).
32 | $(VERBOSE)MAKESILENT = -s
33 |
34 | #Suppress display of executed commands.
35 | $(VERBOSE).SILENT:
36 |
37 | # A target that is always out of date.
38 | cmake_force:
39 | .PHONY : cmake_force
40 |
41 | #=============================================================================
42 | # Set environment variables for the build.
43 |
44 | # The shell in which to execute make rules.
45 | SHELL = /bin/sh
46 |
47 | # The CMake executable.
48 | CMAKE_COMMAND = /usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake
49 |
50 | # The command to remove a file.
51 | RM = /usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake -E rm -f
52 |
53 | # Escaping for special characters.
54 | EQUALS = =
55 |
56 | # The top-level source directory on which CMake was run.
57 | CMAKE_SOURCE_DIR = "/content/drive/Othercomputers/My MacBook Pro/hw3"
58 |
59 | # The top-level build directory on which CMake was run.
60 | CMAKE_BINARY_DIR = "/content/drive/Othercomputers/My MacBook Pro/hw3/build"
61 |
62 | #=============================================================================
63 | # Directory level rules for the build root directory
64 |
65 | # The main recursive "all" target.
66 | all: CMakeFiles/ndarray_backend_cpu.dir/all
67 | all: CMakeFiles/ndarray_backend_cuda.dir/all
68 | .PHONY : all
69 |
70 | # The main recursive "preinstall" target.
71 | preinstall:
72 | .PHONY : preinstall
73 |
74 | # The main recursive "clean" target.
75 | clean: CMakeFiles/ndarray_backend_cpu.dir/clean
76 | clean: CMakeFiles/ndarray_backend_cuda.dir/clean
77 | .PHONY : clean
78 |
79 | #=============================================================================
80 | # Target rules for target CMakeFiles/ndarray_backend_cpu.dir
81 |
82 | # All Build rule for target.
83 | CMakeFiles/ndarray_backend_cpu.dir/all:
84 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/depend
85 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/build
86 | @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --progress-dir="/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" --progress-num=1,2 "Built target ndarray_backend_cpu"
87 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/all
88 |
89 | # Build rule for subdir invocation for target.
90 | CMakeFiles/ndarray_backend_cpu.dir/rule: cmake_check_build_system
91 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" 2
92 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/ndarray_backend_cpu.dir/all
93 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" 0
94 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/rule
95 |
96 | # Convenience name for target.
97 | ndarray_backend_cpu: CMakeFiles/ndarray_backend_cpu.dir/rule
98 | .PHONY : ndarray_backend_cpu
99 |
100 | # clean rule for target.
101 | CMakeFiles/ndarray_backend_cpu.dir/clean:
102 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/clean
103 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/clean
104 |
105 | #=============================================================================
106 | # Target rules for target CMakeFiles/ndarray_backend_cuda.dir
107 |
108 | # All Build rule for target.
109 | CMakeFiles/ndarray_backend_cuda.dir/all:
110 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/depend
111 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/build
112 | @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --progress-dir="/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" --progress-num=3,4 "Built target ndarray_backend_cuda"
113 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/all
114 |
115 | # Build rule for subdir invocation for target.
116 | CMakeFiles/ndarray_backend_cuda.dir/rule: cmake_check_build_system
117 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" 2
118 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/ndarray_backend_cuda.dir/all
119 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles" 0
120 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/rule
121 |
122 | # Convenience name for target.
123 | ndarray_backend_cuda: CMakeFiles/ndarray_backend_cuda.dir/rule
124 | .PHONY : ndarray_backend_cuda
125 |
126 | # clean rule for target.
127 | CMakeFiles/ndarray_backend_cuda.dir/clean:
128 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/clean
129 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/clean
130 |
131 | #=============================================================================
132 | # Special targets to cleanup operation of make.
133 |
134 | # Special rule to run CMake to check the build system integrity.
135 | # No rule that depends on this can have commands that come from listfiles
136 | # because they might be regenerated.
137 | cmake_check_build_system:
138 | $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
139 | .PHONY : cmake_check_build_system
140 |
141 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/TargetDirectories.txt:
--------------------------------------------------------------------------------
1 | /content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles/ndarray_backend_cpu.dir
2 | /content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles/ndarray_backend_cuda.dir
3 | /content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles/edit_cache.dir
4 | /content/drive/Othercomputers/My MacBook Pro/hw3/build/CMakeFiles/rebuild_cache.dir
5 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/cmake.check_cache:
--------------------------------------------------------------------------------
1 | # This file is generated by cmake for dependency checking of the CMakeCache.txt file
2 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
1 |
2 | # Consider dependencies only in project.
3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
4 |
5 | # The set of languages for which implicit dependencies are needed:
6 | set(CMAKE_DEPENDS_LANGUAGES
7 | )
8 |
9 | # The set of dependency files which are needed:
10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES
11 | "/content/drive/Othercomputers/My MacBook Pro/hw3/src/ndarray_backend_cpu.cc" "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o" "gcc" "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o.d"
12 | )
13 |
14 | # Targets to which this target links which contain Fortran sources.
15 | set(CMAKE_Fortran_TARGET_LINKED_INFO_FILES
16 | )
17 |
18 | # Fortran module output directory.
19 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
20 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
1 | file(REMOVE_RECURSE
2 | "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so"
3 | "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cpu.pdb"
4 | "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o"
5 | "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o.d"
6 | )
7 |
8 | # Per-language clean rules from dependency scanning.
9 | foreach(lang CXX)
10 | include(CMakeFiles/ndarray_backend_cpu.dir/cmake_clean_${lang}.cmake OPTIONAL)
11 | endforeach()
12 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for ndarray_backend_cpu.
3 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for ndarray_backend_cpu.
2 | # This may be replaced when dependencies are built.
3 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/flags.make:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
3 |
4 | # compile CXX with /usr/bin/c++
5 | CXX_DEFINES = -Dndarray_backend_cpu_EXPORTS
6 |
7 | CXX_INCLUDES = -isystem /usr/include/python3.10 -isystem /usr/local/lib/python3.10/dist-packages/pybind11/include -isystem /usr/local/cuda/include
8 |
9 | CXX_FLAGS = -std=c++11 -O2 -march=native -fPIC -fvisibility=hidden
10 |
11 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/link.txt:
--------------------------------------------------------------------------------
1 | /usr/bin/c++ -fPIC -std=c++11 -O2 -march=native -shared -o "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so" CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o
2 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/progress.make:
--------------------------------------------------------------------------------
1 | CMAKE_PROGRESS_1 = 1
2 | CMAKE_PROGRESS_2 = 2
3 |
4 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
1 |
2 | # Consider dependencies only in project.
3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
4 |
5 | # The set of languages for which implicit dependencies are needed:
6 | set(CMAKE_DEPENDS_LANGUAGES
7 | )
8 |
9 | # The set of dependency files which are needed:
10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES
11 | )
12 |
13 | # Targets to which this target links which contain Fortran sources.
14 | set(CMAKE_Fortran_TARGET_LINKED_INFO_FILES
15 | )
16 |
17 | # Fortran module output directory.
18 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
19 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
1 | file(REMOVE_RECURSE
2 | "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so"
3 | "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cuda.pdb"
4 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o"
5 | )
6 |
7 | # Per-language clean rules from dependency scanning.
8 | foreach(lang )
9 | include(CMakeFiles/ndarray_backend_cuda.dir/cmake_clean_${lang}.cmake OPTIONAL)
10 | endforeach()
11 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/compiler_depend.make:
--------------------------------------------------------------------------------
1 | # Empty compiler generated dependencies file for ndarray_backend_cuda.
2 | # This may be replaced when dependencies are built.
3 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for ndarray_backend_cuda.
3 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for ndarray_backend_cuda.
2 | # This may be replaced when dependencies are built.
3 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/flags.make:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
3 |
4 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/link.txt:
--------------------------------------------------------------------------------
1 | /usr/bin/c++ -fPIC -std=c++11 -O2 -march=native -shared -o "/content/drive/Othercomputers/My MacBook Pro/hw3/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so" CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o -Wl,-rpath,/usr/local/cuda/lib64 /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/x86_64-linux-gnu/librt.a /usr/local/cuda/lib64/libcudart.so
2 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/progress.make:
--------------------------------------------------------------------------------
1 | CMAKE_PROGRESS_1 = 3
2 | CMAKE_PROGRESS_2 = 4
3 |
4 |
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/build/CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o
--------------------------------------------------------------------------------
/hw3/build/CMakeFiles/progress.marks:
--------------------------------------------------------------------------------
1 | 4
2 |
--------------------------------------------------------------------------------
/hw3/build/cmake_install.cmake:
--------------------------------------------------------------------------------
1 | # Install script for directory: /content/drive/Othercomputers/My MacBook Pro/hw3
2 |
3 | # Set the install prefix
4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX)
5 | set(CMAKE_INSTALL_PREFIX "/usr/local")
6 | endif()
7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
8 |
9 | # Set the install configuration name.
10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
11 | if(BUILD_TYPE)
12 | string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
13 | CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
14 | else()
15 | set(CMAKE_INSTALL_CONFIG_NAME "")
16 | endif()
17 | message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
18 | endif()
19 |
20 | # Set the component getting installed.
21 | if(NOT CMAKE_INSTALL_COMPONENT)
22 | if(COMPONENT)
23 | message(STATUS "Install component: \"${COMPONENT}\"")
24 | set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
25 | else()
26 | set(CMAKE_INSTALL_COMPONENT)
27 | endif()
28 | endif()
29 |
30 | # Install shared libraries without execute permission?
31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
32 | set(CMAKE_INSTALL_SO_NO_EXE "1")
33 | endif()
34 |
35 | # Is this installation the result of a crosscompile?
36 | if(NOT DEFINED CMAKE_CROSSCOMPILING)
37 | set(CMAKE_CROSSCOMPILING "FALSE")
38 | endif()
39 |
40 | # Set default install directory permissions.
41 | if(NOT DEFINED CMAKE_OBJDUMP)
42 | set(CMAKE_OBJDUMP "/usr/bin/objdump")
43 | endif()
44 |
45 | if(CMAKE_INSTALL_COMPONENT)
46 | set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt")
47 | else()
48 | set(CMAKE_INSTALL_MANIFEST "install_manifest.txt")
49 | endif()
50 |
51 | string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT
52 | "${CMAKE_INSTALL_MANIFEST_FILES}")
53 | file(WRITE "/content/drive/Othercomputers/My MacBook Pro/hw3/build/${CMAKE_INSTALL_MANIFEST}"
54 | "${CMAKE_INSTALL_MANIFEST_CONTENT}")
55 |
--------------------------------------------------------------------------------
/hw3/build/detect_cuda_compute_capabilities.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | int main()
4 | {
5 | int count = 0;
6 | if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;
7 | if (count == 0) return -1;
8 | for (int device = 0; device < count; ++device)
9 | {
10 | cudaDeviceProp prop;
11 | if (cudaSuccess == cudaGetDeviceProperties(&prop, device))
12 | std::printf("%d.%d ", prop.major, prop.minor);
13 | }
14 | return 0;
15 | }
16 |
--------------------------------------------------------------------------------
/hw3/debug.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append("./tests/hw3")
3 | sys.path.append("./python")
4 |
5 | from test_ndarray import *
6 | from needle import backend_ndarray as nd
7 |
8 |
9 | if __name__ == "__main__":
10 | test_getitem(device=nd.cpu(), params={"shape": (8, 8, 2, 2, 2, 2), "fn": lambda X: X[1:3, 5:8, 1:2, 0:1, 0:1, 1:2]})
--------------------------------------------------------------------------------
/hw3/hw3.ipynb - Colaboratory.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/hw3.ipynb - Colaboratory.pdf
--------------------------------------------------------------------------------
/hw3/python/needle/__init__.py:
--------------------------------------------------------------------------------
1 | from . import ops
2 | from .ops import *
3 | from .autograd import Tensor, cpu, all_devices
4 |
5 | from . import init
6 | from .init import ones, zeros, zeros_like, ones_like
7 |
8 | from . import data
9 | from . import nn
10 | from . import optim
11 | from .backend_selection import *
12 |
--------------------------------------------------------------------------------
/hw3/python/needle/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/__pycache__/autograd.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/__pycache__/autograd.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/__pycache__/backend_numpy.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/__pycache__/backend_numpy.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/__pycache__/backend_selection.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/__pycache__/backend_selection.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/__pycache__/optim.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/__pycache__/optim.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/backend_ndarray/__init__.py:
--------------------------------------------------------------------------------
1 | from .ndarray import *
2 |
--------------------------------------------------------------------------------
/hw3/python/needle/backend_ndarray/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/backend_ndarray/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/backend_ndarray/__pycache__/ndarray.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/backend_ndarray/__pycache__/ndarray.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/backend_ndarray/__pycache__/ndarray_backend_numpy.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/backend_ndarray/__pycache__/ndarray_backend_numpy.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/hw3/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/hw3/python/needle/backend_ndarray/ndarray_backend_numpy.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | __device_name__ = "numpy"
5 | _datatype = np.float32
6 | _datetype_size = np.dtype(_datatype).itemsize
7 |
8 |
9 | class Array:
10 | def __init__(self, size):
11 | self.array = np.empty(size, dtype=np.float32)
12 |
13 | @property
14 | def size(self):
15 | return self.array.size
16 |
17 |
18 | def to_numpy(a, shape, strides, offset):
19 | return np.lib.stride_tricks.as_strided(
20 | a.array[offset:], shape, tuple([s * _datetype_size for s in strides])
21 | )
22 |
23 |
24 | def from_numpy(a, out):
25 | out.array[:] = a.flatten()
26 |
27 |
28 | def fill(out, val):
29 | out.array.fill(val)
30 |
31 |
32 | def compact(a, out, shape, strides, offset):
33 | out.array[:] = to_numpy(a, shape, strides, offset).flatten()
34 |
35 |
36 | def ewise_setitem(a, out, shape, strides, offset):
37 | to_numpy(out, shape, strides, offset)[:] = a.array.reshape(shape)
38 |
39 |
40 | def scalar_setitem(size, val, out, shape, strides, offset):
41 | to_numpy(out, shape, strides, offset)[:] = val
42 |
43 |
44 | def ewise_add(a, b, out):
45 | out.array[:] = a.array + b.array
46 |
47 |
48 | def scalar_add(a, val, out):
49 | out.array[:] = a.array + val
50 |
51 |
52 | def ewise_mul(a, b, out):
53 | out.array[:] = a.array * b.array
54 |
55 |
56 | def scalar_mul(a, val, out):
57 | out.array[:] = a.array * val
58 |
59 |
60 | def ewise_div(a, b, out):
61 | out.array[:] = a.array / b.array
62 |
63 |
64 | def scalar_div(a, val, out):
65 | out.array[:] = a.array / val
66 |
67 |
68 | def scalar_power(a, val, out):
69 | out.array[:] = a.array**val
70 |
71 |
72 | def ewise_maximum(a, b, out):
73 | out.array[:] = np.maximum(a.array, b.array)
74 |
75 |
76 | def scalar_maximum(a, val, out):
77 | out.array[:] = np.maximum(a.array, val)
78 |
79 |
80 | def ewise_eq(a, b, out):
81 | out.array[:] = (a.array == b.array).astype(np.float32)
82 |
83 |
84 | def scalar_eq(a, val, out):
85 | out.array[:] = (a.array == val).astype(np.float32)
86 |
87 |
88 | def ewise_ge(a, b, out):
89 | out.array[:] = (a.array >= b.array).astype(np.float32)
90 |
91 |
92 | def scalar_ge(a, val, out):
93 | out.array[:] = (a.array >= val).astype(np.float32)
94 |
95 |
96 | def ewise_log(a, out):
97 | out.array[:] = np.log(a.array)
98 |
99 |
100 | def ewise_exp(a, out):
101 | out.array[:] = np.exp(a.array)
102 |
103 |
104 | def ewise_tanh(a, out):
105 | out.array[:] = np.tanh(a.array)
106 |
107 |
108 | def matmul(a, b, out, m, n, p):
109 | out.array[:] = (a.array.reshape(m, n) @ b.array.reshape(n, p)).reshape(-1)
110 |
111 |
112 | def reduce_max(a, out, reduce_size):
113 | out.array[:] = a.array[:].reshape(-1, reduce_size).max(axis=1)
114 |
115 |
116 | def reduce_sum(a, out, reduce_size):
117 | out.array[:] = a.array[:].reshape(-1, reduce_size).sum(axis=1)
118 |
--------------------------------------------------------------------------------
/hw3/python/needle/backend_numpy.py:
--------------------------------------------------------------------------------
1 | """This file defies specific implementations of devices when using numpy as NDArray backend.
2 | """
3 | import numpy
4 |
5 |
6 | class Device:
7 | """Baseclass of all device"""
8 |
9 |
10 | class CPUDevice(Device):
11 | """Represents data that sits in CPU"""
12 |
13 | def __repr__(self):
14 | return "needle.cpu()"
15 |
16 | def __hash__(self):
17 | return self.__repr__().__hash__()
18 |
19 | def __eq__(self, other):
20 | return isinstance(other, CPUDevice)
21 |
22 | def enabled(self):
23 | return True
24 |
25 | def zeros(self, *shape, dtype="float32"):
26 | return numpy.zeros(shape, dtype=dtype)
27 |
28 | def ones(self, *shape, dtype="float32"):
29 | return numpy.ones(shape, dtype=dtype)
30 |
31 | def randn(self, *shape):
32 | # note: numpy doesn't support types within standard random routines, and
33 | # .astype("float32") does work if we're generating a singleton
34 | return numpy.random.randn(*shape)
35 |
36 | def rand(self, *shape):
37 | # note: numpy doesn't support types within standard random routines, and
38 | # .astype("float32") does work if we're generating a singleton
39 | return numpy.random.rand(*shape)
40 |
41 | def one_hot(self, n, i, dtype="float32"):
42 | return numpy.eye(n, dtype=dtype)[i]
43 |
44 | def empty(self, shape, dtype="float32"):
45 | return numpy.empty(shape, dtype=dtype)
46 |
47 | def full(self, shape, fill_value, dtype="float32"):
48 | return numpy.full(shape, fill_value, dtype=dtype)
49 |
50 |
51 | def cpu():
52 | """Return cpu device"""
53 | return CPUDevice()
54 |
55 |
56 | def default_device():
57 | return cpu()
58 |
59 |
60 | def all_devices():
61 | """return a list of all available devices"""
62 | return [cpu()]
63 |
--------------------------------------------------------------------------------
/hw3/python/needle/backend_selection.py:
--------------------------------------------------------------------------------
1 | """Logic for backend selection"""
2 | import os
3 |
4 |
5 | BACKEND = os.environ.get("NEEDLE_BACKEND", "nd")
6 |
7 |
8 | if BACKEND == "nd":
9 | print("Using needle backend")
10 | from . import backend_ndarray as array_api
11 | from .backend_ndarray import (
12 | all_devices,
13 | cuda,
14 | cpu,
15 | cpu_numpy,
16 | default_device,
17 | BackendDevice as Device,
18 | )
19 |
20 | NDArray = array_api.NDArray
21 | elif BACKEND == "np":
22 | print("Using numpy backend")
23 | import numpy as array_api
24 | from .backend_numpy import all_devices, cpu, default_device, Device
25 |
26 | NDArray = array_api.ndarray
27 | else:
28 | raise RuntimeError("Unknown needle array backend %s" % BACKEND)
29 |
--------------------------------------------------------------------------------
/hw3/python/needle/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_basic import *
2 | from .data_transforms import *
3 | from .datasets import *
4 |
--------------------------------------------------------------------------------
/hw3/python/needle/data/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/data/__pycache__/data_basic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/__pycache__/data_basic.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/data/__pycache__/data_transforms.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/__pycache__/data_transforms.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/data/data_basic.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from ..autograd import Tensor
3 |
4 | from typing import Iterator, Optional, List, Sized, Union, Iterable, Any
5 |
6 |
7 |
8 | class Dataset:
9 | r"""An abstract class representing a `Dataset`.
10 |
11 | All subclasses should overwrite :meth:`__getitem__`, supporting fetching a
12 | data sample for a given key. Subclasses must also overwrite
13 | :meth:`__len__`, which is expected to return the size of the dataset.
14 | """
15 |
16 | def __init__(self, transforms: Optional[List] = None):
17 | self.transforms = transforms
18 |
19 | def __getitem__(self, index) -> object:
20 | raise NotImplementedError
21 |
22 | def __len__(self) -> int:
23 | raise NotImplementedError
24 |
25 | def apply_transforms(self, x):
26 | if self.transforms is not None:
27 | # apply the transforms
28 | for tform in self.transforms:
29 | x = tform(x)
30 | return x
31 |
32 |
33 | class DataLoader:
34 | r"""
35 | Data loader. Combines a dataset and a sampler, and provides an iterable over
36 | the given dataset.
37 | Args:
38 | dataset (Dataset): dataset from which to load the data.
39 | batch_size (int, optional): how many samples per batch to load
40 | (default: ``1``).
41 | shuffle (bool, optional): set to ``True`` to have the data reshuffled
42 | at every epoch (default: ``False``).
43 | """
44 | dataset: Dataset
45 | batch_size: Optional[int]
46 |
47 | def __init__(
48 | self,
49 | dataset: Dataset,
50 | batch_size: Optional[int] = 1,
51 | shuffle: bool = False,
52 | ):
53 |
54 | self.dataset = dataset
55 | self.shuffle = shuffle
56 | self.batch_size = batch_size
57 | if not self.shuffle:
58 | self.ordering = np.array_split(np.arange(len(dataset)),
59 | range(batch_size, len(dataset), batch_size))
60 |
61 | def __iter__(self):
62 | ### BEGIN YOUR SOLUTION
63 | raise NotImplementedError()
64 | ### END YOUR SOLUTION
65 | return self
66 |
67 | def __next__(self):
68 | ### BEGIN YOUR SOLUTION
69 | raise NotImplementedError()
70 | ### END YOUR SOLUTION
71 |
72 |
--------------------------------------------------------------------------------
/hw3/python/needle/data/data_transforms.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class Transform:
4 | def __call__(self, x):
5 | raise NotImplementedError
6 |
7 |
8 | class RandomFlipHorizontal(Transform):
9 | def __init__(self, p = 0.5):
10 | self.p = p
11 |
12 | def __call__(self, img):
13 | """
14 | Horizonally flip an image, specified as an H x W x C NDArray.
15 | Args:
16 | img: H x W x C NDArray of an image
17 | Returns:
18 | H x W x C ndarray corresponding to image flipped with probability self.p
19 | Note: use the provided code to provide randomness, for easier testing
20 | """
21 | flip_img = np.random.rand() < self.p
22 | ### BEGIN YOUR SOLUTION
23 | raise NotImplementedError()
24 | ### END YOUR SOLUTION
25 |
26 |
27 | class RandomCrop(Transform):
28 | def __init__(self, padding=3):
29 | self.padding = padding
30 |
31 | def __call__(self, img):
32 | """ Zero pad and then randomly crop an image.
33 | Args:
34 | img: H x W x C NDArray of an image
35 | Return
36 | H x W x C NAArray of cliped image
37 | Note: generate the image shifted by shift_x, shift_y specified below
38 | """
39 | shift_x, shift_y = np.random.randint(low=-self.padding, high=self.padding+1, size=2)
40 | ### BEGIN YOUR SOLUTION
41 | raise NotImplementedError()
42 | ### END YOUR SOLUTION
43 |
--------------------------------------------------------------------------------
/hw3/python/needle/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .mnist_dataset import *
2 | from .ndarray_dataset import *
3 |
--------------------------------------------------------------------------------
/hw3/python/needle/data/datasets/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/datasets/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/data/datasets/__pycache__/mnist_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/datasets/__pycache__/mnist_dataset.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/data/datasets/__pycache__/ndarray_dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/data/datasets/__pycache__/ndarray_dataset.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/data/datasets/mnist_dataset.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional
2 | from ..data_basic import Dataset
3 | import numpy as np
4 |
5 | class MNISTDataset(Dataset):
6 | def __init__(
7 | self,
8 | image_filename: str,
9 | label_filename: str,
10 | transforms: Optional[List] = None,
11 | ):
12 | ### BEGIN YOUR SOLUTION
13 | raise NotImplementedError()
14 | ### END YOUR SOLUTION
15 |
16 | def __getitem__(self, index) -> object:
17 | ### BEGIN YOUR SOLUTION
18 | raise NotImplementedError()
19 | ### END YOUR SOLUTION
20 |
21 | def __len__(self) -> int:
22 | ### BEGIN YOUR SOLUTION
23 | raise NotImplementedError()
24 | ### END YOUR SOLUTION
--------------------------------------------------------------------------------
/hw3/python/needle/data/datasets/ndarray_dataset.py:
--------------------------------------------------------------------------------
1 | from ..data_basic import Dataset
2 |
3 | class NDArrayDataset(Dataset):
4 | def __init__(self, *arrays):
5 | self.arrays = arrays
6 |
7 | def __len__(self) -> int:
8 | return self.arrays[0].shape[0]
9 |
10 | def __getitem__(self, i) -> object:
11 | return tuple([a[i] for a in self.arrays])
--------------------------------------------------------------------------------
/hw3/python/needle/init/__init__.py:
--------------------------------------------------------------------------------
1 | from .init_basic import *
2 |
3 | from .init_initializers import *
4 |
--------------------------------------------------------------------------------
/hw3/python/needle/init/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/init/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/init/__pycache__/init_basic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/init/__pycache__/init_basic.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/init/__pycache__/init_initializers.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/init/__pycache__/init_initializers.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/init/init_basic.py:
--------------------------------------------------------------------------------
1 | import math
2 | import needle as ndl
3 |
4 |
5 | def rand(*shape, low=0.0, high=1.0, device=None, dtype="float32", requires_grad=False):
6 | """Generate random numbers uniform between low and high"""
7 | device = ndl.cpu() if device is None else device
8 | array = device.rand(*shape) * (high - low) + low
9 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
10 |
11 |
12 | def randn(*shape, mean=0.0, std=1.0, device=None, dtype="float32", requires_grad=False):
13 | """Generate random normal with specified mean and std deviation"""
14 | device = ndl.cpu() if device is None else device
15 | array = device.randn(*shape) * std + mean
16 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
17 |
18 |
19 | def constant(*shape, c=1.0, device=None, dtype="float32", requires_grad=False):
20 | """Generate constant Tensor"""
21 | device = ndl.cpu() if device is None else device
22 | array = device.ones(*shape, dtype=dtype) * c # note: can change dtype
23 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
24 |
25 |
26 | def ones(*shape, device=None, dtype="float32", requires_grad=False):
27 | """Generate all-ones Tensor"""
28 | return constant(
29 | *shape, c=1.0, device=device, dtype=dtype, requires_grad=requires_grad
30 | )
31 |
32 |
33 | def zeros(*shape, device=None, dtype="float32", requires_grad=False):
34 | """Generate all-zeros Tensor"""
35 | return constant(
36 | *shape, c=0.0, device=device, dtype=dtype, requires_grad=requires_grad
37 | )
38 |
39 |
40 | def randb(*shape, p=0.5, device=None, dtype="bool", requires_grad=False):
41 | """Generate binary random Tensor"""
42 | device = ndl.cpu() if device is None else device
43 | array = device.rand(*shape) <= p
44 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
45 |
46 |
47 | def one_hot(n, i, device=None, dtype="float32", requires_grad=False):
48 | """Generate one-hot encoding Tensor"""
49 | device = ndl.cpu() if device is None else device
50 | return ndl.Tensor(
51 | device.one_hot(n, i.numpy(), dtype=dtype),
52 | device=device,
53 | requires_grad=requires_grad,
54 | )
55 |
56 |
57 | def zeros_like(array, *, device=None, requires_grad=False):
58 | device = device if device else array.device
59 | return zeros(
60 | *array.shape, dtype=array.dtype, device=device, requires_grad=requires_grad
61 | )
62 |
63 |
64 | def ones_like(array, *, device=None, requires_grad=False):
65 | device = device if device else array.device
66 | return ones(
67 | *array.shape, dtype=array.dtype, device=device, requires_grad=requires_grad
68 | )
69 |
--------------------------------------------------------------------------------
/hw3/python/needle/init/init_initializers.py:
--------------------------------------------------------------------------------
1 | import math
2 | from .init_basic import *
3 |
4 |
5 | def xavier_uniform(fan_in, fan_out, gain=1.0, **kwargs):
6 | ### BEGIN YOUR SOLUTION
7 | raise NotImplementedError()
8 | ### END YOUR SOLUTION
9 |
10 |
11 | def xavier_normal(fan_in, fan_out, gain=1.0, **kwargs):
12 | ### BEGIN YOUR SOLUTION
13 | raise NotImplementedError()
14 | ### END YOUR SOLUTION
15 |
16 |
17 | def kaiming_uniform(fan_in, fan_out, nonlinearity="relu", **kwargs):
18 | assert nonlinearity == "relu", "Only relu supported currently"
19 | ### BEGIN YOUR SOLUTION
20 | raise NotImplementedError()
21 | ### END YOUR SOLUTION
22 |
23 |
24 | def kaiming_normal(fan_in, fan_out, nonlinearity="relu", **kwargs):
25 | assert nonlinearity == "relu", "Only relu supported currently"
26 | ### BEGIN YOUR SOLUTION
27 | raise NotImplementedError()
28 | ### END YOUR SOLUTION
29 |
--------------------------------------------------------------------------------
/hw3/python/needle/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .nn_basic import *
2 |
--------------------------------------------------------------------------------
/hw3/python/needle/nn/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/nn/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/nn/__pycache__/nn_basic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/nn/__pycache__/nn_basic.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/nn/nn_basic.py:
--------------------------------------------------------------------------------
1 | """The module.
2 | """
3 | from typing import List, Callable, Any
4 | from needle.autograd import Tensor
5 | from needle import ops
6 | import needle.init as init
7 | import numpy as np
8 |
9 |
10 | class Parameter(Tensor):
11 | """A special kind of tensor that represents parameters."""
12 |
13 |
14 | def _unpack_params(value: object) -> List[Tensor]:
15 | if isinstance(value, Parameter):
16 | return [value]
17 | elif isinstance(value, Module):
18 | return value.parameters()
19 | elif isinstance(value, dict):
20 | params = []
21 | for k, v in value.items():
22 | params += _unpack_params(v)
23 | return params
24 | elif isinstance(value, (list, tuple)):
25 | params = []
26 | for v in value:
27 | params += _unpack_params(v)
28 | return params
29 | else:
30 | return []
31 |
32 |
33 | def _child_modules(value: object) -> List["Module"]:
34 | if isinstance(value, Module):
35 | modules = [value]
36 | modules.extend(_child_modules(value.__dict__))
37 | return modules
38 | if isinstance(value, dict):
39 | modules = []
40 | for k, v in value.items():
41 | modules += _child_modules(v)
42 | return modules
43 | elif isinstance(value, (list, tuple)):
44 | modules = []
45 | for v in value:
46 | modules += _child_modules(v)
47 | return modules
48 | else:
49 | return []
50 |
51 |
52 | class Module:
53 | def __init__(self):
54 | self.training = True
55 |
56 | def parameters(self) -> List[Tensor]:
57 | """Return the list of parameters in the module."""
58 | return _unpack_params(self.__dict__)
59 |
60 | def _children(self) -> List["Module"]:
61 | return _child_modules(self.__dict__)
62 |
63 | def eval(self):
64 | self.training = False
65 | for m in self._children():
66 | m.training = False
67 |
68 | def train(self):
69 | self.training = True
70 | for m in self._children():
71 | m.training = True
72 |
73 | def __call__(self, *args, **kwargs):
74 | return self.forward(*args, **kwargs)
75 |
76 |
77 | class Identity(Module):
78 | def forward(self, x):
79 | return x
80 |
81 |
82 | class Linear(Module):
83 | def __init__(
84 | self, in_features, out_features, bias=True, device=None, dtype="float32"
85 | ):
86 | super().__init__()
87 | self.in_features = in_features
88 | self.out_features = out_features
89 |
90 | ### BEGIN YOUR SOLUTION
91 | raise NotImplementedError()
92 | ### END YOUR SOLUTION
93 |
94 | def forward(self, X: Tensor) -> Tensor:
95 | ### BEGIN YOUR SOLUTION
96 | raise NotImplementedError()
97 | ### END YOUR SOLUTION
98 |
99 |
100 | class Flatten(Module):
101 | def forward(self, X):
102 | ### BEGIN YOUR SOLUTION
103 | raise NotImplementedError()
104 | ### END YOUR SOLUTION
105 |
106 |
107 | class ReLU(Module):
108 | def forward(self, x: Tensor) -> Tensor:
109 | ### BEGIN YOUR SOLUTION
110 | raise NotImplementedError()
111 | ### END YOUR SOLUTION
112 |
113 |
114 | class Sequential(Module):
115 | def __init__(self, *modules):
116 | super().__init__()
117 | self.modules = modules
118 |
119 | def forward(self, x: Tensor) -> Tensor:
120 | ### BEGIN YOUR SOLUTION
121 | raise NotImplementedError()
122 | ### END YOUR SOLUTION
123 |
124 |
125 | class SoftmaxLoss(Module):
126 | def forward(self, logits: Tensor, y: Tensor):
127 | ### BEGIN YOUR SOLUTION
128 | raise NotImplementedError()
129 | ### END YOUR SOLUTION
130 |
131 |
132 | class BatchNorm1d(Module):
133 | def __init__(self, dim, eps=1e-5, momentum=0.1, device=None, dtype="float32"):
134 | super().__init__()
135 | self.dim = dim
136 | self.eps = eps
137 | self.momentum = momentum
138 | ### BEGIN YOUR SOLUTION
139 | raise NotImplementedError()
140 | ### END YOUR SOLUTION
141 |
142 | def forward(self, x: Tensor) -> Tensor:
143 | ### BEGIN YOUR SOLUTION
144 | raise NotImplementedError()
145 | ### END YOUR SOLUTION
146 |
147 |
148 | class LayerNorm1d(Module):
149 | def __init__(self, dim, eps=1e-5, device=None, dtype="float32"):
150 | super().__init__()
151 | self.dim = dim
152 | self.eps = eps
153 | ### BEGIN YOUR SOLUTION
154 | raise NotImplementedError()
155 | ### END YOUR SOLUTION
156 |
157 | def forward(self, x: Tensor) -> Tensor:
158 | ### BEGIN YOUR SOLUTION
159 | raise NotImplementedError()
160 | ### END YOUR SOLUTION
161 |
162 |
163 | class Dropout(Module):
164 | def __init__(self, p=0.5):
165 | super().__init__()
166 | self.p = p
167 |
168 | def forward(self, x: Tensor) -> Tensor:
169 | ### BEGIN YOUR SOLUTION
170 | raise NotImplementedError()
171 | ### END YOUR SOLUTION
172 |
173 |
174 | class Residual(Module):
175 | def __init__(self, fn: Module):
176 | super().__init__()
177 | self.fn = fn
178 |
179 | def forward(self, x: Tensor) -> Tensor:
180 | ### BEGIN YOUR SOLUTION
181 | raise NotImplementedError()
182 | ### END YOUR SOLUTION
183 |
--------------------------------------------------------------------------------
/hw3/python/needle/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .ops_mathematic import *
2 |
3 | from .ops_logarithmic import *
4 | from .ops_tuple import *
5 |
--------------------------------------------------------------------------------
/hw3/python/needle/ops/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/ops/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/ops/__pycache__/ops_logarithmic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/ops/__pycache__/ops_logarithmic.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/ops/__pycache__/ops_mathematic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/ops/__pycache__/ops_mathematic.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/ops/__pycache__/ops_tuple.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/python/needle/ops/__pycache__/ops_tuple.cpython-310.pyc
--------------------------------------------------------------------------------
/hw3/python/needle/ops/ops_logarithmic.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | from ..autograd import NDArray
3 | from ..autograd import Op, Tensor, Value, TensorOp
4 | from ..autograd import TensorTuple, TensorTupleOp
5 |
6 | from .ops_mathematic import *
7 |
8 | import numpy as array_api
9 |
10 | class LogSoftmax(TensorOp):
11 | def compute(self, Z):
12 | ### BEGIN YOUR SOLUTION
13 | raise NotImplementedError()
14 | ### END YOUR SOLUTION
15 |
16 | def gradient(self, out_grad, node):
17 | ### BEGIN YOUR SOLUTION
18 | raise NotImplementedError()
19 | ### END YOUR SOLUTION
20 |
21 |
22 | def logsoftmax(a):
23 | return LogSoftmax()(a)
24 |
25 |
26 | class LogSumExp(TensorOp):
27 | def __init__(self, axes: Optional[tuple] = None):
28 | self.axes = axes
29 |
30 | def compute(self, Z):
31 | ### BEGIN YOUR SOLUTION
32 | raise NotImplementedError()
33 | ### END YOUR SOLUTION
34 |
35 | def gradient(self, out_grad, node):
36 | ### BEGIN YOUR SOLUTION
37 | raise NotImplementedError()
38 | ### END YOUR SOLUTION
39 |
40 |
41 | def logsumexp(a, axes=None):
42 | return LogSumExp(axes=axes)(a)
43 |
44 |
--------------------------------------------------------------------------------
/hw3/python/needle/ops/ops_tuple.py:
--------------------------------------------------------------------------------
1 | from ..autograd import Op, Tensor, TensorTuple, Value, TensorOp, TensorTupleOp
2 |
3 |
4 | class MakeTensorTuple(TensorTupleOp):
5 | def compute(self, *args) -> tuple:
6 | return tuple(args)
7 |
8 | def gradient(self, out_grad, node):
9 | assert isinstance(out_grad, TensorTuple)
10 | return tuple(*[out_grad[i] for i in range(len(out_grad))])
11 |
12 |
13 | def make_tuple(*args):
14 | return MakeTensorTuple()(*args)
15 |
16 |
17 | class TupleGetItem(TensorOp):
18 | def __init__(self, index):
19 | self.index = index
20 |
21 | def __call__(self, a: TensorTuple, fold_const=True) -> Value:
22 | assert isinstance(a, TensorTuple)
23 | # constant folding
24 | if fold_const and isinstance(a.op, MakeTensorTuple):
25 | return a.inputs[self.index]
26 | return Tensor.make_from_op(self, [a])
27 |
28 | def compute(self, a):
29 | return a[self.index]
30 |
31 | def gradient(self, out_grad, node):
32 | index = self.index
33 | in_grad = []
34 | for i, value in enumerate(node.inputs[0]):
35 | if i != index:
36 | in_grad.append(init.zeros_like(value))
37 | else:
38 | in_grad.append(out_grad)
39 | return MakeTensorTuple()(*in_grad)
40 |
41 |
42 | def tuple_get_item(value, index):
43 | return TupleGetItem(index)(value)
44 |
45 |
46 | class FusedAddScalars(TensorTupleOp):
47 | def __init__(self, c0: float, c1: float):
48 | self.c0 = c0
49 | self.c1 = c1
50 |
51 | def compute(self, a):
52 | return a + self.c0, a + self.c1
53 |
54 | def gradient(self, out_grad, node):
55 | return out_grad[0] + out_grad[1]
56 |
57 |
58 | def fused_add_scalars(x, c0, c1):
59 | return FusedAddScalars(c0, c1)(x)
60 |
--------------------------------------------------------------------------------
/hw3/python/needle/optim.py:
--------------------------------------------------------------------------------
1 | """Optimization module"""
2 | import needle as ndl
3 | import numpy as np
4 |
5 |
6 | class Optimizer:
7 | def __init__(self, params):
8 | self.params = params
9 |
10 | def step(self):
11 | raise NotImplementedError()
12 |
13 | def reset_grad(self):
14 | for p in self.params:
15 | p.grad = None
16 |
17 |
18 | class SGD(Optimizer):
19 | def __init__(self, params, lr=0.01, momentum=0.0, weight_decay=0.0):
20 | super().__init__(params)
21 | self.lr = lr
22 | self.momentum = momentum
23 | self.u = {}
24 | self.weight_decay = weight_decay
25 |
26 | def step(self):
27 | ### BEGIN YOUR SOLUTION
28 | raise NotImplementedError()
29 | ### END YOUR SOLUTION
30 |
31 | def clip_grad_norm(self, max_norm=0.25):
32 | """
33 | Clips gradient norm of parameters.
34 | """
35 | ### BEGIN YOUR SOLUTION
36 | raise NotImplementedError()
37 | ### END YOUR SOLUTION
38 |
39 |
40 | class Adam(Optimizer):
41 | def __init__(
42 | self,
43 | params,
44 | lr=0.01,
45 | beta1=0.9,
46 | beta2=0.999,
47 | eps=1e-8,
48 | weight_decay=0.0,
49 | ):
50 | super().__init__(params)
51 | self.lr = lr
52 | self.beta1 = beta1
53 | self.beta2 = beta2
54 | self.eps = eps
55 | self.weight_decay = weight_decay
56 | self.t = 0
57 |
58 | self.m = {}
59 | self.v = {}
60 |
61 | def step(self):
62 | ### BEGIN YOUR SOLUTION
63 | raise NotImplementedError()
64 | ### END YOUR SOLUTION
65 |
--------------------------------------------------------------------------------
/hw3/tests/hw3/__pycache__/test_ndarray.cpython-310-pytest-7.1.2.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/tests/hw3/__pycache__/test_ndarray.cpython-310-pytest-7.1.2.pyc
--------------------------------------------------------------------------------
/hw3/tests/hw3/__pycache__/test_ndarray.cpython-310-pytest-7.4.3.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/tests/hw3/__pycache__/test_ndarray.cpython-310-pytest-7.4.3.pyc
--------------------------------------------------------------------------------
/hw3/tests/hw3/__pycache__/test_ndarray.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw3/tests/hw3/__pycache__/test_ndarray.cpython-310.pyc
--------------------------------------------------------------------------------
/hw4/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/hw4/.idea/hw4.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/hw4/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw4/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/hw4/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/hw4/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/hw4/.tmp.driveupload/7792:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append("./tests/hw4")
3 | sys.path.append("./python")
4 |
5 | from test_nd_backend import *
6 | from test_cifar_ptb_data import *
7 | from test_conv import *
8 | from test_sequence_models import *
9 | from needle import backend_ndarray as nd
10 |
11 |
12 | def train_cifar10():
13 | import sys
14 | sys.path.append('./python')
15 | sys.path.append('./apps')
16 | import needle as ndl
17 | from models import ResNet9
18 | from simple_ml import train_cifar10, evaluate_cifar10
19 |
20 | device = ndl.cpu()
21 | dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=True)
22 | dataloader = ndl.data.DataLoader( \
23 | dataset=dataset,
24 | batch_size=128,
25 | shuffle=True, )
26 | model = ResNet9(device=device, dtype="float32")
27 | train_cifar10(model, dataloader, n_epochs=2, optimizer=ndl.optim.Adam,
28 | lr=0.001, weight_decay=0.001, device=device)
29 | evaluate_cifar10(model, dataloader)
30 |
31 |
32 | def train_language_model():
33 | import needle as ndl
34 | sys.path.append('./apps')
35 | from models import LanguageModel
36 | from simple_ml import train_ptb, evaluate_ptb
37 |
38 | device = ndl.cpu_numpy()
39 | corpus = ndl.data.Corpus("data/ptb")
40 | train_data = ndl.data.batchify(corpus.train, batch_size=16, device=device, dtype="float32")
41 | model = LanguageModel(30, len(corpus.dictionary), hidden_size=10, num_layers=2, seq_model='rnn', device=device)
42 | train_ptb(model, train_data, seq_len=1, n_epochs=1, device=device)
43 | evaluate_ptb(model, train_data, seq_len=40, device=device)
44 |
45 |
46 | if __name__ == "__main__":
47 | """
48 | Part 1
49 | """
50 | # test_stack((5, 5), 0, 2, nd.cpu())
51 | # test_stack_backward((5, 5), 0, 2, nd.cpu())
52 |
53 | # test_matmul(16, 16, 16, nd.cpu())
54 | # test_relu((5, 5), nd.cpu())
55 | # test_tanh_backward((5, 5), nd.cpu())
56 |
57 |
58 | """
59 | Part 2
60 | """
61 | # test_cifar10_dataset(True)
62 |
63 |
64 | """
65 | Part 3
66 | """
67 | # test_pad_forward({"shape": (10, 32, 32, 8), "padding": ( (0, 0), (2, 2), (2, 2), (0, 0) )}, nd.cpu())
68 | # test_flip_forward({"shape": (10, 5), "axes": (0,)}, nd.cpu())
69 | # test_dilate_forward(nd.cpu())
70 | # test_op_conv((3, 16, 16, 8), (3, 3, 8, 16), 1, 2, False, nd.cpu())
71 | # test_op_conv((3, 16, 16, 8), (3, 3, 8, 16), 2, 1, True, nd.cpu())
72 |
73 | # test_init_kaiming_uniform(nd.cpu())
74 | # test_nn_conv_forward(4, 8, 16, 3, 1, nd.cpu())
75 | # test_nn_conv_backward(4, 1, 1, 3, 1, nd.cpu())
76 | # test_resnet9(nd.cpu())
77 | # test_train_cifar10(nd.cpu())
78 |
79 | train_cifar10()
80 |
81 | """
82 | Part 4
83 | """
84 | # test_rnn_cell(1, 1, 1, False, False, 'relu', nd.cpu())
85 | # test_lstm_cell(1, 1, 1, False, False, nd.cpu())
86 | # test_lstm(13, 1, 1, 1, 1, True, True, nd.cpu())
87 |
88 | """
89 | Part 6
90 | """
91 | # test_language_model_implementation(1, 1, 1, 1, 1, True, 1, 'rnn', nd.cpu())
92 |
93 | """
94 | Part 7
95 | """
96 | # train_language_model()
--------------------------------------------------------------------------------
/hw4/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.5)
2 | project(needle C CXX)
3 | cmake_policy(SET CMP0146 OLD)
4 |
5 | # find correct version of Python
6 | execute_process(COMMAND python3-config --prefix
7 | OUTPUT_VARIABLE Python_ROOT_DIR)
8 | find_package(Python COMPONENTS Development Interpreter REQUIRED)
9 | include_directories(${Python_INCLUDE_DIRS})
10 |
11 | # find pybind
12 | execute_process(COMMAND python3 -m pybind11 --cmakedir
13 | RESULT_VARIABLE __pybind_exit_code
14 | OUTPUT_VARIABLE __pybind_path
15 | OUTPUT_STRIP_TRAILING_WHITESPACE)
16 | find_package(pybind11 PATHS ${__pybind_path})
17 |
18 |
19 | if(NOT MSVC)
20 | set(CMAKE_CXX_FLAGS "-std=c++11 -O2 -march=native ${CMAKE_CXX_FLAGS}")
21 | set(CMAKE_CUDA_STANDARD 14)
22 | else()
23 | set(CMAKE_CXX_FLAGS "/std:c++11 -O2 -march=native ${CMAKE_CXX_FLAGS}")
24 | set(CMAKE_CUDA_STANDARD 14)
25 | endif()
26 |
27 | include_directories(SYSTEM ${pybind11_INCLUDE_DIRS})
28 | list(APPEND LINKER_LIBS ${pybind11_LIBRARIES})
29 |
30 |
31 | ###################
32 | ### CPU BACKEND ###
33 | ###################
34 | add_library(ndarray_backend_cpu MODULE src/ndarray_backend_cpu.cc)
35 | target_link_libraries(ndarray_backend_cpu PUBLIC ${LINKER_LIBS})
36 | pybind11_extension(ndarray_backend_cpu)
37 | pybind11_strip(ndarray_backend_cpu)
38 |
39 |
40 | # directly output to ffi folder
41 | set_target_properties(ndarray_backend_cpu
42 | PROPERTIES
43 | LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/needle/backend_ndarray
44 | CXX_VISIBILITY_PRESET "hidden"
45 | )
46 |
47 | if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
48 | set_property(TARGET ndarray_backend_cpu PROPERTY LINK_OPTIONS -undefined dynamic_lookup)
49 | endif()
50 |
51 |
52 |
53 | ####################
54 | ### CUDA BACKEND ###
55 | ####################
56 | find_package(CUDA)
57 | if(CUDA_FOUND)
58 | message(STATUS "Found cuda, building cuda backend")
59 |
60 | include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
61 | list(APPEND LINKER_LIBS ${CUDA_CUDART_LIBRARY})
62 |
63 | # invoke nvidia smi to detect if we really have a GPU
64 | execute_process(COMMAND "nvidia-smi" ERROR_QUIET RESULT_VARIABLE NV_RET)
65 | if(NV_RET EQUAL "0")
66 | CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS Auto)
67 | else()
68 | # set to 3.7 the flag of K80
69 | CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.7)
70 | endif()
71 |
72 | # set arch flags properly
73 | CUDA_ADD_LIBRARY(ndarray_backend_cuda MODULE src/ndarray_backend_cuda.cu OPTIONS ${ARCH_FLAGS})
74 |
75 | target_link_libraries(ndarray_backend_cuda ${LINKER_LIBS})
76 | pybind11_extension(ndarray_backend_cuda)
77 | pybind11_strip(ndarray_backend_cuda)
78 |
79 | # directly output to ffi folder
80 | set_target_properties(ndarray_backend_cuda
81 | PROPERTIES
82 | LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/needle/backend_ndarray
83 | CXX_VISIBILITY_PRESET "hidden"
84 | CUDA_VISIBILITY_PRESET "hidden"
85 | )
86 |
87 | endif()
88 |
89 |
--------------------------------------------------------------------------------
/hw4/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: lib, pybind, clean, format, all
2 |
3 | all: lib
4 |
5 |
6 | lib:
7 | @mkdir -p build
8 | @cd build; cmake ..
9 | @cd build; $(MAKE)
10 |
11 | format:
12 | python3 -m black .
13 | clang-format -i src/*.cc src/*.cu
14 |
15 | clean:
16 | rm -rf build python/needle/backend_ndarray/ndarray_backend*.so
17 |
--------------------------------------------------------------------------------
/hw4/README.md:
--------------------------------------------------------------------------------
1 | # Homework 4
2 | Public repository and stub/testing code for Homework 4 of 10-714.
3 |
--------------------------------------------------------------------------------
/hw4/ResNet9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/ResNet9.png
--------------------------------------------------------------------------------
/hw4/apps/models.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('./python')
3 | import needle as ndl
4 | import needle.nn as nn
5 | import math
6 | import numpy as np
7 | np.random.seed(0)
8 |
9 |
10 | def ConvBN(in_channels, out_channels, kernel_size, stride, device=None, dtype="float32"):
11 | return nn.Sequential(
12 | nn.Conv(in_channels, out_channels, kernel_size=kernel_size, stride=stride, bias=True, device=device, dtype=dtype),
13 | nn.BatchNorm2d(out_channels, device=device, dtype=dtype),
14 | nn.ReLU()
15 | )
16 |
17 |
18 | class ResidualBlock(ndl.nn.Module):
19 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, device=None, dtype="float32"):
20 | super().__init__()
21 |
22 | self.conv1 = ConvBN(in_channels, out_channels, kernel_size, stride, device=device, dtype=dtype)
23 | self.conv2 = ConvBN(out_channels, out_channels, kernel_size, stride, device=device, dtype=dtype)
24 |
25 |
26 | def forward(self, x):
27 | out = self.conv1(x)
28 | out = self.conv2(out)
29 | out = out + x
30 | return out
31 |
32 |
33 | class ResNet9(ndl.nn.Module):
34 | def __init__(self, device=None, dtype="float32"):
35 | super().__init__()
36 | self.conv1 = ConvBN(3, 16, kernel_size=7, stride=4, device=device, dtype=dtype)
37 | self.conv2 = ConvBN(16, 32, kernel_size=3, stride=2, device=device, dtype=dtype)
38 | self.resi1 = ResidualBlock(32, 32, 3, 1, device=device, dtype=dtype)
39 | self.conv3 = ConvBN(32, 64, kernel_size=3, stride=2, device=device, dtype=dtype)
40 | self.conv4 = ConvBN(64, 128, kernel_size=3, stride=2, device=device, dtype=dtype)
41 | self.resi2 = ResidualBlock(128, 128, 3, 1, device=device, dtype=dtype)
42 | self.linear1 = nn.Linear(128, 128, device=device, dtype=dtype)
43 | self.linear2 = nn.Linear(128, 10, device=device, dtype=dtype)
44 |
45 | def forward(self, x):
46 | out = self.conv1(x)
47 | out = self.conv2(out)
48 | out = self.resi1(out)
49 | out = self.conv3(out)
50 | out = self.conv4(out)
51 | out = self.resi2(out)
52 | out = nn.Flatten()(out)
53 | out = self.linear1(out)
54 | out = ndl.ops.relu(out)
55 | out = self.linear2(out)
56 | return out
57 |
58 |
59 | class LanguageModel(nn.Module):
60 | def __init__(self, embedding_size, output_size, hidden_size, num_layers=1,
61 | seq_model='rnn', device=None, dtype="float32"):
62 | """
63 | Consists of an embedding layer, a sequence model (either RNN or LSTM), and a
64 | linear layer.
65 | Parameters:
66 | output_size: Size of dictionary
67 | embedding_size: Size of embeddings
68 | hidden_size: The number of features in the hidden state of LSTM or RNN
69 | seq_model: 'rnn' or 'lstm', whether to use RNN or LSTM
70 | num_layers: Number of layers in RNN or LSTM
71 | """
72 | super(LanguageModel, self).__init__()
73 |
74 | self.embedding_size = embedding_size
75 | self.output_size = output_size
76 | self.hidden_size = hidden_size
77 | self.embedding = nn.Embedding(output_size, embedding_size, device=device, dtype=dtype)
78 | if seq_model == 'rnn':
79 | self.seq_model = nn.RNN(embedding_size, hidden_size, num_layers, device=device, dtype=dtype)
80 | elif seq_model == 'lstm':
81 | self.seq_model = nn.LSTM(embedding_size, hidden_size, num_layers, device=device, dtype=dtype)
82 | self.linear = nn.Linear(hidden_size, output_size, device=device, dtype=dtype)
83 |
84 |
85 | def forward(self, x, h=None):
86 | """
87 | Given sequence (and the previous hidden state if given), returns probabilities of next word
88 | (along with the last hidden state from the sequence model).
89 | Inputs:
90 | x of shape (seq_len, bs)
91 | h of shape (num_layers, bs, hidden_size) if using RNN,
92 | else h is tuple of (h0, c0), each of shape (num_layers, bs, hidden_size)
93 | Returns (out, h)
94 | out of shape (seq_len*bs, output_size)
95 | h of shape (num_layers, bs, hidden_size) if using RNN,
96 | else h is tuple of (h0, c0), each of shape (num_layers, bs, hidden_size)
97 | """
98 | seq_len, bs = x.shape
99 | x_emb = self.embedding(x)
100 | out, h = self.seq_model(x_emb, h)
101 | out = out.reshape((seq_len * bs, self.hidden_size))
102 | out = self.linear(out)
103 | return out, h
104 |
105 |
106 | if __name__ == "__main__":
107 | model = ResNet9()
108 | x = ndl.ops.randu((1, 32, 32, 3), requires_grad=True)
109 | model(x)
110 | cifar10_train_dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=True)
111 | train_loader = ndl.data.DataLoader(cifar10_train_dataset, 128, ndl.cpu(), dtype="float32")
112 | print(dataset[1][0].shape)
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/3.27.9/CMakeCCompiler.cmake:
--------------------------------------------------------------------------------
1 | set(CMAKE_C_COMPILER "/usr/bin/cc")
2 | set(CMAKE_C_COMPILER_ARG1 "")
3 | set(CMAKE_C_COMPILER_ID "GNU")
4 | set(CMAKE_C_COMPILER_VERSION "11.4.0")
5 | set(CMAKE_C_COMPILER_VERSION_INTERNAL "")
6 | set(CMAKE_C_COMPILER_WRAPPER "")
7 | set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "17")
8 | set(CMAKE_C_EXTENSIONS_COMPUTED_DEFAULT "ON")
9 | set(CMAKE_C_COMPILE_FEATURES "c_std_90;c_function_prototypes;c_std_99;c_restrict;c_variadic_macros;c_std_11;c_static_assert;c_std_17;c_std_23")
10 | set(CMAKE_C90_COMPILE_FEATURES "c_std_90;c_function_prototypes")
11 | set(CMAKE_C99_COMPILE_FEATURES "c_std_99;c_restrict;c_variadic_macros")
12 | set(CMAKE_C11_COMPILE_FEATURES "c_std_11;c_static_assert")
13 | set(CMAKE_C17_COMPILE_FEATURES "c_std_17")
14 | set(CMAKE_C23_COMPILE_FEATURES "c_std_23")
15 |
16 | set(CMAKE_C_PLATFORM_ID "Linux")
17 | set(CMAKE_C_SIMULATE_ID "")
18 | set(CMAKE_C_COMPILER_FRONTEND_VARIANT "GNU")
19 | set(CMAKE_C_SIMULATE_VERSION "")
20 |
21 |
22 |
23 |
24 | set(CMAKE_AR "/usr/bin/ar")
25 | set(CMAKE_C_COMPILER_AR "/usr/bin/gcc-ar-11")
26 | set(CMAKE_RANLIB "/usr/bin/ranlib")
27 | set(CMAKE_C_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11")
28 | set(CMAKE_LINKER "/usr/bin/ld")
29 | set(CMAKE_MT "")
30 | set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND")
31 | set(CMAKE_COMPILER_IS_GNUCC 1)
32 | set(CMAKE_C_COMPILER_LOADED 1)
33 | set(CMAKE_C_COMPILER_WORKS TRUE)
34 | set(CMAKE_C_ABI_COMPILED TRUE)
35 |
36 | set(CMAKE_C_COMPILER_ENV_VAR "CC")
37 |
38 | set(CMAKE_C_COMPILER_ID_RUN 1)
39 | set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m)
40 | set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC)
41 | set(CMAKE_C_LINKER_PREFERENCE 10)
42 | set(CMAKE_C_LINKER_DEPFILE_SUPPORTED TRUE)
43 |
44 | # Save compiler ABI information.
45 | set(CMAKE_C_SIZEOF_DATA_PTR "8")
46 | set(CMAKE_C_COMPILER_ABI "ELF")
47 | set(CMAKE_C_BYTE_ORDER "LITTLE_ENDIAN")
48 | set(CMAKE_C_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
49 |
50 | if(CMAKE_C_SIZEOF_DATA_PTR)
51 | set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}")
52 | endif()
53 |
54 | if(CMAKE_C_COMPILER_ABI)
55 | set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}")
56 | endif()
57 |
58 | if(CMAKE_C_LIBRARY_ARCHITECTURE)
59 | set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
60 | endif()
61 |
62 | set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "")
63 | if(CMAKE_C_CL_SHOWINCLUDES_PREFIX)
64 | set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}")
65 | endif()
66 |
67 |
68 |
69 |
70 |
71 | set(CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
72 | set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "gcc;gcc_s;c;gcc;gcc_s")
73 | set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs")
74 | set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
75 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/3.27.9/CMakeCXXCompiler.cmake:
--------------------------------------------------------------------------------
1 | set(CMAKE_CXX_COMPILER "/usr/bin/c++")
2 | set(CMAKE_CXX_COMPILER_ARG1 "")
3 | set(CMAKE_CXX_COMPILER_ID "GNU")
4 | set(CMAKE_CXX_COMPILER_VERSION "11.4.0")
5 | set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "")
6 | set(CMAKE_CXX_COMPILER_WRAPPER "")
7 | set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "17")
8 | set(CMAKE_CXX_EXTENSIONS_COMPUTED_DEFAULT "ON")
9 | set(CMAKE_CXX_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters;cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates;cxx_std_17;cxx_std_20;cxx_std_23")
10 | set(CMAKE_CXX98_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters")
11 | set(CMAKE_CXX11_COMPILE_FEATURES "cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates")
12 | set(CMAKE_CXX14_COMPILE_FEATURES "cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates")
13 | set(CMAKE_CXX17_COMPILE_FEATURES "cxx_std_17")
14 | set(CMAKE_CXX20_COMPILE_FEATURES "cxx_std_20")
15 | set(CMAKE_CXX23_COMPILE_FEATURES "cxx_std_23")
16 |
17 | set(CMAKE_CXX_PLATFORM_ID "Linux")
18 | set(CMAKE_CXX_SIMULATE_ID "")
19 | set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "GNU")
20 | set(CMAKE_CXX_SIMULATE_VERSION "")
21 |
22 |
23 |
24 |
25 | set(CMAKE_AR "/usr/bin/ar")
26 | set(CMAKE_CXX_COMPILER_AR "/usr/bin/gcc-ar-11")
27 | set(CMAKE_RANLIB "/usr/bin/ranlib")
28 | set(CMAKE_CXX_COMPILER_RANLIB "/usr/bin/gcc-ranlib-11")
29 | set(CMAKE_LINKER "/usr/bin/ld")
30 | set(CMAKE_MT "")
31 | set(CMAKE_TAPI "CMAKE_TAPI-NOTFOUND")
32 | set(CMAKE_COMPILER_IS_GNUCXX 1)
33 | set(CMAKE_CXX_COMPILER_LOADED 1)
34 | set(CMAKE_CXX_COMPILER_WORKS TRUE)
35 | set(CMAKE_CXX_ABI_COMPILED TRUE)
36 |
37 | set(CMAKE_CXX_COMPILER_ENV_VAR "CXX")
38 |
39 | set(CMAKE_CXX_COMPILER_ID_RUN 1)
40 | set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm;mpp;CPP;ixx;cppm;ccm;cxxm;c++m)
41 | set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC)
42 |
43 | foreach (lang C OBJC OBJCXX)
44 | if (CMAKE_${lang}_COMPILER_ID_RUN)
45 | foreach(extension IN LISTS CMAKE_${lang}_SOURCE_FILE_EXTENSIONS)
46 | list(REMOVE_ITEM CMAKE_CXX_SOURCE_FILE_EXTENSIONS ${extension})
47 | endforeach()
48 | endif()
49 | endforeach()
50 |
51 | set(CMAKE_CXX_LINKER_PREFERENCE 30)
52 | set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1)
53 | set(CMAKE_CXX_LINKER_DEPFILE_SUPPORTED TRUE)
54 |
55 | # Save compiler ABI information.
56 | set(CMAKE_CXX_SIZEOF_DATA_PTR "8")
57 | set(CMAKE_CXX_COMPILER_ABI "ELF")
58 | set(CMAKE_CXX_BYTE_ORDER "LITTLE_ENDIAN")
59 | set(CMAKE_CXX_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
60 |
61 | if(CMAKE_CXX_SIZEOF_DATA_PTR)
62 | set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}")
63 | endif()
64 |
65 | if(CMAKE_CXX_COMPILER_ABI)
66 | set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}")
67 | endif()
68 |
69 | if(CMAKE_CXX_LIBRARY_ARCHITECTURE)
70 | set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
71 | endif()
72 |
73 | set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "")
74 | if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX)
75 | set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}")
76 | endif()
77 |
78 |
79 |
80 |
81 |
82 | set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/usr/include/c++/11;/usr/include/x86_64-linux-gnu/c++/11;/usr/include/c++/11/backward;/usr/lib/gcc/x86_64-linux-gnu/11/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include")
83 | set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;gcc_s;gcc;c;gcc_s;gcc")
84 | set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/11;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib;/usr/local/cuda/lib64/stubs")
85 | set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
86 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_C.bin
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/3.27.9/CMakeDetermineCompilerABI_CXX.bin
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/3.27.9/CMakeSystem.cmake:
--------------------------------------------------------------------------------
1 | set(CMAKE_HOST_SYSTEM "Linux-6.1.58+")
2 | set(CMAKE_HOST_SYSTEM_NAME "Linux")
3 | set(CMAKE_HOST_SYSTEM_VERSION "6.1.58+")
4 | set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
5 |
6 |
7 |
8 | set(CMAKE_SYSTEM "Linux-6.1.58+")
9 | set(CMAKE_SYSTEM_NAME "Linux")
10 | set(CMAKE_SYSTEM_VERSION "6.1.58+")
11 | set(CMAKE_SYSTEM_PROCESSOR "x86_64")
12 |
13 | set(CMAKE_CROSSCOMPILING "FALSE")
14 |
15 | set(CMAKE_SYSTEM_LOADED 1)
16 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/3.27.9/CompilerIdC/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/3.27.9/CompilerIdC/a.out
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/3.27.9/CompilerIdCXX/a.out
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/CMakeDirectoryInformation.cmake:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
3 |
4 | # Relative path conversion top directories.
5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/content/drive/Othercomputers/My MacBook Pro/hw4")
6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/content/drive/Othercomputers/My MacBook Pro/hw4/build")
7 |
8 | # Force unix paths in dependencies.
9 | set(CMAKE_FORCE_UNIX_PATHS 1)
10 |
11 |
12 | # The C and CXX include file regular expressions for this directory.
13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
17 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/CMakeRuleHashes.txt:
--------------------------------------------------------------------------------
1 | # Hashes of file build rules.
2 | 347d5addb0d9c9683a2b5d27952f36b2 CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o
3 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/Makefile.cmake:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
3 |
4 | # The generator used is:
5 | set(CMAKE_DEPENDS_GENERATOR "Unix Makefiles")
6 |
7 | # The top level Makefile was generated from the following files:
8 | set(CMAKE_MAKEFILE_DEPENDS
9 | "CMakeCache.txt"
10 | "/content/drive/Othercomputers/My MacBook Pro/hw4/CMakeLists.txt"
11 | "CMakeFiles/3.27.9/CMakeCCompiler.cmake"
12 | "CMakeFiles/3.27.9/CMakeCXXCompiler.cmake"
13 | "CMakeFiles/3.27.9/CMakeSystem.cmake"
14 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen"
15 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.depend"
16 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCInformation.cmake"
17 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCXXInformation.cmake"
18 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCheckCompilerFlagCommonPatterns.cmake"
19 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeCommonLanguageInclude.cmake"
20 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeGenericSystem.cmake"
21 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeInitializeConfigs.cmake"
22 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeLanguageInformation.cmake"
23 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeSystemSpecificInformation.cmake"
24 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CMakeSystemSpecificInitialize.cmake"
25 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCSourceCompiles.cmake"
26 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCXXCompilerFlag.cmake"
27 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckCXXSourceCompiles.cmake"
28 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckIncludeFile.cmake"
29 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/CheckLibraryExists.cmake"
30 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/CMakeCommonCompilerMacros.cmake"
31 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU-C.cmake"
32 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU-CXX.cmake"
33 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Compiler/GNU.cmake"
34 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA.cmake"
35 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA/run_nvcc.cmake"
36 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindCUDA/select_compute_arch.cmake"
37 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPackageHandleStandardArgs.cmake"
38 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPackageMessage.cmake"
39 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPython.cmake"
40 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindPython/Support.cmake"
41 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/FindThreads.cmake"
42 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckCompilerFlag.cmake"
43 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckFlagCommonConfig.cmake"
44 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Internal/CheckSourceCompiles.cmake"
45 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU-C.cmake"
46 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU-CXX.cmake"
47 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-GNU.cmake"
48 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux-Initialize.cmake"
49 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/Linux.cmake"
50 | "/usr/local/lib/python3.10/dist-packages/cmake/data/share/cmake-3.27/Modules/Platform/UnixPaths.cmake"
51 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Common.cmake"
52 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Config.cmake"
53 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11ConfigVersion.cmake"
54 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11NewTools.cmake"
55 | "/usr/local/lib/python3.10/dist-packages/pybind11/share/cmake/pybind11/pybind11Targets.cmake"
56 | )
57 |
58 | # The corresponding makefile is:
59 | set(CMAKE_MAKEFILE_OUTPUTS
60 | "Makefile"
61 | "CMakeFiles/cmake.check_cache"
62 | )
63 |
64 | # Byproducts of CMake generate step:
65 | set(CMAKE_MAKEFILE_PRODUCTS
66 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake.pre-gen"
67 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o.cmake"
68 | "CMakeFiles/CMakeDirectoryInformation.cmake"
69 | )
70 |
71 | # Dependency information for all targets:
72 | set(CMAKE_DEPEND_INFO_FILES
73 | "CMakeFiles/ndarray_backend_cpu.dir/DependInfo.cmake"
74 | "CMakeFiles/ndarray_backend_cuda.dir/DependInfo.cmake"
75 | )
76 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/Makefile2:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
3 |
4 | # Default target executed when no arguments are given to make.
5 | default_target: all
6 | .PHONY : default_target
7 |
8 | #=============================================================================
9 | # Special targets provided by cmake.
10 |
11 | # Disable implicit rules so canonical targets will work.
12 | .SUFFIXES:
13 |
14 | # Disable VCS-based implicit rules.
15 | % : %,v
16 |
17 | # Disable VCS-based implicit rules.
18 | % : RCS/%
19 |
20 | # Disable VCS-based implicit rules.
21 | % : RCS/%,v
22 |
23 | # Disable VCS-based implicit rules.
24 | % : SCCS/s.%
25 |
26 | # Disable VCS-based implicit rules.
27 | % : s.%
28 |
29 | .SUFFIXES: .hpux_make_needs_suffix_list
30 |
31 | # Command-line flag to silence nested $(MAKE).
32 | $(VERBOSE)MAKESILENT = -s
33 |
34 | #Suppress display of executed commands.
35 | $(VERBOSE).SILENT:
36 |
37 | # A target that is always out of date.
38 | cmake_force:
39 | .PHONY : cmake_force
40 |
41 | #=============================================================================
42 | # Set environment variables for the build.
43 |
44 | # The shell in which to execute make rules.
45 | SHELL = /bin/sh
46 |
47 | # The CMake executable.
48 | CMAKE_COMMAND = /usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake
49 |
50 | # The command to remove a file.
51 | RM = /usr/local/lib/python3.10/dist-packages/cmake/data/bin/cmake -E rm -f
52 |
53 | # Escaping for special characters.
54 | EQUALS = =
55 |
56 | # The top-level source directory on which CMake was run.
57 | CMAKE_SOURCE_DIR = "/content/drive/Othercomputers/My MacBook Pro/hw4"
58 |
59 | # The top-level build directory on which CMake was run.
60 | CMAKE_BINARY_DIR = "/content/drive/Othercomputers/My MacBook Pro/hw4/build"
61 |
62 | #=============================================================================
63 | # Directory level rules for the build root directory
64 |
65 | # The main recursive "all" target.
66 | all: CMakeFiles/ndarray_backend_cpu.dir/all
67 | all: CMakeFiles/ndarray_backend_cuda.dir/all
68 | .PHONY : all
69 |
70 | # The main recursive "preinstall" target.
71 | preinstall:
72 | .PHONY : preinstall
73 |
74 | # The main recursive "clean" target.
75 | clean: CMakeFiles/ndarray_backend_cpu.dir/clean
76 | clean: CMakeFiles/ndarray_backend_cuda.dir/clean
77 | .PHONY : clean
78 |
79 | #=============================================================================
80 | # Target rules for target CMakeFiles/ndarray_backend_cpu.dir
81 |
82 | # All Build rule for target.
83 | CMakeFiles/ndarray_backend_cpu.dir/all:
84 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/depend
85 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/build
86 | @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --progress-dir="/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" --progress-num=1,2 "Built target ndarray_backend_cpu"
87 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/all
88 |
89 | # Build rule for subdir invocation for target.
90 | CMakeFiles/ndarray_backend_cpu.dir/rule: cmake_check_build_system
91 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" 2
92 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/ndarray_backend_cpu.dir/all
93 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" 0
94 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/rule
95 |
96 | # Convenience name for target.
97 | ndarray_backend_cpu: CMakeFiles/ndarray_backend_cpu.dir/rule
98 | .PHONY : ndarray_backend_cpu
99 |
100 | # clean rule for target.
101 | CMakeFiles/ndarray_backend_cpu.dir/clean:
102 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cpu.dir/build.make CMakeFiles/ndarray_backend_cpu.dir/clean
103 | .PHONY : CMakeFiles/ndarray_backend_cpu.dir/clean
104 |
105 | #=============================================================================
106 | # Target rules for target CMakeFiles/ndarray_backend_cuda.dir
107 |
108 | # All Build rule for target.
109 | CMakeFiles/ndarray_backend_cuda.dir/all:
110 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/depend
111 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/build
112 | @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --progress-dir="/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" --progress-num=3,4 "Built target ndarray_backend_cuda"
113 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/all
114 |
115 | # Build rule for subdir invocation for target.
116 | CMakeFiles/ndarray_backend_cuda.dir/rule: cmake_check_build_system
117 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" 2
118 | $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 CMakeFiles/ndarray_backend_cuda.dir/all
119 | $(CMAKE_COMMAND) -E cmake_progress_start "/content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles" 0
120 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/rule
121 |
122 | # Convenience name for target.
123 | ndarray_backend_cuda: CMakeFiles/ndarray_backend_cuda.dir/rule
124 | .PHONY : ndarray_backend_cuda
125 |
126 | # clean rule for target.
127 | CMakeFiles/ndarray_backend_cuda.dir/clean:
128 | $(MAKE) $(MAKESILENT) -f CMakeFiles/ndarray_backend_cuda.dir/build.make CMakeFiles/ndarray_backend_cuda.dir/clean
129 | .PHONY : CMakeFiles/ndarray_backend_cuda.dir/clean
130 |
131 | #=============================================================================
132 | # Special targets to cleanup operation of make.
133 |
134 | # Special rule to run CMake to check the build system integrity.
135 | # No rule that depends on this can have commands that come from listfiles
136 | # because they might be regenerated.
137 | cmake_check_build_system:
138 | $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
139 | .PHONY : cmake_check_build_system
140 |
141 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/TargetDirectories.txt:
--------------------------------------------------------------------------------
1 | /content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles/ndarray_backend_cpu.dir
2 | /content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles/ndarray_backend_cuda.dir
3 | /content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles/edit_cache.dir
4 | /content/drive/Othercomputers/My MacBook Pro/hw4/build/CMakeFiles/rebuild_cache.dir
5 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/cmake.check_cache:
--------------------------------------------------------------------------------
1 | # This file is generated by cmake for dependency checking of the CMakeCache.txt file
2 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
1 |
2 | # Consider dependencies only in project.
3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
4 |
5 | # The set of languages for which implicit dependencies are needed:
6 | set(CMAKE_DEPENDS_LANGUAGES
7 | )
8 |
9 | # The set of dependency files which are needed:
10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES
11 | "/content/drive/Othercomputers/My MacBook Pro/hw4/src/ndarray_backend_cpu.cc" "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o" "gcc" "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o.d"
12 | )
13 |
14 | # Targets to which this target links which contain Fortran sources.
15 | set(CMAKE_Fortran_TARGET_LINKED_INFO_FILES
16 | )
17 |
18 | # Fortran module output directory.
19 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
20 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
1 | file(REMOVE_RECURSE
2 | "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so"
3 | "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.pdb"
4 | "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o"
5 | "CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o.d"
6 | )
7 |
8 | # Per-language clean rules from dependency scanning.
9 | foreach(lang CXX)
10 | include(CMakeFiles/ndarray_backend_cpu.dir/cmake_clean_${lang}.cmake OPTIONAL)
11 | endforeach()
12 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for ndarray_backend_cpu.
3 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for ndarray_backend_cpu.
2 | # This may be replaced when dependencies are built.
3 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/flags.make:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
3 |
4 | # compile CXX with /usr/bin/c++
5 | CXX_DEFINES = -Dndarray_backend_cpu_EXPORTS
6 |
7 | CXX_INCLUDES = -isystem /usr/include/python3.10 -isystem /usr/local/lib/python3.10/dist-packages/pybind11/include -isystem /usr/local/cuda/include
8 |
9 | CXX_FLAGS = -std=c++11 -O2 -march=native -fPIC -fvisibility=hidden
10 |
11 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/link.txt:
--------------------------------------------------------------------------------
1 | /usr/bin/c++ -fPIC -std=c++11 -O2 -march=native -shared -o "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so" CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o
2 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/progress.make:
--------------------------------------------------------------------------------
1 | CMAKE_PROGRESS_1 = 1
2 | CMAKE_PROGRESS_2 = 2
3 |
4 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/ndarray_backend_cpu.dir/src/ndarray_backend_cpu.cc.o
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
1 |
2 | # Consider dependencies only in project.
3 | set(CMAKE_DEPENDS_IN_PROJECT_ONLY OFF)
4 |
5 | # The set of languages for which implicit dependencies are needed:
6 | set(CMAKE_DEPENDS_LANGUAGES
7 | )
8 |
9 | # The set of dependency files which are needed:
10 | set(CMAKE_DEPENDS_DEPENDENCY_FILES
11 | )
12 |
13 | # Targets to which this target links which contain Fortran sources.
14 | set(CMAKE_Fortran_TARGET_LINKED_INFO_FILES
15 | )
16 |
17 | # Fortran module output directory.
18 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
19 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
1 | file(REMOVE_RECURSE
2 | "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so"
3 | "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cuda.pdb"
4 | "CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o"
5 | )
6 |
7 | # Per-language clean rules from dependency scanning.
8 | foreach(lang )
9 | include(CMakeFiles/ndarray_backend_cuda.dir/cmake_clean_${lang}.cmake OPTIONAL)
10 | endforeach()
11 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/compiler_depend.make:
--------------------------------------------------------------------------------
1 | # Empty compiler generated dependencies file for ndarray_backend_cuda.
2 | # This may be replaced when dependencies are built.
3 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/compiler_depend.ts:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Timestamp file for compiler generated dependencies management for ndarray_backend_cuda.
3 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/depend.make:
--------------------------------------------------------------------------------
1 | # Empty dependencies file for ndarray_backend_cuda.
2 | # This may be replaced when dependencies are built.
3 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/flags.make:
--------------------------------------------------------------------------------
1 | # CMAKE generated file: DO NOT EDIT!
2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.27
3 |
4 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/link.txt:
--------------------------------------------------------------------------------
1 | /usr/bin/c++ -fPIC -std=c++11 -O2 -march=native -shared -o "/content/drive/Othercomputers/My MacBook Pro/hw4/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so" CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o -Wl,-rpath,/usr/local/cuda/lib64 /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/x86_64-linux-gnu/librt.a /usr/local/cuda/lib64/libcudart.so
2 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/progress.make:
--------------------------------------------------------------------------------
1 | CMAKE_PROGRESS_1 = 3
2 | CMAKE_PROGRESS_2 = 4
3 |
4 |
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/build/CMakeFiles/ndarray_backend_cuda.dir/src/ndarray_backend_cuda_generated_ndarray_backend_cuda.cu.o
--------------------------------------------------------------------------------
/hw4/build/CMakeFiles/progress.marks:
--------------------------------------------------------------------------------
1 | 4
2 |
--------------------------------------------------------------------------------
/hw4/build/cmake_install.cmake:
--------------------------------------------------------------------------------
1 | # Install script for directory: /content/drive/Othercomputers/My MacBook Pro/hw4
2 |
3 | # Set the install prefix
4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX)
5 | set(CMAKE_INSTALL_PREFIX "/usr/local")
6 | endif()
7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
8 |
9 | # Set the install configuration name.
10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
11 | if(BUILD_TYPE)
12 | string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
13 | CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
14 | else()
15 | set(CMAKE_INSTALL_CONFIG_NAME "")
16 | endif()
17 | message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
18 | endif()
19 |
20 | # Set the component getting installed.
21 | if(NOT CMAKE_INSTALL_COMPONENT)
22 | if(COMPONENT)
23 | message(STATUS "Install component: \"${COMPONENT}\"")
24 | set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
25 | else()
26 | set(CMAKE_INSTALL_COMPONENT)
27 | endif()
28 | endif()
29 |
30 | # Install shared libraries without execute permission?
31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
32 | set(CMAKE_INSTALL_SO_NO_EXE "1")
33 | endif()
34 |
35 | # Is this installation the result of a crosscompile?
36 | if(NOT DEFINED CMAKE_CROSSCOMPILING)
37 | set(CMAKE_CROSSCOMPILING "FALSE")
38 | endif()
39 |
40 | # Set default install directory permissions.
41 | if(NOT DEFINED CMAKE_OBJDUMP)
42 | set(CMAKE_OBJDUMP "/usr/bin/objdump")
43 | endif()
44 |
45 | if(CMAKE_INSTALL_COMPONENT)
46 | set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt")
47 | else()
48 | set(CMAKE_INSTALL_MANIFEST "install_manifest.txt")
49 | endif()
50 |
51 | string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT
52 | "${CMAKE_INSTALL_MANIFEST_FILES}")
53 | file(WRITE "/content/drive/Othercomputers/My MacBook Pro/hw4/build/${CMAKE_INSTALL_MANIFEST}"
54 | "${CMAKE_INSTALL_MANIFEST_CONTENT}")
55 |
--------------------------------------------------------------------------------
/hw4/build/detect_cuda_compute_capabilities.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | int main()
4 | {
5 | int count = 0;
6 | if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;
7 | if (count == 0) return -1;
8 | for (int device = 0; device < count; ++device)
9 | {
10 | cudaDeviceProp prop;
11 | if (cudaSuccess == cudaGetDeviceProperties(&prop, device))
12 | std::printf("%d.%d ", prop.major, prop.minor);
13 | }
14 | return 0;
15 | }
16 |
--------------------------------------------------------------------------------
/hw4/debug.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append("./tests/hw4")
3 | sys.path.append("./python")
4 |
5 | from test_nd_backend import *
6 | from test_cifar_ptb_data import *
7 | from test_conv import *
8 | from test_sequence_models import *
9 | from needle import backend_ndarray as nd
10 |
11 |
12 | def train_cifar10():
13 | import sys
14 | sys.path.append('./python')
15 | sys.path.append('./apps')
16 | import needle as ndl
17 | from models import ResNet9
18 | from simple_ml import train_cifar10, evaluate_cifar10
19 |
20 | device = ndl.cpu()
21 | dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=True)
22 | dataloader = ndl.data.DataLoader( \
23 | dataset=dataset,
24 | batch_size=128,
25 | shuffle=True, )
26 | model = ResNet9(device=device, dtype="float32")
27 | train_cifar10(model, dataloader, n_epochs=2, optimizer=ndl.optim.Adam,
28 | lr=0.001, weight_decay=0.001, device=device)
29 | evaluate_cifar10(model, dataloader)
30 |
31 |
32 | def train_language_model():
33 | import needle as ndl
34 | sys.path.append('./apps')
35 | from models import LanguageModel
36 | from simple_ml import train_ptb, evaluate_ptb
37 |
38 | device = ndl.cpu_numpy()
39 | corpus = ndl.data.Corpus("data/ptb")
40 | train_data = ndl.data.batchify(corpus.train, batch_size=16, device=device, dtype="float32")
41 | model = LanguageModel(30, len(corpus.dictionary), hidden_size=10, num_layers=2, seq_model='rnn', device=device)
42 | train_ptb(model, train_data, seq_len=1, n_epochs=1, device=device)
43 | evaluate_ptb(model, train_data, seq_len=40, device=device)
44 |
45 |
46 | if __name__ == "__main__":
47 | """
48 | Part 1
49 | """
50 | # test_stack((5, 5), 0, 2, nd.cpu())
51 | # test_stack_backward((5, 5), 0, 2, nd.cpu())
52 |
53 | # test_matmul(16, 16, 16, nd.cpu())
54 | # test_relu((5, 5), nd.cpu())
55 | # test_tanh_backward((5, 5), nd.cpu())
56 |
57 |
58 | """
59 | Part 2
60 | """
61 | # test_cifar10_dataset(True)
62 |
63 |
64 | """
65 | Part 3
66 | """
67 | # test_pad_forward({"shape": (10, 32, 32, 8), "padding": ( (0, 0), (2, 2), (2, 2), (0, 0) )}, nd.cpu())
68 | # test_flip_forward({"shape": (10, 5), "axes": (0,)}, nd.cpu())
69 | # test_dilate_forward(nd.cpu())
70 | # test_op_conv((3, 16, 16, 8), (3, 3, 8, 16), 1, 2, False, nd.cpu())
71 | # test_op_conv((3, 16, 16, 8), (3, 3, 8, 16), 2, 1, True, nd.cpu())
72 |
73 | # test_init_kaiming_uniform(nd.cpu())
74 | # test_nn_conv_forward(4, 8, 16, 3, 1, nd.cpu())
75 | # test_nn_conv_backward(4, 1, 1, 3, 1, nd.cpu())
76 | # test_resnet9(nd.cpu())
77 | # test_train_cifar10(nd.cpu())
78 |
79 | train_cifar10()
80 |
81 | """
82 | Part 4
83 | """
84 | # test_rnn_cell(1, 1, 1, False, False, 'relu', nd.cpu())
85 | # test_lstm_cell(1, 1, 1, False, False, nd.cpu())
86 | # test_lstm(13, 1, 1, 1, 1, True, True, nd.cpu())
87 |
88 | """
89 | Part 6
90 | """
91 | # test_language_model_implementation(1, 1, 1, 1, 1, True, 1, 'rnn', nd.cpu())
92 |
93 | """
94 | Part 7
95 | """
96 | # train_language_model()
--------------------------------------------------------------------------------
/hw4/hw4.ipynb - Colaboratory.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/hw4.ipynb - Colaboratory.pdf
--------------------------------------------------------------------------------
/hw4/python/needle/__init__.py:
--------------------------------------------------------------------------------
1 | from . import ops
2 | from .ops import *
3 | from .autograd import Tensor, cpu, all_devices
4 |
5 | from . import init
6 | from .init import ones, zeros, zeros_like, ones_like
7 |
8 | from . import data
9 | from . import nn
10 | from . import optim
11 | from .backend_selection import *
12 |
--------------------------------------------------------------------------------
/hw4/python/needle/backend_ndarray/__init__.py:
--------------------------------------------------------------------------------
1 | from .ndarray import *
2 |
--------------------------------------------------------------------------------
/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-darwin.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-darwin.so
--------------------------------------------------------------------------------
/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/python/needle/backend_ndarray/ndarray_backend_cpu.cpython-310-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/hw4/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NgCafai/deep-learning-system/7d8db29ea567c8f026ca0560b7acb55b4cd24ffb/hw4/python/needle/backend_ndarray/ndarray_backend_cuda.cpython-310-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/hw4/python/needle/backend_ndarray/ndarray_backend_numpy.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | __device_name__ = "numpy"
5 | _datatype = np.float32
6 | _datetype_size = np.dtype(_datatype).itemsize
7 |
8 |
9 | class Array:
10 | def __init__(self, size):
11 | self.array = np.empty(size, dtype=np.float32)
12 |
13 | @property
14 | def size(self):
15 | return self.array.size
16 |
17 |
18 | def to_numpy(a, shape, strides, offset):
19 | return np.lib.stride_tricks.as_strided(
20 | a.array[offset:], shape, tuple([s * _datetype_size for s in strides])
21 | )
22 |
23 |
24 | def from_numpy(a, out):
25 | out.array[:] = a.flatten()
26 |
27 |
28 | def fill(out, val):
29 | out.array.fill(val)
30 |
31 |
32 | def compact(a, out, shape, strides, offset):
33 | out.array[:] = to_numpy(a, shape, strides, offset).flatten()
34 |
35 |
36 | def ewise_setitem(a, out, shape, strides, offset):
37 | to_numpy(out, shape, strides, offset)[:] = a.array.reshape(shape)
38 |
39 |
40 | def scalar_setitem(size, val, out, shape, strides, offset):
41 | to_numpy(out, shape, strides, offset)[:] = val
42 |
43 |
44 | def ewise_add(a, b, out):
45 | out.array[:] = a.array + b.array
46 |
47 |
48 | def scalar_add(a, val, out):
49 | out.array[:] = a.array + val
50 |
51 |
52 | def ewise_mul(a, b, out):
53 | out.array[:] = a.array * b.array
54 |
55 |
56 | def scalar_mul(a, val, out):
57 | out.array[:] = a.array * val
58 |
59 |
60 | def ewise_div(a, b, out):
61 | out.array[:] = a.array / b.array
62 |
63 |
64 | def scalar_div(a, val, out):
65 | out.array[:] = a.array / val
66 |
67 |
68 | def scalar_power(a, val, out):
69 | out.array[:] = a.array**val
70 |
71 |
72 | def ewise_maximum(a, b, out):
73 | out.array[:] = np.maximum(a.array, b.array)
74 |
75 |
76 | def scalar_maximum(a, val, out):
77 | out.array[:] = np.maximum(a.array, val)
78 |
79 |
80 | def ewise_eq(a, b, out):
81 | out.array[:] = (a.array == b.array).astype(np.float32)
82 |
83 |
84 | def scalar_eq(a, val, out):
85 | out.array[:] = (a.array == val).astype(np.float32)
86 |
87 |
88 | def ewise_ge(a, b, out):
89 | out.array[:] = (a.array >= b.array).astype(np.float32)
90 |
91 |
92 | def scalar_ge(a, val, out):
93 | out.array[:] = (a.array >= val).astype(np.float32)
94 |
95 |
96 | def ewise_log(a, out):
97 | out.array[:] = np.log(a.array)
98 |
99 |
100 | def ewise_exp(a, out):
101 | out.array[:] = np.exp(a.array)
102 |
103 |
104 | def ewise_tanh(a, out):
105 | out.array[:] = np.tanh(a.array)
106 |
107 |
108 | def matmul(a, b, out, m, n, p):
109 | out.array[:] = (a.array.reshape(m, n) @ b.array.reshape(n, p)).reshape(-1)
110 |
111 |
112 | def reduce_max(a, out, reduce_size):
113 | out.array[:] = a.array[:].reshape(-1, reduce_size).max(axis=1)
114 |
115 |
116 | def reduce_sum(a, out, reduce_size):
117 | out.array[:] = a.array[:].reshape(-1, reduce_size).sum(axis=1)
118 |
--------------------------------------------------------------------------------
/hw4/python/needle/backend_numpy.py:
--------------------------------------------------------------------------------
1 | """This file defies specific implementations of devices when using numpy as NDArray backend.
2 | """
3 | import numpy
4 |
5 |
6 | class Device:
7 | """Baseclass of all device"""
8 |
9 |
10 | class CPUDevice(Device):
11 | """Represents data that sits in CPU"""
12 |
13 | def __repr__(self):
14 | return "needle.cpu()"
15 |
16 | def __hash__(self):
17 | return self.__repr__().__hash__()
18 |
19 | def __eq__(self, other):
20 | return isinstance(other, CPUDevice)
21 |
22 | def enabled(self):
23 | return True
24 |
25 | def zeros(self, *shape, dtype="float32"):
26 | return numpy.zeros(shape, dtype=dtype)
27 |
28 | def ones(self, *shape, dtype="float32"):
29 | return numpy.ones(shape, dtype=dtype)
30 |
31 | def randn(self, *shape):
32 | # note: numpy doesn't support types within standard random routines, and
33 | # .astype("float32") does work if we're generating a singleton
34 | return numpy.random.randn(*shape)
35 |
36 | def rand(self, *shape):
37 | # note: numpy doesn't support types within standard random routines, and
38 | # .astype("float32") does work if we're generating a singleton
39 | return numpy.random.rand(*shape)
40 |
41 | def one_hot(self, n, i, dtype="float32"):
42 | return numpy.eye(n, dtype=dtype)[i]
43 |
44 | def empty(self, shape, dtype="float32"):
45 | return numpy.empty(shape, dtype=dtype)
46 |
47 | def full(self, shape, fill_value, dtype="float32"):
48 | return numpy.full(shape, fill_value, dtype=dtype)
49 |
50 |
51 | def cpu():
52 | """Return cpu device"""
53 | return CPUDevice()
54 |
55 |
56 | def default_device():
57 | return cpu()
58 |
59 |
60 | def all_devices():
61 | """return a list of all available devices"""
62 | return [cpu()]
63 |
--------------------------------------------------------------------------------
/hw4/python/needle/backend_selection.py:
--------------------------------------------------------------------------------
1 | """Logic for backend selection"""
2 | import os
3 |
4 |
5 | BACKEND = os.environ.get("NEEDLE_BACKEND", "nd")
6 |
7 |
8 | if BACKEND == "nd":
9 | print("Using needle backend")
10 | from . import backend_ndarray as array_api
11 | from .backend_ndarray import (
12 | all_devices,
13 | cuda,
14 | cpu,
15 | cpu_numpy,
16 | default_device,
17 | BackendDevice as Device,
18 | )
19 |
20 | NDArray = array_api.NDArray
21 | elif BACKEND == "np":
22 | print("Using numpy backend")
23 | import numpy as array_api
24 | from .backend_numpy import all_devices, cpu, default_device, Device
25 |
26 | NDArray = array_api.ndarray
27 | else:
28 | raise RuntimeError("Unknown needle array backend %s" % BACKEND)
29 |
--------------------------------------------------------------------------------
/hw4/python/needle/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_basic import *
2 | from .data_transforms import *
3 | from .datasets import *
4 |
--------------------------------------------------------------------------------
/hw4/python/needle/data/data_basic.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from ..autograd import Tensor
3 |
4 | from typing import Iterator, Optional, List, Sized, Union, Iterable, Any
5 |
6 |
7 |
8 | class Dataset:
9 | r"""An abstract class representing a `Dataset`.
10 |
11 | All subclasses should overwrite :meth:`__getitem__`, supporting fetching a
12 | data sample for a given key. Subclasses must also overwrite
13 | :meth:`__len__`, which is expected to return the size of the dataset.
14 | """
15 |
16 | def __init__(self, transforms: Optional[List] = None):
17 | self.transforms = transforms
18 |
19 | def __getitem__(self, index) -> object:
20 | raise NotImplementedError
21 |
22 | def __len__(self) -> int:
23 | raise NotImplementedError
24 |
25 | def apply_transforms(self, x):
26 | if self.transforms is not None:
27 | # apply the transforms
28 | for tform in self.transforms:
29 | x = tform(x)
30 | return x
31 |
32 |
33 | class DataLoader:
34 | r"""
35 | Data loader. Combines a dataset and a sampler, and provides an iterable over
36 | the given dataset.
37 | Args:
38 | dataset (Dataset): dataset from which to load the data.
39 | batch_size (int, optional): how many samples per batch to load
40 | (default: ``1``).
41 | shuffle (bool, optional): set to ``True`` to have the data reshuffled
42 | at every epoch (default: ``False``).
43 | """
44 | dataset: Dataset
45 | batch_size: Optional[int]
46 |
47 | def __init__(
48 | self,
49 | dataset: Dataset,
50 | batch_size: Optional[int] = 1,
51 | shuffle: bool = False,
52 | ):
53 |
54 | self.dataset = dataset
55 | self.shuffle = shuffle
56 | self.batch_size = batch_size
57 | if not self.shuffle:
58 | self.ordering = np.array_split(np.arange(len(dataset)),
59 | range(batch_size, len(dataset), batch_size))
60 |
61 | def __iter__(self):
62 | if self.shuffle:
63 | self.ordering = np.array_split(np.random.permutation(len(self.dataset)),
64 | range(self.batch_size, len(self.dataset), self.batch_size))
65 | else:
66 | self.ordering = np.array_split(np.arange(len(self.dataset)),
67 | range(self.batch_size, len(self.dataset), self.batch_size))
68 | self.batch_idx = 0
69 | return self
70 |
71 | def __next__(self):
72 | if self.batch_idx >= len(self.ordering):
73 | raise StopIteration
74 | batch_indices = self.ordering[self.batch_idx]
75 | X_batch, y_batch = self.dataset[batch_indices]
76 | self.batch_idx += 1
77 | return Tensor(X_batch), Tensor(y_batch)
78 |
79 |
--------------------------------------------------------------------------------
/hw4/python/needle/data/data_transforms.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class Transform:
4 | def __call__(self, x):
5 | raise NotImplementedError
6 |
7 |
8 | class RandomFlipHorizontal(Transform):
9 | def __init__(self, p = 0.5):
10 | self.p = p
11 |
12 | def __call__(self, img):
13 | """
14 | Horizonally flip an image, specified as an H x W x C NDArray.
15 | Args:
16 | img: H x W x C NDArray of an image
17 | Returns:
18 | H x W x C ndarray corresponding to image flipped with probability self.p
19 | Note: use the provided code to provide randomness, for easier testing
20 | """
21 | flip_img = np.random.rand() < self.p
22 | if flip_img:
23 | img = img[:, ::-1, :]
24 | return img
25 |
26 |
27 | class RandomCrop(Transform):
28 | def __init__(self, padding=3):
29 | self.padding = padding
30 |
31 | def __call__(self, img):
32 | """ Zero pad and then randomly crop an image.
33 | Args:
34 | img: H x W x C NDArray of an image
35 | Return
36 | H x W x C NAArray of cliped image
37 | Note: generate the image shifted by shift_x, shift_y specified below
38 | """
39 | shift_x, shift_y = np.random.randint(low=-self.padding, high=self.padding+1, size=2)
40 | img_pad = np.pad(img, ((self.padding, self.padding), (self.padding, self.padding), (0, 0)), 'constant', constant_values=0)
41 | img_crop = img_pad[self.padding + shift_x : self.padding + shift_x + img.shape[0], self.padding + shift_y : self.padding + shift_y + img.shape[1], :]
42 | return img_crop
43 |
--------------------------------------------------------------------------------
/hw4/python/needle/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .mnist_dataset import *
2 | from .ndarray_dataset import *
3 | from .cifar10_dataset import *
4 | from .ptb_dataset import *
5 |
--------------------------------------------------------------------------------
/hw4/python/needle/data/datasets/cifar10_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 | from typing import Iterator, Optional, List, Sized, Union, Iterable, Any
4 | import numpy as np
5 | from ..data_basic import Dataset
6 |
7 | class CIFAR10Dataset(Dataset):
8 | def __init__(
9 | self,
10 | base_folder: str,
11 | train: bool,
12 | p: Optional[int] = 0.5,
13 | transforms: Optional[List] = None
14 | ):
15 | """
16 | Parameters:
17 | base_folder - cifar-10-batches-py folder filepath
18 | train - bool, if True load training dataset, else load test dataset
19 | Divide pixel values by 255. so that images are in 0-1 range.
20 | Attributes:
21 | X - numpy array of images
22 | y - numpy array of labels
23 | """
24 | if train:
25 | self.X = np.empty((0, 3, 32, 32))
26 | self.y = np.empty((0,))
27 | for i in range(1, 6):
28 | with open(os.path.join(base_folder, f"data_batch_{i}"), "rb") as f:
29 | data = pickle.load(f, encoding="bytes")
30 | self.X = np.concatenate((self.X, data[b"data"].reshape(-1, 3, 32, 32)), axis=0)
31 | self.y = np.concatenate((self.y, data[b"labels"]), axis=0)
32 | else:
33 | with open(os.path.join(base_folder, "test_batch"), "rb") as f:
34 | data = pickle.load(f, encoding="bytes")
35 | self.X = data[b"data"].reshape(-1, 3, 32, 32)
36 | self.y = np.array(data[b"labels"])
37 |
38 | self.X = self.X.astype(np.float32) / 255.0
39 | self.transforms = [] if transforms is None else transforms
40 |
41 |
42 | def __getitem__(self, index) -> object:
43 | """
44 | Returns the image, label at given index
45 | Image should be of shape (3, 32, 32)
46 | """
47 | images = self.X[index]
48 | labels = self.y[index]
49 | for func in self.transforms:
50 | images = func(images)
51 | return images, labels
52 |
53 | def __len__(self) -> int:
54 | """
55 | Returns the total number of examples in the dataset
56 | """
57 | return len(self.y)
58 |
--------------------------------------------------------------------------------
/hw4/python/needle/data/datasets/mnist_dataset.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional
2 | from ..data_basic import Dataset
3 | import numpy as np
4 |
5 | class MNISTDataset(Dataset):
6 | def __init__(
7 | self,
8 | image_filename: str,
9 | label_filename: str,
10 | transforms: Optional[List] = None,
11 | ):
12 | ### BEGIN YOUR SOLUTION
13 | raise NotImplementedError()
14 | ### END YOUR SOLUTION
15 |
16 | def __getitem__(self, index) -> object:
17 | ### BEGIN YOUR SOLUTION
18 | raise NotImplementedError()
19 | ### END YOUR SOLUTION
20 |
21 | def __len__(self) -> int:
22 | ### BEGIN YOUR SOLUTION
23 | raise NotImplementedError()
24 | ### END YOUR SOLUTION
--------------------------------------------------------------------------------
/hw4/python/needle/data/datasets/ndarray_dataset.py:
--------------------------------------------------------------------------------
1 | from ..data_basic import Dataset
2 |
3 | class NDArrayDataset(Dataset):
4 | def __init__(self, *arrays):
5 | self.arrays = arrays
6 |
7 | def __len__(self) -> int:
8 | return self.arrays[0].shape[0]
9 |
10 | def __getitem__(self, i) -> object:
11 | return tuple([a[i] for a in self.arrays])
--------------------------------------------------------------------------------
/hw4/python/needle/data/datasets/ptb_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | from needle import backend_ndarray as nd
5 | from needle import Tensor
6 |
7 | class Dictionary(object):
8 | """
9 | Creates a dictionary from a list of words, mapping each word to a
10 | unique integer.
11 | Attributes:
12 | word2idx: dictionary mapping from a word to its unique ID
13 | idx2word: list of words in the dictionary, in the order they were added
14 | to the dictionary (i.e. each word only appears once in this list)
15 | """
16 | def __init__(self):
17 | self.word2idx = {}
18 | self.idx2word = []
19 |
20 | def add_word(self, word):
21 | """
22 | Input: word of type str
23 | If the word is not in the dictionary, adds the word to the dictionary
24 | and appends to the list of words.
25 | Returns the word's unique ID.
26 | """
27 | if word not in self.word2idx:
28 | idx = len(self.idx2word)
29 | self.word2idx[word] = idx
30 | self.idx2word.append(word)
31 | return self.word2idx[word]
32 |
33 | def __len__(self):
34 | """
35 | Returns the number of unique words in the dictionary.
36 | """
37 | return len(self.idx2word)
38 |
39 |
40 |
41 | class Corpus(object):
42 | """
43 | Creates corpus from train, and test txt files.
44 | """
45 | def __init__(self, base_dir, max_lines=None):
46 | self.dictionary = Dictionary()
47 | self.train = self.tokenize(os.path.join(base_dir, 'train.txt'), max_lines)
48 | self.test = self.tokenize(os.path.join(base_dir, 'test.txt'), max_lines)
49 |
50 | def tokenize(self, path, max_lines=None):
51 | """
52 | Input:
53 | path - path to text file
54 | max_lines - maximum number of lines to read in
55 | Tokenizes a text file, first adding each word in the file to the dictionary,
56 | and then tokenizing the text file to a list of IDs. When adding words to the
57 | dictionary (and tokenizing the file content) '' should be appended to
58 | the end of each line in order to properly account for the end of the sentence.
59 | Output:
60 | ids: List of ids
61 | """
62 | ids = []
63 | self.dictionary.add_word('')
64 | with open(path, 'r') as f:
65 | if max_lines is not None:
66 | lines = f.readlines()[:max_lines]
67 | else:
68 | lines = f.readlines()
69 | for line in lines:
70 | words = line.split() + ['']
71 | for word in words:
72 | ids.append(self.dictionary.add_word(word))
73 | return ids
74 |
75 |
76 | def batchify(data, batch_size, device, dtype):
77 | """
78 | Starting from sequential data, batchify arranges the dataset into columns.
79 | For instance, with the alphabet as the sequence and batch size 4, we'd get
80 | ┌ a g m s ┐
81 | │ b h n t │
82 | │ c i o u │
83 | │ d j p v │
84 | │ e k q w │
85 | └ f l r x ┘.
86 | These columns are treated as independent by the model, which means that the
87 | dependence of e. g. 'g' on 'f' cannot be learned, but allows more efficient
88 | batch processing.
89 | If the data cannot be evenly divided by the batch size, trim off the remainder.
90 | Returns the data as a numpy array of shape (nbatch, batch_size).
91 | """
92 | nbatch = len(data) // batch_size
93 | data = np.array(data[:nbatch * batch_size]).reshape(nbatch, batch_size)
94 | return data
95 |
96 |
97 | def get_batch(batches, i, bptt, device=None, dtype=None):
98 | """
99 | get_batch subdivides the source data into chunks of length bptt.
100 | If source is equal to the example output of the batchify function, with
101 | a bptt-limit of 2, we'd get the following two Variables for i = 0:
102 | ┌ a g m s ┐ ┌ b h n t ┐
103 | └ b h n t ┘ └ c i o u ┘
104 | Note that despite the name of the function, the subdivison of data is not
105 | done along the batch dimension (i.e. dimension 1), since that was handled
106 | by the batchify function. The chunks are along dimension 0, corresponding
107 | to the seq_len dimension in the LSTM or RNN.
108 | Inputs:
109 | batches - numpy array returned from batchify function
110 | i - index
111 | bptt - Sequence length
112 | Returns:
113 | data - Tensor of shape (bptt, bs) with cached data as NDArray
114 | target - Tensor of shape (bptt*bs,) with cached data as NDArray
115 | """
116 | # Since we have to fetch at least one row as data and one row as target,
117 | # we subtract 1 from bptt to get the maximum possible sequence length.
118 | seq_len = min(bptt, batches.shape[0] - 1 - i)
119 |
120 | data = batches[i : i + seq_len]
121 | target = batches[i + 1 : i + 1 + seq_len].reshape(-1)
122 | return Tensor(data, device=device, dtype=dtype), Tensor(target, device=device, dtype=dtype)
--------------------------------------------------------------------------------
/hw4/python/needle/init/__init__.py:
--------------------------------------------------------------------------------
1 | from .init_basic import *
2 |
3 | from .init_initializers import *
4 |
--------------------------------------------------------------------------------
/hw4/python/needle/init/init_basic.py:
--------------------------------------------------------------------------------
1 | import math
2 | import needle as ndl
3 |
4 |
5 | def rand(*shape, low=0.0, high=1.0, device=None, dtype="float32", requires_grad=False):
6 | """Generate random numbers uniform between low and high"""
7 | device = ndl.cpu() if device is None else device
8 | array = device.rand(*shape) * (high - low) + low
9 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
10 |
11 |
12 | def randn(*shape, mean=0.0, std=1.0, device=None, dtype="float32", requires_grad=False):
13 | """Generate random normal with specified mean and std deviation"""
14 | device = ndl.cpu() if device is None else device
15 | array = device.randn(*shape) * std + mean
16 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
17 |
18 |
19 |
20 | def constant(*shape, c=1.0, device=None, dtype="float32", requires_grad=False):
21 | """Generate constant Tensor"""
22 | device = ndl.cpu() if device is None else device
23 | array = device.full(shape, c, dtype=dtype)
24 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
25 |
26 | def ones(*shape, device=None, dtype="float32", requires_grad=False):
27 | """Generate all-ones Tensor"""
28 | return constant(
29 | *shape, c=1.0, device=device, dtype=dtype, requires_grad=requires_grad
30 | )
31 |
32 |
33 | def zeros(*shape, device=None, dtype="float32", requires_grad=False):
34 | """Generate all-zeros Tensor"""
35 | return constant(
36 | *shape, c=0.0, device=device, dtype=dtype, requires_grad=requires_grad
37 | )
38 |
39 |
40 | def randb(*shape, p=0.5, device=None, dtype="bool", requires_grad=False):
41 | """Generate binary random Tensor"""
42 | device = ndl.cpu() if device is None else device
43 | array = device.rand(*shape) <= p
44 | return ndl.Tensor(array, device=device, dtype=dtype, requires_grad=requires_grad)
45 |
46 |
47 | def one_hot(n, i, device=None, dtype="float32", requires_grad=False):
48 | """Generate one-hot encoding Tensor"""
49 | device = ndl.cpu() if device is None else device
50 | return ndl.Tensor(
51 | device.one_hot(n, i.numpy().astype("int32"), dtype=dtype),
52 | device=device,
53 | requires_grad=requires_grad,
54 | )
55 |
56 |
57 | def zeros_like(array, *, device=None, requires_grad=False):
58 | device = device if device else array.device
59 | return zeros(
60 | *array.shape, dtype=array.dtype, device=device, requires_grad=requires_grad
61 | )
62 |
63 |
64 | def ones_like(array, *, device=None, requires_grad=False):
65 | device = device if device else array.device
66 | return ones(
67 | *array.shape, dtype=array.dtype, device=device, requires_grad=requires_grad
68 | )
69 |
--------------------------------------------------------------------------------
/hw4/python/needle/init/init_initializers.py:
--------------------------------------------------------------------------------
1 | import math
2 | from .init_basic import *
3 |
4 |
5 | def xavier_uniform(fan_in, fan_out, gain=1.0, **kwargs):
6 | a = gain * math.sqrt(6.0 / (fan_in + fan_out))
7 | return rand(fan_in, fan_out, low=-a, high=a, **kwargs)
8 |
9 |
10 | def xavier_normal(fan_in, fan_out, gain=1.0, **kwargs):
11 | std = gain * math.sqrt(2.0 / (fan_in + fan_out))
12 | return randn(fan_in, fan_out, mean=0.0, std=std, **kwargs)
13 |
14 |
15 | def kaiming_uniform(fan_in, fan_out, shape=None, nonlinearity="relu", **kwargs):
16 | assert nonlinearity == "relu", "Only relu supported currently"
17 | if shape is not None:
18 | fan_in = math.prod(shape[:-1])
19 | else:
20 | shape = (fan_in, fan_out)
21 | gain = math.sqrt(2.0)
22 | bound = gain * math.sqrt(3.0 / fan_in)
23 | return rand(*shape, low=-bound, high=bound, **kwargs)
24 |
25 |
26 | def kaiming_normal(fan_in, fan_out, nonlinearity="relu", **kwargs):
27 | assert nonlinearity == "relu", "Only relu supported currently"
28 | gain = math.sqrt(2.0)
29 | std = gain * math.sqrt(1.0 / fan_in)
30 | return randn(fan_in, fan_out, mean=0.0, std=std, **kwargs)
--------------------------------------------------------------------------------
/hw4/python/needle/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from .nn_basic import *
2 | from .nn_conv import *
3 | from .nn_sequence import *
4 |
--------------------------------------------------------------------------------
/hw4/python/needle/nn/nn_conv.py:
--------------------------------------------------------------------------------
1 | """The module.
2 | """
3 | import math
4 | from typing import List, Callable, Any
5 | from needle.autograd import Tensor
6 | from needle import ops
7 | import needle.init as init
8 | import numpy as np
9 | from .nn_basic import Parameter, Module
10 |
11 |
12 | class Conv(Module):
13 | """
14 | Multi-channel 2D convolutional layer
15 | IMPORTANT: Accepts inputs in NCHW format, outputs also in NCHW format
16 | Only supports padding=same
17 | No grouped convolution or dilation
18 | Only supports square kernels
19 | """
20 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, bias=True, device=None, dtype="float32"):
21 | super().__init__()
22 | if isinstance(kernel_size, tuple):
23 | kernel_size = kernel_size[0]
24 | if isinstance(stride, tuple):
25 | stride = stride[0]
26 | self.in_channels = in_channels
27 | self.out_channels = out_channels
28 | self.kernel_size = kernel_size
29 | self.stride = stride
30 |
31 | self.weight = Parameter(init.kaiming_uniform(
32 | in_channels * kernel_size * kernel_size, out_channels, shape=(kernel_size, kernel_size, in_channels, out_channels), dtype=dtype, device=device, requires_grad=True))
33 | if bias:
34 | # bound = 1.0 / math.sqrt(in_channels * (kernel_size ** 2))
35 | self.bias = Parameter(init.rand(out_channels, dtype=dtype, device=device, requires_grad=True))
36 | else:
37 | self.bias = None
38 |
39 | self.padding = (kernel_size - 1) // 2
40 |
41 | def forward(self, x: Tensor) -> Tensor:
42 | """
43 | x: (N, C, H, W)
44 | """
45 | # Transform x from NCHW to NHWC
46 | x = x.transpose((1, 2)).transpose((2, 3))
47 |
48 | out = ops.conv(x, self.weight, stride=self.stride, padding=self.padding)
49 | if self.bias is not None:
50 | bias_broadcast = ops.broadcast_to(self.bias, out.shape)
51 | out = out + bias_broadcast
52 |
53 | # Transform out from NHWC to NCHW
54 | out = out.transpose((3, 1)).transpose((3, 2))
55 |
56 | return out
--------------------------------------------------------------------------------
/hw4/python/needle/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .ops_mathematic import *
2 |
3 | from .ops_logarithmic import *
4 | from .ops_tuple import *
5 |
--------------------------------------------------------------------------------
/hw4/python/needle/ops/ops_logarithmic.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | from ..autograd import NDArray
3 | from ..autograd import Op, Tensor, Value, TensorOp
4 | from ..autograd import TensorTuple, TensorTupleOp
5 |
6 | from .ops_mathematic import *
7 |
8 | from ..backend_selection import array_api, BACKEND
9 |
10 | class LogSoftmax(TensorOp):
11 | def compute(self, Z):
12 | ### BEGIN YOUR SOLUTION
13 | raise NotImplementedError()
14 | ### END YOUR SOLUTION
15 |
16 | def gradient(self, out_grad, node):
17 | ### BEGIN YOUR SOLUTION
18 | raise NotImplementedError()
19 | ### END YOUR SOLUTION
20 |
21 |
22 | def logsoftmax(a):
23 | return LogSoftmax()(a)
24 |
25 |
26 | class LogSumExp(TensorOp):
27 | def __init__(self, axes: Optional[tuple] = None):
28 | self.axes = axes
29 |
30 | def compute(self, Z):
31 | max_Z = array_api.max(Z, self.axes, keepdims=True)
32 | Z = Z - array_api.broadcast_to(max_Z, Z.shape)
33 | res = array_api.log(array_api.sum(array_api.exp(Z), self.axes))
34 | res = res + array_api.reshape(max_Z, res.shape)
35 | return res
36 |
37 | def gradient(self, out_grad, node):
38 | input_data = node.inputs[0].realize_cached_data()
39 | max_input = array_api.max(input_data, self.axes, keepdims=True)
40 | input_data = input_data - array_api.broadcast_to(max_input, input_data.shape)
41 | sum_exp_z = array_api.sum(array_api.exp(input_data), self.axes, keepdims=True)
42 | cur_grad = array_api.exp(input_data) / array_api.broadcast_to(sum_exp_z, input_data.shape)
43 |
44 | if out_grad.shape != cur_grad.shape:
45 | if out_grad.cached_data.size == cur_grad.size:
46 | out_grad = reshape(out_grad, cur_grad.shape)
47 | else:
48 | # 对 out_grad 进行 reshape(比如从 (3,)变成 (3,1),否则 broadcast 时,结果和预期不一致),然后再进行 broadcast_to
49 | new_shape = list(cur_grad.shape)
50 | if self.axes is not None:
51 | if isinstance(self.axes, Number):
52 | self.axes = (self.axes,)
53 | for axis in self.axes:
54 | new_shape[axis] = 1
55 | else:
56 | new_shape = [1] * len(new_shape)
57 | out_grad = reshape(out_grad, new_shape)
58 | out_grad = broadcast_to(out_grad, cur_grad.shape)
59 | return out_grad * cur_grad
60 |
61 | def logsumexp(a, axes=None):
62 | return LogSumExp(axes=axes)(a)
63 |
64 |
--------------------------------------------------------------------------------
/hw4/python/needle/ops/ops_tuple.py:
--------------------------------------------------------------------------------
1 | from ..autograd import Op, Tensor, TensorTuple, Value, TensorOp, TensorTupleOp
2 | import needle.init as init
3 |
4 | class MakeTensorTuple(TensorTupleOp):
5 | def compute(self, *args) -> tuple:
6 | return tuple(args)
7 |
8 | def gradient(self, out_grad, node):
9 | assert isinstance(out_grad, TensorTuple)
10 | return tuple([out_grad[i] for i in range(len(out_grad))])
11 |
12 |
13 | def make_tuple(*args):
14 | return MakeTensorTuple()(*args)
15 |
16 |
17 | class TupleGetItem(TensorOp):
18 | def __init__(self, index):
19 | self.index = index
20 |
21 | def __call__(self, a: TensorTuple, fold_const=True) -> Value:
22 | assert isinstance(a, TensorTuple)
23 | # constant folding
24 | if fold_const and isinstance(a.op, MakeTensorTuple):
25 | return a.inputs[self.index]
26 | return Tensor.make_from_op(self, [a])
27 |
28 | def compute(self, a):
29 | return a[self.index]
30 |
31 | def gradient(self, out_grad, node):
32 | index = self.index
33 | in_grad = []
34 | for i, value in enumerate(node.inputs[0]):
35 | if i != index:
36 | in_grad.append(init.zeros_like(value))
37 | else:
38 | in_grad.append(out_grad)
39 | return MakeTensorTuple()(*in_grad)
40 |
41 |
42 | def tuple_get_item(value, index):
43 | return TupleGetItem(index)(value)
44 |
45 |
46 | class FusedAddScalars(TensorTupleOp):
47 | def __init__(self, c0: float, c1: float):
48 | self.c0 = c0
49 | self.c1 = c1
50 |
51 | def compute(self, a):
52 | return a + self.c0, a + self.c1
53 |
54 | def gradient(self, out_grad, node):
55 | return out_grad[0] + out_grad[1]
56 |
57 |
58 | def fused_add_scalars(x, c0, c1):
59 | return FusedAddScalars(c0, c1)(x)
60 |
--------------------------------------------------------------------------------
/hw4/python/needle/optim.py:
--------------------------------------------------------------------------------
1 | """Optimization module"""
2 | import needle as ndl
3 | import numpy as np
4 |
5 |
6 | class Optimizer:
7 | def __init__(self, params):
8 | self.params = params
9 |
10 | def step(self):
11 | raise NotImplementedError()
12 |
13 | def reset_grad(self):
14 | for p in self.params:
15 | p.grad = None
16 |
17 |
18 | class SGD(Optimizer):
19 | def __init__(self, params, lr=0.01, momentum=0.0, weight_decay=0.0):
20 | super().__init__(params)
21 | self.lr = lr
22 | self.momentum = momentum
23 | self.u = {}
24 | self.weight_decay = weight_decay
25 |
26 | def step(self):
27 | for param in self.params:
28 | # grad 这里加了一个惩罚项
29 | grad_with_penalty = param.grad.detach() + self.weight_decay * param.detach()
30 | u = self.u.get(id(param), 0) * self.momentum + (1 - self.momentum) * grad_with_penalty
31 | # 将 dtype 从 float64 转换为 float32
32 | u = ndl.Tensor(u, dtype=param.dtype)
33 | self.u[id(param)] = u
34 | param.data -= self.lr * u
35 |
36 |
37 |
38 | class Adam(Optimizer):
39 | def __init__(
40 | self,
41 | params,
42 | lr=0.01,
43 | beta1=0.9,
44 | beta2=0.999,
45 | eps=1e-8,
46 | weight_decay=0.0,
47 | ):
48 | super().__init__(params)
49 | self.lr = lr
50 | self.beta1 = beta1
51 | self.beta2 = beta2
52 | self.eps = eps
53 | self.weight_decay = weight_decay
54 | self.t = 0
55 |
56 | self.m = {}
57 | self.v = {}
58 |
59 | def step(self):
60 | self.t += 1
61 | for param in self.params:
62 | # grad 这里加了一个惩罚项
63 | grad_with_penalty = param.grad.detach() + self.weight_decay * param.detach()
64 | # 将 dtype 从 float64 转换为 float32
65 | grad_with_penalty = ndl.Tensor(grad_with_penalty, dtype=param.dtype)
66 |
67 | m = self.beta1 * self.m.get(id(param), 0) + (1 - self.beta1) * grad_with_penalty
68 | v = self.beta2 * self.v.get(id(param), 0) + (1 - self.beta2) * grad_with_penalty ** 2
69 | self.m[id(param)] = m.detach()
70 | self.v[id(param)] = v.detach()
71 | m_hat = m / (1 - self.beta1 ** self.t)
72 | v_hat = v / (1 - self.beta2 ** self.t)
73 | param.data -= self.lr * m_hat / (v_hat ** 0.5 + self.eps)
--------------------------------------------------------------------------------
/hw4/tests/hw4/test_cifar_ptb_data.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('./python')
3 | import itertools
4 | import numpy as np
5 | import pytest
6 | import mugrade
7 |
8 | import needle as ndl
9 | from needle import backend_ndarray as nd
10 |
11 |
12 | np.random.seed(2)
13 |
14 |
15 | _DEVICES = [ndl.cpu(), pytest.param(ndl.cuda(),
16 | marks=pytest.mark.skipif(not ndl.cuda().enabled(), reason="No GPU"))]
17 |
18 |
19 | TRAIN = [True, False]
20 | @pytest.mark.parametrize("train", TRAIN)
21 | def test_cifar10_dataset(train):
22 | dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=train)
23 | if train:
24 | assert len(dataset) == 50000
25 | else:
26 | assert len(dataset) == 10000
27 | example = dataset[np.random.randint(len(dataset))]
28 | assert(isinstance(example, tuple))
29 | X, y = example
30 | assert isinstance(X, np.ndarray)
31 | assert X.shape == (3, 32, 32)
32 |
33 |
34 | BATCH_SIZES = [1, 15]
35 | @pytest.mark.parametrize("batch_size", BATCH_SIZES)
36 | @pytest.mark.parametrize("train", TRAIN)
37 | @pytest.mark.parametrize("device", _DEVICES, ids=["cpu", "cuda"])
38 | def test_cifar10_loader(batch_size, train, device):
39 | cifar10_train_dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=True)
40 | train_loader = ndl.data.DataLoader(cifar10_train_dataset, batch_size)
41 | for (X, y) in train_loader:
42 | break
43 | assert isinstance(X.cached_data, nd.NDArray)
44 | assert isinstance(X, ndl.Tensor)
45 | assert isinstance(y, ndl.Tensor)
46 | assert X.dtype == 'float32'
47 |
48 |
49 | BPTT = [3, 32]
50 | @pytest.mark.parametrize("batch_size", BATCH_SIZES)
51 | @pytest.mark.parametrize("bptt", BPTT)
52 | @pytest.mark.parametrize("train", TRAIN)
53 | @pytest.mark.parametrize("device", _DEVICES, ids=["cpu", "cuda"])
54 | def test_ptb_dataset(batch_size, bptt, train, device):
55 | # TODO update with more tests?
56 | corpus = ndl.data.Corpus("data/ptb")
57 | if train:
58 | data = ndl.data.batchify(corpus.train, batch_size, device=device, dtype="float32")
59 | else:
60 | data = ndl.data.batchify(corpus.test, batch_size, device=device, dtype="float32")
61 | X, y = ndl.data.get_batch(data, np.random.randint(len(data)), bptt, device=device)
62 | assert X.shape == (bptt, batch_size)
63 | assert y.shape == (bptt * batch_size,)
64 | assert isinstance(X, ndl.Tensor)
65 | assert X.dtype == 'float32'
66 | assert X.device == device
67 | assert isinstance(X.cached_data, nd.NDArray)
68 | ntokens = len(corpus.dictionary)
69 | assert ntokens == 10000
70 |
71 |
72 | ### MUGRADE ###
73 |
74 | TEST_BATCH_SIZES = [3, 5]
75 | TEST_BPTT = [6, 10]
76 |
77 | def mugrade_submit(x):
78 | if isinstance(x, np.ndarray):
79 | x = x.flatten()[:128]
80 | #print(x)
81 | mugrade.submit(x)
82 | else:
83 | #print(x)
84 | mugrade.submit(x)
85 |
86 |
87 | def submit_cifar10():
88 | if not ndl.cuda().enabled():
89 | print('You need a GPU to run some of these tests.')
90 | devices = [ndl.cpu(), ndl.cuda()]
91 | for train in TRAIN:
92 | dataset = ndl.data.CIFAR10Dataset("data/cifar-10-batches-py", train=train)
93 | mugrade_submit(len(dataset))
94 | for (device, batch_size) in itertools.product(devices, TEST_BATCH_SIZES):
95 | loader = ndl.data.DataLoader(dataset, batch_size)
96 | for (X, y) in loader:
97 | break
98 | mugrade_submit(X.numpy()[0, :, :, :])
99 | mugrade_submit(y.numpy()[0])
100 |
101 |
102 | def submit_ptb():
103 | # devices = [ndl.cpu(), ndl.cuda()] if ndl.cuda().enabled() else [ndl.cpu()]
104 | devices = [ndl.cpu(), ndl.cuda()]
105 |
106 | corpus = ndl.data.Corpus("data/ptb")
107 | mugrade_submit(np.array(len(corpus.dictionary)))
108 | for train in TRAIN:
109 | for (device, batch_size, bptt) in itertools.product(devices, TEST_BATCH_SIZES, TEST_BPTT):
110 | if train:
111 | data = ndl.data.batchify(corpus.train, batch_size, device=device, dtype="float32")
112 | else:
113 | data = ndl.data.batchify(corpus.test, batch_size, device=device, dtype="float32")
114 | X, y = ndl.data.get_batch(data, np.random.randint(len(data)), bptt)
115 | mugrade_submit(np.array(len(data)))
116 | mugrade_submit(X.numpy()[0, :])
117 | mugrade_submit(y.numpy()[0])
118 |
119 |
120 | if __name__ == "__main__":
121 | submit_cifar10()
122 | submit_ptb()
--------------------------------------------------------------------------------