├── resources ├── img_qcnn_vs_cnn.png ├── img_initialisation_pytorch.png └── img_expirement_results_classification_task_paper.png ├── reproduction ├── qcnn_convolutional_layer.ipynb └── qcnn_linear_layer.ipynb ├── results ├── cnn_results_table_1.ipynb └── qcnn_results_table_1.ipynb └── README.md /resources/img_qcnn_vs_cnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JorisWeeda/Quaternion-Convolutional-Neural-Networks/HEAD/resources/img_qcnn_vs_cnn.png -------------------------------------------------------------------------------- /resources/img_initialisation_pytorch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JorisWeeda/Quaternion-Convolutional-Neural-Networks/HEAD/resources/img_initialisation_pytorch.png -------------------------------------------------------------------------------- /resources/img_expirement_results_classification_task_paper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JorisWeeda/Quaternion-Convolutional-Neural-Networks/HEAD/resources/img_expirement_results_classification_task_paper.png -------------------------------------------------------------------------------- /reproduction/qcnn_convolutional_layer.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":15483,"status":"ok","timestamp":1681579189979,"user":{"displayName":"joris weeda","userId":"16756172332098140791"},"user_tz":-120},"id":"xq_wUFIhKkPK","outputId":"c30ac206-04b0-4de5-96fc-3c3fb80dd446"},"outputs":[{"name":"stdout","output_type":"stream","text":["Python 3.9.16\n","Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Requirement already satisfied: cifar10 in /usr/local/lib/python3.9/dist-packages (1.0.0)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.9/dist-packages (from cifar10) (1.22.4)\n","Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from cifar10) (2.27.1)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from cifar10) (4.65.0)\n","Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (1.26.15)\n","Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (2.0.12)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (2022.12.7)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (3.4)\n","Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Requirement already satisfied: imageio in /usr/local/lib/python3.9/dist-packages (2.25.1)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.9/dist-packages (1.22.4)\n","Requirement already satisfied: scipy in /usr/local/lib/python3.9/dist-packages (1.10.1)\n","Requirement already satisfied: pillow>=8.3.2 in /usr/local/lib/python3.9/dist-packages (from imageio) (8.4.0)\n","Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Collecting git+https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks\n"," Cloning https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks to /tmp/pip-req-build-tbs74fkr\n"," Running command git clone --filter=blob:none --quiet https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks /tmp/pip-req-build-tbs74fkr\n"," Resolved https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks to commit 28caa7cde240e354fd7b87280450fd233cd494c3\n"," Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n"]}],"source":["# run this cell to download the right packages (only needed once)\n","!python --version\n","\n","!pip install cifar10\n","!pip install imageio numpy scipy \n","!pip install git+https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"9bXMRTLTxIJ9"},"outputs":[],"source":["import time\n","import torch\n","\n","import numpy as np\n","import torch.nn as nn\n","import torch.nn.functional as F\n","\n","from pathlib import Path\n","from torch.utils.data import DataLoader\n","from torchsummary import summary\n","from torchvision import datasets, transforms\n","\n","from core_qnn.quaternion_layers import QuaternionConv, QuaternionLinear\n","from core_qnn.quaternion_ops import check_input, q_normalize\n","\n","device = torch.device('cuda' if torch.cuda.is_available else 'cpu')"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1864,"status":"ok","timestamp":1681579194177,"user":{"displayName":"joris weeda","userId":"16756172332098140791"},"user_tz":-120},"id":"vfTDOOC8ruuV","outputId":"8e079beb-674e-4be1-f0b4-6b37e4f7d21e"},"outputs":[{"name":"stdout","output_type":"stream","text":["Files already downloaded and verified\n","Files already downloaded and verified\n","CPU times: user 1.41 s, sys: 273 ms, total: 1.68 s\n","Wall time: 1.68 s\n"]}],"source":["%%time\n","\n","# import and download the CIFAR10 dataset\n","transform_train = transforms.Compose([transforms.ToTensor(), transforms.Normalize((.5,.5,.5),(.5,.5,.5))])\n","transform_test = transforms.Compose([transforms.ToTensor(), transforms.Normalize((.5,.5,.5),(.5,.5,.5))])\n","\n","train_set = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)\n","test_set = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)\n","\n","classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"6iEEcR2ofk6L"},"outputs":[],"source":["class InvalidKernelShape(RuntimeError):\n"," \"\"\"Base class to generate custom exception if generating kernel failed.\"\"\"\n","\n"," def __init__(self, error_message):\n"," \"\"\" Construct custom error with custom error message.\n"," :param error_message: The custom error message.\n"," \"\"\"\n"," super().__init__(error_message)\n","\n","class InvalidInput(RuntimeError):\n"," \"\"\"Base class to generate custom exception if input is invalid.\"\"\"\n","\n"," def __init__(self, error_message):\n"," \"\"\" Construct custom error with custom error message.\n"," :param error_message: The custom error message.\n"," \"\"\"\n"," super().__init__(error_message)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Bq56qgrC3OLQ"},"outputs":[],"source":["class QuaternionConvolution(nn.Module):\n"," \"\"\"Reproduction class of the quaternion convolution layer.\"\"\"\n","\n"," ALLOWED_DIMENSIONS = (2, 3)\n","\n"," def __init__(self, in_channels, out_channels, kernel_size, stride, dimension=2, padding=0, dilation=1, groups=1, bias=True):\n"," \"\"\"Create the quaterion convolution layer.\"\"\"\n"," super(QuaternionConvolution, self).__init__()\n","\n"," self.in_channels = np.floor_divide(in_channels, 4)\n"," self.out_channels = np.floor_divide(out_channels, 4)\n","\n"," self.groups = groups\n"," self.stride = stride\n"," self.padding = padding\n"," self.dilation = dilation\n","\n"," self.kernel_size = self.get_kernel_shape(kernel_size, dimension)\n"," self.weight_shape = self.get_weight_shape(self.in_channels, self.out_channels, self.kernel_size)\n","\n"," self._weights = self.weight_tensors(self.weight_shape, kernel_size)\n"," self.r_weight, self.k_weight, self.i_weight, self.j_weight = self._weights\n"," \n"," if bias:\n"," self.bias = nn.Parameter(torch.Tensor(out_channels))\n"," nn.init.constant_(self.bias, 0)\n","\n"," def forward(self, x):\n"," \"\"\"Apply forward pass of input through quaternion convolution layer.\"\"\"\n"," cat_kernels_4_r = torch.cat([self.r_weight, -self.i_weight, -self.j_weight, -self.k_weight], dim=1)\n"," cat_kernels_4_i = torch.cat([self.i_weight, self.r_weight, -self.k_weight, self.j_weight], dim=1)\n"," cat_kernels_4_j = torch.cat([self.j_weight, self.k_weight, self.r_weight, -self.i_weight], dim=1)\n"," cat_kernels_4_k = torch.cat([self.k_weight, -self.j_weight, self.i_weight, self.r_weight], dim=1)\n","\n"," cat_kernels_4_quaternion = torch.cat([cat_kernels_4_r, cat_kernels_4_i, cat_kernels_4_j, cat_kernels_4_k], dim=0)\n","\n"," if x.dim() == 3:\n"," convfunc = F.conv1d\n"," elif x.dim() == 4:\n"," convfunc = F.conv2d\n"," elif x.dim() == 5:\n"," convfunc = F.conv3d\n"," else:\n"," raise InvalidInput(\"Given input channels do not match allowed dimensions\")\n","\n"," return convfunc(x, cat_kernels_4_quaternion, self.bias, self.stride, self.padding, self.dilation, self.groups)\n","\n"," @staticmethod\n"," def weight_tensors(weight_shape, kernel_size):\n"," \"\"\"Create and initialise the weight tensors according to quaternion rules.\"\"\"\n"," modulus = nn.Parameter(torch.Tensor(*weight_shape))\n"," modulus = nn.init.xavier_uniform_(modulus, gain=1.0)\n","\n"," i_weight = 2.0 * torch.rand(*weight_shape) - 1.0\n"," j_weight = 2.0 * torch.rand(*weight_shape) - 1.0\n"," k_weight = 2.0 * torch.rand(*weight_shape) - 1.0\n","\n"," sum_imaginary_parts = i_weight.abs() + j_weight.abs() + k_weight.abs()\n","\n"," i_weight = torch.div(i_weight, sum_imaginary_parts)\n"," j_weight = torch.div(j_weight, sum_imaginary_parts)\n"," k_weight = torch.div(k_weight, sum_imaginary_parts)\n","\n"," phase = torch.rand(*weight_shape) * (2 * torch.tensor([np.pi])) - torch.tensor([np.pi])\n","\n"," r_weight = modulus * np.cos(phase)\n"," i_weight = modulus * i_weight * np.sin(phase)\n"," j_weight = modulus * j_weight * np.sin(phase)\n"," k_weight = modulus * k_weight * np.sin(phase)\n","\n"," return nn.Parameter(r_weight), nn.Parameter(i_weight), nn.Parameter(j_weight), nn.Parameter(k_weight)\n","\n"," @staticmethod\n"," def get_weight_shape(in_channels, out_channels, kernel_size):\n"," \"\"\"Construct weight shape based on the input/output channels and kernel size.\"\"\"\n"," return (out_channels, in_channels) + kernel_size\n","\n"," @staticmethod\n"," def get_kernel_shape(kernel_size, dimension):\n"," \"\"\"Construct the kernel shape based on the given dimension and kernel size.\"\"\"\n"," if dimension not in QuaternionConvolution.ALLOWED_DIMENSIONS:\n"," raise InvalidKernelShape('Given dimensions are not allowed.')\n"," \n"," if isinstance(kernel_size, int):\n"," return (kernel_size, ) * dimension\n","\n"," if isinstance(kernel_size, tuple):\n"," if len(kernel_size) != dimension:\n"," raise InvalidKernelShape('Given kernel shape does not match dimension.')\n","\n"," return kernel_size\n","\n"," raise InvalidKernelShape('No valid type of kernel size to construct kernel.')\n","\n"," def __repr__(self):\n"," return self.__class__.__name__ + '(' \\\n"," + 'in_channels=' + str(self.in_channels) \\\n"," + ', out_channels=' + str(self.out_channels) \\\n"," + ', kernel_size=' + str(self.kernel_size) \\\n"," + ', stride=' + str(self.stride) + ')'\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2170,"status":"ok","timestamp":1681579196339,"user":{"displayName":"joris weeda","userId":"16756172332098140791"},"user_tz":-120},"id":"bn_GMalk2BoY","outputId":"a2f9d742-31ae-4a05-d735-942fd452ba1e"},"outputs":[{"name":"stdout","output_type":"stream","text":["Number of trainable parameters: 2032650\n","----------------------------------------------------------------\n"," Layer (type) Output Shape Param #\n","================================================================\n","QuaternionConvolution-1 [32, 64, 30, 30] 64\n","QuaternionConvolution-2 [32, 128, 28, 28] 128\n"," MaxPool2d-3 [32, 128, 14, 14] 0\n"," Dropout-4 [32, 128, 14, 14] 0\n","QuaternionConvolution-5 [32, 256, 12, 12] 256\n","QuaternionConvolution-6 [32, 512, 10, 10] 512\n"," MaxPool2d-7 [32, 512, 5, 5] 0\n"," Dropout-8 [32, 512, 5, 5] 0\n"," QuaternionLinear-9 [32, 512] 512\n"," Dropout-10 [32, 512] 0\n"," Linear-11 [32, 10] 5,130\n"," Softmax-12 [32, 10] 0\n","================================================================\n","Total params: 6,602\n","Trainable params: 5,130\n","Non-trainable params: 1,472\n","----------------------------------------------------------------\n","Input size (MB): 0.50\n","Forward/backward pass size (MB): 78.82\n","Params size (MB): 0.03\n","Estimated Total Size (MB): 79.34\n","----------------------------------------------------------------\n","CPU times: user 2.24 s, sys: 137 ms, total: 2.38 s\n","Wall time: 2.46 s\n"]}],"source":["%%time\n","\n","class CustomQCNN(nn.Module):\n"," \"\"\"Reproduction QCNN to validate quaternion convolution layer.\"\"\"\n","\n"," def __init__(self, in_channels, hidden_channels, out_features, kernel_size):\n"," super(CustomQCNN, self).__init__()\n","\n"," self.conv_1 = QuaternionConvolution(in_channels, hidden_channels[0], kernel_size, 1)\n"," self.conv_2 = QuaternionConvolution(hidden_channels[0], hidden_channels[1], kernel_size, 1)\n","\n"," self.pool_1 = nn.MaxPool2d(2, 2)\n"," self.dropout_1 = nn.Dropout(0.25)\n","\n"," self.conv_3 = QuaternionConvolution(hidden_channels[1], hidden_channels[2], kernel_size, 1)\n"," self.conv_4 = QuaternionConvolution(hidden_channels[2], hidden_channels[3], kernel_size, 1)\n","\n"," self.pool_2 = nn.MaxPool2d(2, 2)\n"," self.dropout_2 = nn.Dropout(0.25)\n","\n"," self.fc_1 = QuaternionLinear(12800, 512)\n"," self.fc_2 = nn.Linear(512, out_features)\n","\n"," self.dropout_3 = nn.Dropout(0.5)\n"," self.sm = nn.Softmax(dim=1)\n","\n"," def forward(self, x):\n"," x = F.relu(self.conv_1(x))\n"," x = F.relu(self.conv_2(x))\n"," x = self.pool_1(x)\n"," x = self.dropout_1(x)\n","\n"," x = F.relu(self.conv_3(x))\n"," x = F.relu(self.conv_4(x))\n"," x = self.pool_2(x)\n"," x = self.dropout_2(x)\n","\n"," x = torch.flatten(x, start_dim=1) \n","\n"," x = F.relu(self.fc_1(x))\n"," x = self.dropout_3(x)\n"," x = self.fc_2(x)\n"," x = self.sm(x)\n","\n"," return x\n","\n","# Model parameters\n","in_channels = 4\n","hidden_channels = [64, 128, 256, 512]\n","out_features = 10\n","kernel_size = (3, 3)\n","\n","batch_size = 32\n","\n","custom_qcnn = CustomQCNN(in_channels, hidden_channels, out_features, kernel_size)\n","custom_qcnn = custom_qcnn.cuda()\n","\n","print(\"Number of trainable parameters: \", sum(p.numel() for p in custom_qcnn.parameters() if p.requires_grad))\n","summary(custom_qcnn, input_size=(in_channels, 32, 32), batch_size=batch_size, device=device.type)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":20,"status":"ok","timestamp":1681579196340,"user":{"displayName":"joris weeda","userId":"16756172332098140791"},"user_tz":-120},"id":"4waYaaGMKp2n","outputId":"88e0fba2-0829-4b1c-c68e-d1b3e410e8ae"},"outputs":[{"name":"stdout","output_type":"stream","text":["Number of trainable parameters: 640\n","tensor([[[ 4.9678e-02, 1.7548e-02, -1.5976e-01],\n"," [-1.5517e-02, -3.2999e-02, -3.3856e-05],\n"," [ 3.9830e-02, -1.6522e-02, -8.2282e-03]]], grad_fn=)\n","CPU times: user 7.28 ms, sys: 0 ns, total: 7.28 ms\n","Wall time: 7.43 ms\n"]}],"source":["%%time\n","paper_qcnn_layer = QuaternionConv(4, 64, kernel_size, stride=1)\n","\n","print(\"Number of trainable parameters: \", sum(p.numel() for p in paper_qcnn_layer.parameters() if p.requires_grad))\n","print(paper_qcnn_layer.i_weight[0])"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":15,"status":"ok","timestamp":1681579196342,"user":{"displayName":"joris weeda","userId":"16756172332098140791"},"user_tz":-120},"id":"nvSKH5k7iYCK","outputId":"bde6e926-01d9-449c-855b-b4a1ed508967"},"outputs":[{"name":"stdout","output_type":"stream","text":["Number of trainable parameters: 640\n","tensor([[[ 8.9675e-03, -1.2020e-02, 5.8783e-02],\n"," [-2.6882e-05, 5.7910e-02, -4.7084e-02],\n"," [ 1.3167e-02, 7.9821e-02, 7.1172e-02]]], grad_fn=)\n","CPU times: user 4.68 ms, sys: 0 ns, total: 4.68 ms\n","Wall time: 5.78 ms\n"]}],"source":["%%time\n","custom_qcnn_layer = QuaternionConvolution(4, 64, kernel_size, 1)\n","\n","print(\"Number of trainable parameters: \", sum(p.numel() for p in custom_qcnn_layer.parameters() if p.requires_grad))\n","print(custom_qcnn_layer.i_weight[0])"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true,"base_uri":"https://localhost:8080/"},"id":"OyAWqLtFrLGY","outputId":"eea48d68-cc7c-4e12-93f3-a2e8b852516e"},"outputs":[{"name":"stdout","output_type":"stream","text":["Start training seed 1\n","Epoch [11/80], Last loss: 2.0787\n","Epoch [21/80], Last loss: 1.7392\n","Epoch [31/80], Last loss: 1.6275\n","Epoch [41/80], Last loss: 1.6913\n","Epoch [51/80], Last loss: 1.5220\n","Epoch [61/80], Last loss: 1.6078\n","Epoch [71/80], Last loss: 1.6412\n","Finished training seed 1, accuracy of the network: 78.45%, elapsed time: 1957 sec\n","Start training seed 2\n","Epoch [11/80], Last loss: 1.9188\n","Epoch [21/80], Last loss: 1.6543\n","Epoch [31/80], Last loss: 1.6585\n","Epoch [41/80], Last loss: 1.5151\n","Epoch [51/80], Last loss: 1.6406\n","Epoch [61/80], Last loss: 1.7523\n","Epoch [71/80], Last loss: 1.4807\n","Finished training seed 2, accuracy of the network: 77.72%, elapsed time: 1963 sec\n","Start training seed 3\n","Epoch [11/80], Last loss: 1.9496\n","Epoch [21/80], Last loss: 1.7193\n","Epoch [31/80], Last loss: 1.6582\n","Epoch [41/80], Last loss: 1.6938\n","Epoch [51/80], Last loss: 1.5250\n","Epoch [61/80], Last loss: 1.7079\n","Epoch [71/80], Last loss: 1.5793\n","Finished training seed 3, accuracy of the network: 78.14%, elapsed time: 1954 sec\n","Average accuracy over 3, 80 epochs each results in: 78.10333333333334\n","CPU times: user 1h 5min 2s, sys: 3min 18s, total: 1h 8min 21s\n","Wall time: 1h 37min 55s\n"]}],"source":["%%time\n","num_epochs = 80\n","amount_of_trainings = 3\n","\n","learning_rate = 0.0001\n","learning_rate_decay = 1e-6\n","\n","batch_size = 32\n","\n","custom_qcnn_accs = []\n","trainings_seed_excution_time = []\n","\n","for training_seed in range(amount_of_trainings):\n"," print(f'Start training seed {training_seed + 1}')\n"," start_time_training_seed = time.time()\n","\n"," train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)\n"," test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=2)\n","\n"," custom_qcnn = CustomQCNN(in_channels, hidden_channels, out_features, kernel_size)\n"," custom_qcnn = custom_qcnn.cuda()\n","\n"," optimizer = torch.optim.RMSprop(custom_qcnn.parameters(),lr=learning_rate, weight_decay=learning_rate_decay)\n"," criterion = nn.CrossEntropyLoss()\n","\n"," for epoch in range(1, num_epochs):\n"," \n"," custom_qcnn.train()\n","\n"," for index, (x_batch, y_batch) in enumerate(train_loader):\n"," zeros_channel = torch.zeros((x_batch.shape[0], 1, x_batch.shape[2], x_batch.shape[3]))\n"," x_batch = torch.cat([x_batch, zeros_channel], dim=1)\n","\n"," # Check if the input size is correct\n"," check_input(x_batch)\n","\n"," x_batch = x_batch.cuda()\n"," y_batch = y_batch.cuda()\n"," \n"," # Perform forward pass\n"," y_pred = custom_qcnn(x_batch)\n","\n"," # Compute the loss\n"," loss = criterion(y_pred, y_batch)\n","\n"," # Backpropagation\n"," optimizer.zero_grad()\n"," loss.backward()\n"," optimizer.step()\n","\n"," if (epoch / 10).is_integer():\n"," print (f'Epoch [{epoch + 1}/{num_epochs}], Last loss: {loss.item():.4f}')\n","\n"," with torch.no_grad():\n"," n_correct = 0\n"," n_samples = 0\n","\n"," custom_qcnn.eval()\n","\n"," for index, (x_batch, y_batch) in enumerate(test_loader):\n"," zeros_channel = torch.zeros((x_batch.shape[0], 1, x_batch.shape[2], x_batch.shape[3]))\n"," x_batch = torch.cat([x_batch, zeros_channel], dim=1)\n","\n"," x_batch = x_batch.cuda()\n"," y_batch = y_batch.cuda()\n","\n"," # Check if the input size is correct\n"," check_input(x_batch)\n","\n"," # Perform forward pass\n"," y_pred = custom_qcnn(x_batch)\n","\n"," _, predicted = torch.max(y_pred,1)\n"," n_samples += y_batch.size(0)\n"," n_correct += (predicted == y_batch).sum().item()\n","\n"," acc = 100 * n_correct / n_samples\n"," custom_qcnn_accs.append(acc)\n"," \n"," elapsed_training_time = int(time.time() - start_time_training_seed)\n"," trainings_seed_excution_time.append(start_time_training_seed)\n","\n"," print(f'Finished training seed {training_seed + 1}, accuracy of the network: {acc}%, elapsed time: {elapsed_training_time} sec')\n","\n","print(f'Average accuracy over {amount_of_trainings}, {num_epochs} epochs each results in: {sum(custom_qcnn_accs) / amount_of_trainings}')"]}],"metadata":{"accelerator":"GPU","colab":{"provenance":[]},"gpuClass":"standard","kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0} -------------------------------------------------------------------------------- /reproduction/qcnn_linear_layer.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","gpuClass":"standard"},"cells":[{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"4QXmP5W3Gr0j","executionInfo":{"status":"ok","timestamp":1681669155594,"user_tz":-120,"elapsed":13556,"user":{"displayName":"joris weeda","userId":"16756172332098140791"}},"outputId":"37981c91-2f42-4e27-e38f-f43556c669f1"},"outputs":[{"output_type":"stream","name":"stdout","text":["Python 3.9.16\n","Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Collecting cifar10\n"," Downloading cifar10-1.0.0-py3-none-any.whl (7.9 kB)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from cifar10) (4.65.0)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.9/dist-packages (from cifar10) (1.22.4)\n","Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from cifar10) (2.27.1)\n","Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (2.0.12)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (2022.12.7)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (3.4)\n","Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (1.26.15)\n","Installing collected packages: cifar10\n","Successfully installed cifar10-1.0.0\n","Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Requirement already satisfied: imageio in /usr/local/lib/python3.9/dist-packages (2.25.1)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.9/dist-packages (1.22.4)\n","Requirement already satisfied: scipy in /usr/local/lib/python3.9/dist-packages (1.10.1)\n","Requirement already satisfied: pillow>=8.3.2 in /usr/local/lib/python3.9/dist-packages (from imageio) (8.4.0)\n","Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Collecting git+https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks\n"," Cloning https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks to /tmp/pip-req-build-we8ykjyv\n"," Running command git clone --filter=blob:none --quiet https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks /tmp/pip-req-build-we8ykjyv\n"," Resolved https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks to commit 28caa7cde240e354fd7b87280450fd233cd494c3\n"," Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Building wheels for collected packages: Pytorch-QNN\n"," Building wheel for Pytorch-QNN (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for Pytorch-QNN: filename=Pytorch_QNN-1-py3-none-any.whl size=21507 sha256=81a7b7a1a945b45f96418dc4212a2675d80a8ce0b2e241321ae3dbcaacedcf97\n"," Stored in directory: /tmp/pip-ephem-wheel-cache-wiawi2os/wheels/4d/ef/23/2dab0a09f3d2ba797d554a613cf4d367a6da42f613ca046eed\n","Successfully built Pytorch-QNN\n","Installing collected packages: Pytorch-QNN\n","Successfully installed Pytorch-QNN-1\n"]}],"source":["# run this cell to download the right packages (only needed once)\n","!python --version\n","\n","!pip install cifar10\n","!pip install imageio numpy scipy \n","!pip install git+https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks"]},{"cell_type":"code","source":["import time\n","import torch\n","\n","import numpy as np\n","import torch.nn as nn\n","import torch.nn.functional as F\n","\n","from pathlib import Path\n","from torch.utils.data import DataLoader\n","from torchsummary import summary\n","from torchvision import datasets, transforms\n","\n","from core_qnn.quaternion_layers import QuaternionConv, QuaternionLinear\n","from core_qnn.quaternion_ops import check_input, q_normalize\n","\n","device = torch.device('cuda' if torch.cuda.is_available else 'cpu')"],"metadata":{"id":"utugAreOGteM"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["%%time\n","\n","# import and download the CIFAR10 dataset\n","transform_train = transforms.Compose([transforms.ToTensor(), transforms.Normalize((.5,.5,.5),(.5,.5,.5))])\n","transform_test = transforms.Compose([transforms.ToTensor(), transforms.Normalize((.5,.5,.5),(.5,.5,.5))])\n","\n","train_set = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)\n","test_set = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)\n","\n","classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"4vOREz-aGtpI","executionInfo":{"status":"ok","timestamp":1681669171137,"user_tz":-120,"elapsed":8975,"user":{"displayName":"joris weeda","userId":"16756172332098140791"}},"outputId":"ef939462-e468-46f3-bc40-a8afed4e8d8d"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz\n"]},{"output_type":"stream","name":"stderr","text":["100%|██████████| 170498071/170498071 [00:02<00:00, 69836548.66it/s]\n"]},{"output_type":"stream","name":"stdout","text":["Extracting ./data/cifar-10-python.tar.gz to ./data\n","Files already downloaded and verified\n","CPU times: user 3.55 s, sys: 1 s, total: 4.55 s\n","Wall time: 9.69 s\n"]}]},{"cell_type":"code","source":["class QuaternionLin(nn.Module):\n"," \"\"\"Reproduction class of the quaternion linear layer.\"\"\"\n"," \n"," def __init__(self, in_channels, out_channels, dimension=2, bias=True):\n"," \"\"\"Create the quaterion linear layer.\"\"\"\n"," super(QuaternionLin, self).__init__()\n","\n"," self.in_channels = np.floor_divide(in_channels, 4)\n"," self.out_channels = np.floor_divide(out_channels, 4)\n","\n"," self.weight_shape = self.get_weight_shape(self.in_channels, self.out_channels)\n"," self._weights = self.weight_tensors(self.weight_shape)\n","\n"," self.r_weight, self.k_weight, self.i_weight, self.j_weight = self._weights\n","\n"," if bias:\n"," self.bias = nn.Parameter(torch.Tensor(out_channels))\n"," nn.init.constant_(self.bias, 0)\n","\n"," def forward(self, input_x):\n"," \"\"\"Apply forward pass of input through quaternion linear layer.\"\"\"\n"," cat_kernels_4_r = torch.cat([self.r_weight, -self.i_weight, -self.j_weight, -self.k_weight], dim=0)\n"," cat_kernels_4_i = torch.cat([self.i_weight, self.r_weight, -self.k_weight, self.j_weight], dim=0)\n"," cat_kernels_4_j = torch.cat([self.j_weight, self.k_weight, self.r_weight, -self.i_weight], dim=0)\n"," cat_kernels_4_k = torch.cat([self.k_weight, -self.j_weight, self.i_weight, self.r_weight], dim=0)\n","\n"," cat_kernels_4_quaternion = torch.cat([cat_kernels_4_r, cat_kernels_4_i, cat_kernels_4_j, cat_kernels_4_k], dim=1)\n","\n"," if self.bias is not None:\n"," return torch.addmm(self.bias, input_x, cat_kernels_4_quaternion)\n","\n"," return torch.matmul(input_x, cat_kernels_4_quaternion)\n","\n"," @staticmethod\n"," def weight_tensors(weight_shape):\n"," \"\"\"Create and initialise the weight tensors according to quaternion rules.\"\"\"\n"," modulus = nn.Parameter(torch.Tensor(*weight_shape))\n"," modulus = nn.init.xavier_uniform_(modulus, gain=1.0)\n","\n"," i_weight = 2.0 * torch.rand(*weight_shape) - 1.0\n"," j_weight = 2.0 * torch.rand(*weight_shape) - 1.0\n"," k_weight = 2.0 * torch.rand(*weight_shape) - 1.0\n","\n"," sum_imaginary_parts = i_weight.abs() + j_weight.abs() + k_weight.abs()\n","\n"," i_weight = torch.div(i_weight, sum_imaginary_parts)\n"," j_weight = torch.div(j_weight, sum_imaginary_parts)\n"," k_weight = torch.div(k_weight, sum_imaginary_parts)\n","\n"," phase = torch.rand(*weight_shape) * (2 * torch.tensor([np.pi])) - torch.tensor([np.pi])\n","\n"," r_weight = modulus * np.cos(phase)\n"," i_weight = modulus * i_weight * np.sin(phase)\n"," j_weight = modulus * j_weight * np.sin(phase)\n"," k_weight = modulus * k_weight * np.sin(phase)\n","\n"," return nn.Parameter(r_weight), nn.Parameter(i_weight), nn.Parameter(j_weight), nn.Parameter(k_weight)\n","\n"," @staticmethod\n"," def get_weight_shape(in_channels, out_channels):\n"," \"\"\"Construct weight shape based on the input/output channels.\"\"\"\n"," return (in_channels, out_channels)\n","\n"," def __repr__(self):\n"," return self.__class__.__name__ + '(' \\\n"," + 'in_channels=' + str(self.in_channels) \\\n"," + ', out_channels=' + str(self.out_channels) + ')'\n"],"metadata":{"id":"RMYS8igQGtwi"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["%%time\n","\n","class CustomQCNN(nn.Module):\n"," \"\"\"Reproduction QCNN to validate quaternion convolution layer.\"\"\"\n","\n"," def __init__(self, in_channels, hidden_channels, out_features, kernel_size):\n"," super(CustomQCNN, self).__init__()\n","\n"," self.conv_1 = QuaternionConv(in_channels, hidden_channels[0], kernel_size, 1)\n"," self.conv_2 = QuaternionConv(hidden_channels[0], hidden_channels[1], kernel_size, 1)\n","\n"," self.pool_1 = nn.MaxPool2d(2, 2)\n"," self.dropout_1 = nn.Dropout(0.25)\n","\n"," self.conv_3 = QuaternionConv(hidden_channels[1], hidden_channels[2], kernel_size, 1)\n"," self.conv_4 = QuaternionConv(hidden_channels[2], hidden_channels[3], kernel_size, 1)\n","\n"," self.pool_2 = nn.MaxPool2d(2, 2)\n"," self.dropout_2 = nn.Dropout(0.25)\n","\n"," self.fc_1 = QuaternionLin(12800, 512)\n"," self.fc_2 = nn.Linear(512, out_features)\n","\n"," self.dropout_3 = nn.Dropout(0.5)\n"," self.sm = nn.Softmax(dim=1)\n","\n"," def forward(self, x):\n"," x = F.relu(self.conv_1(x))\n"," x = F.relu(self.conv_2(x))\n"," x = self.pool_1(x)\n"," x = self.dropout_1(x)\n","\n"," x = F.relu(self.conv_3(x))\n"," x = F.relu(self.conv_4(x))\n"," x = self.pool_2(x)\n"," x = self.dropout_2(x)\n","\n"," x = torch.flatten(x, start_dim=1) \n","\n"," x = F.relu(self.fc_1(x))\n"," x = self.dropout_3(x)\n"," x = self.fc_2(x)\n"," x = self.sm(x)\n","\n"," return x\n","\n","# Model parameters\n","in_channels = 4\n","hidden_channels = [64, 128, 256, 512]\n","out_features = 10\n","kernel_size = (3, 3)\n","\n","batch_size = 32\n","\n","custom_qcnn = CustomQCNN(in_channels, hidden_channels, out_features, kernel_size)\n","custom_qcnn = custom_qcnn.cuda()\n","\n","print(\"Number of trainable parameters: \", sum(p.numel() for p in custom_qcnn.parameters() if p.requires_grad))\n","summary(custom_qcnn, input_size=(in_channels, 32, 32), batch_size=batch_size, device=device.type)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"jgK4iik1G3uU","executionInfo":{"status":"ok","timestamp":1681669173243,"user_tz":-120,"elapsed":1917,"user":{"displayName":"joris weeda","userId":"16756172332098140791"}},"outputId":"2e99a7eb-c421-44a2-b4fc-d4fbff07f1c4"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Number of trainable parameters: 2032650\n","----------------------------------------------------------------\n"," Layer (type) Output Shape Param #\n","================================================================\n"," QuaternionConv-1 [32, 64, 30, 30] 64\n"," QuaternionConv-2 [32, 128, 28, 28] 128\n"," MaxPool2d-3 [32, 128, 14, 14] 0\n"," Dropout-4 [32, 128, 14, 14] 0\n"," QuaternionConv-5 [32, 256, 12, 12] 256\n"," QuaternionConv-6 [32, 512, 10, 10] 512\n"," MaxPool2d-7 [32, 512, 5, 5] 0\n"," Dropout-8 [32, 512, 5, 5] 0\n"," QuaternionLin-9 [32, 512] 512\n"," Dropout-10 [32, 512] 0\n"," Linear-11 [32, 10] 5,130\n"," Softmax-12 [32, 10] 0\n","================================================================\n","Total params: 6,602\n","Trainable params: 5,130\n","Non-trainable params: 1,472\n","----------------------------------------------------------------\n","Input size (MB): 0.50\n","Forward/backward pass size (MB): 78.82\n","Params size (MB): 0.03\n","Estimated Total Size (MB): 79.34\n","----------------------------------------------------------------\n","CPU times: user 596 ms, sys: 237 ms, total: 832 ms\n","Wall time: 2.03 s\n"]}]},{"cell_type":"code","source":["%%time\n","paper_qcnn_lin_layer = QuaternionLinear(12800, 512)\n","\n","print(\"Number of trainable parameters: \", sum(p.numel() for p in paper_qcnn_lin_layer.parameters() if p.requires_grad))\n","paper_qcnn_lin_layer.i_weight[0][:50]"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"6PwYhc7VSMnA","executionInfo":{"status":"ok","timestamp":1681669176167,"user_tz":-120,"elapsed":2932,"user":{"displayName":"joris weeda","userId":"16756172332098140791"}},"outputId":"345f0cf7-dec8-466d-a965-919d3cefdc25"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Number of trainable parameters: 1638912\n","CPU times: user 2.46 s, sys: 4.87 ms, total: 2.46 s\n","Wall time: 2.53 s\n"]},{"output_type":"execute_result","data":{"text/plain":["tensor([-9.8619e-03, 1.7768e-02, -3.9290e-03, -8.4551e-03, -9.6563e-03,\n"," 6.3405e-03, -9.6673e-03, -8.2083e-03, 1.0748e-02, 1.6635e-03,\n"," 7.7834e-03, -1.8080e-02, 2.0989e-04, -1.2731e-02, -6.1434e-03,\n"," -8.1282e-03, 1.1271e-02, -1.2184e-02, 1.2324e-03, 1.0997e-02,\n"," -4.6240e-03, 1.9024e-02, 3.5174e-03, 7.7954e-03, 6.4949e-03,\n"," 2.2042e-02, -1.5830e-03, -1.0838e-02, -5.8510e-03, 3.0616e-03,\n"," -6.7673e-03, -1.7402e-02, -9.4927e-03, -1.1655e-02, -4.9278e-04,\n"," 7.3662e-04, -1.3883e-02, -9.8667e-06, 9.0085e-03, 2.5610e-03,\n"," -9.9933e-03, -1.7828e-03, 6.2730e-03, -4.0444e-03, -7.5603e-04,\n"," -6.5449e-03, -2.2364e-03, 8.6866e-04, -3.7308e-04, 1.2813e-02],\n"," grad_fn=)"]},"metadata":{},"execution_count":6}]},{"cell_type":"code","source":["%%time\n","custom_qcnn_lin_layer = QuaternionLin(12800, 512)\n","\n","print(\"Number of trainable parameters: \", sum(p.numel() for p in custom_qcnn_lin_layer.parameters() if p.requires_grad))\n","custom_qcnn_lin_layer.i_weight[0][:50]"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"DhsKbxSASb6y","executionInfo":{"status":"ok","timestamp":1681669176168,"user_tz":-120,"elapsed":31,"user":{"displayName":"joris weeda","userId":"16756172332098140791"}},"outputId":"a64af330-71e6-4e99-a935-bd31d7f23b2a"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Number of trainable parameters: 1638912\n","CPU times: user 31.7 ms, sys: 0 ns, total: 31.7 ms\n","Wall time: 31.7 ms\n"]},{"output_type":"execute_result","data":{"text/plain":["tensor([ 4.2403e-05, -3.0373e-03, 2.4217e-04, 1.2262e-02, 4.8097e-04,\n"," 3.4563e-04, 1.0125e-02, 4.6110e-05, -4.4688e-03, 2.1976e-03,\n"," -1.1161e-02, 1.7865e-03, 2.6552e-04, -2.0037e-03, 7.9006e-03,\n"," 5.3779e-03, -5.1694e-03, -3.0157e-03, 9.0666e-03, 5.5432e-04,\n"," 1.8890e-04, -2.3435e-03, 1.7509e-02, 3.2966e-03, -3.0224e-03,\n"," -7.9977e-04, -4.0333e-03, 1.6774e-03, -1.7125e-03, 4.8665e-04,\n"," -1.2629e-04, 3.5356e-03, -1.4928e-02, 1.4676e-02, -1.2352e-04,\n"," -3.9960e-03, 6.0097e-03, -2.1924e-04, -7.7298e-03, 9.5267e-04,\n"," -5.3146e-03, -3.2860e-03, 7.6674e-03, 4.6479e-03, -2.3704e-03,\n"," 1.2056e-04, 8.2204e-03, 3.5477e-04, -6.8082e-03, -3.2168e-03],\n"," grad_fn=)"]},"metadata":{},"execution_count":7}]},{"cell_type":"code","source":["%%time\n","num_epochs = 80\n","amount_of_trainings = 3\n","\n","learning_rate = 0.0001\n","learning_rate_decay = 1e-6\n","\n","batch_size = 32\n","\n","custom_qcnn_accs = []\n","trainings_seed_excution_time = []\n","\n","for training_seed in range(amount_of_trainings):\n"," print(f'Start training seed {training_seed + 1}')\n"," start_time_training_seed = time.time()\n","\n"," train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)\n"," test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=2)\n","\n"," custom_qcnn = CustomQCNN(in_channels, hidden_channels, out_features, kernel_size)\n"," custom_qcnn = custom_qcnn.cuda()\n","\n"," optimizer = torch.optim.RMSprop(custom_qcnn.parameters(),lr=learning_rate, weight_decay=learning_rate_decay)\n"," criterion = nn.CrossEntropyLoss()\n","\n"," for epoch in range(1, num_epochs):\n"," \n"," custom_qcnn.train()\n","\n"," for index, (x_batch, y_batch) in enumerate(train_loader):\n"," zeros_channel = torch.zeros((x_batch.shape[0], 1, x_batch.shape[2], x_batch.shape[3]))\n"," x_batch = torch.cat([x_batch, zeros_channel], dim=1)\n","\n"," # Check if the input size is correct\n"," check_input(x_batch)\n","\n"," x_batch = x_batch.cuda()\n"," y_batch = y_batch.cuda()\n"," \n"," # Perform forward pass\n"," y_pred = custom_qcnn(x_batch)\n","\n"," # Compute the loss\n"," loss = criterion(y_pred, y_batch)\n","\n"," # Backpropagation\n"," optimizer.zero_grad()\n"," loss.backward()\n"," optimizer.step()\n","\n"," if (epoch / 10).is_integer():\n"," print (f'Epoch [{epoch + 1}/{num_epochs}], Last loss: {loss.item():.4f}')\n","\n"," with torch.no_grad():\n"," n_correct = 0\n"," n_samples = 0\n","\n"," custom_qcnn.eval()\n","\n"," for index, (x_batch, y_batch) in enumerate(test_loader):\n"," zeros_channel = torch.zeros((x_batch.shape[0], 1, x_batch.shape[2], x_batch.shape[3]))\n"," x_batch = torch.cat([x_batch, zeros_channel], dim=1)\n","\n"," x_batch = x_batch.cuda()\n"," y_batch = y_batch.cuda()\n","\n"," # Check if the input size is correct\n"," check_input(x_batch)\n","\n"," # Perform forward pass\n"," y_pred = custom_qcnn(x_batch)\n","\n"," _, predicted = torch.max(y_pred,1)\n"," n_samples += y_batch.size(0)\n"," n_correct += (predicted == y_batch).sum().item()\n","\n"," acc = 100 * n_correct / n_samples\n"," custom_qcnn_accs.append(acc)\n"," \n"," elapsed_training_time = int(time.time() - start_time_training_seed)\n"," trainings_seed_excution_time.append(start_time_training_seed)\n","\n"," print(f'Finished training seed {training_seed + 1}, accuracy of the network: {acc}%, elapsed time: {elapsed_training_time} sec')\n","\n","print(f'Average accuracy over {amount_of_trainings}, {num_epochs} epochs each results in: {sum(custom_qcnn_accs) / amount_of_trainings}')"],"metadata":{"id":"IYnxL4TYG4CB","colab":{"base_uri":"https://localhost:8080/"},"outputId":"bec4c795-45fc-4684-ceb8-b95eafbe0368","executionInfo":{"status":"ok","timestamp":1681674450111,"user_tz":-120,"elapsed":5273963,"user":{"displayName":"joris weeda","userId":"16756172332098140791"}}},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Start training seed 1\n","Epoch [11/80], Last loss: 1.9361\n","Epoch [21/80], Last loss: 1.6766\n","Epoch [31/80], Last loss: 1.5703\n","Epoch [41/80], Last loss: 1.8227\n","Epoch [51/80], Last loss: 1.5918\n","Epoch [61/80], Last loss: 1.5288\n","Epoch [71/80], Last loss: 1.6498\n","Finished training seed 1, accuracy of the network: 76.81%, elapsed time: 1778 sec\n","Start training seed 2\n","Epoch [11/80], Last loss: 1.7154\n","Epoch [21/80], Last loss: 1.7754\n","Epoch [31/80], Last loss: 1.5692\n","Epoch [41/80], Last loss: 1.5782\n","Epoch [51/80], Last loss: 1.6479\n","Epoch [61/80], Last loss: 1.4617\n","Epoch [71/80], Last loss: 1.5288\n","Finished training seed 2, accuracy of the network: 78.19%, elapsed time: 1747 sec\n","Start training seed 3\n","Epoch [11/80], Last loss: 1.6538\n","Epoch [21/80], Last loss: 1.7058\n","Epoch [31/80], Last loss: 1.5524\n","Epoch [41/80], Last loss: 1.6071\n","Epoch [51/80], Last loss: 1.5236\n","Epoch [61/80], Last loss: 1.5637\n","Epoch [71/80], Last loss: 1.5399\n","Finished training seed 3, accuracy of the network: 78.02%, elapsed time: 1747 sec\n","Average accuracy over 3, 80 epochs each results in: 77.67333333333333\n","CPU times: user 57min 26s, sys: 2min 57s, total: 1h 24s\n","Wall time: 1h 27min 53s\n"]}]}]} -------------------------------------------------------------------------------- /results/cnn_results_table_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | }, 15 | "accelerator": "GPU", 16 | "gpuClass": "standard" 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "source": [ 22 | "# Reproduction Table 1, CNN\n", 23 | "In this notebook the CNN result of table 1 is reproduced using PyTorch.\n", 24 | "\n", 25 | "\n" 26 | ], 27 | "metadata": { 28 | "id": "5NodS0rcy0_P" 29 | } 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": { 35 | "id": "A0bUOOkH8od2", 36 | "colab": { 37 | "base_uri": "https://localhost:8080/" 38 | }, 39 | "outputId": "19d0ac46-d892-49d9-e82b-d04bd51e91fb" 40 | }, 41 | "outputs": [ 42 | { 43 | "output_type": "stream", 44 | "name": "stdout", 45 | "text": [ 46 | "Python 3.9.16\n", 47 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 48 | "Requirement already satisfied: cifar10 in /usr/local/lib/python3.9/dist-packages (1.0.0)\n", 49 | "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from cifar10) (2.27.1)\n", 50 | "Requirement already satisfied: numpy in /usr/local/lib/python3.9/dist-packages (from cifar10) (1.22.4)\n", 51 | "Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from cifar10) (4.65.0)\n", 52 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (3.4)\n", 53 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (2022.12.7)\n", 54 | "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (2.0.12)\n", 55 | "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (1.26.15)\n", 56 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 57 | "Requirement already satisfied: imageio in /usr/local/lib/python3.9/dist-packages (2.25.1)\n", 58 | "Requirement already satisfied: numpy in /usr/local/lib/python3.9/dist-packages (1.22.4)\n", 59 | "Requirement already satisfied: scipy in /usr/local/lib/python3.9/dist-packages (1.10.1)\n", 60 | "Requirement already satisfied: pillow>=8.3.2 in /usr/local/lib/python3.9/dist-packages (from imageio) (8.4.0)\n", 61 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 62 | "Collecting git+https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks\n", 63 | " Cloning https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks to /tmp/pip-req-build-f2d4i7a4\n", 64 | " Running command git clone --filter=blob:none --quiet https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks /tmp/pip-req-build-f2d4i7a4\n", 65 | " Resolved https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks to commit 28caa7cde240e354fd7b87280450fd233cd494c3\n", 66 | " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 67 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 68 | "Requirement already satisfied: torchinfo in /usr/local/lib/python3.9/dist-packages (1.7.2)\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | " # run this cell to download the right packages (only needed once)\n", 74 | "!python --version\n", 75 | "\n", 76 | "!pip install cifar10\n", 77 | "!pip install imageio numpy scipy \n", 78 | "!pip install git+https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks\n", 79 | "!pip install torchinfo\n" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "source": [ 85 | "import torch\n", 86 | "import torchvision\n", 87 | "\n", 88 | "import numpy as np\n", 89 | "import torch.nn as nn\n", 90 | "import torch.nn.functional as F\n", 91 | "\n", 92 | "from torchsummary import summary\n", 93 | "from torch.utils.data import DataLoader\n", 94 | "from torchinfo import summary as summary_info\n", 95 | "from torchvision.transforms import transforms\n", 96 | "from torchvision import datasets\n", 97 | "\n", 98 | "device = torch.device('cuda' if torch.cuda.is_available else 'cpu')" 99 | ], 100 | "metadata": { 101 | "id": "FrEo-nMWxcun" 102 | }, 103 | "execution_count": null, 104 | "outputs": [] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "source": [ 109 | "%%time\n", 110 | "\n", 111 | "# import and download the CIFAR10 dataset\n", 112 | "batch_size = 32\n", 113 | "\n", 114 | "transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((.5,.5,.5),(.5,.5,.5))]) \n", 115 | "\n", 116 | "transform_train = transforms.Compose([transforms.ToTensor()])\n", 117 | "transform_test = transforms.Compose([transforms.ToTensor()])\n", 118 | "\n", 119 | "train_set = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)\n", 120 | "test_set = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)\n", 121 | "\n", 122 | "train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)\n", 123 | "test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=2)\n", 124 | "\n", 125 | "\n", 126 | "classes = ('plane','car','bird','cat','deer','dog','frog','horse','ship','truck')" 127 | ], 128 | "metadata": { 129 | "colab": { 130 | "base_uri": "https://localhost:8080/" 131 | }, 132 | "id": "GVdrTE-vt45z", 133 | "outputId": "0c9ee8f0-71c4-4a80-8c18-271275c946ee" 134 | }, 135 | "execution_count": null, 136 | "outputs": [ 137 | { 138 | "output_type": "stream", 139 | "name": "stdout", 140 | "text": [ 141 | "Files already downloaded and verified\n", 142 | "Files already downloaded and verified\n", 143 | "CPU times: user 1.35 s, sys: 301 ms, total: 1.65 s\n", 144 | "Wall time: 1.69 s\n" 145 | ] 146 | } 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "source": [ 152 | "class ConvNet(nn.Module):\n", 153 | " def __init__(self, in_channels, hidden_channels, out_features, kernel_size):\n", 154 | " super(ConvNet, self).__init__()\n", 155 | " self.conv_1 = nn.Conv2d(in_channels, hidden_channels[0], kernel_size)\n", 156 | " self.conv_2 = nn.Conv2d(hidden_channels[0], hidden_channels[1], kernel_size)\n", 157 | "\n", 158 | " self.pool_1 = nn.MaxPool2d(2, 2)\n", 159 | " self.dropout_1 = nn.Dropout(0.25)\n", 160 | "\n", 161 | " self.conv_3 = nn.Conv2d(hidden_channels[1], hidden_channels[2], kernel_size)\n", 162 | " self.conv_4 = nn.Conv2d(hidden_channels[2], hidden_channels[3], kernel_size)\n", 163 | "\n", 164 | " self.pool_2 = nn.MaxPool2d(2, 2)\n", 165 | " self.dropout_2 = nn.Dropout(0.25)\n", 166 | "\n", 167 | " self.fc_1 = nn.Linear(4608, 512)\n", 168 | " self.fc_2 = nn.Linear(512, out_features)\n", 169 | "\n", 170 | " self.dropout_3 = nn.Dropout(0.5)\n", 171 | " self.sm = nn.Softmax(dim=1)\n", 172 | "\n", 173 | " def forward(self,x):\n", 174 | " x = F.relu(self.conv_1(x))\n", 175 | " x = F.relu(self.conv_2(x))\n", 176 | " x = self.pool_1(x)\n", 177 | " x = self.dropout_1(x)\n", 178 | "\n", 179 | " x = F.relu(self.conv_3(x))\n", 180 | " x = F.relu(self.conv_4(x))\n", 181 | " x = self.pool_2(x)\n", 182 | " x = self.dropout_2(x)\n", 183 | "\n", 184 | " x = torch.flatten(x, start_dim=1) \n", 185 | "\n", 186 | " x = F.relu(self.fc_1(x))\n", 187 | " x = self.dropout_3(x)\n", 188 | " x = self.fc_2(x)\n", 189 | " x = self.sm(x)\n", 190 | "\n", 191 | " return x\n", 192 | "\n", 193 | "\n", 194 | "# Model parameters\n", 195 | "in_channels = 3\n", 196 | "hidden_channels = [64, 128, 256, 512]\n", 197 | "out_features = 10\n", 198 | "kernel_size = 4\n", 199 | "\n", 200 | "cnn = ConvNet(in_channels, hidden_channels, out_features, kernel_size)\n", 201 | "cnn = cnn.cuda()\n", 202 | "print(\"Number of trainable parameters: \", sum(p.numel() for p in cnn.parameters() if p.requires_grad))\n", 203 | "\n", 204 | "# print model summary\n", 205 | "summary(cnn, input_size=(in_channels, 32, 32), batch_size=batch_size, device=device.type)\n" 206 | ], 207 | "metadata": { 208 | "colab": { 209 | "base_uri": "https://localhost:8080/" 210 | }, 211 | "id": "-7PDPS208mHL", 212 | "outputId": "43235b64-9aa0-4c35-c598-c3a780f3c0b4" 213 | }, 214 | "execution_count": null, 215 | "outputs": [ 216 | { 217 | "output_type": "stream", 218 | "name": "stdout", 219 | "text": [ 220 | "Number of trainable parameters: 5121482\n", 221 | "----------------------------------------------------------------\n", 222 | " Layer (type) Output Shape Param #\n", 223 | "================================================================\n", 224 | " Conv2d-1 [32, 64, 29, 29] 3,136\n", 225 | " Conv2d-2 [32, 128, 26, 26] 131,200\n", 226 | " MaxPool2d-3 [32, 128, 13, 13] 0\n", 227 | " Dropout-4 [32, 128, 13, 13] 0\n", 228 | " Conv2d-5 [32, 256, 10, 10] 524,544\n", 229 | " Conv2d-6 [32, 512, 7, 7] 2,097,664\n", 230 | " MaxPool2d-7 [32, 512, 3, 3] 0\n", 231 | " Dropout-8 [32, 512, 3, 3] 0\n", 232 | " Linear-9 [32, 512] 2,359,808\n", 233 | " Dropout-10 [32, 512] 0\n", 234 | " Linear-11 [32, 10] 5,130\n", 235 | " Softmax-12 [32, 10] 0\n", 236 | "================================================================\n", 237 | "Total params: 5,121,482\n", 238 | "Trainable params: 5,121,482\n", 239 | "Non-trainable params: 0\n", 240 | "----------------------------------------------------------------\n", 241 | "Input size (MB): 0.38\n", 242 | "Forward/backward pass size (MB): 59.71\n", 243 | "Params size (MB): 19.54\n", 244 | "Estimated Total Size (MB): 79.62\n", 245 | "----------------------------------------------------------------\n" 246 | ] 247 | } 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "source": [ 253 | "%%time\n", 254 | "num_epochs = 80\n", 255 | "learning_rate = 0.0001\n", 256 | "learning_rate_decay = 1e-6\n", 257 | "\n", 258 | "criterion = nn.CrossEntropyLoss()\n", 259 | "optimizer = torch.optim.RMSprop(cnn.parameters(),lr=learning_rate, weight_decay=learning_rate_decay)\n", 260 | "\n", 261 | "n_total_step = len(train_loader)\n", 262 | " \n", 263 | "for epoch in range(num_epochs):\n", 264 | " for i,(images, labels) in enumerate(train_loader):\n", 265 | " images = images.cuda()\n", 266 | " labels = labels.cuda()\n", 267 | " outputs = cnn(images)\n", 268 | " loss = criterion(outputs, labels)\n", 269 | " \n", 270 | " # Backpropagation\n", 271 | " optimizer.zero_grad()\n", 272 | " loss.backward()\n", 273 | " optimizer.step()\n", 274 | " \n", 275 | " print(f\"Epoch [{epoch+1}/{num_epochs}], Last loss: {loss.item():.4f}\")\n", 276 | " \n", 277 | "print(\"Finished training\") " 278 | ], 279 | "metadata": { 280 | "id": "KsEvT72jqaPO", 281 | "colab": { 282 | "base_uri": "https://localhost:8080/" 283 | }, 284 | "outputId": "77819cca-9b62-4e1a-e09e-7275fa6a8889" 285 | }, 286 | "execution_count": null, 287 | "outputs": [ 288 | { 289 | "output_type": "stream", 290 | "name": "stdout", 291 | "text": [ 292 | "Epoch [1/80], Last loss: 2.0785\n", 293 | "Epoch [2/80], Last loss: 2.2880\n", 294 | "Epoch [3/80], Last loss: 2.0752\n", 295 | "Epoch [4/80], Last loss: 1.8501\n", 296 | "Epoch [5/80], Last loss: 1.8796\n", 297 | "Epoch [6/80], Last loss: 1.8211\n", 298 | "Epoch [7/80], Last loss: 1.7624\n", 299 | "Epoch [8/80], Last loss: 1.8702\n", 300 | "Epoch [9/80], Last loss: 1.8104\n", 301 | "Epoch [10/80], Last loss: 1.7699\n", 302 | "Epoch [11/80], Last loss: 1.8345\n", 303 | "Epoch [12/80], Last loss: 1.8089\n", 304 | "Epoch [13/80], Last loss: 1.7506\n", 305 | "Epoch [14/80], Last loss: 1.8241\n", 306 | "Epoch [15/80], Last loss: 1.6014\n", 307 | "Epoch [16/80], Last loss: 1.8853\n", 308 | "Epoch [17/80], Last loss: 1.4983\n", 309 | "Epoch [18/80], Last loss: 1.6422\n", 310 | "Epoch [19/80], Last loss: 1.8272\n", 311 | "Epoch [20/80], Last loss: 1.5766\n", 312 | "Epoch [21/80], Last loss: 1.6398\n", 313 | "Epoch [22/80], Last loss: 1.9576\n", 314 | "Epoch [23/80], Last loss: 1.6632\n", 315 | "Epoch [24/80], Last loss: 1.6935\n", 316 | "Epoch [25/80], Last loss: 1.4854\n", 317 | "Epoch [26/80], Last loss: 1.6618\n", 318 | "Epoch [27/80], Last loss: 1.8221\n", 319 | "Epoch [28/80], Last loss: 1.8102\n", 320 | "Epoch [29/80], Last loss: 1.5862\n", 321 | "Epoch [30/80], Last loss: 1.6905\n", 322 | "Epoch [31/80], Last loss: 1.7193\n", 323 | "Epoch [32/80], Last loss: 1.5237\n", 324 | "Epoch [33/80], Last loss: 1.6457\n", 325 | "Epoch [34/80], Last loss: 1.4837\n", 326 | "Epoch [35/80], Last loss: 1.5088\n", 327 | "Epoch [36/80], Last loss: 1.5921\n", 328 | "Epoch [37/80], Last loss: 1.6996\n", 329 | "Epoch [38/80], Last loss: 1.5499\n", 330 | "Epoch [39/80], Last loss: 1.6473\n", 331 | "Epoch [40/80], Last loss: 1.5839\n", 332 | "Epoch [41/80], Last loss: 1.7715\n", 333 | "Epoch [42/80], Last loss: 1.5863\n", 334 | "Epoch [43/80], Last loss: 1.6933\n", 335 | "Epoch [44/80], Last loss: 1.5847\n", 336 | "Epoch [45/80], Last loss: 1.6158\n", 337 | "Epoch [46/80], Last loss: 1.5434\n", 338 | "Epoch [47/80], Last loss: 1.6774\n", 339 | "Epoch [48/80], Last loss: 1.5680\n", 340 | "Epoch [49/80], Last loss: 1.6495\n", 341 | "Epoch [50/80], Last loss: 1.6579\n", 342 | "Epoch [51/80], Last loss: 1.5342\n", 343 | "Epoch [52/80], Last loss: 1.5310\n", 344 | "Epoch [53/80], Last loss: 1.5272\n", 345 | "Epoch [54/80], Last loss: 1.5238\n", 346 | "Epoch [55/80], Last loss: 1.5245\n", 347 | "Epoch [56/80], Last loss: 1.4941\n", 348 | "Epoch [57/80], Last loss: 1.5251\n", 349 | "Epoch [58/80], Last loss: 1.4612\n", 350 | "Epoch [59/80], Last loss: 1.5229\n", 351 | "Epoch [60/80], Last loss: 1.5796\n", 352 | "Epoch [61/80], Last loss: 1.7513\n", 353 | "Epoch [62/80], Last loss: 1.5286\n", 354 | "Epoch [63/80], Last loss: 1.6094\n", 355 | "Epoch [64/80], Last loss: 1.5238\n", 356 | "Epoch [65/80], Last loss: 1.5240\n", 357 | "Epoch [66/80], Last loss: 1.6485\n", 358 | "Epoch [67/80], Last loss: 1.5326\n", 359 | "Epoch [68/80], Last loss: 1.6010\n", 360 | "Epoch [69/80], Last loss: 1.5187\n", 361 | "Epoch [70/80], Last loss: 1.6429\n", 362 | "Epoch [71/80], Last loss: 1.4612\n", 363 | "Epoch [72/80], Last loss: 1.5237\n", 364 | "Epoch [73/80], Last loss: 1.4616\n", 365 | "Epoch [74/80], Last loss: 1.5267\n", 366 | "Epoch [75/80], Last loss: 1.5847\n", 367 | "Epoch [76/80], Last loss: 1.4612\n", 368 | "Epoch [77/80], Last loss: 1.6461\n", 369 | "Epoch [78/80], Last loss: 1.5237\n", 370 | "Epoch [79/80], Last loss: 1.4664\n", 371 | "Epoch [80/80], Last loss: 1.4641\n", 372 | "Finished training\n", 373 | "CPU times: user 34min 18s, sys: 1min 1s, total: 35min 19s\n", 374 | "Wall time: 39min 25s\n" 375 | ] 376 | } 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "source": [ 382 | "with torch.no_grad():\n", 383 | " n_correct = 0\n", 384 | " n_samples = 0\n", 385 | "\n", 386 | " n_class_correct = [0 for i in range(10)]\n", 387 | " n_class_sample = [0 for i in range(10)]\n", 388 | "\n", 389 | " for images, labels in test_loader:\n", 390 | " images = images.cuda()\n", 391 | " labels = labels.cuda()\n", 392 | " outputs = cnn(images)\n", 393 | " \n", 394 | "\n", 395 | " _, predicted = torch.max(outputs,1)\n", 396 | " n_samples += labels.size(0)\n", 397 | " n_correct += (predicted == labels).sum().item()\n", 398 | "\n", 399 | " for i in range(labels.size(0)):\n", 400 | " label = labels[i]\n", 401 | " pred = predicted[i]\n", 402 | " if(label==pred):\n", 403 | " n_class_correct[label] += 1\n", 404 | " n_class_sample[label] += 1\n", 405 | "\n", 406 | " acc = 100 * n_correct / n_samples\n", 407 | " print(f\"Accuracy of the network: {acc}%\")\n", 408 | "\n", 409 | " for i in range(10):\n", 410 | " acc = 100 * n_class_correct[i]/ n_class_sample[i]\n", 411 | " print(f\"Accuracy of {classes[i]}: {acc} %\")" 412 | ], 413 | "metadata": { 414 | "id": "lbSO3cZzJegS", 415 | "colab": { 416 | "base_uri": "https://localhost:8080/" 417 | }, 418 | "outputId": "e53d95c3-14d8-46ef-f643-6b6e46e2a10d" 419 | }, 420 | "execution_count": null, 421 | "outputs": [ 422 | { 423 | "output_type": "stream", 424 | "name": "stdout", 425 | "text": [ 426 | "Accuracy of the network: 75.83%\n", 427 | "Accuracy of plane: 81.0 %\n", 428 | "Accuracy of car: 86.8 %\n", 429 | "Accuracy of bird: 71.9 %\n", 430 | "Accuracy of cat: 58.7 %\n", 431 | "Accuracy of deer: 77.9 %\n", 432 | "Accuracy of dog: 55.1 %\n", 433 | "Accuracy of frog: 82.2 %\n", 434 | "Accuracy of horse: 77.6 %\n", 435 | "Accuracy of ship: 84.9 %\n", 436 | "Accuracy of truck: 82.2 %\n" 437 | ] 438 | } 439 | ] 440 | } 441 | ] 442 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Reproduction of quaternion convolutional neural networks (QCNN) 2 | 3 | # Recommended sources 4 | 5 | Original paper: 6 | 7 | * Paper: https://arxiv.org/pdf/1903.00658.pdf 8 | * Data: https://www.cs.toronto.edu/~kriz/cifar.html 9 | * Github: https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks 10 | 11 | Our reproduction: 12 | * Github: https://github.com/JorisWeeda/Quaternion-Convolutional-Neural-Networks 13 | 14 | 15 | # Summary of our work 16 | The paper we have chosen is titled ”Quaternion Convolutional Neural Networks”. It provides a new method of processing images using neural networks by representing the image information in a better way, in the quaternion domain. Nowadays, computer vision tasks are commonly solved using convolutional neural networks (CNN), a powerful feature representation method that heavily relies on convolution layers. These convolution layers extract features from high-dimensional structural data efficiently by a set of convolution kernels. In the case of multi-channel input (e.g. RGB- or HSV-images), the results are typically summed to provide a single output for every kernel. 17 | 18 | Although CNNs have been proven to achieve state-of-the-art results, the processing strategy inherently suffers from some drawbacks in color image processing [paper]. The summation of the channel results ignores possible interrelationships between the colors, and there is a significant risk of losing important structural information [paper]. This paper proposes an improved method of defining image information, which preserves the relationships between the channels and provides a structural representation of color images. The main objective of this code base is to provide a reproduction of a new method for handling images in convolutional neural networks (CNN), as provided in [this paper](https://arxiv.org/pdf/1903.00658.pdf). The paper proposes an improved way of representing the images, using quaternions, before processing it through out CNN. Within this blogpost the classification results seen in table 1 from the paper are reproduced and discussed using the native library corresponding to the paper. Thereafter, an improved version of both the convolution and linear layer is coded and benchmarked against the results of the earlier gathered results. 19 | 20 | 21 | # Introduction 22 | Our objective was to reproduce the same results as in the paper by using the cifar-10 dataset. At first this should be done by using the already exsiting code for the QCNN, provided by the paper's git repository. To compare the results from table-1 in the paper we also needed a benchmark CNN, which applies the regular CNN structure and engine. This process also meant that the code and results needed to be validated, so besides reproducing the code, a hyperparameter check was needed. Finally if time allowed our aim was to replicate the code from scratch and with a different variant (linear layer QCNN). 23 | 24 | # Reproduction of results 25 | In this blog post, we delve into the world of Convolutional Neural Networks (CNNs) and Quaternion Convolutional Neural Networks (QCNNs), comparing their approaches to each other in the color image processing field. We further explore the findings of a research paper '' that utilizes the CIFAR-10 dataset, a widely-used benchmark for image classification tasks, consisting of 60.000 32x32 color images across 10 classes. 26 | 27 | The paper compares the architectures of CNNs and QCNNs (as detailed in Table 1), focusing on the differences in handling color information. We examine the process of reproducing both convolutional and linear layers in the networks and discuss the results obtained from these reproductions. 28 | 29 |
30 | 31 | ![plot](./resources/img_expirement_results_classification_task_paper.png) 32 | 33 | _Figure 1: Classification results as proposed in the paper._ 34 | 35 |
36 | 37 | By the end of this blog post, you will gain insights into the reproducibility of this paper, understand the respective architectures (CNN & QCNN), and learn about their performance in color image processing tasks. 38 | 39 | ## Dataset 40 | CIFAR-10 is a dataset consisting of 60,000 32x32 color images in 10 classes, with 6,000 images per class. The classes represent common objects in the real world such as airplanes, dogs, cats, and cars. The dataset is divided into 50,000 training images and 10,000 testing images. CIFAR-10 is commonly used as a benchmark dataset for image classification tasks in the field of computer vision and deep learning. It is a relatively small dataset compared to other popular datasets such as ImageNet, but it is still challenging due to the small image size and variety of object classes. 41 | 42 | ## Convolutional based color image processing (CNN) 43 | Convolutional Neural Networks (CNNs) are a specialized type of deep learning models that have proven to be incredibly effective in handling image data. They stand out due to their unique architecture, which is designed to imitate the human visual system. This design enables them to automatically learn and detect features from images with minimal human intervention. 44 | 45 | Convolutional Neural Networks (CNNs) are deep learning models specifically designed for image processing. They automatically learn features from images through a series of convolutional layers, pooling layers, activation functions, and fully connected layers. By mimicking the human visual system, CNNs efficiently detect and extract patterns, structures, and meaningful information from images. This powerful architecture has made CNNs the go-to solution for tasks like image classification, object detection, and semantic segmentation. 46 | 47 | Although CNNs have revolutionized the field of computer vision, their success lies in the way they automatically learn features from images, making them highly efficient and adaptable. As a result, they have found applications in diverse areas such as image classification, object detection, semantic segmentation, and facial recognition, to name just a few. 48 | 49 | ## Quaternion-based color image processing (QCNN) 50 | Quaternion Convolutional Neural Networks (QCNNs) are an extension of traditional CNNs that leverage quaternion algebra to process images more efficiently, particularly when dealing with color information. While standard CNNs operate on real-valued tensors, QCNNs use quaternion-valued tensors, enabling them to capture inter-channel correlations and reduce the number of parameters in the network. QCNNs therefore efficiently handle complex-valued data, such as color images. By representing the color images as quaternionss, QCNNs provide a more compact and expressive representation, leading to improved performance in tasks like color image classification, object detection, and semantic segmentation. By leveraging quaternion operations in their convolutional layers, QCNNs offer a more compact and computationally efficient architecture compared to traditional CNNs while maintaining or even surpassing their performance levels. 51 | 52 |
53 | 54 | ![plot](./resources/img_qcnn_vs_cnn.png) 55 | 56 | _Figure 2: Illustration of the difference between CNN and QCNN on convolution layers._ 57 | 58 |
59 | 60 | 61 | ## Weight initialisation and training phase 62 | Weight initialization is critical for the successful training of any network, including QCNN. In QCNN, the scaling factor $s$ controls the magnitude of the transformed vector, while the rotation angle $\theta$ is an additional parameter that only rotates the input vector. Since the projection to the gray axis is unaffected by $\theta$, we follow the suggestion in [11] and perform normalized initialization to keep the variance of the gradients constant during training. The paper proses initialize $s_j$ and $\theta$ for the $j$-th layer as two uniform random variables. 63 | 64 | $$ 65 | \begin{align*} 66 | s_j \sim U \left[-\sqrt{\frac{6}{n_j+n_{j+1}}}, \sqrt{\frac{6}{n_j+n_{j+1}}}\right], \quad \theta \sim U \left[-\frac{\pi}{2}, \frac{\pi}{2}\right], 67 | \end{align*} 68 | $$ 69 | 70 | 71 | ## Reproduced table 1 72 | The CNN and QCNN are used for image classification with 10 classes in the CIFAR10 dataset. The CNN accepts RGB images with 3 channels, while the QCNN accepts 4 channels (including a padded zeros channel for quaternion representations). Using CIFAR-10, a popular benchmark dataset for image classification tasks in machine learning, the networska are tested on their accuracy. The dataset consists of 60,000 color images in 10 classes, with 50.000 training images and 10.000 test images. The images are of size 32x32 pixels and are labeled with one of the following classes: airplane, automobile, bird, cat, deer, dog, frog, horse, ship, and truck. The CIFAR-10 dataset is commonly used as a standard benchmark for image classification tasks, and many machine learning models have been trained and evaluated on this dataset. 73 | 74 | As the paper lacks detailled information of layers for the CNN and QCNN the models are improved based on general knowledge and trial and error. For the extended networks there are four convolutional layers with 64, 128, 256, and 512 filters, respectively, followed by max pooling and dropout layers. Two fully connected layers are used to output the 10 classes. The kernel size for all convolutional layers is 3. During training, the model is run for 80 epochs with a learning rate of 0.0001, using an RMSprop optimizer with weight decay of 1e-6. The cross-entropy loss function is used as the criterion. Results with the explained architecture and corresponding training parameters are shown in the following table: 75 | 76 |
77 | 78 | | **Model** | **Code base used** | **Trainable parameters** | **Training time** | **Dataset** | **Accuracy** | 79 | |---------------------------------------------------------|--------------------------|--------------------------|-------------------|-------------|--------------| 80 | | Convolutional neural network (CNN) | Pytorch | 5.121.482 | 39min 25s | CIFAR10 | 75.83 % | 81 | | Quaternion convolutional neural network (QCNN) | Pytorch & Orkis-research | 2.032.650 | 32min 44s | CIFAR10 | 77.05 % | 82 | 83 |
84 | 85 | Important to mention is that the absence of detail architectural information on CNN as well as the QCNN results in a change of trainable parameters according to the paper. The paper explicitly mentions that QCNN should have more parameters and that both CNN and QCNN can be considered shallow networks. However, the given guidelines for number of convolution blocks, convolutional layers and linear layers describe more or less a deeper neural network as shallow networks typically have 1 or 2 hidden layers. The QCNN as a result has less paramters compared to the CNN benchmark model. 86 | 87 | Reducing the complexity of the archticture to consist of two convolution layers, one dropout layer, one pooling layer and a single linear layer combined with softmax output provides a better representation of a shallow network. These changes improves the ratio of learnable parameters to match the statement in the paper, but does not generate usefull results as the following table shows. 88 | 89 |
90 | 91 | | **Model** | **Code base used** | **Trainable parameters** | **Training time** | **Dataset** | **Accuracy** | 92 | |---------------------------------------------------------|--------------------------|--------------------------|-------------------|-------------|--------------| 93 | | Shallow convolutional neural network (CNN) | Pytorch | 289.226 | 16min 24s | CIFAR10 | 43.66% | 94 | | Shallow Quaternion convolutional neural network (QCNN) | Pytorch & Orkis-research | 2.788.608 | 21min 42s | CIFAR10 | 38.77% | 95 | 96 |
97 | 98 | # Reproduction of the layers 99 | This section covers the code pieces we have successfully replicated, namely the quaternion convolutional layer and the quaternion linear layer. We provide details on these layers and include the results obtained from their implementation. 100 | 101 | ## Quaternion convolutional layer 102 | The quaternion convolutional layer is the building block of a Quaternion Convolutional Neural Network (QCNN). Unlike traditional Convolutional Neural Networks (CNNs), QCNNs use quaternion-valued filters instead of real-valued filters. A quaternion is a four-dimensional extension of complex numbers, which are composed of a real part and three imaginary parts. In the case of a QCNN, the quaternion filter contains four channels, with each channel representing the real part and the three imaginary parts. 103 | 104 | During the forward pass of a QCNN, the input tensor is convolved with the quaternion filter to produce the output tensor. This convolution operation is different from traditional CNNs in that it involves the multiplication of two quaternions, rather than the dot product of two real-valued vectors. This allows the QCNN to capture spatial orientation information, such as roll, pitch, and yaw, which are not possible to represent using real-valued filters. The mathematical quaternion convolution introduced by the paper is summarized in the appendix. 105 | 106 | The backward pass in a quaternion convolutional layer is similar to that in a regular convolutional layer used in traditional CNNs. The gradient with respect to the input feature map is computed by convolving the gradient with respect to the output feature map with the rotated conjugate of the filter, and then summing across all filters. The gradients with respect to the weights are computed by convolving the gradient with respect to the output feature map with the conjugate of the input feature map, and then summing across all feature maps. 107 | 108 | The created layer is more efficient and readable when compared to the layer available on the Github repository provided by the paper. Our implementation utilizes the PyTorch methods in an improved manner, resulting in less code and enhanced readability. The created layer and provided layer from github are compared after initialisation to verify that these are initialised in a similar fashion. Three individual training runs have been executed to verify the created layer in the architecture described earlier during the result reproduction. 109 | 110 |
111 | 112 | | **Training run** | **Epochs** | **Training time** | **Dataset** | **Accuracy** | 113 | |------------------|------------|-------------------|-------------|--------------| 114 | | 1 | 80 | 34min 43s | CIFAR10 | 78.45% | 115 | | 2 | 80 | 34min 37s | CIFAR10 | 77.72% | 116 | | 3 | 80 | 39min 34s | CIFAR10 | 78.14% | 117 | 118 |
119 | 120 | 121 | ## Quaternion linear layer 122 | Just like as the quaternion convolutional layer, the quaternion-based linear layer acts as an essential element in a Quaternion Convolutional Neural Network (QCNN). The network is trained using the RMSprop optimizer and the cross-entropy loss function. The training loop performs forward and backward passes on batches of the data, updating the model’s parameters with the gradients computed during backpropagation. 123 | 124 | During the forward pass, the input tensor is passed through each layer of the network, with each layer linearly transforming the tensor based on its weights and biases. This transformation process diverges from traditional neural networks as it involves the multiplication of quaternion values instead of the dot product of real-valued vectors. The final output tensor is passed through the softmax activation function to produce the class probabilities. 125 | 126 | During the backward pass, the loss gradient is computed with respect to each parameter in the network using the chain rule of calculus. The optimizer then updates each parameter by subtracting the product of its gradient and the learning rate, which determines the size of the steps taken in the direction of the gradient. 127 | 128 |
129 | 130 | | **Training run** | **Epochs** | **Training time** | **Dataset** | **Accuracy** | 131 | |------------------|------------|-------------------|-------------|--------------| 132 | | 1 | 80 | 30min 58s | CIFAR10 | 76.81% | 133 | | 2 | 80 | 29min 47s | CIFAR10 | 78.19% | 134 | | 3 | 80 | 29min 47s | CIFAR10 | 78.02% | 135 | 136 |
137 | 138 | ## Different weight initialisation 139 | The GitHub repository provides a weight initialization code that differs from the initialization method proposed in the referenced paper. The paper proposes a weight initialization method, which was applied in the reproduction of both the linear and convolutional layers using the following code snippet: 140 | ``` 141 | s = 1. / np.sqrt(2*fan_in) 142 | modulus = chi.rvs(4,loc=0,scale=s,size=kernel_shape) 143 | ``` 144 | As can be seen, this implementation does not match the earlier discussed initialisation proposed in the paper. In both the quaternoion linear and quaternoion convolutional reproductions the weight initialisation as stated by the paper is implemented using the following piece of code: 145 | ``` 146 | modulus = nn.Parameter(torch.Tensor(*weight_shape)) 147 | modulus = nn.init.xavier_uniform_(modulus, gain=1.0) 148 | ``` 149 | 150 |
151 | 152 | ![](./resources/img_initialisation_pytorch.png) 153 | 154 | _Figure 3: Weight initialisation following the paper and PyTorch utilities._ 155 | 156 |
157 | 158 | # Conclusion 159 | In conclusion, the replication process of the paper on Quaternion Convolutional Neural Networks (QCNNs) has shown varied results. Based on the provided text and the results from each table, the weight initialization technique proposed in the paper was successfully implemented and resulted in consistent gradient variance during training. 160 | 161 | Due to the absence of detailed architectural information for the CNN and QCNN models in the paper, the models were improved based on general knowledge and trial and error. In the first experiment, the QCNN model outperformed the CNN model, achieving an accuracy of 77.05% compared to CNN's 75.83%. When the complexity of the architecture was reduced to create a shallow network representation, the results were not useful, as the shallow CNN achieved an accuracy of 43.66% while the shallow QCNN only reached 38.77%. 162 | 163 | The quaternion convolutional and linear layers were successfully replicated and improved. Three individual training runs for each layer confirmed their effectiveness when incorporated into the architecture. The weight initialization code provided in the GitHub repository did not match the paper's proposal. After adjusting the weight initialization to follow the paper's recommendations, consistent results were obtained. 164 | 165 | From these findings, we can infer that the paper's approach to QCNNs has potential, but the lack of detailed architectural information makes it challenging to directly replicate the results. The improved models with adapted architectures outperformed their counterparts, indicating that QCNNs may indeed offer advantages in color image processing tasks. However, the results of the shallow network experiments suggest that further investigation and optimization are needed to better understand the full potential of QCNNs. 166 | 167 | # Discussion 168 | In this discussion, we incorporate the findings from the experiments involving Quaternion Convolutional Neural Networks (QCNNs) and their shallow counterparts. The process included understanding the paper’s methodology, replicating the code, and conducting experiments to compare the performance of QCNNs with traditional Convolutional Neural Networks (CNNs). We also discuss the challenges faced during the replication process and the implications of the experimental results. 169 | 170 | **Key Takeaways** 171 | 172 | 1. **Replicating the paper’s results:** Despite some challenges due to the absence of detailed architectural information for both CNN and QCNN models in the paper, we managed to replicate the results. The QCNN model outperformed the CNN model in the first experiment, achieving an accuracy of 77.05% compared to the CNN’s 75.83%. 173 | 174 | 2. **Shallow network experiments:** When the complexity of the architecture was reduced to create a shallow network representation, the results were not as promising. The shallow CNN achieved an accuracy of 43.66%, while the shallow QCNN only reached 38.77%. This suggests that the shallow architecture may not be suitable for QCNNs and that further investigation and optimization are needed to uncover their full potential. 175 | 176 | 3. **Reproduction of quaternion layers:** The quaternion convolutional and linear layers were successfully replicated and incorporated into the architecture. Three individual training runs for each layer confirmed their effectiveness, achieving accuracies between 76.81% and 78.19% for the linear layer and between 77.72% and 78.45% for the convolutional layer. 177 | 178 | 4. **Weight initialization:** A discrepancy between the weight initialization code provided in the GitHub repository and the paper’s proposal was discovered. After adjusting the weight initialization to follow the paper’s recommendations, consistent results were obtained, emphasizing the importance of proper weight initialization for achieving optimal performance. 179 | 180 | 5. **Challenges and limitations:** The replication process was hindered by the lack of detailed architectural information, making it challenging to directly replicate the results. Moreover, the results of the shallow network experiments suggest that further investigation and optimization are needed to better understand the full potential of QCNNs. 181 | 182 | Our attempts to reproduce the results of the Quaternion Convolutional Neural Networks paper and explore their performance in shallow network settings demonstrated the potential of QCNNs in color image processing tasks. However, the lack of detailed architectural information and the less promising results of the shallow network experiments highlight the need for further investigation and optimization. 183 | 184 | Future research could focus on exploring different architectures and optimization techniques to enhance the performance of QCNNs further. Additionally, investigating the applicability of QCNNs in other domains, such as video processing or 3D object recognition, could provide additional insights into the benefits of quaternion-based representations. 185 | 186 | -------------------------------------------------------------------------------- /results/qcnn_results_table_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "atY2AF9J_bXU" 7 | }, 8 | "source": [ 9 | "# Reproduction Table 1, QCNN\n", 10 | "In this notebook the QCNN result of table 1 is reproduced using PyTorch." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "colab": { 18 | "base_uri": "https://localhost:8080/" 19 | }, 20 | "id": "A0bUOOkH8od2", 21 | "outputId": "2eecfb55-ce7d-4c8e-bb52-f63f1f0ccf9b" 22 | }, 23 | "outputs": [ 24 | { 25 | "output_type": "stream", 26 | "name": "stdout", 27 | "text": [ 28 | "Python 3.9.16\n", 29 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 30 | "Collecting cifar10\n", 31 | " Downloading cifar10-1.0.0-py3-none-any.whl (7.9 kB)\n", 32 | "Requirement already satisfied: numpy in /usr/local/lib/python3.9/dist-packages (from cifar10) (1.22.4)\n", 33 | "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from cifar10) (2.27.1)\n", 34 | "Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from cifar10) (4.65.0)\n", 35 | "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (2.0.12)\n", 36 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (2022.12.7)\n", 37 | "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (1.26.15)\n", 38 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->cifar10) (3.4)\n", 39 | "Installing collected packages: cifar10\n", 40 | "Successfully installed cifar10-1.0.0\n", 41 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 42 | "Requirement already satisfied: imageio in /usr/local/lib/python3.9/dist-packages (2.25.1)\n", 43 | "Requirement already satisfied: numpy in /usr/local/lib/python3.9/dist-packages (1.22.4)\n", 44 | "Requirement already satisfied: scipy in /usr/local/lib/python3.9/dist-packages (1.10.1)\n", 45 | "Requirement already satisfied: pillow>=8.3.2 in /usr/local/lib/python3.9/dist-packages (from imageio) (8.4.0)\n", 46 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 47 | "Collecting git+https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks\n", 48 | " Cloning https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks to /tmp/pip-req-build-mnqemws1\n", 49 | " Running command git clone --filter=blob:none --quiet https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks /tmp/pip-req-build-mnqemws1\n", 50 | " Resolved https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks to commit 28caa7cde240e354fd7b87280450fd233cd494c3\n", 51 | " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 52 | "Building wheels for collected packages: Pytorch-QNN\n", 53 | " Building wheel for Pytorch-QNN (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 54 | " Created wheel for Pytorch-QNN: filename=Pytorch_QNN-1-py3-none-any.whl size=21507 sha256=2806e9054a3dbad45c88447967f5d62db109914f0e3c1e0873a2b94404fb6476\n", 55 | " Stored in directory: /tmp/pip-ephem-wheel-cache-_uw_7aa0/wheels/4d/ef/23/2dab0a09f3d2ba797d554a613cf4d367a6da42f613ca046eed\n", 56 | "Successfully built Pytorch-QNN\n", 57 | "Installing collected packages: Pytorch-QNN\n", 58 | "Successfully installed Pytorch-QNN-1\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "# run this cell to download the right packages (only needed once)\n", 64 | "!python --version\n", 65 | "\n", 66 | "!pip install cifar10\n", 67 | "!pip install imageio numpy scipy \n", 68 | "!pip install git+https://github.com/Orkis-Research/Pytorch-Quaternion-Neural-Networks" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "id": "FrEo-nMWxcun" 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "import torch\n", 80 | "import numpy as np\n", 81 | "import tensorflow as tf\n", 82 | "\n", 83 | "import torch.nn as nn\n", 84 | "import torch.nn.functional as F\n", 85 | "import torch.optim as optim\n", 86 | "import torchvision\n", 87 | "\n", 88 | "from pathlib import Path\n", 89 | "from torch.utils.data import DataLoader\n", 90 | "from torchsummary import summary\n", 91 | "from torchvision import datasets, transforms\n", 92 | "\n", 93 | "from core_qnn.quaternion_layers import QuaternionConv, QuaternionLinear\n", 94 | "from core_qnn.quaternion_ops import check_input, q_normalize\n", 95 | "\n", 96 | "device = torch.device('cuda' if torch.cuda.is_available else 'cpu')" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": { 103 | "colab": { 104 | "base_uri": "https://localhost:8080/", 105 | "height": 136, 106 | "referenced_widgets": [ 107 | "1bc0d19b8dc24fd5b13ad6b5eb472527", 108 | "d7a110a484714ff1a09ec8e6454bc4f1", 109 | "f49d840b3c554ccf92304d10366d6b75", 110 | "ad57ea03414f42ce94cebf0b918fc28c", 111 | "14e9eaee62204fe6b4105598dad2b64f", 112 | "c670f45ce70b445fbb515b7b82a5fcf7", 113 | "a3d41b38f3ab4790b55369d79353810b", 114 | "335434ccab4e45ff97a3cae323e1b271", 115 | "cbf8dd37565a40959a4d3bc47d634e31", 116 | "91204cf7b84a4ee1b7c1032d4b7956b0", 117 | "a69c1f00c93e475abfd2fb2274b944dd" 118 | ] 119 | }, 120 | "id": "QGUIzf_j5q-Z", 121 | "outputId": "08dc8760-74b8-4ac9-fa20-910d85acc152" 122 | }, 123 | "outputs": [ 124 | { 125 | "output_type": "stream", 126 | "name": "stdout", 127 | "text": [ 128 | "Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz\n" 129 | ] 130 | }, 131 | { 132 | "output_type": "display_data", 133 | "data": { 134 | "text/plain": [ 135 | " 0%| | 0/170498071 [00:00