├── LICENSE ├── Numpy Fundamentals ├── 02 Intermediate NumPy Concepts │ ├── 03 Statistical Computations │ │ └── statistical_computations.py │ ├── 02 Tensor Operations │ │ └── tensor_operations.py │ ├── 01 Linear Algebra for ML │ │ └── linear_algebra_ml.py │ ├── 04 Implementing ML Algorithms │ │ └── implementing_ml_algorithms.py │ └── README.md ├── 03 Advanced NumPy Concepts │ ├── 04 Advanced Tensor Manipulations │ │ └── advanced_tensor_manipulations.py │ ├── 03 Integration with ML Frameworks │ │ └── integration_ml_frameworks.py │ ├── 02 Custom Functions and Ufuncs │ │ └── custom_functions_ufuncs.py │ ├── 01 Vectorization and Performance │ │ └── vectorization_performance.py │ └── README.md └── 01 Beginner NumPy Concepts │ ├── 03 Basic Operations │ └── basic_operations.py │ ├── 02 Indexing and Slicing │ └── indexing_slicing.py │ ├── 04 Data Preprocessing for ML │ └── data_preprocessing_ml.py │ ├── 01 Array Creation and Properties │ └── array_creation_properties.py │ └── README.md ├── README.md └── Numpy Interview Questions └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 rohanmistry231 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Numpy Fundamentals/02 Intermediate NumPy Concepts/03 Statistical Computations/statistical_computations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | try: 4 | from sklearn.datasets import load_iris 5 | except ImportError: 6 | load_iris = None 7 | 8 | # %% [1. Introduction to Statistical Computations] 9 | # Learn NumPy's statistical tools for ML analysis. 10 | # Covers descriptive statistics, correlation/covariance, and random sampling. 11 | 12 | print("NumPy version:", np.__version__) 13 | 14 | # %% [2. Descriptive Statistics] 15 | # Compute statistics on Iris data. 16 | if load_iris: 17 | iris = load_iris() 18 | X = iris.data 19 | else: 20 | X = np.random.rand(150, 4) # Synthetic data 21 | 22 | mean = np.mean(X, axis=0) 23 | median = np.median(X, axis=0) 24 | variance = np.var(X, axis=0) 25 | std = np.std(X, axis=0) 26 | print("\nDescriptive Statistics (Iris Features):") 27 | print("Mean:", mean) 28 | print("Median:", median) 29 | print("Variance:", variance) 30 | print("Standard Deviation:", std) 31 | 32 | # %% [3. Correlation and Covariance] 33 | # Compute correlation and covariance matrices. 34 | corr_matrix = np.corrcoef(X.T) 35 | cov_matrix = np.cov(X.T) 36 | print("\nCorrelation Matrix:\n", corr_matrix) 37 | print("\nCovariance Matrix:\n", cov_matrix) 38 | 39 | # %% [4. Random Sampling for Data Augmentation] 40 | # Generate augmented data with random sampling. 41 | np.random.seed(42) 42 | indices = np.random.choice(X.shape[0], size=50, replace=True) 43 | augmented_data = X[indices] 44 | print("\nAugmented Data Shape:", augmented_data.shape) 45 | print("First 3 Augmented Samples:\n", augmented_data[:3]) 46 | 47 | # %% [5. Visualizing Statistics] 48 | # Visualize correlation matrix. 49 | plt.figure(figsize=(6, 4)) 50 | plt.imshow(corr_matrix, cmap='coolwarm') 51 | plt.colorbar() 52 | plt.title('Correlation Matrix of Iris Features') 53 | plt.savefig('statistical_computations_corr.png') 54 | 55 | # Visualize augmented data distribution 56 | plt.figure(figsize=(8, 4)) 57 | plt.hist(augmented_data[:, 0], bins=20, color='purple', alpha=0.7) 58 | plt.title('Augmented Data: Feature 1 Distribution') 59 | plt.xlabel('Value') 60 | plt.ylabel('Frequency') 61 | plt.savefig('statistical_computations_hist.png') 62 | 63 | # %% [6. Practical ML Application] 64 | # Use statistics for feature selection. 65 | np.random.seed(42) 66 | X_synthetic = np.random.rand(100, 3) 67 | y_synthetic = np.random.randint(0, 2, 100) 68 | correlations = np.array([np.corrcoef(X_synthetic[:, i], y_synthetic)[0, 1] for i in range(3)]) 69 | print("\nFeature Selection:") 70 | print("Feature Correlations with Target:", correlations) 71 | print("Selected Feature (highest correlation):", np.argmax(np.abs(correlations))) 72 | 73 | # %% [7. Interview Scenario: Correlation Analysis] 74 | # Discuss correlation for feature selection. 75 | print("\nInterview Scenario: Correlation Analysis") 76 | print("Q: How would you select features using NumPy?") 77 | print("A: Compute np.corrcoef to find feature-target correlations, select high values.") 78 | print("Key: High correlation indicates predictive power.") 79 | print("Example: np.corrcoef(X.T, y) for feature-target correlations.") -------------------------------------------------------------------------------- /Numpy Fundamentals/02 Intermediate NumPy Concepts/02 Tensor Operations/tensor_operations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | # %% [1. Introduction to Tensor Operations] 5 | # Learn NumPy's tensor operations for ML tasks. 6 | # Covers multi-dimensional arrays, reshaping, transposing, and contractions. 7 | 8 | print("NumPy version:", np.__version__) 9 | 10 | # %% [2. Multi-dimensional Arrays] 11 | # Create 3D tensor (e.g., batch of images). 12 | np.random.seed(42) 13 | tensor_3d = np.random.rand(10, 32, 32) # 10 samples, 32x32 images 14 | print("\n3D Tensor Shape:", tensor_3d.shape) 15 | print("First Sample (first 3x3):\n", tensor_3d[0, :3, :3]) 16 | 17 | # 4D tensor (e.g., batch of RGB images) 18 | tensor_4d = np.random.rand(5, 3, 64, 64) # 5 samples, 3 channels, 64x64 19 | print("\n4D Tensor Shape:", tensor_4d.shape) 20 | 21 | # %% [3. Tensor Reshaping and Transposing] 22 | # Reshape tensor for ML input. 23 | reshaped_tensor = np.reshape(tensor_3d, (10, 32 * 32)) # Flatten images 24 | print("\nReshaped Tensor Shape:", reshaped_tensor.shape) 25 | print("First Sample (first 10 elements):\n", reshaped_tensor[0, :10]) 26 | 27 | # Transpose tensor 28 | transposed_tensor = np.transpose(tensor_3d, (1, 2, 0)) # Swap axes 29 | print("\nTransposed Tensor Shape:", transposed_tensor.shape) 30 | 31 | # Moveaxis and swapaxes 32 | moved_tensor = np.moveaxis(tensor_4d, 1, 3) # Move channel axis 33 | print("\nMoved Tensor Shape:", moved_tensor.shape) 34 | 35 | # %% [4. Tensor Contractions and Reductions] 36 | # Tensor contraction with tensordot 37 | tensor_a = np.random.rand(5, 3, 4) 38 | tensor_b = np.random.rand(4, 2) 39 | contracted = np.tensordot(tensor_a, tensor_b, axes=([2], [0])) 40 | print("\nTensor Contraction Shape:", contracted.shape) 41 | 42 | # Reduction with sum 43 | sum_tensor = np.sum(tensor_3d, axis=(1, 2)) # Sum over image dimensions 44 | print("\nSum Reduction Shape:", sum_tensor.shape) 45 | print("Sum Values:", sum_tensor) 46 | 47 | # %% [5. Visualizing Tensors] 48 | # Visualize a 3D tensor slice. 49 | plt.figure(figsize=(6, 4)) 50 | plt.imshow(tensor_3d[0], cmap='gray') 51 | plt.title('3D Tensor: First Image Slice') 52 | plt.colorbar() 53 | plt.savefig('tensor_operations_image.png') 54 | 55 | # Visualize reduction results 56 | plt.figure(figsize=(8, 4)) 57 | plt.bar(range(len(sum_tensor)), sum_tensor) 58 | plt.title('Sum Reduction of 3D Tensor') 59 | plt.xlabel('Sample Index') 60 | plt.ylabel('Sum Value') 61 | plt.savefig('tensor_operations_reduction.png') 62 | 63 | # %% [6. Practical ML Application] 64 | # Prepare tensor for CNN input. 65 | np.random.seed(42) 66 | images = np.random.rand(20, 1, 28, 28) # 20 grayscale images, 28x28 67 | images_reshaped = np.reshape(images, (20, 28 * 28)) # Flatten for dense layer 68 | print("\nCNN Input Preparation:") 69 | print("Original Tensor Shape:", images.shape) 70 | print("Reshaped Tensor Shape:", images_reshaped.shape) 71 | 72 | # %% [7. Interview Scenario: Tensor Reshaping] 73 | # Discuss reshaping for deep learning. 74 | print("\nInterview Scenario: Tensor Reshaping") 75 | print("Q: How would you prepare a tensor for a neural network?") 76 | print("A: Reshape to match input layer (e.g., flatten images with np.reshape).") 77 | print("Key: Ensure shape compatibility with model architecture.") 78 | print("Example: np.reshape(tensor, (n_samples, height * width)) for dense layers.") -------------------------------------------------------------------------------- /Numpy Fundamentals/03 Advanced NumPy Concepts/04 Advanced Tensor Manipulations/advanced_tensor_manipulations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | try: 4 | from scipy import sparse 5 | import tensorly as tl 6 | except ImportError: 7 | sparse, tl = None, None 8 | 9 | # %% [1. Introduction to Advanced Tensor Manipulations] 10 | # Learn advanced NumPy tensor operations for ML. 11 | # Covers batch processing, sparse arrays, and tensor decompositions. 12 | 13 | print("NumPy version:", np.__version__) 14 | 15 | # %% [2. Batch Processing for Deep Learning] 16 | # Process image batches for CNNs. 17 | np.random.seed(42) 18 | images = np.random.rand(32, 3, 64, 64) # 32 RGB images, 64x64 19 | batch_mean = np.mean(images, axis=(2, 3), keepdims=True) 20 | images_normalized = images - batch_mean # Batch normalization 21 | print("\nBatch Processed Images Shape:", images_normalized.shape) 22 | print("Batch Mean Shape:", batch_mean.shape) 23 | 24 | # %% [3. Sparse Arrays for Large-scale Data] 25 | # Use sparse arrays for memory efficiency. 26 | if sparse: 27 | sparse_matrix = sparse.csr_matrix(np.random.rand(1000, 1000) > 0.9) # 90% sparsity 28 | print("\nSparse Matrix Shape:", sparse_matrix.shape) 29 | print("Non-zero Elements:", sparse_matrix.nnz) 30 | else: 31 | print("\nScipy.sparse not available; skipping sparse matrix.") 32 | 33 | # %% [4. Tensor Decompositions] 34 | # Perform CP decomposition for compression. 35 | if tl: 36 | tensor = np.random.rand(10, 20, 30) 37 | factors = tl.decomposition.parafac(tensor, rank=5) 38 | reconstructed = tl.kruskal_to_tensor(factors) 39 | error = np.mean((tensor - reconstructed)**2) 40 | print("\nCP Decomposition Error:", error) 41 | else: 42 | print("\nTensorly not available; using simple sum reduction.") 43 | tensor = np.random.rand(10, 20, 30) 44 | reduced = np.sum(tensor, axis=2) 45 | print("Reduced Tensor Shape:", reduced.shape) 46 | 47 | # %% [5. Visualizing Tensor Manipulations] 48 | # Visualize normalized image slice. 49 | plt.figure(figsize=(6, 4)) 50 | plt.imshow(images_normalized[0, 0], cmap='gray') 51 | plt.title('Normalized Image Slice (Batch Processing)') 52 | plt.colorbar() 53 | plt.savefig('tensor_manipulations_image.png') 54 | 55 | # Visualize sparse matrix (if available) 56 | if sparse: 57 | plt.figure(figsize=(6, 4)) 58 | plt.spy(sparse_matrix, markersize=1) 59 | plt.title('Sparse Matrix Structure') 60 | plt.savefig('tensor_manipulations_sparse.png') 61 | 62 | # %% [6. Practical ML Application] 63 | # Prepare a large tensor for deep learning. 64 | np.random.seed(42) 65 | large_tensor = np.random.rand(100, 3, 128, 128) # 100 RGB images 66 | large_tensor_flattened = np.reshape(large_tensor, (100, -1)) # Flatten for dense layer 67 | print("\nDeep Learning Tensor Preparation:") 68 | print("Original Tensor Shape:", large_tensor.shape) 69 | print("Flattened Tensor Shape:", large_tensor_flattened.shape) 70 | 71 | # %% [7. Interview Scenario: Tensor Decomposition] 72 | # Discuss tensor decomposition for ML. 73 | print("\nInterview Scenario: Tensor Decomposition") 74 | print("Q: How would you compress a tensor for ML?") 75 | print("A: Use CP decomposition to reduce dimensionality with tensorly.") 76 | print("Key: Preserves structure while reducing memory.") 77 | print("Example: factors = tl.decomposition.parafac(tensor, rank=5).") -------------------------------------------------------------------------------- /Numpy Fundamentals/03 Advanced NumPy Concepts/03 Integration with ML Frameworks/integration_ml_frameworks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | try: 4 | import tensorflow as tf 5 | import torch 6 | from sklearn.preprocessing import StandardScaler 7 | except ImportError: 8 | tf, torch, StandardScaler = None, None, None 9 | 10 | # %% [1. Introduction to Integration with ML Frameworks] 11 | # Learn to integrate NumPy with TensorFlow, PyTorch, and scikit-learn. 12 | # Covers tensor conversion, data pipelines, and preprocessing. 13 | 14 | print("NumPy version:", np.__version__) 15 | 16 | # %% [2. Converting NumPy Arrays to Tensors] 17 | # Convert to TensorFlow and PyTorch tensors. 18 | np.random.seed(42) 19 | X_np = np.random.rand(100, 5) 20 | 21 | if tf: 22 | X_tf = tf.convert_to_tensor(X_np, dtype=tf.float32) 23 | print("\nTensorFlow Tensor Shape:", X_tf.shape) 24 | else: 25 | print("\nTensorFlow not available; skipping.") 26 | 27 | if torch: 28 | X_torch = torch.from_numpy(X_np).float() 29 | print("PyTorch Tensor Shape:", X_torch.shape) 30 | else: 31 | print("PyTorch not available; skipping.") 32 | 33 | # %% [3. NumPy as a Backend for Data Pipelines] 34 | # Create a TensorFlow data pipeline from NumPy arrays. 35 | if tf: 36 | y_np = np.random.randint(0, 2, 100) 37 | dataset = tf.data.Dataset.from_tensor_slices((X_np, y_np)).batch(32).shuffle(100) 38 | for X_batch, y_batch in dataset.take(1): 39 | print("\nTensorFlow Dataset Batch Shapes:", X_batch.shape, y_batch.shape) 40 | else: 41 | print("\nTensorFlow not available; skipping pipeline.") 42 | 43 | # %% [4. Interfacing with scikit-learn] 44 | # Use NumPy with scikit-learn preprocessing. 45 | if StandardScaler: 46 | scaler = StandardScaler() 47 | X_scaled = scaler.fit_transform(X_np) 48 | print("\nScikit-learn Scaled Features Shape:", X_scaled.shape) 49 | print("First 3 Scaled Samples:\n", X_scaled[:3]) 50 | else: 51 | X_scaled = (X_np - np.mean(X_np, axis=0)) / np.std(X_np, axis=0) 52 | print("\nScikit-learn not available; using NumPy scaling.") 53 | print("First 3 Scaled Samples:\n", X_scaled[:3]) 54 | 55 | # %% [5. Visualizing Integration] 56 | # Visualize scaled features. 57 | plt.figure(figsize=(8, 4)) 58 | plt.scatter(X_np[:, 0], X_np[:, 1], c='blue', alpha=0.5, label='Original') 59 | plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c='red', alpha=0.5, label='Scaled') 60 | plt.title('Original vs. Scaled Features') 61 | plt.xlabel('Feature 1') 62 | plt.ylabel('Feature 2') 63 | plt.legend() 64 | plt.savefig('integration_ml_scaled.png') 65 | 66 | # %% [6. Practical ML Application] 67 | # Prepare a NumPy dataset for a deep learning model. 68 | np.random.seed(42) 69 | X_ml = np.random.rand(1000, 10) 70 | y_ml = np.random.randint(0, 2, 1000) 71 | if tf: 72 | dataset_ml = tf.data.Dataset.from_tensor_slices((X_ml, y_ml)).batch(64) 73 | print("\nDeep Learning Dataset:") 74 | print("Batch Size: 64") 75 | else: 76 | print("\nTensorFlow not available; skipping deep learning dataset.") 77 | 78 | # %% [7. Interview Scenario: Framework Integration] 79 | # Discuss NumPy integration with ML frameworks. 80 | print("\nInterview Scenario: Framework Integration") 81 | print("Q: How do you prepare NumPy data for TensorFlow?") 82 | print("A: Convert to tensors with tf.convert_to_tensor, use tf.data.Dataset.") 83 | print("Key: Ensures compatibility with ML framework APIs.") 84 | print("Example: dataset = tf.data.Dataset.from_tensor_slices((X_np, y_np)).batch(32).") -------------------------------------------------------------------------------- /Numpy Fundamentals/02 Intermediate NumPy Concepts/01 Linear Algebra for ML/linear_algebra_ml.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | try: 4 | from sklearn.datasets import load_iris 5 | except ImportError: 6 | load_iris = None 7 | 8 | # %% [1. Introduction to Linear Algebra for ML] 9 | # Learn NumPy's linear algebra tools for ML tasks. 10 | # Covers matrix operations, solving linear systems, eigenvalues, and SVD. 11 | 12 | print("NumPy version:", np.__version__) 13 | 14 | # %% [2. Matrix Operations] 15 | # Perform matrix operations using Iris data. 16 | if load_iris: 17 | iris = load_iris() 18 | X = iris.data[:100] # First 100 samples for simplicity 19 | else: 20 | X = np.random.rand(100, 4) # Synthetic data 21 | 22 | # Matrix multiplication (dot product) 23 | X_T = np.transpose(X) # Transpose 24 | X_TX = np.dot(X_T, X) # X^T * X 25 | print("\nX^T * X Matrix (4x4):\n", X_TX) 26 | 27 | # Matrix multiplication with matmul 28 | X_matmul = np.matmul(X_T, X) 29 | print("\nX^T * X with matmul:\n", X_matmul) 30 | 31 | # %% [3. Solving Linear Systems] 32 | # Solve a linear system: Ax = b 33 | A = np.array([[3, 1], [1, 2]]) # Coefficient matrix 34 | b = np.array([9, 8]) # Constants 35 | x = np.linalg.solve(A, b) # Solve for x 36 | print("\nLinear System Solution (Ax = b):") 37 | print("A:\n", A) 38 | print("b:", b) 39 | print("x:", x) 40 | 41 | # Verify solution 42 | print("Verification (Ax):\n", np.dot(A, x)) 43 | 44 | # %% [4. Eigenvalues and Eigenvectors] 45 | # Compute eigenvalues/vectors for covariance matrix. 46 | cov_matrix = np.cov(X.T) # Covariance of Iris features 47 | eigenvalues, eigenvectors = np.linalg.eig(cov_matrix) 48 | print("\nCovariance Matrix Eigenvalues:", eigenvalues) 49 | print("Eigenvectors:\n", eigenvectors) 50 | 51 | # %% [5. Singular Value Decomposition (SVD)] 52 | # Apply SVD for dimensionality reduction. 53 | U, S, Vt = np.linalg.svd(X, full_matrices=False) 54 | X_reduced = np.dot(U[:, :2], np.diag(S[:2])) # Reduce to 2 dimensions 55 | print("\nSVD Reduced Data Shape:", X_reduced.shape) 56 | print("First 3 Reduced Samples:\n", X_reduced[:3]) 57 | 58 | # %% [6. Visualizing Linear Algebra] 59 | # Visualize SVD-reduced data. 60 | plt.figure(figsize=(8, 4)) 61 | plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c='blue', alpha=0.5) 62 | plt.title('SVD: Iris Data in 2D') 63 | plt.xlabel('Component 1') 64 | plt.ylabel('Component 2') 65 | plt.savefig('linear_algebra_svd.png') 66 | 67 | # Visualize covariance matrix 68 | plt.figure(figsize=(6, 4)) 69 | plt.imshow(cov_matrix, cmap='viridis') 70 | plt.colorbar() 71 | plt.title('Covariance Matrix Heatmap') 72 | plt.savefig('linear_algebra_cov_matrix.png') 73 | 74 | # %% [7. Practical ML Application] 75 | # Use matrix operations for feature transformation. 76 | np.random.seed(42) 77 | X_synthetic = np.random.rand(100, 3) # Synthetic data 78 | W = np.random.rand(3, 2) # Transformation matrix 79 | X_transformed = np.dot(X_synthetic, W) # Linear transformation 80 | print("\nSynthetic ML Dataset:") 81 | print("Transformed Features Shape:", X_transformed.shape) 82 | print("First 3 Transformed Samples:\n", X_transformed[:3]) 83 | 84 | # %% [8. Interview Scenario: SVD for PCA] 85 | # Discuss SVD for dimensionality reduction. 86 | print("\nInterview Scenario: SVD for PCA") 87 | print("Q: How would you implement PCA with NumPy?") 88 | print("A: Use np.linalg.svd to decompose data, select top components.") 89 | print("Key: SVD reduces dimensionality while preserving variance.") 90 | print("Example: U, S, Vt = np.linalg.svd(X); X_reduced = U[:, :k] @ np.diag(S[:k]).") -------------------------------------------------------------------------------- /Numpy Fundamentals/01 Beginner NumPy Concepts/03 Basic Operations/basic_operations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | try: 4 | from sklearn.datasets import load_iris 5 | except ImportError: 6 | load_iris = None 7 | 8 | # %% [1. Introduction to Basic Operations] 9 | # Learn NumPy’s element-wise operations, broadcasting, and universal functions (ufuncs). 10 | # Essential for ML computations like feature scaling and loss calculations. 11 | 12 | print("NumPy version:", np.__version__) 13 | 14 | # %% [2. Element-wise Operations] 15 | # Perform arithmetic operations on arrays. 16 | if load_iris: 17 | iris = load_iris() 18 | data = iris.data 19 | else: 20 | data = np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [7.0, 3.2, 4.7, 1.4]]) 21 | 22 | scaled_data = data * 2 # Multiply all elements by 2 23 | print("\nScaled Data (first 3 rows):\n", scaled_data[:3]) 24 | 25 | added_data = data + 10 # Add 10 to all elements 26 | print("\nAdded Data (first 3 rows):\n", added_data[:3]) 27 | 28 | # Combine arrays 29 | combined = data[:, 0] + data[:, 1] # Sum of sepal length and width 30 | print("\nSum of Sepal Length and Width (first 5):\n", combined[:5]) 31 | 32 | # %% [3. Broadcasting] 33 | # Apply operations across arrays of different shapes. 34 | bias = np.array([1, -1, 0, 0.5]) # Bias for each feature 35 | biased_data = data + bias # Broadcasting bias to all rows 36 | print("\nBroadcasted Bias Data (first 3 rows):\n", biased_data[:3]) 37 | 38 | # Broadcasting scalar 39 | normalized = data / np.max(data, axis=0) # Normalize by column max 40 | print("\nNormalized Data (first 3 rows):\n", normalized[:3]) 41 | 42 | # %% [4. Universal Functions (ufuncs)] 43 | # Apply mathematical functions element-wise. 44 | sin_data = np.sin(data) # Sine of all elements 45 | print("\nSine of Data (first 3 rows):\n", sin_data[:3]) 46 | 47 | exp_data = np.exp(data[:, 0]) # Exponential of sepal length 48 | print("\nExponential of Sepal Length (first 5):\n", exp_data[:5]) 49 | 50 | mean_data = np.mean(data, axis=0) # Mean of each feature 51 | print("\nMean of Each Feature:", mean_data) 52 | 53 | # %% [5. Visualizing Operations] 54 | # Visualize normalized data distribution. 55 | if load_iris: 56 | plt.figure(figsize=(8, 4)) 57 | plt.hist(normalized[:, 0], bins=20, color='green', alpha=0.7) 58 | plt.title('Normalized Sepal Length Distribution') 59 | plt.xlabel('Normalized Value') 60 | plt.ylabel('Frequency') 61 | plt.savefig('operations_histogram.png') 62 | 63 | # %% [6. Practical ML Application] 64 | # Compute a loss function for ML. 65 | np.random.seed(42) 66 | y_true = np.random.randint(0, 2, 100) # True binary labels 67 | y_pred = np.random.rand(100) # Predicted probabilities 68 | mse = np.mean((y_true - y_pred) ** 2) # Mean squared error 69 | print("\nML Loss Calculation:") 70 | print("Mean Squared Error:", mse) 71 | 72 | # Visualize predictions vs. true labels 73 | plt.figure(figsize=(8, 4)) 74 | plt.scatter(range(100), y_true, c='blue', label='True Labels', alpha=0.5) 75 | plt.scatter(range(100), y_pred, c='red', label='Predictions', alpha=0.5) 76 | plt.title('True vs. Predicted Labels') 77 | plt.legend() 78 | plt.savefig('operations_loss.png') 79 | 80 | # %% [7. Interview Scenario: Broadcasting] 81 | # Discuss broadcasting for ML computations. 82 | print("\nInterview Scenario: Broadcasting") 83 | print("Q: How does broadcasting simplify ML feature scaling?") 84 | print("A: Broadcasting applies operations (e.g., normalization) to arrays without loops.") 85 | print("Key: Ensures shape compatibility for efficient computations.") 86 | print("Example: arr / np.max(arr, axis=0) normalizes columns without explicit iteration.") -------------------------------------------------------------------------------- /Numpy Fundamentals/01 Beginner NumPy Concepts/02 Indexing and Slicing/indexing_slicing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | try: 4 | from sklearn.datasets import load_iris 5 | except ImportError: 6 | load_iris = None 7 | 8 | # %% [1. Introduction to Indexing and Slicing] 9 | # Learn how to access and manipulate NumPy arrays using indexing and slicing. 10 | # Covers basic indexing, boolean indexing, fancy indexing, and slicing for ML. 11 | 12 | print("NumPy version:", np.__version__) 13 | 14 | # %% [2. Basic Indexing] 15 | # Access elements and subarrays using indices. 16 | if load_iris: 17 | iris = load_iris() 18 | data = iris.data 19 | else: 20 | data = np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [7.0, 3.2, 4.7, 1.4]]) 21 | 22 | print("\nIris Array (first 3 rows):\n", data[:3]) 23 | print("Single Element (row 0, col 0):", data[0, 0]) # Sepal length of first sample 24 | print("First Row:", data[0]) # All features of first sample 25 | print("First Column:", data[:, 0]) # Sepal length for all samples 26 | 27 | # %% [3. Slicing] 28 | # Extract subarrays using slices. 29 | subset = data[:5, 1:3] # First 5 rows, columns 1 and 2 30 | print("\nSliced Subarray (first 5 rows, cols 1-2):\n", subset) 31 | 32 | # Step slicing for downsampling 33 | downsampled = data[::2, :] # Every other row 34 | print("\nDownsampled Array (every other row):\n", downsampled[:3]) 35 | 36 | # %% [4. Boolean Indexing] 37 | # Filter arrays based on conditions. 38 | sepal_length = data[:, 0] 39 | long_sepal = data[sepal_length > 6.0] # Samples with sepal length > 6.0 40 | print("\nSamples with Sepal Length > 6.0:\n", long_sepal[:3]) 41 | 42 | # Combine conditions 43 | mask = (sepal_length > 5.0) & (data[:, 2] < 2.0) # Sepal length > 5.0 and petal length < 2.0 44 | filtered = data[mask] 45 | print("\nFiltered Samples (sepal > 5.0, petal < 2.0):\n", filtered) 46 | 47 | # %% [5. Fancy Indexing] 48 | # Use arrays of indices to select elements. 49 | rows = np.array([0, 2, 4]) 50 | cols = np.array([1, 3]) 51 | selected = data[rows, cols] # Elements at (0,1), (2,3), (4,3) 52 | print("\nFancy Indexing (selected elements):\n", selected) 53 | 54 | # Select specific rows 55 | selected_rows = data[[0, 10, 20]] 56 | print("\nSelected Rows (0, 10, 20):\n", selected_rows) 57 | 58 | # %% [6. Visualizing Indexing] 59 | # Visualize filtered data. 60 | if load_iris: 61 | plt.figure(figsize=(8, 4)) 62 | plt.scatter(data[:, 0], data[:, 2], c='blue', alpha=0.5, label='All Samples') 63 | plt.scatter(long_sepal[:, 0], long_sepal[:, 2], c='red', label='Sepal Length > 6.0') 64 | plt.xlabel('Sepal Length (cm)') 65 | plt.ylabel('Petal Length (cm)') 66 | plt.title('Iris Dataset: Boolean Indexing') 67 | plt.legend() 68 | plt.savefig('indexing_scatter.png') 69 | 70 | # %% [7. Practical ML Application] 71 | # Use indexing to prepare ML features. 72 | np.random.seed(42) 73 | X = np.random.rand(100, 3) # 100 samples, 3 features 74 | y = np.random.randint(0, 2, 100) # Binary labels 75 | positive_samples = X[y == 1] # Select samples with label 1 76 | print("\nML Dataset: Positive Samples Shape:", positive_samples.shape) 77 | print("First 3 Positive Samples:\n", positive_samples[:3]) 78 | 79 | # %% [8. Interview Scenario: Indexing] 80 | # Discuss indexing for ML data selection. 81 | print("\nInterview Scenario: Indexing") 82 | print("Q: How would you filter a dataset for ML preprocessing?") 83 | print("A: Use boolean indexing for conditions (e.g., arr[arr[:, 0] > 5]) and slicing for subsets.") 84 | print("Key: Boolean indexing is efficient for outlier removal and feature selection.") 85 | print("Example: arr[arr[:, 0] > np.mean(arr[:, 0])] for above-average values.") -------------------------------------------------------------------------------- /Numpy Fundamentals/02 Intermediate NumPy Concepts/04 Implementing ML Algorithms/implementing_ml_algorithms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | try: 4 | from sklearn.datasets import make_blobs 5 | except ImportError: 6 | make_blobs = None 7 | 8 | # %% [1. Introduction to Implementing ML Algorithms] 9 | # Learn to implement ML algorithms with NumPy. 10 | # Covers linear regression, logistic regression, and K-means clustering. 11 | 12 | print("NumPy version:", np.__version__) 13 | 14 | # %% [2. Linear Regression with Normal Equations] 15 | # Implement linear regression on synthetic data. 16 | np.random.seed(42) 17 | if make_blobs: 18 | X, _ = make_blobs(n_samples=100, centers=1, n_features=2) 19 | else: 20 | X = np.random.rand(100, 2) 21 | y = 2 * X[:, 0] + 3 * X[:, 1] + np.random.randn(100) * 0.1 # Linear relationship 22 | X_b = np.c_[np.ones((100, 1)), X] # Add bias term 23 | theta = np.linalg.solve(np.dot(X_b.T, X_b), np.dot(X_b.T, y)) # Normal equations 24 | print("\nLinear Regression Coefficients:", theta) 25 | 26 | # Predict 27 | y_pred = np.dot(X_b, theta) 28 | 29 | # %% [3. Logistic Regression with Gradient Descent] 30 | # Implement logistic regression. 31 | def sigmoid(z): 32 | return 1 / (1 + np.exp(-z)) 33 | 34 | X_log = X 35 | y_log = (y > np.median(y)).astype(int) # Binary labels 36 | X_log_b = np.c_[np.ones((100, 1)), X_log] 37 | theta_log = np.zeros(3) 38 | lr = 0.1 39 | for _ in range(1000): 40 | z = np.dot(X_log_b, theta_log) 41 | h = sigmoid(z) 42 | gradient = np.dot(X_log_b.T, (h - y_log)) / 100 43 | theta_log -= lr * gradient 44 | print("\nLogistic Regression Coefficients:", theta_log) 45 | 46 | # Predict 47 | y_pred_log = sigmoid(np.dot(X_log_b, theta_log)) > 0.5 48 | 49 | # %% [4. K-means Clustering] 50 | # Implement K-means clustering. 51 | if make_blobs: 52 | X_cluster, _ = make_blobs(n_samples=100, centers=3, n_features=2) 53 | else: 54 | X_cluster = np.random.rand(100, 2) * 10 55 | K = 3 56 | centroids = X_cluster[np.random.choice(100, K, replace=False)] 57 | for _ in range(10): 58 | distances = np.sqrt(((X_cluster - centroids[:, np.newaxis])**2).sum(axis=2)) 59 | labels = np.argmin(distances, axis=0) 60 | centroids = np.array([X_cluster[labels == k].mean(axis=0) for k in range(K)]) 61 | print("\nK-means Centroids:\n", centroids) 62 | 63 | # %% [5. Visualizing ML Algorithms] 64 | # Visualize linear regression predictions. 65 | plt.figure(figsize=(8, 4)) 66 | plt.scatter(X[:, 0], y, c='blue', alpha=0.5, label='Data') 67 | plt.plot(X[:, 0], y_pred, c='red', label='Linear Regression') 68 | plt.xlabel('Feature 1') 69 | plt.ylabel('Target') 70 | plt.title('Linear Regression Fit') 71 | plt.legend() 72 | plt.savefig('ml_algorithms_linear.png') 73 | 74 | # Visualize K-means clusters 75 | plt.figure(figsize=(8, 4)) 76 | plt.scatter(X_cluster[:, 0], X_cluster[:, 1], c=labels, cmap='viridis', alpha=0.5) 77 | plt.scatter(centroids[:, 0], centroids[:, 1], c='red', marker='x', s=200, label='Centroids') 78 | plt.title('K-means Clustering') 79 | plt.legend() 80 | plt.savefig('ml_algorithms_kmeans.png') 81 | 82 | # %% [6. Practical ML Application] 83 | # Evaluate linear regression performance. 84 | mse = np.mean((y_pred - y)**2) 85 | print("\nLinear Regression Performance:") 86 | print("Mean Squared Error:", mse) 87 | 88 | # %% [7. Interview Scenario: Gradient Descent] 89 | # Discuss implementing gradient descent. 90 | print("\nInterview Scenario: Gradient Descent") 91 | print("Q: How would you implement logistic regression with NumPy?") 92 | print("A: Use gradient descent to minimize loss, compute gradients with np.dot.") 93 | print("Key: Sigmoid function and iterative updates are critical.") 94 | print("Example: theta -= lr * np.dot(X.T, (sigmoid(X @ theta) - y)) / n.") -------------------------------------------------------------------------------- /Numpy Fundamentals/03 Advanced NumPy Concepts/02 Custom Functions and Ufuncs/custom_functions_ufuncs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | try: 4 | from numba import jit 5 | except ImportError: 6 | jit = lambda x: x 7 | 8 | # %% [1. Introduction to Custom Functions and Ufuncs] 9 | # Learn to create custom NumPy functions for ML tasks. 10 | # Covers np.frompyfunc, np.vectorize, numba, and gradient computations. 11 | 12 | print("NumPy version:", np.__version__) 13 | 14 | # %% [2. Writing Custom Ufuncs with np.frompyfunc] 15 | # Create a custom activation function. 16 | def custom_activation(x): 17 | return np.clip(x, -1, 1) 18 | 19 | ufunc_activation = np.frompyfunc(custom_activation, 1, 1) 20 | X = np.linspace(-5, 5, 100) 21 | y_ufunc = ufunc_activation(X).astype(float) 22 | print("\nCustom Ufunc Output (first 5):", y_ufunc[:5]) 23 | 24 | # %% [3. Vectorizing Complex Operations] 25 | # Vectorize a non-trivial function. 26 | def complex_function(x, threshold=0.5): 27 | return x**2 if x > threshold else np.sin(x) 28 | 29 | vectorized_func = np.vectorize(complex_function) 30 | X_complex = np.linspace(-2, 2, 100) 31 | y_vectorized = vectorized_func(X_complex) 32 | print("\nVectorized Function Output (first 5):", y_vectorized[:5]) 33 | 34 | # %% [4. Numba for Performance] 35 | # Optimize a gradient computation with numba. 36 | @jit(nopython=True) 37 | def compute_gradient(X, y, theta): 38 | return np.dot(X.T, (np.dot(X, theta) - y)) / len(y) 39 | 40 | np.random.seed(42) 41 | X_grad = np.random.rand(1000, 5) 42 | y_grad = np.random.rand(1000) 43 | theta = np.random.rand(5) 44 | gradient = compute_gradient(X_grad, y_grad, theta) 45 | print("\nNumba Gradient Shape:", gradient.shape) 46 | print("Gradient Values:", gradient) 47 | 48 | # %% [5. Gradient Computations for ML] 49 | # Compute gradients for a custom loss. 50 | def custom_loss(y_true, y_pred): 51 | return np.mean((y_true - y_pred)**2) 52 | 53 | def loss_gradient(X, y, theta): 54 | y_pred = np.dot(X, theta) 55 | return -2 * np.dot(X.T, (y - y_pred)) / len(y) 56 | 57 | X_ml = np.random.rand(500, 3) 58 | y_ml = np.random.rand(500) 59 | theta_ml = np.random.rand(3) 60 | grad = loss_gradient(X_ml, y_ml, theta_ml) 61 | print("\nCustom Loss Gradient Shape:", grad.shape) 62 | print("Gradient Values:", grad) 63 | 64 | # %% [6. Visualizing Custom Functions] 65 | # Plot custom activation function. 66 | plt.figure(figsize=(8, 4)) 67 | plt.plot(X, y_ufunc, label='Custom Activation (clipped)') 68 | plt.plot(X, X, '--', label='Input') 69 | plt.title('Custom Ufunc: Clipped Activation') 70 | plt.xlabel('Input') 71 | plt.ylabel('Output') 72 | plt.legend() 73 | plt.savefig('custom_functions_ufunc.png') 74 | 75 | # Plot vectorized function 76 | plt.figure(figsize=(8, 4)) 77 | plt.plot(X_complex, y_vectorized, label='Vectorized Function') 78 | plt.title('Vectorized Complex Function') 79 | plt.xlabel('Input') 80 | plt.ylabel('Output') 81 | plt.legend() 82 | plt.savefig('custom_functions_vectorized.png') 83 | 84 | # %% [7. Practical ML Application] 85 | # Apply custom ufunc to preprocess ML features. 86 | np.random.seed(42) 87 | X_features = np.random.rand(1000, 10) * 10 - 5 88 | X_processed = ufunc_activation(X_features).astype(float) 89 | print("\nML Feature Preprocessing:") 90 | print("Processed Features Shape:", X_processed.shape) 91 | print("First 3 Processed Samples:\n", X_processed[:3]) 92 | 93 | # %% [8. Interview Scenario: Numba Optimization] 94 | # Discuss numba for ML performance. 95 | print("\nInterview Scenario: Numba Optimization") 96 | print("Q: How would you optimize a gradient computation in NumPy?") 97 | print("A: Use numba's @jit to compile Python code to machine code.") 98 | print("Key: Numba accelerates loops and numerical operations.") 99 | print("Example: @jit def compute_gradient(X, y, theta): ...") -------------------------------------------------------------------------------- /Numpy Fundamentals/03 Advanced NumPy Concepts/01 Vectorization and Performance/vectorization_performance.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import time 4 | try: 5 | from scipy import sparse 6 | except ImportError: 7 | sparse = None 8 | 9 | # %% [1. Introduction to Vectorization and Performance] 10 | # Learn advanced NumPy techniques for performance optimization. 11 | # Covers vectorization, memory-efficient computations, and profiling. 12 | 13 | print("NumPy version:", np.__version__) 14 | 15 | # %% [2. Replacing Loops with Vectorized Operations] 16 | # Compare loop vs. vectorized operations. 17 | np.random.seed(42) 18 | X = np.random.rand(10000, 100) # Large dataset 19 | y = np.random.rand(10000) 20 | 21 | # Loop-based dot product 22 | start_time = time.time() 23 | result_loop = np.zeros(100) 24 | for i in range(100): 25 | result_loop[i] = np.sum(X[:, i] * y) 26 | loop_time = time.time() - start_time 27 | 28 | # Vectorized dot product 29 | start_time = time.time() 30 | result_vectorized = np.dot(X.T, y) 31 | vectorized_time = time.time() - start_time 32 | print("\nLoop Time:", loop_time, "seconds") 33 | print("Vectorized Time:", vectorized_time, "seconds") 34 | print("Speedup:", loop_time / vectorized_time) 35 | 36 | # Verify results 37 | print("Results Match:", np.allclose(result_loop, result_vectorized)) 38 | 39 | # %% [3. Memory-efficient Computations] 40 | # Use np.memmap for large datasets. 41 | large_array = np.memmap('large_array.dat', dtype='float32', mode='w+', shape=(10000, 1000)) 42 | large_array[:] = np.random.rand(10000, 1000) 43 | print("\nMemory-mapped Array Shape:", large_array.shape) 44 | 45 | # Stride tricks for sliding windows 46 | from numpy.lib.stride_tricks import as_strided 47 | X_small = np.random.rand(100, 10) 48 | window_size = 3 49 | strided = as_strided(X_small, shape=(X_small.shape[0] - window_size + 1, window_size, X_small.shape[1]), 50 | strides=(X_small.strides[0], X_small.strides[0], X_small.strides[1])) 51 | print("\nStrided Array Shape (sliding windows):", strided.shape) 52 | 53 | # %% [4. Profiling and Optimization] 54 | # Profile a computation-heavy operation. 55 | def compute_distances(X): 56 | return np.sqrt(((X[:, np.newaxis] - X)**2).sum(axis=2)) 57 | 58 | X_profile = np.random.rand(1000, 5) 59 | start_time = time.time() 60 | distances = compute_distances(X_profile) 61 | profile_time = time.time() - start_time 62 | print("\nDistance Computation Time:", profile_time, "seconds") 63 | 64 | # %% [5. Visualizing Performance] 65 | # Plot loop vs. vectorized times. 66 | plt.figure(figsize=(8, 4)) 67 | plt.bar(['Loop', 'Vectorized'], [loop_time, vectorized_time], color=['red', 'green']) 68 | plt.title('Loop vs. Vectorized Performance') 69 | plt.ylabel('Time (seconds)') 70 | plt.savefig('vectorization_performance_bar.png') 71 | 72 | # Visualize distance matrix 73 | plt.figure(figsize=(6, 4)) 74 | plt.imshow(distances, cmap='viridis') 75 | plt.colorbar() 76 | plt.title('Distance Matrix') 77 | plt.savefig('vectorization_performance_distances.png') 78 | 79 | # %% [6. Practical ML Application] 80 | # Optimize a feature scaling operation. 81 | np.random.seed(42) 82 | X_ml = np.random.rand(5000, 50) 83 | start_time = time.time() 84 | X_scaled = (X_ml - np.mean(X_ml, axis=0)) / np.std(X_ml, axis=0) 85 | scaling_time = time.time() - start_time 86 | print("\nML Feature Scaling Time:", scaling_time, "seconds") 87 | print("Scaled Features Shape:", X_scaled.shape) 88 | 89 | # %% [7. Interview Scenario: Vectorization] 90 | # Discuss vectorization benefits. 91 | print("\nInterview Scenario: Vectorization") 92 | print("Q: Why use vectorized operations in ML?") 93 | print("A: Vectorization replaces slow loops with optimized C-based operations.") 94 | print("Key: Improves performance for large datasets.") 95 | print("Example: np.dot(X.T, y) vs. loop-based summation.") -------------------------------------------------------------------------------- /Numpy Fundamentals/01 Beginner NumPy Concepts/04 Data Preprocessing for ML/data_preprocessing_ml.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | try: 4 | from sklearn.datasets import load_iris 5 | except ImportError: 6 | load_iris = None 7 | 8 | # %% [1. Introduction to Data Preprocessing for ML] 9 | # Learn how to preprocess ML datasets with NumPy. 10 | # Covers loading datasets, normalization, standardization, and train/test splitting. 11 | 12 | print("NumPy version:", np.__version__) 13 | 14 | # %% [2. Loading Datasets] 15 | # Load Iris dataset or use synthetic CSV data. 16 | if load_iris: 17 | iris = load_iris() 18 | X = iris.data 19 | y = iris.target 20 | else: 21 | # Synthetic CSV-like data 22 | data = np.array([[5.1, 3.5, 1.4, 0.2, 0], [4.9, 3.0, 1.4, 0.2, 0], 23 | [7.0, 3.2, 4.7, 1.4, 1], [6.4, 3.2, 4.5, 1.5, 1]]) 24 | X = data[:, :-1] # Features 25 | y = data[:, -1].astype(int) # Labels 26 | 27 | print("\nLoaded Dataset:") 28 | print("Features (X) Shape:", X.shape) 29 | print("Labels (y) Shape:", y.shape) 30 | print("First 3 Samples:\n", np.hstack((X[:3], y[:3].reshape(-1, 1)))) 31 | 32 | # %% [3. Normalization] 33 | # Scale features to [0, 1] using min-max normalization. 34 | X_min = np.min(X, axis=0) 35 | X_max = np.max(X, axis=0) 36 | X_normalized = (X - X_min) / (X_max - X_min) 37 | print("\nNormalized Features (first 3 rows):\n", X_normalized[:3]) 38 | 39 | # %% [4. Standardization] 40 | # Scale features to mean=0, std=1. 41 | X_mean = np.mean(X, axis=0) 42 | X_std = np.std(X, axis=0) 43 | X_standardized = (X - X_mean) / X_std 44 | print("\nStandardized Features (first 3 rows):\n", X_standardized[:3]) 45 | 46 | # %% [5. Train/Test Splitting] 47 | # Split dataset into training and testing sets. 48 | np.random.seed(42) 49 | indices = np.random.permutation(X.shape[0]) 50 | train_size = int(0.8 * X.shape[0]) 51 | train_idx, test_idx = indices[:train_size], indices[train_size:] 52 | X_train, X_test = X[train_idx], X[test_idx] 53 | y_train, y_test = y[train_idx], y[test_idx] 54 | print("\nTrain/Test Split:") 55 | print("X_train Shape:", X_train.shape) 56 | print("X_test Shape:", X_test.shape) 57 | print("y_train Shape:", y_train.shape) 58 | print("y_test Shape:", y_test.shape) 59 | 60 | # %% [6. Visualizing Preprocessing] 61 | # Visualize standardized vs. original features. 62 | if load_iris: 63 | plt.figure(figsize=(8, 4)) 64 | plt.scatter(X[:, 0], X[:, 2], c='blue', alpha=0.5, label='Original') 65 | plt.scatter(X_standardized[:, 0], X_standardized[:, 2], c='red', alpha=0.5, label='Standardized') 66 | plt.xlabel('Sepal Length') 67 | plt.ylabel('Petal Length') 68 | plt.title('Original vs. Standardized Iris Features') 69 | plt.legend() 70 | plt.savefig('preprocessing_scatter.png') 71 | 72 | # %% [7. Practical ML Application] 73 | # Prepare a synthetic dataset for ML classification. 74 | np.random.seed(42) 75 | X_synthetic = np.random.rand(100, 2) # 100 samples, 2 features 76 | y_synthetic = (X_synthetic[:, 0] + X_synthetic[:, 1] > 1).astype(int) 77 | X_synthetic_std = (X_synthetic - np.mean(X_synthetic, axis=0)) / np.std(X_synthetic, axis=0) 78 | train_idx = np.random.choice(100, 80, replace=False) 79 | test_idx = np.setdiff1d(np.arange(100), train_idx) 80 | X_train_synthetic = X_synthetic_std[train_idx] 81 | X_test_synthetic = X_synthetic_std[test_idx] 82 | print("\nSynthetic ML Dataset:") 83 | print("Standardized X_train Shape:", X_train_synthetic.shape) 84 | print("X_test Shape:", X_test_synthetic.shape) 85 | 86 | # %% [8. Interview Scenario: Preprocessing] 87 | # Discuss preprocessing for ML pipelines. 88 | print("\nInterview Scenario: Preprocessing") 89 | print("Q: How would you preprocess a dataset for ML with NumPy?") 90 | print("A: Normalize or standardize features, split into train/test sets.") 91 | print("Key: Standardization (mean=0, std=1) is common for ML algorithms.") 92 | print("Example: (X - np.mean(X, axis=0)) / np.std(X, axis=0) for standardization.") -------------------------------------------------------------------------------- /Numpy Fundamentals/01 Beginner NumPy Concepts/01 Array Creation and Properties/array_creation_properties.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | try: 4 | from sklearn.datasets import load_iris 5 | except ImportError: 6 | load_iris = None 7 | 8 | # %% [1. Introduction to Array Creation and Properties] 9 | # Learn how to create NumPy arrays and explore their properties for ML data handling. 10 | # Covers np.array, np.zeros, np.ones, np.random, array attributes, and reshaping. 11 | 12 | print("NumPy version:", np.__version__) 13 | 14 | # %% [2. Creating Arrays] 15 | # Create arrays using different methods. 16 | # From a Python list (e.g., Iris features). 17 | if load_iris: 18 | iris = load_iris() 19 | data = iris.data # Shape: (150, 4) 20 | else: 21 | # Fallback synthetic data 22 | data = np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [7.0, 3.2, 4.7, 1.4]]) 23 | 24 | array_from_list = np.array(data) 25 | print("\nArray from List (Iris features):\n", array_from_list[:3]) 26 | 27 | # Zeros, ones, and random arrays 28 | zeros_array = np.zeros((3, 4)) # 3x4 array of zeros 29 | ones_array = np.ones((2, 5)) # 2x5 array of ones 30 | random_array = np.random.rand(3, 3) # 3x3 array of random values [0, 1) 31 | print("\nZeros Array:\n", zeros_array) 32 | print("\nOnes Array:\n", ones_array) 33 | print("\nRandom Array:\n", random_array) 34 | 35 | # %% [3. Array Attributes] 36 | # Explore array properties: shape, dtype, ndim. 37 | print("\nArray Attributes (Iris array):") 38 | print("Shape:", array_from_list.shape) # (n_samples, n_features) 39 | print("Data Type:", array_from_list.dtype) 40 | print("Dimensions:", array_from_list.ndim) 41 | 42 | # Example with random integer array 43 | int_array = np.random.randint(0, 10, size=(4, 3)) 44 | print("\nInteger Array:\n", int_array) 45 | print("Shape:", int_array.shape) 46 | print("Data Type:", int_array.dtype) 47 | print("Dimensions:", int_array.ndim) 48 | 49 | # %% [4. Reshaping and Flattening] 50 | # Reshape arrays for ML tasks (e.g., flattening features). 51 | reshaped_array = np.reshape(array_from_list, (50, 12)) # Reshape to 50x12 52 | print("\nReshaped Array (50x12):\n", reshaped_array[:2]) 53 | 54 | flattened_array = np.ravel(array_from_list) # Flatten to 1D 55 | print("\nFlattened Array (first 10 elements):\n", flattened_array[:10]) 56 | 57 | # Example: Reshape for image-like data 58 | image_array = np.random.rand(16, 16) # Simulate 16x16 grayscale image 59 | reshaped_image = np.reshape(image_array, (4, 64)) # Reshape to 4x64 60 | print("\nImage Array Shape:", image_array.shape) 61 | print("Reshaped Image Shape:", reshaped_image.shape) 62 | 63 | # %% [5. Visualizing Arrays] 64 | # Visualize a random 2D array as a heatmap. 65 | plt.figure(figsize=(6, 4)) 66 | plt.imshow(random_array, cmap='viridis') 67 | plt.colorbar() 68 | plt.title('Random 2D Array Heatmap') 69 | plt.savefig('array_creation_heatmap.png') 70 | 71 | # Visualize Iris feature distribution 72 | if load_iris: 73 | plt.figure(figsize=(8, 4)) 74 | plt.hist(array_from_list[:, 0], bins=20, color='blue', alpha=0.7) 75 | plt.title('Iris Sepal Length Distribution') 76 | plt.xlabel('Sepal Length (cm)') 77 | plt.ylabel('Frequency') 78 | plt.savefig('iris_sepal_histogram.png') 79 | 80 | # %% [6. Practical ML Application] 81 | # Create a synthetic dataset for ML classification. 82 | np.random.seed(42) 83 | X = np.random.rand(100, 2) # 100 samples, 2 features 84 | y = (X[:, 0] + X[:, 1] > 1).astype(int) # Binary labels 85 | print("\nSynthetic ML Dataset:") 86 | print("Features (X) Shape:", X.shape) 87 | print("Labels (y) Shape:", y.shape) 88 | print("First 5 samples:\n", np.hstack((X[:5], y[:5].reshape(-1, 1)))) 89 | 90 | # %% [7. Interview Scenario: Array Creation] 91 | # Discuss creating arrays for ML tasks. 92 | print("\nInterview Scenario: Array Creation") 93 | print("Q: How would you create a dataset for ML with NumPy?") 94 | print("A: Use np.random for features, np.array for structured data, and np.reshape for correct shapes.") 95 | print("Key: Ensure correct shape and dtype for ML model compatibility.") 96 | print("Example: np.random.rand(100, 2) for 100 samples with 2 features.") -------------------------------------------------------------------------------- /Numpy Fundamentals/03 Advanced NumPy Concepts/README.md: -------------------------------------------------------------------------------- 1 | # 🌐 Advanced NumPy Concepts (`numpy`) 2 | 3 | ## 📖 Introduction 4 | NumPy’s advanced concepts focus on performance optimization, custom functions, integration with ML frameworks, and complex tensor manipulations for AI and machine learning. This section covers **Vectorization and Performance**, **Custom Functions and Ufuncs**, **Integration with ML Frameworks**, and **Advanced Tensor Manipulations**, with practical examples and interview insights to elevate your NumPy expertise. 5 | 6 | ## 🎯 Learning Objectives 7 | - Optimize NumPy code with vectorization and memory-efficient techniques. 8 | - Create custom functions and ufuncs for ML tasks. 9 | - Integrate NumPy with TensorFlow, PyTorch, and scikit-learn. 10 | - Manipulate tensors for deep learning and large-scale data. 11 | 12 | ## 🔑 Key Concepts 13 | - **Vectorization and Performance**: 14 | - Replacing loops with vectorized operations. 15 | - Memory-efficient computations (`np.memmap`, `np.lib.stride_tricks`). 16 | - Profiling and optimizing code. 17 | - **Custom Functions and Ufuncs**: 18 | - Writing ufuncs with `np.frompyfunc` or `numba`. 19 | - Vectorizing operations (`np.vectorize`). 20 | - Gradient computations for ML. 21 | - **Integration with ML Frameworks**: 22 | - Converting arrays to tensors (`tf.convert_to_tensor`, `torch.from_numpy`). 23 | - NumPy-based data pipelines. 24 | - Interfacing with scikit-learn. 25 | - **Advanced Tensor Manipulations**: 26 | - Batch processing for deep learning. 27 | - Sparse arrays (`scipy.sparse`). 28 | - Tensor decompositions (Tucker, CP). 29 | 30 | ## 📝 Example Walkthroughs 31 | The following Python files demonstrate each subsection: 32 | 33 | 1. **`vectorization_performance.py`**: 34 | - Compares loop vs. vectorized operations (`np.dot`). 35 | - Uses `np.memmap` and `np.lib.stride_tricks` for memory efficiency. 36 | - Visualizes performance (bar plot) and distance matrix (heatmap). 37 | 38 | Example code: 39 | ```python 40 | import numpy as np 41 | X = np.random.rand(10000, 100) 42 | result = np.dot(X.T, y) # Vectorized 43 | ``` 44 | 45 | 2. **`custom_functions_ufuncs.py`**: 46 | - Creates custom ufuncs (`np.frompyfunc`) and vectorized functions (`np.vectorize`). 47 | - Optimizes gradients with `numba`. 48 | - Visualizes custom activation and vectorized function outputs. 49 | 50 | Example code: 51 | ```python 52 | import numpy as np 53 | ufunc = np.frompyfunc(lambda x: np.clip(x, -1, 1), 1, 1) 54 | y = ufunc(X).astype(float) 55 | ``` 56 | 57 | 3. **`integration_ml_frameworks.py`**: 58 | - Converts NumPy arrays to TensorFlow/PyTorch tensors. 59 | - Builds a TensorFlow data pipeline and uses scikit-learn preprocessing. 60 | - Visualizes original vs. scaled features. 61 | 62 | Example code: 63 | ```python 64 | import tensorflow as tf 65 | X_np = np.random.rand(100, 5) 66 | X_tf = tf.convert_to_tensor(X_np, dtype=tf.float32) 67 | ``` 68 | 69 | 4. **`advanced_tensor_manipulations.py`**: 70 | - Normalizes image batches and processes sparse arrays. 71 | - Performs CP decomposition with `tensorly`. 72 | - Visualizes normalized images and sparse matrix structure. 73 | 74 | Example code: 75 | ```python 76 | import numpy as np 77 | images = np.random.rand(32, 3, 64, 64) 78 | images_normalized = images - np.mean(images, axis=(2, 3), keepdims=True) 79 | ``` 80 | 81 | ## 🛠️ Practical Tasks 82 | 1. **Vectorization**: 83 | - Optimize a loop-based computation (e.g., dot product) with vectorization. 84 | - Process a large dataset with `np.memmap`. 85 | 2. **Custom Functions**: 86 | - Write a custom ufunc for a non-standard activation function. 87 | - Optimize a gradient computation with `numba`. 88 | 3. **Framework Integration**: 89 | - Convert a NumPy dataset to a TensorFlow `tf.data.Dataset`. 90 | - Preprocess features with scikit-learn and NumPy. 91 | 4. **Tensor Manipulations**: 92 | - Normalize a batch of images for a CNN. 93 | - Apply CP decomposition to compress a tensor. 94 | 95 | ## 💡 Interview Tips 96 | - **Common Questions**: 97 | - Why is vectorization faster than loops in NumPy? 98 | - How do you optimize a NumPy computation with `numba`? 99 | - How do you integrate NumPy with TensorFlow/PyTorch? 100 | - What are the benefits of tensor decomposition in ML? 101 | - **Tips**: 102 | - Explain vectorization’s use of C-based operations for speed. 103 | - Highlight `numba`’s JIT compilation for ML optimizations. 104 | - Be ready to code a data pipeline or tensor decomposition. 105 | - **Coding Tasks**: 106 | - Vectorize a loop-based computation. 107 | - Convert a NumPy array to a TensorFlow dataset. 108 | - Implement a sparse matrix operation. 109 | 110 | ## 📚 Resources 111 | - [NumPy Performance Tips](https://numpy.org/doc/stable/user/performance.html) 112 | - [NumPy and Numba](https://numba.pydata.org/) 113 | - [TensorFlow Data Pipeline](https://www.tensorflow.org/guide/data) 114 | - [Scikit-learn Preprocessing](https://scikit-learn.org/stable/modules/preprocessing.html) 115 | - [Tensorly Documentation](http://tensorly.org/stable/) 116 | - [SciPy Sparse Arrays](https://docs.scipy.org/doc/scipy/reference/sparse.html) -------------------------------------------------------------------------------- /Numpy Fundamentals/01 Beginner NumPy Concepts/README.md: -------------------------------------------------------------------------------- 1 | # 🌱 Beginner NumPy Concepts (`numpy`) 2 | 3 | ## 📖 Introduction 4 | NumPy is the cornerstone of numerical computing in Python, essential for AI and machine learning (ML) data manipulation and preprocessing. This section introduces the fundamentals of NumPy, focusing on array creation, indexing, operations, and ML preprocessing. It covers **Array Creation and Properties**, **Indexing and Slicing**, **Basic Operations**, and **Data Preprocessing for ML**, with practical examples and interview insights tailored to beginners. 5 | 6 | ## 🎯 Learning Objectives 7 | - Create and manipulate NumPy arrays for ML datasets. 8 | - Access and filter data using indexing and slicing. 9 | - Perform element-wise operations, broadcasting, and universal functions (ufuncs). 10 | - Preprocess ML datasets with normalization, standardization, and train/test splitting. 11 | 12 | ## 🔑 Key Concepts 13 | - **Array Creation and Properties**: 14 | - Create arrays with `np.array`, `np.zeros`, `np.ones`, `np.random`. 15 | - Understand attributes (`shape`, `dtype`, `ndim`) and reshaping (`np.reshape`, `np.ravel`). 16 | - **Indexing and Slicing**: 17 | - Use basic indexing (`arr[0]`), slicing (`arr[:5, 1:3]`), boolean, and fancy indexing. 18 | - **Basic Operations**: 19 | - Perform element-wise operations (e.g., `arr + 1`), broadcasting, and ufuncs (`np.sin`, `np.mean`). 20 | - **Data Preprocessing for ML**: 21 | - Load datasets (`np.loadtxt`), normalize/standardize features, and split train/test sets. 22 | 23 | ## 📝 Example Walkthroughs 24 | The following Python files demonstrate each subsection: 25 | 26 | 1. **`array_creation_properties.py`**: 27 | - Creates arrays from Iris data and synthetic datasets (`np.array`, `np.random.rand`). 28 | - Explores attributes (`shape`, `dtype`) and reshaping for ML tasks. 29 | - Visualizes a random array as a heatmap and Iris feature distribution. 30 | 31 | Example code: 32 | ```python 33 | import numpy as np 34 | data = np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2]]) 35 | print("Shape:", data.shape) # (2, 4) 36 | reshaped = np.reshape(data, (4, 2)) 37 | ``` 38 | 39 | 2. **`indexing_slicing.py`**: 40 | - Demonstrates basic indexing, slicing, boolean, and fancy indexing on Iris data. 41 | - Filters samples (e.g., sepal length > 6.0) and selects specific rows/columns. 42 | - Visualizes filtered data with a scatter plot. 43 | 44 | Example code: 45 | ```python 46 | import numpy as np 47 | data = np.array([[5.1, 3.5, 1.4], [4.9, 3.0, 1.4]]) 48 | long_sepal = data[data[:, 0] > 5.0] 49 | ``` 50 | 51 | 3. **`basic_operations.py`**: 52 | - Performs element-wise operations (e.g., scaling), broadcasting (e.g., bias addition), and ufuncs (e.g., `np.sin`). 53 | - Computes a mean squared error for ML. 54 | - Visualizes normalized feature distribution and true vs. predicted labels. 55 | 56 | Example code: 57 | ```python 58 | import numpy as np 59 | data = np.array([[5.1, 3.5], [4.9, 3.0]]) 60 | normalized = data / np.max(data, axis=0) 61 | ``` 62 | 63 | 4. **`data_preprocessing_ml.py`**: 64 | - Loads Iris or synthetic data, normalizes/standardizes features, and splits train/test sets. 65 | - Prepares a synthetic ML dataset with standardization. 66 | - Visualizes original vs. standardized features. 67 | 68 | Example code: 69 | ```python 70 | import numpy as np 71 | X = np.array([[5.1, 3.5], [4.9, 3.0]]) 72 | X_standardized = (X - np.mean(X, axis=0)) / np.std(X, axis=0) 73 | ``` 74 | 75 | ## 🛠️ Practical Tasks 76 | 1. **Array Creation**: 77 | - Create a 3x4 array of random values and print its shape and dtype. 78 | - Reshape a 1D array into a 2D matrix for ML input. 79 | 2. **Indexing and Slicing**: 80 | - Filter a dataset to select samples with a feature value above the mean. 81 | - Extract the first and last columns of a 2D array using slicing. 82 | 3. **Basic Operations**: 83 | - Normalize a dataset’s features to [0, 1] using broadcasting. 84 | - Compute the mean and standard deviation of each feature in a dataset. 85 | 4. **Data Preprocessing**: 86 | - Load a CSV dataset with `np.loadtxt` and standardize its features. 87 | - Split a dataset into 80% training and 20% testing sets. 88 | 89 | ## 💡 Interview Tips 90 | - **Common Questions**: 91 | - How do you create a NumPy array for an ML dataset? 92 | - What is broadcasting, and how is it used in ML preprocessing? 93 | - How would you filter outliers using boolean indexing? 94 | - Why standardize features before training an ML model? 95 | - **Tips**: 96 | - Explain broadcasting’s efficiency for feature scaling (e.g., `arr / np.max(arr)`). 97 | - Highlight boolean indexing for data cleaning (e.g., removing outliers). 98 | - Be ready to code normalization or train/test splitting with NumPy. 99 | - **Coding Tasks**: 100 | - Create a 2D array and normalize its columns. 101 | - Filter a dataset using a boolean condition. 102 | - Split a NumPy array into train/test sets. 103 | 104 | ## 📚 Resources 105 | - [NumPy Quickstart](https://numpy.org/doc/stable/user/quickstart.html) 106 | - [NumPy Basics](https://numpy.org/doc/stable/user/absolute_beginners.html) 107 | - [NumPy Array Creation](https://numpy.org/doc/stable/reference/routines.array-creation.html) 108 | - [NumPy Indexing](https://numpy.org/doc/stable/user/basics.indexing.html) 109 | - [NumPy Broadcasting](https://numpy.org/doc/stable/user/basics.broadcasting.html) 110 | - [SciPy Lecture Notes: NumPy](https://scipy-lectures.org/intro/numpy/index.html) 111 | - [Kaggle: Python and NumPy](https://www.kaggle.com/learn/python) -------------------------------------------------------------------------------- /Numpy Fundamentals/02 Intermediate NumPy Concepts/README.md: -------------------------------------------------------------------------------- 1 | # 🏋️ Intermediate NumPy Concepts (`numpy`) 2 | 3 | ## 📖 Introduction 4 | NumPy’s intermediate concepts build on beginner skills, focusing on linear algebra, tensor operations, statistical computations, and ML algorithm implementation for AI and machine learning. This section covers **Linear Algebra for ML**, **Tensor Operations**, **Statistical Computations**, and **Implementing ML Algorithms**, with practical examples and interview insights to deepen your NumPy proficiency. 5 | 6 | ## 🎯 Learning Objectives 7 | - Perform matrix operations and linear algebra for ML tasks. 8 | - Manipulate multi-dimensional tensors for deep learning workflows. 9 | - Compute statistical metrics and augment data for ML analysis. 10 | - Implement ML algorithms (e.g., linear regression, K-means) from scratch. 11 | 12 | ## 🔑 Key Concepts 13 | - **Linear Algebra for ML**: 14 | - Matrix operations (`np.dot`, `np.matmul`, `np.transpose`). 15 | - Solving linear systems (`np.linalg.solve`). 16 | - Eigenvalues/vectors (`np.linalg.eig`) and SVD (`np.linalg.svd`). 17 | - **Tensor Operations**: 18 | - Multi-dimensional arrays (3D+ tensors). 19 | - Reshaping/transposing (`np.reshape`, `np.moveaxis`, `np.swapaxes`). 20 | - Contractions/reductions (`np.tensordot`, `np.sum`). 21 | - **Statistical Computations**: 22 | - Descriptive statistics (`np.mean`, `np.median`, `np.var`). 23 | - Correlation/covariance (`np.corrcoef`, `np.cov`). 24 | - Random sampling (`np.random.choice`). 25 | - **Implementing ML Algorithms**: 26 | - Linear regression with normal equations. 27 | - Logistic regression with gradient descent. 28 | - K-means clustering from scratch. 29 | 30 | ## 📝 Example Walkthroughs 31 | The following Python files demonstrate each subsection: 32 | 33 | 1. **`linear_algebra_ml.py`**: 34 | - Performs matrix operations (`np.dot`, `np.matmul`) on Iris data. 35 | - Solves linear systems and computes eigenvalues/SVD for dimensionality reduction. 36 | - Visualizes SVD-reduced data and covariance matrix heatmap. 37 | 38 | Example code: 39 | ```python 40 | import numpy as np 41 | X = np.random.rand(100, 4) 42 | U, S, Vt = np.linalg.svd(X, full_matrices=False) 43 | X_reduced = np.dot(U[:, :2], np.diag(S[:2])) 44 | ``` 45 | 46 | 2. **`tensor_operations.py`**: 47 | - Creates 3D/4D tensors for image data and reshapes/transposes them. 48 | - Performs tensor contractions (`np.tensordot`) and reductions (`np.sum`). 49 | - Visualizes tensor slices and reduction results. 50 | 51 | Example code: 52 | ```python 53 | import numpy as np 54 | tensor = np.random.rand(10, 32, 32) 55 | reshaped = np.reshape(tensor, (10, 32 * 32)) 56 | ``` 57 | 58 | 3. **`statistical_computations.py`**: 59 | - Computes descriptive statistics (`np.mean`, `np.var`) and correlation/covariance on Iris data. 60 | - Augments data with random sampling (`np.random.choice`). 61 | - Visualizes correlation matrix and augmented data distribution. 62 | 63 | Example code: 64 | ```python 65 | import numpy as np 66 | X = np.random.rand(150, 4) 67 | corr_matrix = np.corrcoef(X.T) 68 | ``` 69 | 70 | 4. **`implementing_ml_algorithms.py`**: 71 | - Implements linear regression (normal equations), logistic regression (gradient descent), and K-means clustering. 72 | - Uses synthetic or blob data for simplicity. 73 | - Visualizes linear regression fit and K-means clusters. 74 | 75 | Example code: 76 | ```python 77 | import numpy as np 78 | X = np.random.rand(100, 2) 79 | X_b = np.c_[np.ones((100, 1)), X] 80 | theta = np.linalg.solve(np.dot(X_b.T, X_b), np.dot(X_b.T, y)) 81 | ``` 82 | 83 | ## 🛠️ Practical Tasks 84 | 1. **Linear Algebra**: 85 | - Compute the covariance matrix of a dataset and find its eigenvalues. 86 | - Apply SVD to reduce a dataset to 2 dimensions. 87 | 2. **Tensor Operations**: 88 | - Reshape a 3D tensor (e.g., image batch) for a dense layer. 89 | - Perform a tensor contraction between two tensors. 90 | 3. **Statistical Computations**: 91 | - Calculate feature correlations and select the most predictive feature. 92 | - Augment a dataset with random sampling. 93 | 4. **ML Algorithms**: 94 | - Implement linear regression for a synthetic dataset. 95 | - Code K-means clustering and visualize the results. 96 | 97 | ## 💡 Interview Tips 98 | - **Common Questions**: 99 | - How does SVD enable PCA in ML? 100 | - What’s the difference between `np.dot` and `np.matmul`? 101 | - How would you implement logistic regression with NumPy? 102 | - Why use random sampling for data augmentation? 103 | - **Tips**: 104 | - Explain SVD’s role in dimensionality reduction (e.g., `U @ np.diag(S)` for PCA). 105 | - Highlight gradient descent’s iterative nature for logistic regression. 106 | - Be ready to code linear regression or K-means from scratch. 107 | - **Coding Tasks**: 108 | - Implement PCA using SVD. 109 | - Code gradient descent for logistic regression. 110 | - Compute a correlation matrix for feature selection. 111 | 112 | ## 📚 Resources 113 | - [NumPy Linear Algebra](https://numpy.org/doc/stable/reference/routines.linalg.html) 114 | - [NumPy Random Sampling](https://numpy.org/doc/stable/reference/random/index.html) 115 | - [SciPy Lecture Notes: NumPy for ML](https://scipy-lectures.org/intro/numpy/index.html) 116 | - [NumPy Array Manipulation](https://numpy.org/doc/stable/reference/routines.array-manipulation.html) 117 | - [Kaggle: Machine Learning with NumPy](https://www.kaggle.com/learn/intro-to-machine-learning) 118 | - [Python for Data Analysis](https://www.oreilly.com/library/view/python-for-data/9781491957653/) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🚀 NumPy for AI/ML Roadmap 2 | 3 | ## 📖 Introduction 4 | NumPy is the foundational library for numerical computing in Python, powering data manipulation, tensor operations, and mathematical computations in AI and machine learning (ML). It underpins ML frameworks like TensorFlow, PyTorch, and scikit-learn, making it essential for preprocessing data, implementing algorithms, and optimizing performance. This roadmap provides a structured path to master NumPy for AI/ML, from basic array operations to advanced tensor manipulations and ML algorithm implementation, with a focus on practical applications and interview preparation. 5 | 6 | ## 🎯 Learning Objectives 7 | - **Master NumPy Basics**: Understand array creation, indexing, and operations for ML data handling. 8 | - **Apply Linear Algebra**: Use NumPy for matrix operations critical to ML algorithms. 9 | - **Handle Tensors**: Perform tensor manipulations for deep learning workflows. 10 | - **Implement ML Algorithms**: Code ML models (e.g., linear regression, PCA) using NumPy. 11 | - **Optimize Performance**: Leverage NumPy’s vectorization and integration with ML frameworks. 12 | - **Prepare for Interviews**: Gain hands-on experience and insights for AI/ML job interviews. 13 | 14 | ## 🛠️ Prerequisites 15 | - **Python**: Familiarity with Python programming (lists, loops, functions). 16 | - **Basic Math**: Understanding of linear algebra (matrices, vectors) and statistics. 17 | - **Machine Learning Basics**: Optional knowledge of supervised learning, neural networks, and gradient descent. 18 | - **Development Environment**: Install NumPy (`pip install numpy`), Matplotlib (`pip install matplotlib`), and optional ML libraries (e.g., scikit-learn, TensorFlow). 19 | 20 | ## 📈 NumPy for AI/ML Learning Roadmap 21 | 22 | ### 🌱 Beginner NumPy Concepts 23 | Start with the fundamentals of NumPy for data manipulation and preprocessing in ML. 24 | 25 | - **Array Creation and Properties** 26 | - Creating arrays (`np.array`, `np.zeros`, `np.ones`, `np.random`) 27 | - Array attributes (shape, dtype, ndim) 28 | - Reshaping and flattening arrays (`np.reshape`, `np.ravel`) 29 | - **Indexing and Slicing** 30 | - Basic indexing (`arr[0]`, `arr[:, 1]`) 31 | - Boolean and fancy indexing 32 | - Slicing for data subsetting 33 | - **Basic Operations** 34 | - Element-wise operations (addition, multiplication, etc.) 35 | - Broadcasting for shape compatibility 36 | - Universal functions (ufuncs: `np.sin`, `np.exp`, `np.mean`) 37 | - **Data Preprocessing for ML** 38 | - Loading datasets (e.g., CSV with `np.loadtxt`, `np.genfromtxt`) 39 | - Normalization and standardization (`np.mean`, `np.std`) 40 | - Splitting data into train/test sets 41 | 42 | **Practical Tasks**: 43 | - Create a 2D array from a dataset (e.g., Iris) and compute mean/std per feature. 44 | - Use boolean indexing to filter outliers in a dataset. 45 | - Normalize a dataset using broadcasting. 46 | - Split a NumPy array into train/test sets for ML. 47 | 48 | **Resources**: 49 | - [NumPy Quickstart](https://numpy.org/doc/stable/user/quickstart.html) 50 | - [NumPy Basics](https://numpy.org/doc/stable/user/absolute_beginners.html) 51 | - [NumPy Array Creation](https://numpy.org/doc/stable/reference/routines.array-creation.html) 52 | 53 | ### 🏋️ Intermediate NumPy Concepts 54 | Deepen your skills with linear algebra, tensor operations, and ML algorithm foundations. 55 | 56 | - **Linear Algebra for ML** 57 | - Matrix operations (`np.dot`, `np.matmul`, `np.transpose`) 58 | - Solving linear systems (`np.linalg.solve`) 59 | - Eigenvalues/vectors (`np.linalg.eig`) 60 | - Singular Value Decomposition (SVD) for dimensionality reduction 61 | - **Tensor Operations** 62 | - Multi-dimensional arrays (3D+ tensors for images, sequences) 63 | - Tensor reshaping and transposing (`np.moveaxis`, `np.swapaxes`) 64 | - Tensor contractions and reductions (`np.tensordot`, `np.sum`) 65 | - **Statistical Computations** 66 | - Descriptive statistics (`np.mean`, `np.median`, `np.var`) 67 | - Correlation and covariance (`np.corrcoef`, `np.cov`) 68 | - Random sampling for data augmentation (`np.random.choice`) 69 | - **Implementing ML Algorithms** 70 | - Linear regression with normal equations 71 | - Logistic regression with gradient descent 72 | - K-means clustering from scratch 73 | 74 | **Practical Tasks**: 75 | - Implement linear regression using `np.dot` and `np.linalg.solve`. 76 | - Compute PCA using SVD on a dataset (e.g., MNIST). 77 | - Reshape a 3D tensor (e.g., image batch) for neural network input. 78 | - Code K-means clustering with NumPy for a synthetic dataset. 79 | 80 | **Resources**: 81 | - [NumPy Linear Algebra](https://numpy.org/doc/stable/reference/routines.linalg.html) 82 | - [NumPy Random Sampling](https://numpy.org/doc/stable/reference/random/index.html) 83 | - [SciPy Lecture Notes: NumPy for ML](https://scipy-lectures.org/intro/numpy/index.html) 84 | 85 | ### 🌐 Advanced NumPy Concepts 86 | Tackle advanced techniques for performance optimization and integration with ML frameworks. 87 | 88 | - **Vectorization and Performance** 89 | - Replacing loops with vectorized operations 90 | - Memory-efficient computations (`np.memmap`, `np.lib.stride_tricks`) 91 | - Profiling and optimizing NumPy code 92 | - **Custom Functions and Ufuncs** 93 | - Writing custom ufuncs with `np.frompyfunc` or `numba` 94 | - Vectorizing complex operations (`np.vectorize`) 95 | - Gradient computations for ML optimization 96 | - **Integration with ML Frameworks** 97 | - Converting NumPy arrays to TensorFlow/PyTorch tensors (`tf.convert_to_tensor`, `torch.from_numpy`) 98 | - NumPy as a backend for data pipelines 99 | - Interfacing with scikit-learn for preprocessing 100 | - **Advanced Tensor Manipulations** 101 | - Batch processing for deep learning (e.g., image batches) 102 | - Sparse arrays for large-scale data (`scipy.sparse`) 103 | - Tensor decompositions (e.g., Tucker, CP) for compression 104 | 105 | **Practical Tasks**: 106 | - Optimize a matrix multiplication loop with vectorization. 107 | - Write a custom ufunc for a non-standard activation function. 108 | - Convert a NumPy dataset to a TensorFlow `tf.data.Dataset`. 109 | - Implement a tensor decomposition for a 4D image tensor. 110 | 111 | **Resources**: 112 | - [NumPy Performance Tips](https://numpy.org/doc/stable/user/performance.html) 113 | - [NumPy and Numba](https://numba.pydata.org/) 114 | - [TensorFlow Data Pipeline](https://www.tensorflow.org/guide/data) 115 | 116 | ### 🧬 NumPy in AI/ML Applications 117 | Apply NumPy to real-world AI/ML tasks and frameworks. 118 | 119 | - **Data Preprocessing** 120 | - Handling missing data (`np.isnan`, `np.where`) 121 | - Feature engineering (e.g., polynomial features) 122 | - Image preprocessing (e.g., resizing, augmentation) 123 | - **ML Algorithm Implementation** 124 | - Neural network forward/backward pass from scratch 125 | - Gradient descent optimization 126 | - Principal Component Analysis (PCA) for dimensionality reduction 127 | - **Deep Learning Support** 128 | - Preparing tensor inputs for CNNs/RNNs 129 | - Computing loss functions (e.g., cross-entropy) 130 | - Simulating batch normalization 131 | - **Evaluation Metrics** 132 | - Accuracy, precision, recall, F1-score 133 | - Confusion matrix and ROC curves 134 | - Mean squared error and R² for regression 135 | 136 | **Practical Tasks**: 137 | - Preprocess an image dataset (e.g., CIFAR-10) with NumPy. 138 | - Implement a neural network forward pass for MNIST. 139 | - Compute a confusion matrix for a classification model. 140 | - Apply PCA to reduce dimensionality of a high-dimensional dataset. 141 | 142 | **Resources**: 143 | - [NumPy for Data Science](https://numpy.org/doc/stable/user/absolute_beginners.html#data-science) 144 | - [Scikit-Learn with NumPy](https://scikit-learn.org/stable/modules/preprocessing.html) 145 | - [Kaggle: NumPy Tutorials](https://www.kaggle.com/learn/python) 146 | 147 | ### 📦 Optimization and Best Practices 148 | Optimize NumPy for large-scale ML workflows and production. 149 | 150 | - **Memory Management** 151 | - Using `np.memmap` for large datasets 152 | - Avoiding unnecessary copies (`np.copy`, views) 153 | - Sparse matrices for memory efficiency 154 | - **Parallel Computing** 155 | - Leveraging `numba` for JIT compilation 156 | - Using `multiprocessing` with NumPy arrays 157 | - Integrating with Dask for big data 158 | - **Debugging and Testing** 159 | - Handling numerical stability (e.g., overflow, underflow) 160 | - Unit testing NumPy code with `pytest` 161 | - Validating tensor shapes and dtypes 162 | - **Production Integration** 163 | - Exporting NumPy arrays to ML frameworks 164 | - Saving/loading arrays (`np.save`, `np.load`) 165 | - Interfacing with pandas for data analysis 166 | 167 | **Practical Tasks**: 168 | - Process a large dataset with `np.memmap` and Dask. 169 | - Optimize a gradient descent loop with `numba`. 170 | - Write unit tests for a custom NumPy ML function. 171 | - Save a preprocessed dataset as `.npy` for a TensorFlow pipeline. 172 | 173 | **Resources**: 174 | - [NumPy Memory Management](https://numpy.org/doc/stable/reference/arrays.ndarray.html#memory-layout) 175 | - [Dask with NumPy](https://docs.dask.org/en/stable/array.html) 176 | - [NumPy Testing](https://numpy.org/doc/stable/reference/routines.testing.html) 177 | 178 | ## 💡 Learning Tips 179 | - **Hands-On Practice**: Code each section’s tasks in a Jupyter notebook. Use datasets like MNIST, CIFAR-10, or synthetic data from `np.random`. 180 | - **Visualize Results**: Plot arrays, matrices, and ML outputs (e.g., decision boundaries, PCA results) using Matplotlib. 181 | - **Experiment**: Modify array shapes, operations, or algorithms (e.g., change learning rates in gradient descent) and analyze performance. 182 | - **Portfolio Projects**: Build projects like a NumPy-based linear regression model, PCA pipeline, or neural network to showcase skills. 183 | - **Community**: Engage with NumPy forums, Stack Overflow, and Kaggle for examples and support. 184 | 185 | ## 🛠️ Practical Tasks 186 | 1. **Beginner**: Load a CSV dataset with NumPy and normalize features. 187 | 2. **Intermediate**: Implement logistic regression with gradient descent. 188 | 3. **Advanced**: Optimize a neural network forward pass with vectorization. 189 | 4. **AI/ML Applications**: Code PCA for dimensionality reduction on MNIST. 190 | 5. **Optimization**: Process a large dataset with `np.memmap` and save as `.npy`. 191 | 192 | ## 💼 Interview Preparation 193 | - **Common Questions**: 194 | - How does NumPy’s broadcasting work for ML computations? 195 | - How would you implement linear regression with NumPy? 196 | - What are the benefits of vectorization over loops? 197 | - How do you handle large datasets with NumPy? 198 | - **Coding Tasks**: 199 | - Implement matrix multiplication or SVD for PCA. 200 | - Code a neural network forward pass with NumPy. 201 | - Preprocess a dataset (e.g., normalize, split) using NumPy. 202 | - **Tips**: 203 | - Explain broadcasting’s role in efficient ML computations. 204 | - Highlight NumPy’s integration with TensorFlow/PyTorch. 205 | - Practice debugging numerical issues (e.g., NaN values). 206 | 207 | ## 📚 Resources 208 | - **Official Documentation**: 209 | - [NumPy Official Site](https://numpy.org/) 210 | - [NumPy User Guide](https://numpy.org/doc/stable/user/) 211 | - [NumPy API Reference](https://numpy.org/doc/stable/reference/) 212 | - **Tutorials**: 213 | - [NumPy Quickstart Tutorial](https://numpy.org/doc/stable/user/quickstart.html) 214 | - [SciPy Lecture Notes](https://scipy-lectures.org/intro/numpy/index.html) 215 | - [Kaggle: Python and NumPy](https://www.kaggle.com/learn/python) 216 | - **Books**: 217 | - *Python for Data Analysis* by Wes McKinney 218 | - *Numerical Python* by Robert Johansson 219 | - *Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow* by Aurélien Géron 220 | - **Community**: 221 | - [NumPy GitHub](https://github.com/numpy/numpy) 222 | - [Stack Overflow: NumPy Tag](https://stackoverflow.com/questions/tagged/numpy) 223 | - [NumPy Mailing List](https://mail.python.org/mailman3/lists/numpy-discussion.python.org/) 224 | 225 | ## 📅 Suggested Timeline 226 | - **Week 1**: Beginner Concepts (Arrays, Indexing, Operations) 227 | - **Week 2**: Intermediate Concepts (Linear Algebra, Tensors, ML Algorithms) 228 | - **Week 3**: Advanced Concepts (Vectorization, Framework Integration) 229 | - **Week 4**: AI/ML Applications and Optimization 230 | - **Week 5**: Portfolio project and interview prep 231 | 232 | ## 🚀 Get Started 233 | Clone this repository and start with the Beginner Concepts section. Run the example code in a Jupyter notebook, experiment with tasks, and build a portfolio project (e.g., a NumPy-based ML pipeline) to showcase your skills. Happy learning, and good luck with your AI/ML journey! -------------------------------------------------------------------------------- /Numpy Interview Questions/README.md: -------------------------------------------------------------------------------- 1 | # NumPy Interview Questions for AI/ML Roles 2 | 3 | This README provides 170 NumPy interview questions tailored for AI/ML roles, focusing on numerical computing with NumPy in Python. The questions cover **core NumPy concepts** (e.g., array creation, operations, indexing, broadcasting, linear algebra) and their applications in AI/ML tasks like data preprocessing, feature engineering, and model input preparation. Questions are categorized by topic and divided into **Basic**, **Intermediate**, and **Advanced** levels to support candidates preparing for roles requiring NumPy in AI/ML workflows. 4 | 5 | ## Array Creation and Manipulation 6 | 7 | ### Basic 8 | 1. **What is NumPy, and why is it important in AI/ML?** 9 | NumPy provides efficient array operations for numerical computing in AI/ML. 10 | ```python 11 | import numpy as np 12 | array = np.array([1, 2, 3]) 13 | ``` 14 | 15 | 2. **How do you create a NumPy array from a Python list?** 16 | Converts lists to arrays for fast computation. 17 | ```python 18 | import numpy as np 19 | list_data = [1, 2, 3] 20 | array = np.array(list_data) 21 | ``` 22 | 23 | 3. **How do you create a NumPy array with zeros or ones?** 24 | Initializes arrays for placeholders. 25 | ```python 26 | zeros = np.zeros((2, 3)) 27 | ones = np.ones((2, 3)) 28 | ``` 29 | 30 | 4. **What is the role of `np.arange` in NumPy?** 31 | Creates arrays with a range of values. 32 | ```python 33 | array = np.arange(0, 10, 2) 34 | ``` 35 | 36 | 5. **How do you create a NumPy array with random values?** 37 | Generates random data for testing. 38 | ```python 39 | random_array = np.random.rand(2, 3) 40 | ``` 41 | 42 | 6. **How do you reshape a NumPy array?** 43 | Changes array dimensions for ML inputs. 44 | ```python 45 | array = np.array([1, 2, 3, 4, 5, 6]) 46 | reshaped = array.reshape(2, 3) 47 | ``` 48 | 49 | #### Intermediate 50 | 7. **Write a function to create a 2D NumPy array with a given shape.** 51 | Initializes arrays dynamically. 52 | ```python 53 | def create_2d_array(rows, cols, fill=0): 54 | return np.full((rows, cols), fill) 55 | ``` 56 | 57 | 8. **How do you create a NumPy array with evenly spaced values?** 58 | Uses `linspace` for uniform intervals. 59 | ```python 60 | array = np.linspace(0, 10, 5) 61 | ``` 62 | 63 | 9. **Write a function to initialize a NumPy array with random integers.** 64 | Generates integer arrays for simulations. 65 | ```python 66 | def random_int_array(shape, low, high): 67 | return np.random.randint(low, high, shape) 68 | ``` 69 | 70 | 10. **How do you create a diagonal matrix in NumPy?** 71 | Initializes matrices for linear algebra. 72 | ```python 73 | diag_matrix = np.diag([1, 2, 3]) 74 | ``` 75 | 76 | 11. **Write a function to visualize a NumPy array as a heatmap.** 77 | Displays array values graphically. 78 | ```python 79 | import matplotlib.pyplot as plt 80 | def plot_heatmap(array): 81 | plt.imshow(array, cmap='viridis') 82 | plt.colorbar() 83 | plt.savefig('heatmap.png') 84 | ``` 85 | 86 | 12. **How do you concatenate two NumPy arrays?** 87 | Combines arrays for data aggregation. 88 | ```python 89 | array1 = np.array([[1, 2], [3, 4]]) 90 | array2 = np.array([[5, 6]]) 91 | concatenated = np.concatenate((array1, array2), axis=0) 92 | ``` 93 | 94 | #### Advanced 95 | 13. **Write a function to create a NumPy array with a custom pattern.** 96 | Generates structured arrays. 97 | ```python 98 | def custom_pattern(shape, pattern='checkerboard'): 99 | array = np.zeros(shape) 100 | if pattern == 'checkerboard': 101 | array[::2, ::2] = 1 102 | array[1::2, 1::2] = 1 103 | return array 104 | ``` 105 | 106 | 14. **How do you optimize array creation for large datasets?** 107 | Uses efficient initialization methods. 108 | ```python 109 | large_array = np.empty((10000, 10000)) 110 | ``` 111 | 112 | 15. **Write a function to create a block matrix in NumPy.** 113 | Constructs matrices from subarrays. 114 | ```python 115 | def block_matrix(blocks): 116 | return np.block(blocks) 117 | ``` 118 | 119 | 16. **How do you handle memory-efficient array creation?** 120 | Uses sparse arrays or generators. 121 | ```python 122 | from scipy.sparse import csr_matrix 123 | sparse_array = csr_matrix((1000, 1000)) 124 | ``` 125 | 126 | 17. **Write a function to create a NumPy array with padded borders.** 127 | Adds padding for convolutional tasks. 128 | ```python 129 | def pad_array(array, pad_width): 130 | return np.pad(array, pad_width, mode='constant') 131 | ``` 132 | 133 | 18. **How do you create a NumPy array with a specific memory layout?** 134 | Controls C or Fortran order for performance. 135 | ```python 136 | array = np.array([[1, 2], [3, 4]], order='F') 137 | ``` 138 | 139 | ## Array Operations 140 | 141 | ### Basic 142 | 19. **How do you perform element-wise addition in NumPy?** 143 | Adds arrays for data transformations. 144 | ```python 145 | array1 = np.array([1, 2, 3]) 146 | array2 = np.array([4, 5, 6]) 147 | result = array1 + array2 148 | ``` 149 | 150 | 20. **What is broadcasting in NumPy, and how does it work?** 151 | Aligns arrays for operations. 152 | ```python 153 | array = np.array([[1, 2], [3, 4]]) 154 | scalar = 2 155 | result = array * scalar 156 | ``` 157 | 158 | 21. **How do you compute the dot product of two NumPy arrays?** 159 | Performs matrix multiplication. 160 | ```python 161 | array1 = np.array([1, 2]) 162 | array2 = np.array([3, 4]) 163 | dot_product = np.dot(array1, array2) 164 | ``` 165 | 166 | 22. **How do you calculate the mean of a NumPy array?** 167 | Computes statistics for analysis. 168 | ```python 169 | array = np.array([1, 2, 3, 4]) 170 | mean = np.mean(array) 171 | ``` 172 | 173 | 23. **How do you perform matrix transposition in NumPy?** 174 | Flips rows and columns. 175 | ```python 176 | array = np.array([[1, 2], [3, 4]]) 177 | transposed = array.T 178 | ``` 179 | 180 | 24. **How do you visualize array operations in NumPy?** 181 | Plots operation results. 182 | ```python 183 | import matplotlib.pyplot as plt 184 | array = np.array([1, 2, 3, 4]) 185 | plt.plot(array, array**2) 186 | plt.savefig('array_operation.png') 187 | ``` 188 | 189 | #### Intermediate 190 | 25. **Write a function to perform element-wise operations on NumPy arrays.** 191 | Applies custom operations. 192 | ```python 193 | def element_wise_op(array1, array2, op='add'): 194 | if op == 'add': 195 | return array1 + array2 196 | elif op == 'multiply': 197 | return array1 * array2 198 | ``` 199 | 200 | 26. **How do you implement broadcasting for custom operations?** 201 | Aligns shapes dynamically. 202 | ```python 203 | array = np.array([[1, 2], [3, 4]]) 204 | vector = np.array([1, 2]) 205 | result = array + vector 206 | ``` 207 | 208 | 27. **Write a function to compute the outer product of two NumPy arrays.** 209 | Generates matrix from vectors. 210 | ```python 211 | def outer_product(vec1, vec2): 212 | return np.outer(vec1, vec2) 213 | ``` 214 | 215 | 28. **How do you perform batch operations on NumPy arrays?** 216 | Processes multiple arrays efficiently. 217 | ```python 218 | arrays = [np.array([1, 2]), np.array([3, 4])] 219 | results = [arr * 2 for arr in arrays] 220 | ``` 221 | 222 | 29. **Write a function to normalize a NumPy array.** 223 | Scales values for ML preprocessing. 224 | ```python 225 | def normalize_array(array): 226 | return (array - np.mean(array)) / np.std(array) 227 | ``` 228 | 229 | 30. **How do you handle numerical stability in NumPy operations?** 230 | Uses safe computations for large numbers. 231 | ```python 232 | array = np.array([1e10, 2e10]) 233 | result = np.log1p(array) 234 | ``` 235 | 236 | #### Advanced 237 | 31. **Write a function to implement matrix factorization in NumPy.** 238 | Decomposes matrices for dimensionality reduction. 239 | ```python 240 | def matrix_factorization(matrix, k): 241 | U, S, Vt = np.linalg.svd(matrix) 242 | return U[:, :k], np.diag(S[:k]), Vt[:k, :] 243 | ``` 244 | 245 | 32. **How do you optimize NumPy operations for performance?** 246 | Uses vectorized operations. 247 | ```python 248 | array = np.random.rand(1000, 1000) 249 | result = np.einsum('ij,ij->i', array, array) 250 | ``` 251 | 252 | 33. **Write a function to perform sliding window operations in NumPy.** 253 | Applies operations over windows. 254 | ```python 255 | def sliding_window(array, window_size): 256 | return np.lib.stride_tricks.sliding_window_view(array, window_size) 257 | ``` 258 | 259 | 34. **How do you implement custom reductions in NumPy?** 260 | Defines specialized aggregations. 261 | ```python 262 | def custom_reduction(array, op='sum'): 263 | if op == 'sum': 264 | return np.sum(array, axis=0) 265 | elif op == 'prod': 266 | return np.prod(array, axis=0) 267 | ``` 268 | 269 | 35. **Write a function to handle sparse array operations in NumPy.** 270 | Optimizes for sparse data. 271 | ```python 272 | from scipy.sparse import csr_matrix 273 | def sparse_operation(array): 274 | sparse = csr_matrix(array) 275 | return sparse.dot(sparse.T) 276 | ``` 277 | 278 | 36. **How do you parallelize NumPy operations?** 279 | Uses libraries like Numba or Dask. 280 | ```python 281 | from numba import jit 282 | @jit 283 | def fast_operation(array): 284 | return array * 2 285 | ``` 286 | 287 | ## Indexing and Slicing 288 | 289 | ### Basic 290 | 37. **How do you access elements in a NumPy array?** 291 | Uses indices for data retrieval. 292 | ```python 293 | array = np.array([[1, 2], [3, 4]]) 294 | element = array[0, 1] 295 | ``` 296 | 297 | 38. **What is array slicing in NumPy?** 298 | Extracts subarrays with ranges. 299 | ```python 300 | array = np.array([1, 2, 3, 4]) 301 | slice = array[1:3] 302 | ``` 303 | 304 | 39. **How do you use boolean indexing in NumPy?** 305 | Filters arrays with conditions. 306 | ```python 307 | array = np.array([1, 2, 3, 4]) 308 | filtered = array[array > 2] 309 | ``` 310 | 311 | 40. **How do you access rows and columns in a 2D NumPy array?** 312 | Uses slicing for matrix operations. 313 | ```python 314 | array = np.array([[1, 2], [3, 4]]) 315 | row = array[0, :] 316 | ``` 317 | 318 | 41. **What is fancy indexing in NumPy?** 319 | Uses arrays as indices. 320 | ```python 321 | array = np.array([10, 20, 30, 40]) 322 | indices = [0, 2] 323 | selected = array[indices] 324 | ``` 325 | 326 | 42. **How do you visualize sliced NumPy arrays?** 327 | Plots subarray data. 328 | ```python 329 | import matplotlib.pyplot as plt 330 | array = np.random.rand(5, 5) 331 | plt.imshow(array[:3, :3], cmap='Blues') 332 | plt.savefig('sliced_array.png') 333 | ``` 334 | 335 | #### Intermediate 336 | 43. **Write a function to extract a subarray using NumPy slicing.** 337 | Retrieves specific regions. 338 | ```python 339 | def extract_subarray(array, rows, cols): 340 | return array[rows[0]:rows[1], cols[0]:cols[1]] 341 | ``` 342 | 343 | 44. **How do you use advanced indexing with NumPy?** 344 | Combines integer and boolean indexing. 345 | ```python 346 | array = np.array([[1, 2], [3, 4]]) 347 | rows = np.array([0, 1]) 348 | cols = np.array([1, 0]) 349 | selected = array[rows, cols] 350 | ``` 351 | 352 | 45. **Write a function to filter a NumPy array with conditions.** 353 | Selects elements dynamically. 354 | ```python 355 | def filter_array(array, threshold): 356 | return array[array > threshold] 357 | ``` 358 | 359 | 46. **How do you modify array elements using indexing?** 360 | Updates values conditionally. 361 | ```python 362 | array = np.array([1, 2, 3, 4]) 363 | array[array < 3] = 0 364 | ``` 365 | 366 | 47. **Write a function to extract diagonal elements in NumPy.** 367 | Retrieves matrix diagonals. 368 | ```python 369 | def get_diagonal(array): 370 | return np.diagonal(array) 371 | ``` 372 | 373 | 48. **How do you handle out-of-bounds indexing in NumPy?** 374 | Uses safe indexing techniques. 375 | ```python 376 | def safe_index(array, index): 377 | return array[index] if 0 <= index < len(array) else None 378 | ``` 379 | 380 | #### Advanced 381 | 49. **Write a function to implement multi-dimensional indexing in NumPy.** 382 | Accesses complex array structures. 383 | ```python 384 | def multi_dim_index(array, indices): 385 | return array[tuple(indices)] 386 | ``` 387 | 388 | 50. **How do you optimize indexing for large NumPy arrays?** 389 | Uses strides or views. 390 | ```python 391 | array = np.random.rand(1000, 1000) 392 | view = array[::2, ::2] 393 | ``` 394 | 395 | 51. **Write a function to perform conditional indexing with multiple criteria.** 396 | Filters with complex logic. 397 | ```python 398 | def multi_condition_index(array, cond1, cond2): 399 | return array[np.logical_and(array > cond1, array < cond2)] 400 | ``` 401 | 402 | 52. **How do you implement custom indexing for NumPy arrays?** 403 | Defines specialized access patterns. 404 | ```python 405 | def custom_index(array, pattern='even'): 406 | if pattern == 'even': 407 | return array[::2] 408 | return array[1::2] 409 | ``` 410 | 411 | 53. **Write a function to reorder NumPy array elements.** 412 | Rearranges based on indices. 413 | ```python 414 | def reorder_array(array, order): 415 | return array[np.argsort(order)] 416 | ``` 417 | 418 | 54. **How do you handle sparse array indexing in NumPy?** 419 | Uses sparse formats for efficiency. 420 | ```python 421 | from scipy.sparse import csr_matrix 422 | def sparse_index(sparse_array, row, col): 423 | return sparse_array[row, col] 424 | ``` 425 | 426 | ## Broadcasting and Vectorization 427 | 428 | ### Basic 429 | 55. **What is vectorization in NumPy, and why is it important?** 430 | Replaces loops with array operations for speed. 431 | ```python 432 | array = np.array([1, 2, 3]) 433 | result = array * 2 434 | ``` 435 | 436 | 56. **How do you perform broadcasting with mismatched shapes?** 437 | Aligns arrays automatically. 438 | ```python 439 | array = np.array([[1, 2], [3, 4]]) 440 | vector = np.array([1, 2]) 441 | result = array + vector 442 | ``` 443 | 444 | 57. **How do you compute element-wise operations without loops?** 445 | Uses vectorized functions. 446 | ```python 447 | array = np.array([1, 2, 3]) 448 | squared = np.square(array) 449 | ``` 450 | 451 | 58. **What is the role of `np.vectorize` in NumPy?** 452 | Applies scalar functions to arrays. 453 | ```python 454 | def my_func(x): 455 | return x * 2 456 | vectorized = np.vectorize(my_func) 457 | result = vectorized(np.array([1, 2, 3])) 458 | ``` 459 | 460 | 59. **How do you visualize broadcasting results?** 461 | Plots operation outputs. 462 | ```python 463 | import matplotlib.pyplot as plt 464 | array = np.ones((3, 3)) + np.array([1, 2, 3]) 465 | plt.imshow(array, cmap='Greys') 466 | plt.savefig('broadcasting_result.png') 467 | ``` 468 | 469 | 60. **How do you check broadcasting compatibility in NumPy?** 470 | Verifies shape alignment. 471 | ```python 472 | def check_broadcasting(shape1, shape2): 473 | try: 474 | np.broadcast_arrays(np.empty(shape1), np.empty(shape2)) 475 | return True 476 | except ValueError: 477 | return False 478 | ``` 479 | 480 | #### Intermediate 481 | 61. **Write a function to perform broadcasting with custom arrays.** 482 | Applies operations across shapes. 483 | ```python 484 | def broadcast_operation(array, vector): 485 | return array + vector 486 | ``` 487 | 488 | 62. **How do you optimize vectorized operations in NumPy?** 489 | Minimizes memory overhead. 490 | ```python 491 | array = np.random.rand(1000) 492 | result = np.sin(array, out=np.empty_like(array)) 493 | ``` 494 | 495 | 63. **Write a function to apply vectorized operations conditionally.** 496 | Uses masks for selective computation. 497 | ```python 498 | def conditional_vectorize(array, threshold): 499 | return np.where(array > threshold, array * 2, array) 500 | ``` 501 | 502 | 64. **How do you handle broadcasting with higher-dimensional arrays?** 503 | Aligns multi-dimensional shapes. 504 | ```python 505 | array = np.ones((3, 4, 5)) 506 | vector = np.array([1, 2, 3, 4]) 507 | result = array + vector[:, np.newaxis] 508 | ``` 509 | 510 | 65. **Write a function to vectorize a custom computation.** 511 | Applies scalar logic to arrays. 512 | ```python 513 | def vectorized_custom(array): 514 | return np.vectorize(lambda x: x**2 if x > 0 else 0)(array) 515 | ``` 516 | 517 | 66. **How do you visualize vectorized operation performance?** 518 | Compares loop vs. vectorized times. 519 | ```python 520 | import matplotlib.pyplot as plt 521 | import time 522 | sizes = [100, 1000, 10000] 523 | times = [] 524 | for n in sizes: 525 | array = np.random.rand(n) 526 | start = time.time() 527 | np.sin(array) 528 | times.append(time.time() - start) 529 | plt.plot(sizes, times) 530 | plt.savefig('vectorized_performance.png') 531 | ``` 532 | 533 | #### Advanced 534 | 67. **Write a function to implement complex broadcasting rules.** 535 | Handles intricate shape alignments. 536 | ```python 537 | def complex_broadcast(array, shape): 538 | return array + np.ones(shape) 539 | ``` 540 | 541 | 68. **How do you optimize broadcasting for memory efficiency?** 542 | Uses in-place operations. 543 | ```python 544 | array = np.random.rand(1000, 1000) 545 | array += 1 546 | ``` 547 | 548 | 69. **Write a function to vectorize matrix operations.** 549 | Applies matrix computations efficiently. 550 | ```python 551 | def vectorized_matrix_op(matrix1, matrix2): 552 | return np.einsum('ij,jk->ik', matrix1, matrix2) 553 | ``` 554 | 555 | 70. **How do you handle broadcasting with sparse arrays?** 556 | Uses sparse formats for efficiency. 557 | ```python 558 | from scipy.sparse import csr_matrix 559 | def sparse_broadcast(sparse, dense): 560 | return sparse + dense 561 | ``` 562 | 563 | 71. **Write a function to debug broadcasting issues.** 564 | Logs shape mismatches. 565 | ```python 566 | import logging 567 | def debug_broadcast(array1, array2): 568 | logging.basicConfig(filename='numpy.log', level=logging.INFO) 569 | try: 570 | return array1 + array2 571 | except ValueError as e: 572 | logging.error(f"Broadcasting error: {e}") 573 | raise 574 | ``` 575 | 576 | 72. **How do you implement broadcasting with custom dtypes?** 577 | Handles specialized data types. 578 | ```python 579 | array = np.array([1, 2], dtype=np.float32) 580 | result = array + np.array([1, 2], dtype=np.int16) 581 | ``` 582 | 583 | ## Linear Algebra 584 | 585 | ### Basic 586 | 73. **How do you compute the matrix inverse in NumPy?** 587 | Inverts matrices for solving systems. 588 | ```python 589 | matrix = np.array([[1, 2], [3, 4]]) 590 | inverse = np.linalg.inv(matrix) 591 | ``` 592 | 593 | 74. **What is the determinant of a matrix in NumPy?** 594 | Measures matrix properties. 595 | ```python 596 | matrix = np.array([[1, 2], [3, 4]]) 597 | det = np.linalg.det(matrix) 598 | ``` 599 | 600 | 75. **How do you solve a linear system in NumPy?** 601 | Finds solutions to Ax = b. 602 | ```python 603 | A = np.array([[1, 2], [3, 4]]) 604 | b = np.array([5, 6]) 605 | x = np.linalg.solve(A, b) 606 | ``` 607 | 608 | 76. **How do you compute eigenvalues in NumPy?** 609 | Analyzes matrix properties. 610 | ```python 611 | matrix = np.array([[1, 2], [3, 4]]) 612 | eigenvalues = np.linalg.eigvals(matrix) 613 | ``` 614 | 615 | 77. **How do you perform singular value decomposition (SVD) in NumPy?** 616 | Decomposes matrices for ML. 617 | ```python 618 | matrix = np.array([[1, 2], [3, 4]]) 619 | U, S, Vt = np.linalg.svd(matrix) 620 | ``` 621 | 622 | 78. **How do you visualize matrix operations in NumPy?** 623 | Plots matrix transformations. 624 | ```python 625 | import matplotlib.pyplot as plt 626 | matrix = np.random.rand(5, 5) 627 | plt.imshow(matrix, cmap='hot') 628 | plt.savefig('matrix_plot.png') 629 | ``` 630 | 631 | #### Intermediate 632 | 79. **Write a function to solve a batch of linear systems in NumPy.** 633 | Handles multiple systems efficiently. 634 | ```python 635 | def batch_solve(A_batch, b_batch): 636 | return np.linalg.solve(A_batch, b_batch) 637 | ``` 638 | 639 | 80. **How do you compute the matrix rank in NumPy?** 640 | Determines linear independence. 641 | ```python 642 | matrix = np.array([[1, 2], [2, 4]]) 643 | rank = np.linalg.matrix_rank(matrix) 644 | ``` 645 | 646 | 81. **Write a function to perform QR decomposition in NumPy.** 647 | Decomposes matrices for stability. 648 | ```python 649 | def qr_decomposition(matrix): 650 | Q, R = np.linalg.qr(matrix) 651 | return Q, R 652 | ``` 653 | 654 | 82. **How do you compute the condition number of a matrix?** 655 | Assesses numerical stability. 656 | ```python 657 | matrix = np.array([[1, 2], [3, 4]]) 658 | cond = np.linalg.cond(matrix) 659 | ``` 660 | 661 | 83. **Write a function to compute the Cholesky decomposition.** 662 | Factorizes symmetric matrices. 663 | ```python 664 | def cholesky_decomp(matrix): 665 | return np.linalg.cholesky(matrix) 666 | ``` 667 | 668 | 84. **How do you visualize eigenvalues of a matrix?** 669 | Plots eigenvalue distributions. 670 | ```python 671 | import matplotlib.pyplot as plt 672 | matrix = np.random.rand(5, 5) 673 | eigvals = np.linalg.eigvals(matrix) 674 | plt.scatter(eigvals.real, eigvals.imag) 675 | plt.savefig('eigenvalues_plot.png') 676 | ``` 677 | 678 | #### Advanced 679 | 85. **Write a function to implement iterative linear solvers in NumPy.** 680 | Solves large systems efficiently. 681 | ```python 682 | from scipy.sparse.linalg import cg 683 | def iterative_solve(A, b): 684 | x, _ = cg(A, b) 685 | return x 686 | ``` 687 | 688 | 86. **How do you optimize linear algebra operations in NumPy?** 689 | Uses BLAS/LAPACK for speed. 690 | ```python 691 | matrix = np.random.rand(1000, 1000) 692 | result = np.linalg.inv(matrix) 693 | ``` 694 | 695 | 87. **Write a function to compute the pseudo-inverse in NumPy.** 696 | Handles non-square matrices. 697 | ```python 698 | def pseudo_inverse(matrix): 699 | return np.linalg.pinv(matrix) 700 | ``` 701 | 702 | 88. **How do you implement tensor operations in NumPy?** 703 | Extends linear algebra to tensors. 704 | ```python 705 | tensor = np.random.rand(3, 3, 3) 706 | result = np.tensordot(tensor, tensor, axes=([2], [2])) 707 | ``` 708 | 709 | 89. **Write a function to handle ill-conditioned matrices.** 710 | Stabilizes computations. 711 | ```python 712 | def safe_inverse(matrix, tol=1e-10): 713 | if np.linalg.cond(matrix) < 1/tol: 714 | return np.linalg.inv(matrix) 715 | return np.linalg.pinv(matrix) 716 | ``` 717 | 718 | 90. **How do you parallelize linear algebra operations?** 719 | Uses multi-core processing. 720 | ```python 721 | from joblib import Parallel, delayed 722 | def parallel_matrix_inv(matrices): 723 | return Parallel(n_jobs=-1)(delayed(np.linalg.inv)(m) for m in matrices) 724 | ``` 725 | 726 | ## Integration with AI/ML Workflows 727 | 728 | ### Basic 729 | 91. **How do you preprocess data with NumPy for AI/ML?** 730 | Normalizes and reshapes inputs. 731 | ```python 732 | data = np.random.rand(100, 10) 733 | normalized = (data - np.mean(data, axis=0)) / np.std(data, axis=0) 734 | ``` 735 | 736 | 92. **How do you create feature matrices in NumPy?** 737 | Structures data for ML models. 738 | ```python 739 | features = np.array([[1, 2], [3, 4], [5, 6]]) 740 | ``` 741 | 742 | 93. **How do you split data into train/test sets in NumPy?** 743 | Prepares data for evaluation. 744 | ```python 745 | data = np.random.rand(100, 5) 746 | train = data[:80] 747 | test = data[80:] 748 | ``` 749 | 750 | 94. **How do you compute pairwise distances in NumPy?** 751 | Used in clustering algorithms. 752 | ```python 753 | from scipy.spatial.distance import cdist 754 | points = np.random.rand(10, 2) 755 | distances = cdist(points, points) 756 | ``` 757 | 758 | 95. **How do you one-hot encode labels in NumPy?** 759 | Prepares categorical data. 760 | ```python 761 | labels = np.array([0, 1, 2]) 762 | one_hot = np.eye(3)[labels] 763 | ``` 764 | 765 | 96. **How do you visualize data distributions in NumPy?** 766 | Plots histograms for analysis. 767 | ```python 768 | import matplotlib.pyplot as plt 769 | data = np.random.randn(1000) 770 | plt.hist(data, bins=30) 771 | plt.savefig('data_distribution.png') 772 | ``` 773 | 774 | #### Intermediate 775 | 97. **Write a function to preprocess images with NumPy for ML.** 776 | Normalizes and reshapes images. 777 | ```python 778 | def preprocess_image(image): 779 | return (image / 255.0).reshape(-1) 780 | ``` 781 | 782 | 98. **How do you implement data augmentation with NumPy?** 783 | Generates synthetic data. 784 | ```python 785 | def augment_data(array): 786 | return array + np.random.normal(0, 0.1, array.shape) 787 | ``` 788 | 789 | 99. **Write a function to compute feature correlations in NumPy.** 790 | Analyzes feature relationships. 791 | ```python 792 | def feature_correlation(features): 793 | return np.corrcoef(features, rowvar=False) 794 | ``` 795 | 796 | 100. **How do you handle missing data in NumPy for ML?** 797 | Imputes or removes NaNs. 798 | ```python 799 | def handle_missing(array): 800 | return np.where(np.isnan(array), np.mean(array, axis=0), array) 801 | ``` 802 | 803 | 101. **Write a function to standardize features in NumPy.** 804 | Scales features for ML models. 805 | ```python 806 | def standardize_features(features): 807 | return (features - np.mean(features, axis=0)) / np.std(features, axis=0) 808 | ``` 809 | 810 | 102. **How do you integrate NumPy with Scikit-learn?** 811 | Prepares data for ML pipelines. 812 | ```python 813 | from sklearn.linear_model import LogisticRegression 814 | X = np.random.rand(100, 5) 815 | y = np.random.randint(0, 2, 100) 816 | model = LogisticRegression().fit(X, y) 817 | ``` 818 | 819 | #### Advanced 820 | 103. **Write a function to implement PCA with NumPy.** 821 | Reduces dimensionality for ML. 822 | ```python 823 | def pca_transform(data, n_components): 824 | cov = np.cov(data.T) 825 | eigvals, eigvecs = np.linalg.eigh(cov) 826 | top_k = eigvecs[:, -n_components:] 827 | return data @ top_k 828 | ``` 829 | 830 | 104. **How do you optimize NumPy for large-scale ML datasets?** 831 | Uses chunked processing. 832 | ```python 833 | def process_chunks(data, chunk_size=1000): 834 | for i in range(0, len(data), chunk_size): 835 | yield standardize_features(data[i:i + chunk_size]) 836 | ``` 837 | 838 | 105. **Write a function to compute gradients in NumPy.** 839 | Supports optimization in ML. 840 | ```python 841 | def compute_gradient(X, y, w): 842 | return X.T @ (X @ w - y) / len(y) 843 | ``` 844 | 845 | 106. **How do you implement k-means clustering with NumPy?** 846 | Groups data points. 847 | ```python 848 | def kmeans(X, k, max_iters=100): 849 | centroids = X[np.random.choice(len(X), k)] 850 | for _ in range(max_iters): 851 | distances = cdist(X, centroids) 852 | labels = np.argmin(distances, axis=1) 853 | centroids = np.array([X[labels == i].mean(axis=0) for i in range(k)]) 854 | return labels, centroids 855 | ``` 856 | 857 | 107. **Write a function to handle imbalanced datasets in NumPy.** 858 | Resamples data for balance. 859 | ```python 860 | def balance_data(X, y, minority_class): 861 | minority = X[y == minority_class] 862 | majority = X[y != minority_class] 863 | minority_upsampled = minority[np.random.choice(len(minority), len(majority))] 864 | return np.vstack([majority, minority_upsampled]), np.hstack([y[y != minority_class], np.full(len(majority), minority_class)]) 865 | ``` 866 | 867 | 108. **How do you integrate NumPy with deep learning frameworks?** 868 | Converts data for TensorFlow/PyTorch. 869 | ```python 870 | import tensorflow as tf 871 | array = np.random.rand(100, 10) 872 | tensor = tf.convert_to_tensor(array) 873 | ``` 874 | 875 | ## Debugging and Error Handling 876 | 877 | ### Basic 878 | 109. **How do you debug NumPy array shapes?** 879 | Logs shape information. 880 | ```python 881 | def debug_shape(array): 882 | print(f"Shape: {array.shape}") 883 | return array 884 | ``` 885 | 886 | 110. **What is a try-except block in NumPy applications?** 887 | Handles numerical errors. 888 | ```python 889 | try: 890 | result = np.linalg.inv(np.array([[1, 2], [2, 4]])) 891 | except np.linalg.LinAlgError as e: 892 | print(f"Error: {e}") 893 | ``` 894 | 895 | 111. **How do you validate NumPy array inputs?** 896 | Ensures correct shapes and types. 897 | ```python 898 | def validate_array(array, expected_shape): 899 | if array.shape != expected_shape: 900 | raise ValueError(f"Expected shape {expected_shape}, got {array.shape}") 901 | return array 902 | ``` 903 | 904 | 112. **How do you handle NaN values in NumPy?** 905 | Detects and replaces NaNs. 906 | ```python 907 | array = np.array([1, np.nan, 3]) 908 | cleaned = np.nan_to_num(array, nan=0) 909 | ``` 910 | 911 | 113. **What is the role of logging in NumPy debugging?** 912 | Tracks errors and operations. 913 | ```python 914 | import logging 915 | logging.basicConfig(filename='numpy.log', level=logging.INFO) 916 | logging.info("Starting NumPy operation") 917 | ``` 918 | 919 | 114. **How do you handle overflow errors in NumPy?** 920 | Uses safe numerical ranges. 921 | ```python 922 | array = np.array([1e308], dtype=np.float64) 923 | result = np.clip(array, -1e308, 1e308) 924 | ``` 925 | 926 | #### Intermediate 927 | 115. **Write a function to retry NumPy operations on failure.** 928 | Handles transient errors. 929 | ```python 930 | def retry_operation(func, array, max_attempts=3): 931 | for attempt in range(max_attempts): 932 | try: 933 | return func(array) 934 | except Exception as e: 935 | if attempt == max_attempts - 1: 936 | raise 937 | print(f"Attempt {attempt+1} failed: {e}") 938 | ``` 939 | 940 | 116. **How do you debug NumPy operation outputs?** 941 | Inspects intermediate results. 942 | ```python 943 | def debug_operation(array): 944 | result = array * 2 945 | print(f"Input: {array[:5]}, Output: {result[:5]}") 946 | return result 947 | ``` 948 | 949 | 117. **Write a function to validate NumPy array dtypes.** 950 | Ensures correct data types. 951 | ```python 952 | def validate_dtype(array, expected_dtype): 953 | if array.dtype != expected_dtype: 954 | raise ValueError(f"Expected dtype {expected_dtype}, got {array.dtype}") 955 | return array 956 | ``` 957 | 958 | 118. **How do you profile NumPy operation performance?** 959 | Measures execution time. 960 | ```python 961 | import time 962 | def profile_operation(array): 963 | start = time.time() 964 | result = np.sin(array) 965 | print(f"Operation took {time.time() - start}s") 966 | return result 967 | ``` 968 | 969 | 119. **Write a function to handle memory errors in NumPy.** 970 | Manages large arrays. 971 | ```python 972 | def safe_operation(array, max_size=1e6): 973 | if array.size > max_size: 974 | raise MemoryError("Array too large") 975 | return array * 2 976 | ``` 977 | 978 | 120. **How do you debug broadcasting errors in NumPy?** 979 | Logs shape mismatches. 980 | ```python 981 | def debug_broadcasting(array1, array2): 982 | try: 983 | return array1 + array2 984 | except ValueError as e: 985 | print(f"Broadcasting error: {e}, Shapes: {array1.shape}, {array2.shape}") 986 | raise 987 | ``` 988 | 989 | #### Advanced 990 | 121. **Write a function to implement a custom NumPy error handler.** 991 | Logs specific errors. 992 | ```python 993 | import logging 994 | def custom_error_handler(array, operation): 995 | logging.basicConfig(filename='numpy.log', level=logging.ERROR) 996 | try: 997 | return operation(array) 998 | except Exception as e: 999 | logging.error(f"Operation error: {e}") 1000 | raise 1001 | ``` 1002 | 1003 | 122. **How do you implement circuit breakers in NumPy applications?** 1004 | Prevents cascading failures. 1005 | ```python 1006 | from pybreaker import CircuitBreaker 1007 | breaker = CircuitBreaker(fail_max=3, reset_timeout=60) 1008 | @breaker 1009 | def safe_operation(array): 1010 | return np.linalg.inv(array) 1011 | ``` 1012 | 1013 | 123. **Write a function to detect numerical instability in NumPy.** 1014 | Checks for large condition numbers. 1015 | ```python 1016 | def detect_instability(matrix): 1017 | cond = np.linalg.cond(matrix) 1018 | if cond > 1e10: 1019 | print("Warning: Matrix may be ill-conditioned") 1020 | return matrix 1021 | ``` 1022 | 1023 | 124. **How do you implement logging for distributed NumPy operations?** 1024 | Centralizes logs for debugging. 1025 | ```python 1026 | import logging.handlers 1027 | def setup_distributed_logging(): 1028 | handler = logging.handlers.SocketHandler('log-server', 9090) 1029 | logging.getLogger().addHandler(handler) 1030 | logging.info("NumPy operation started") 1031 | ``` 1032 | 1033 | 125. **Write a function to handle version compatibility in NumPy.** 1034 | Checks library versions. 1035 | ```python 1036 | import numpy as np 1037 | def check_numpy_version(): 1038 | if np.__version__ < '1.20': 1039 | raise ValueError("Unsupported NumPy version") 1040 | ``` 1041 | 1042 | 126. **How do you debug NumPy performance bottlenecks?** 1043 | Profiles operation stages. 1044 | ```python 1045 | import time 1046 | def debug_bottlenecks(array): 1047 | start = time.time() 1048 | result = np.dot(array, array.T) 1049 | print(f"Matrix multiplication: {time.time() - start}s") 1050 | return result 1051 | ``` 1052 | 1053 | ## Visualization and Interpretation 1054 | 1055 | ### Basic 1056 | 127. **How do you visualize NumPy array distributions?** 1057 | Plots histograms for data analysis. 1058 | ```python 1059 | import matplotlib.pyplot as plt 1060 | array = np.random.randn(1000) 1061 | plt.hist(array, bins=30) 1062 | plt.savefig('array_distribution.png') 1063 | ``` 1064 | 1065 | 128. **How do you create a scatter plot with NumPy data?** 1066 | Visualizes relationships in data. 1067 | ```python 1068 | import matplotlib.pyplot as plt 1069 | x = np.random.rand(100) 1070 | y = np.random.rand(100) 1071 | plt.scatter(x, y) 1072 | plt.savefig('scatter_plot.png') 1073 | ``` 1074 | 1075 | 129. **How do you visualize matrix data in NumPy?** 1076 | Uses heatmaps for matrices. 1077 | ```python 1078 | import matplotlib.pyplot as plt 1079 | matrix = np.random.rand(5, 5) 1080 | plt.imshow(matrix, cmap='coolwarm') 1081 | plt.colorbar() 1082 | plt.savefig('matrix_heatmap.png') 1083 | ``` 1084 | 1085 | 130. **How do you plot NumPy array operations?** 1086 | Visualizes transformed data. 1087 | ```python 1088 | import matplotlib.pyplot as plt 1089 | array = np.linspace(0, 10, 100) 1090 | plt.plot(array, np.sin(array)) 1091 | plt.savefig('sin_plot.png') 1092 | ``` 1093 | 1094 | 131. **How do you create a 3D plot with NumPy data?** 1095 | Visualizes multi-dimensional arrays. 1096 | ```python 1097 | from mpl_toolkits.mplot3d import Axes3D 1098 | import matplotlib.pyplot as plt 1099 | x = np.linspace(-5, 5, 100) 1100 | y = np.linspace(-5, 5, 100) 1101 | X, Y = np.meshgrid(x, y) 1102 | Z = np.sin(np.sqrt(X**2 + Y**2)) 1103 | fig = plt.figure() 1104 | ax = fig.add_subplot(111, projection='3d') 1105 | ax.plot_surface(X, Y, Z) 1106 | plt.savefig('3d_plot.png') 1107 | ``` 1108 | 1109 | 132. **How do you visualize NumPy array statistics?** 1110 | Plots mean, std, etc. 1111 | ```python 1112 | import matplotlib.pyplot as plt 1113 | arrays = [np.random.randn(100) for _ in range(5)] 1114 | means = [np.mean(arr) for arr in arrays] 1115 | plt.bar(range(len(means)), means) 1116 | plt.savefig('array_stats.png') 1117 | ``` 1118 | 1119 | #### Intermediate 1120 | 133. **Write a function to visualize NumPy array comparisons.** 1121 | Plots multiple arrays. 1122 | ```python 1123 | import matplotlib.pyplot as plt 1124 | def compare_arrays(arrays, labels): 1125 | for arr, label in zip(arrays, labels): 1126 | plt.plot(arr, label=label) 1127 | plt.legend() 1128 | plt.savefig('array_comparison.png') 1129 | ``` 1130 | 1131 | 134. **How do you visualize NumPy clustering results?** 1132 | Plots clustered data points. 1133 | ```python 1134 | import matplotlib.pyplot as plt 1135 | def plot_clusters(X, labels): 1136 | plt.scatter(X[:, 0], X[:, 1], c=labels) 1137 | plt.savefig('cluster_plot.png') 1138 | ``` 1139 | 1140 | 135. **Write a function to visualize NumPy feature importance.** 1141 | Plots feature weights. 1142 | ```python 1143 | import matplotlib.pyplot as plt 1144 | def plot_feature_importance(features, importances): 1145 | plt.bar(features, importances) 1146 | plt.xticks(rotation=45) 1147 | plt.savefig('feature_importance.png') 1148 | ``` 1149 | 1150 | 136. **How do you visualize NumPy matrix transformations?** 1151 | Shows before/after effects. 1152 | ```python 1153 | import matplotlib.pyplot as plt 1154 | def plot_transformation(matrix, transformed): 1155 | plt.subplot(1, 2, 1) 1156 | plt.imshow(matrix, cmap='Blues') 1157 | plt.subplot(1, 2, 2) 1158 | plt.imshow(transformed, cmap='Blues') 1159 | plt.savefig('transformation_plot.png') 1160 | ``` 1161 | 1162 | 137. **Write a function to visualize NumPy error distributions.** 1163 | Plots operation errors. 1164 | ```python 1165 | import matplotlib.pyplot as plt 1166 | def plot_errors(errors): 1167 | plt.hist(errors, bins=20) 1168 | plt.savefig('error_distribution.png') 1169 | ``` 1170 | 1171 | 138. **How do you visualize NumPy data trends?** 1172 | Plots time series or trends. 1173 | ```python 1174 | import matplotlib.pyplot as plt 1175 | data = np.cumsum(np.random.randn(100)) 1176 | plt.plot(data) 1177 | plt.savefig('data_trend.png') 1178 | ``` 1179 | 1180 | #### Advanced 1181 | 139. **Write a function to visualize NumPy high-dimensional data.** 1182 | Uses PCA for projection. 1183 | ```python 1184 | from sklearn.decomposition import PCA 1185 | import matplotlib.pyplot as plt 1186 | def plot_high_dim_data(data): 1187 | pca = PCA(n_components=2) 1188 | reduced = pca.fit_transform(data) 1189 | plt.scatter(reduced[:, 0], reduced[:, 1]) 1190 | plt.savefig('high_dim_plot.png') 1191 | ``` 1192 | 1193 | 140. **How do you implement a dashboard for NumPy metrics?** 1194 | Displays real-time stats. 1195 | ```python 1196 | from fastapi import FastAPI 1197 | app = FastAPI() 1198 | metrics = [] 1199 | @app.get('/metrics') 1200 | async def get_metrics(): 1201 | return {'metrics': metrics} 1202 | ``` 1203 | 1204 | 141. **Write a function to visualize NumPy operation performance.** 1205 | Plots execution times. 1206 | ```python 1207 | import matplotlib.pyplot as plt 1208 | def plot_performance(sizes, times): 1209 | plt.plot(sizes, times, marker='o') 1210 | plt.savefig('performance_plot.png') 1211 | ``` 1212 | 1213 | 142. **How do you visualize NumPy data drift?** 1214 | Tracks data changes over time. 1215 | ```python 1216 | import matplotlib.pyplot as plt 1217 | def plot_data_drift(metrics): 1218 | plt.plot(metrics, marker='o') 1219 | plt.savefig('data_drift.png') 1220 | ``` 1221 | 1222 | 143. **Write a function to visualize NumPy uncertainty.** 1223 | Plots confidence intervals. 1224 | ```python 1225 | import matplotlib.pyplot as plt 1226 | def plot_uncertainty(data, std): 1227 | plt.plot(data) 1228 | plt.fill_between(range(len(data)), data - std, data + std, alpha=0.2) 1229 | plt.savefig('uncertainty_plot.png') 1230 | ``` 1231 | 1232 | 144. **How do you visualize NumPy model errors by category?** 1233 | Analyzes error patterns. 1234 | ```python 1235 | import matplotlib.pyplot as plt 1236 | def plot_error_by_category(categories, errors): 1237 | plt.bar(categories, errors) 1238 | plt.savefig('error_by_category.png') 1239 | ``` 1240 | 1241 | ## Best Practices and Optimization 1242 | 1243 | ### Basic 1244 | 145. **What are best practices for NumPy code organization?** 1245 | Modularizes array operations. 1246 | ```python 1247 | def preprocess_data(data): 1248 | return standardize_features(data) 1249 | def compute_features(data): 1250 | return np.dot(data, data.T) 1251 | ``` 1252 | 1253 | 146. **How do you ensure reproducibility in NumPy?** 1254 | Sets random seeds. 1255 | ```python 1256 | np.random.seed(42) 1257 | ``` 1258 | 1259 | 147. **What is caching in NumPy pipelines?** 1260 | Stores intermediate results. 1261 | ```python 1262 | from functools import lru_cache 1263 | @lru_cache(maxsize=1000) 1264 | def compute_matrix(array): 1265 | return np.dot(array, array.T) 1266 | ``` 1267 | 1268 | 148. **How do you handle large-scale NumPy arrays?** 1269 | Uses chunked processing. 1270 | ```python 1271 | def process_large_array(array, chunk_size=1000): 1272 | for i in range(0, len(array), chunk_size): 1273 | yield array[i:i + chunk_size] 1274 | ``` 1275 | 1276 | 149. **What is the role of environment configuration in NumPy?** 1277 | Manages settings securely. 1278 | ```python 1279 | import os 1280 | os.environ['NUMPY_DATA_PATH'] = 'data.npy' 1281 | ``` 1282 | 1283 | 150. **How do you document NumPy code?** 1284 | Uses docstrings for clarity. 1285 | ```python 1286 | def normalize_array(array): 1287 | """Normalizes array to zero mean and unit variance.""" 1288 | return (array - np.mean(array)) / np.std(array) 1289 | ``` 1290 | 1291 | #### Intermediate 1292 | 151. **Write a function to optimize NumPy memory usage.** 1293 | Limits memory allocation. 1294 | ```python 1295 | def optimize_memory(array, max_size=1e6): 1296 | if array.size > max_size: 1297 | return array[:int(max_size)] 1298 | return array 1299 | ``` 1300 | 1301 | 152. **How do you implement unit tests for NumPy code?** 1302 | Validates array operations. 1303 | ```python 1304 | import unittest 1305 | class TestNumPy(unittest.TestCase): 1306 | def test_normalize(self): 1307 | array = np.array([1, 2, 3]) 1308 | result = normalize_array(array) 1309 | self.assertAlmostEqual(np.mean(result), 0) 1310 | ``` 1311 | 1312 | 153. **Write a function to create reusable NumPy templates.** 1313 | Standardizes array processing. 1314 | ```python 1315 | def array_template(array, operation='normalize'): 1316 | if operation == 'normalize': 1317 | return normalize_array(array) 1318 | return array 1319 | ``` 1320 | 1321 | 154. **How do you optimize NumPy for batch processing?** 1322 | Processes arrays in chunks. 1323 | ```python 1324 | def batch_process(arrays, batch_size=100): 1325 | for i in range(0, len(arrays), batch_size): 1326 | yield [normalize_array(arr) for arr in arrays[i:i + batch_size]] 1327 | ``` 1328 | 1329 | 155. **Write a function to handle NumPy configuration.** 1330 | Centralizes settings. 1331 | ```python 1332 | def configure_numpy(): 1333 | return {'dtype': np.float32, 'order': 'C'} 1334 | ``` 1335 | 1336 | 156. **How do you ensure NumPy pipeline consistency?** 1337 | Standardizes versions and settings. 1338 | ```python 1339 | import numpy as np 1340 | def check_numpy_env(): 1341 | print(f"NumPy version: {np.__version__}") 1342 | ``` 1343 | 1344 | #### Advanced 1345 | 157. **Write a function to implement NumPy pipeline caching.** 1346 | Reuses processed arrays. 1347 | ```python 1348 | import joblib 1349 | def cache_array(array, cache_file='cache.npy'): 1350 | if os.path.exists(cache_file): 1351 | return np.load(cache_file) 1352 | result = normalize_array(array) 1353 | np.save(cache_file, result) 1354 | return result 1355 | ``` 1356 | 1357 | 158. **How do you optimize NumPy for high-throughput processing?** 1358 | Uses parallel execution. 1359 | ```python 1360 | from joblib import Parallel, delayed 1361 | def high_throughput_process(arrays): 1362 | return Parallel(n_jobs=-1)(delayed(normalize_array)(arr) for arr in arrays) 1363 | ``` 1364 | 1365 | 159. **Write a function to implement NumPy pipeline versioning.** 1366 | Tracks changes in workflows. 1367 | ```python 1368 | def version_pipeline(config, version): 1369 | with open(f'numpy_pipeline_v{version}.json', 'w') as f: 1370 | json.dump(config, f) 1371 | ``` 1372 | 1373 | 160. **How do you implement NumPy pipeline monitoring?** 1374 | Logs performance metrics. 1375 | ```python 1376 | import logging 1377 | def monitored_process(array): 1378 | logging.basicConfig(filename='numpy.log', level=logging.INFO) 1379 | start = time.time() 1380 | result = normalize_array(array) 1381 | logging.info(f"Processed array in {time.time() - start}s") 1382 | return result 1383 | ``` 1384 | 1385 | 161. **Write a function to handle NumPy scalability.** 1386 | Processes large datasets efficiently. 1387 | ```python 1388 | def scalable_process(array, chunk_size=1000): 1389 | for i in range(0, len(array), chunk_size): 1390 | yield normalize_array(array[i:i + chunk_size]) 1391 | ``` 1392 | 1393 | 162. **How do you implement NumPy pipeline automation?** 1394 | Scripts end-to-end workflows. 1395 | ```python 1396 | def automate_pipeline(data): 1397 | processed = normalize_array(data) 1398 | np.save('processed_data.npy', processed) 1399 | return processed 1400 | ``` 1401 | 1402 | ## Ethical Considerations in NumPy 1403 | 1404 | ### Basic 1405 | 163. **What are ethical concerns in NumPy applications?** 1406 | Includes bias in data processing and resource usage. 1407 | ```python 1408 | def check_data_bias(data, labels): 1409 | return np.mean(data[labels == 0]) - np.mean(data[labels == 1]) 1410 | ``` 1411 | 1412 | 164. **How do you detect bias in NumPy data processing?** 1413 | Analyzes statistical disparities. 1414 | ```python 1415 | def detect_bias(data, groups): 1416 | return {g: np.mean(data[groups == g]) for g in np.unique(groups)} 1417 | ``` 1418 | 1419 | 165. **What is data privacy in NumPy, and how is it ensured?** 1420 | Protects sensitive data. 1421 | ```python 1422 | def anonymize_data(data): 1423 | return data + np.random.normal(0, 0.1, data.shape) 1424 | ``` 1425 | 1426 | 166. **How do you ensure fairness in NumPy data processing?** 1427 | Balances data across groups. 1428 | ```python 1429 | def fair_processing(data, labels): 1430 | return balance_data(data, labels, minority_class=1) 1431 | ``` 1432 | 1433 | 167. **What is explainability in NumPy applications?** 1434 | Clarifies data transformations. 1435 | ```python 1436 | def explain_transformation(data, transformed): 1437 | print(f"Mean before: {np.mean(data)}, Mean after: {np.mean(transformed)}") 1438 | return transformed 1439 | ``` 1440 | 1441 | 168. **How do you visualize NumPy data bias?** 1442 | Plots group-wise statistics. 1443 | ```python 1444 | import matplotlib.pyplot as plt 1445 | def plot_bias(groups, means): 1446 | plt.bar(groups, means) 1447 | plt.savefig('bias_plot.png') 1448 | ``` 1449 | 1450 | #### Intermediate 1451 | 169. **Write a function to mitigate bias in NumPy data.** 1452 | Reweights or resamples data. 1453 | ```python 1454 | def mitigate_bias(data, labels, minority_class): 1455 | return balance_data(data, labels, minority_class) 1456 | ``` 1457 | 1458 | 170. **How do you implement differential privacy in NumPy?** 1459 | Adds noise to protect data. 1460 | ```python 1461 | def private_processing(data, epsilon=1.0): 1462 | noise = np.random.laplace(0, 1/epsilon, data.shape) 1463 | return data + noise 1464 | ``` --------------------------------------------------------------------------------