├── LICENSE
├── Numpy Fundamentals
    ├── 02 Intermediate NumPy Concepts
    │   ├── 03 Statistical Computations
    │   │   └── statistical_computations.py
    │   ├── 02 Tensor Operations
    │   │   └── tensor_operations.py
    │   ├── 01 Linear Algebra for ML
    │   │   └── linear_algebra_ml.py
    │   ├── 04 Implementing ML Algorithms
    │   │   └── implementing_ml_algorithms.py
    │   └── README.md
    ├── 03 Advanced NumPy Concepts
    │   ├── 04 Advanced Tensor Manipulations
    │   │   └── advanced_tensor_manipulations.py
    │   ├── 03 Integration with ML Frameworks
    │   │   └── integration_ml_frameworks.py
    │   ├── 02 Custom Functions and Ufuncs
    │   │   └── custom_functions_ufuncs.py
    │   ├── 01 Vectorization and Performance
    │   │   └── vectorization_performance.py
    │   └── README.md
    └── 01 Beginner NumPy Concepts
    │   ├── 03 Basic Operations
    │       └── basic_operations.py
    │   ├── 02 Indexing and Slicing
    │       └── indexing_slicing.py
    │   ├── 04 Data Preprocessing for ML
    │       └── data_preprocessing_ml.py
    │   ├── 01 Array Creation and Properties
    │       └── array_creation_properties.py
    │   └── README.md
├── README.md
└── Numpy Interview Questions
    └── README.md


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License  
 2 | 
 3 | Copyright (c) 2025 rohanmistry231
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy  
 6 | of this software and associated documentation files (the "Software"), to deal  
 7 | in the Software without restriction, including without limitation the rights  
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell  
 9 | copies of the Software, and to permit persons to whom the Software is  
10 | furnished to do so, subject to the following conditions:  
11 | 
12 | The above copyright notice and this permission notice shall be included in all  
13 | copies or substantial portions of the Software.  
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  
21 | SOFTWARE.


--------------------------------------------------------------------------------
/Numpy Fundamentals/02 Intermediate NumPy Concepts/03 Statistical Computations/statistical_computations.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | try:
 4 |     from sklearn.datasets import load_iris
 5 | except ImportError:
 6 |     load_iris = None
 7 | 
 8 | # %% [1. Introduction to Statistical Computations]
 9 | # Learn NumPy's statistical tools for ML analysis.
10 | # Covers descriptive statistics, correlation/covariance, and random sampling.
11 | 
12 | print("NumPy version:", np.__version__)
13 | 
14 | # %% [2. Descriptive Statistics]
15 | # Compute statistics on Iris data.
16 | if load_iris:
17 |     iris = load_iris()
18 |     X = iris.data
19 | else:
20 |     X = np.random.rand(150, 4)  # Synthetic data
21 | 
22 | mean = np.mean(X, axis=0)
23 | median = np.median(X, axis=0)
24 | variance = np.var(X, axis=0)
25 | std = np.std(X, axis=0)
26 | print("\nDescriptive Statistics (Iris Features):")
27 | print("Mean:", mean)
28 | print("Median:", median)
29 | print("Variance:", variance)
30 | print("Standard Deviation:", std)
31 | 
32 | # %% [3. Correlation and Covariance]
33 | # Compute correlation and covariance matrices.
34 | corr_matrix = np.corrcoef(X.T)
35 | cov_matrix = np.cov(X.T)
36 | print("\nCorrelation Matrix:\n", corr_matrix)
37 | print("\nCovariance Matrix:\n", cov_matrix)
38 | 
39 | # %% [4. Random Sampling for Data Augmentation]
40 | # Generate augmented data with random sampling.
41 | np.random.seed(42)
42 | indices = np.random.choice(X.shape[0], size=50, replace=True)
43 | augmented_data = X[indices]
44 | print("\nAugmented Data Shape:", augmented_data.shape)
45 | print("First 3 Augmented Samples:\n", augmented_data[:3])
46 | 
47 | # %% [5. Visualizing Statistics]
48 | # Visualize correlation matrix.
49 | plt.figure(figsize=(6, 4))
50 | plt.imshow(corr_matrix, cmap='coolwarm')
51 | plt.colorbar()
52 | plt.title('Correlation Matrix of Iris Features')
53 | plt.savefig('statistical_computations_corr.png')
54 | 
55 | # Visualize augmented data distribution
56 | plt.figure(figsize=(8, 4))
57 | plt.hist(augmented_data[:, 0], bins=20, color='purple', alpha=0.7)
58 | plt.title('Augmented Data: Feature 1 Distribution')
59 | plt.xlabel('Value')
60 | plt.ylabel('Frequency')
61 | plt.savefig('statistical_computations_hist.png')
62 | 
63 | # %% [6. Practical ML Application]
64 | # Use statistics for feature selection.
65 | np.random.seed(42)
66 | X_synthetic = np.random.rand(100, 3)
67 | y_synthetic = np.random.randint(0, 2, 100)
68 | correlations = np.array([np.corrcoef(X_synthetic[:, i], y_synthetic)[0, 1] for i in range(3)])
69 | print("\nFeature Selection:")
70 | print("Feature Correlations with Target:", correlations)
71 | print("Selected Feature (highest correlation):", np.argmax(np.abs(correlations)))
72 | 
73 | # %% [7. Interview Scenario: Correlation Analysis]
74 | # Discuss correlation for feature selection.
75 | print("\nInterview Scenario: Correlation Analysis")
76 | print("Q: How would you select features using NumPy?")
77 | print("A: Compute np.corrcoef to find feature-target correlations, select high values.")
78 | print("Key: High correlation indicates predictive power.")
79 | print("Example: np.corrcoef(X.T, y) for feature-target correlations.")


--------------------------------------------------------------------------------
/Numpy Fundamentals/02 Intermediate NumPy Concepts/02 Tensor Operations/tensor_operations.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | # %% [1. Introduction to Tensor Operations]
 5 | # Learn NumPy's tensor operations for ML tasks.
 6 | # Covers multi-dimensional arrays, reshaping, transposing, and contractions.
 7 | 
 8 | print("NumPy version:", np.__version__)
 9 | 
10 | # %% [2. Multi-dimensional Arrays]
11 | # Create 3D tensor (e.g., batch of images).
12 | np.random.seed(42)
13 | tensor_3d = np.random.rand(10, 32, 32)  # 10 samples, 32x32 images
14 | print("\n3D Tensor Shape:", tensor_3d.shape)
15 | print("First Sample (first 3x3):\n", tensor_3d[0, :3, :3])
16 | 
17 | # 4D tensor (e.g., batch of RGB images)
18 | tensor_4d = np.random.rand(5, 3, 64, 64)  # 5 samples, 3 channels, 64x64
19 | print("\n4D Tensor Shape:", tensor_4d.shape)
20 | 
21 | # %% [3. Tensor Reshaping and Transposing]
22 | # Reshape tensor for ML input.
23 | reshaped_tensor = np.reshape(tensor_3d, (10, 32 * 32))  # Flatten images
24 | print("\nReshaped Tensor Shape:", reshaped_tensor.shape)
25 | print("First Sample (first 10 elements):\n", reshaped_tensor[0, :10])
26 | 
27 | # Transpose tensor
28 | transposed_tensor = np.transpose(tensor_3d, (1, 2, 0))  # Swap axes
29 | print("\nTransposed Tensor Shape:", transposed_tensor.shape)
30 | 
31 | # Moveaxis and swapaxes
32 | moved_tensor = np.moveaxis(tensor_4d, 1, 3)  # Move channel axis
33 | print("\nMoved Tensor Shape:", moved_tensor.shape)
34 | 
35 | # %% [4. Tensor Contractions and Reductions]
36 | # Tensor contraction with tensordot
37 | tensor_a = np.random.rand(5, 3, 4)
38 | tensor_b = np.random.rand(4, 2)
39 | contracted = np.tensordot(tensor_a, tensor_b, axes=([2], [0]))
40 | print("\nTensor Contraction Shape:", contracted.shape)
41 | 
42 | # Reduction with sum
43 | sum_tensor = np.sum(tensor_3d, axis=(1, 2))  # Sum over image dimensions
44 | print("\nSum Reduction Shape:", sum_tensor.shape)
45 | print("Sum Values:", sum_tensor)
46 | 
47 | # %% [5. Visualizing Tensors]
48 | # Visualize a 3D tensor slice.
49 | plt.figure(figsize=(6, 4))
50 | plt.imshow(tensor_3d[0], cmap='gray')
51 | plt.title('3D Tensor: First Image Slice')
52 | plt.colorbar()
53 | plt.savefig('tensor_operations_image.png')
54 | 
55 | # Visualize reduction results
56 | plt.figure(figsize=(8, 4))
57 | plt.bar(range(len(sum_tensor)), sum_tensor)
58 | plt.title('Sum Reduction of 3D Tensor')
59 | plt.xlabel('Sample Index')
60 | plt.ylabel('Sum Value')
61 | plt.savefig('tensor_operations_reduction.png')
62 | 
63 | # %% [6. Practical ML Application]
64 | # Prepare tensor for CNN input.
65 | np.random.seed(42)
66 | images = np.random.rand(20, 1, 28, 28)  # 20 grayscale images, 28x28
67 | images_reshaped = np.reshape(images, (20, 28 * 28))  # Flatten for dense layer
68 | print("\nCNN Input Preparation:")
69 | print("Original Tensor Shape:", images.shape)
70 | print("Reshaped Tensor Shape:", images_reshaped.shape)
71 | 
72 | # %% [7. Interview Scenario: Tensor Reshaping]
73 | # Discuss reshaping for deep learning.
74 | print("\nInterview Scenario: Tensor Reshaping")
75 | print("Q: How would you prepare a tensor for a neural network?")
76 | print("A: Reshape to match input layer (e.g., flatten images with np.reshape).")
77 | print("Key: Ensure shape compatibility with model architecture.")
78 | print("Example: np.reshape(tensor, (n_samples, height * width)) for dense layers.")


--------------------------------------------------------------------------------
/Numpy Fundamentals/03 Advanced NumPy Concepts/04 Advanced Tensor Manipulations/advanced_tensor_manipulations.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | try:
 4 |     from scipy import sparse
 5 |     import tensorly as tl
 6 | except ImportError:
 7 |     sparse, tl = None, None
 8 | 
 9 | # %% [1. Introduction to Advanced Tensor Manipulations]
10 | # Learn advanced NumPy tensor operations for ML.
11 | # Covers batch processing, sparse arrays, and tensor decompositions.
12 | 
13 | print("NumPy version:", np.__version__)
14 | 
15 | # %% [2. Batch Processing for Deep Learning]
16 | # Process image batches for CNNs.
17 | np.random.seed(42)
18 | images = np.random.rand(32, 3, 64, 64)  # 32 RGB images, 64x64
19 | batch_mean = np.mean(images, axis=(2, 3), keepdims=True)
20 | images_normalized = images - batch_mean  # Batch normalization
21 | print("\nBatch Processed Images Shape:", images_normalized.shape)
22 | print("Batch Mean Shape:", batch_mean.shape)
23 | 
24 | # %% [3. Sparse Arrays for Large-scale Data]
25 | # Use sparse arrays for memory efficiency.
26 | if sparse:
27 |     sparse_matrix = sparse.csr_matrix(np.random.rand(1000, 1000) > 0.9)  # 90% sparsity
28 |     print("\nSparse Matrix Shape:", sparse_matrix.shape)
29 |     print("Non-zero Elements:", sparse_matrix.nnz)
30 | else:
31 |     print("\nScipy.sparse not available; skipping sparse matrix.")
32 | 
33 | # %% [4. Tensor Decompositions]
34 | # Perform CP decomposition for compression.
35 | if tl:
36 |     tensor = np.random.rand(10, 20, 30)
37 |     factors = tl.decomposition.parafac(tensor, rank=5)
38 |     reconstructed = tl.kruskal_to_tensor(factors)
39 |     error = np.mean((tensor - reconstructed)**2)
40 |     print("\nCP Decomposition Error:", error)
41 | else:
42 |     print("\nTensorly not available; using simple sum reduction.")
43 |     tensor = np.random.rand(10, 20, 30)
44 |     reduced = np.sum(tensor, axis=2)
45 |     print("Reduced Tensor Shape:", reduced.shape)
46 | 
47 | # %% [5. Visualizing Tensor Manipulations]
48 | # Visualize normalized image slice.
49 | plt.figure(figsize=(6, 4))
50 | plt.imshow(images_normalized[0, 0], cmap='gray')
51 | plt.title('Normalized Image Slice (Batch Processing)')
52 | plt.colorbar()
53 | plt.savefig('tensor_manipulations_image.png')
54 | 
55 | # Visualize sparse matrix (if available)
56 | if sparse:
57 |     plt.figure(figsize=(6, 4))
58 |     plt.spy(sparse_matrix, markersize=1)
59 |     plt.title('Sparse Matrix Structure')
60 |     plt.savefig('tensor_manipulations_sparse.png')
61 | 
62 | # %% [6. Practical ML Application]
63 | # Prepare a large tensor for deep learning.
64 | np.random.seed(42)
65 | large_tensor = np.random.rand(100, 3, 128, 128)  # 100 RGB images
66 | large_tensor_flattened = np.reshape(large_tensor, (100, -1))  # Flatten for dense layer
67 | print("\nDeep Learning Tensor Preparation:")
68 | print("Original Tensor Shape:", large_tensor.shape)
69 | print("Flattened Tensor Shape:", large_tensor_flattened.shape)
70 | 
71 | # %% [7. Interview Scenario: Tensor Decomposition]
72 | # Discuss tensor decomposition for ML.
73 | print("\nInterview Scenario: Tensor Decomposition")
74 | print("Q: How would you compress a tensor for ML?")
75 | print("A: Use CP decomposition to reduce dimensionality with tensorly.")
76 | print("Key: Preserves structure while reducing memory.")
77 | print("Example: factors = tl.decomposition.parafac(tensor, rank=5).")


--------------------------------------------------------------------------------
/Numpy Fundamentals/03 Advanced NumPy Concepts/03 Integration with ML Frameworks/integration_ml_frameworks.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | try:
 4 |     import tensorflow as tf
 5 |     import torch
 6 |     from sklearn.preprocessing import StandardScaler
 7 | except ImportError:
 8 |     tf, torch, StandardScaler = None, None, None
 9 | 
10 | # %% [1. Introduction to Integration with ML Frameworks]
11 | # Learn to integrate NumPy with TensorFlow, PyTorch, and scikit-learn.
12 | # Covers tensor conversion, data pipelines, and preprocessing.
13 | 
14 | print("NumPy version:", np.__version__)
15 | 
16 | # %% [2. Converting NumPy Arrays to Tensors]
17 | # Convert to TensorFlow and PyTorch tensors.
18 | np.random.seed(42)
19 | X_np = np.random.rand(100, 5)
20 | 
21 | if tf:
22 |     X_tf = tf.convert_to_tensor(X_np, dtype=tf.float32)
23 |     print("\nTensorFlow Tensor Shape:", X_tf.shape)
24 | else:
25 |     print("\nTensorFlow not available; skipping.")
26 | 
27 | if torch:
28 |     X_torch = torch.from_numpy(X_np).float()
29 |     print("PyTorch Tensor Shape:", X_torch.shape)
30 | else:
31 |     print("PyTorch not available; skipping.")
32 | 
33 | # %% [3. NumPy as a Backend for Data Pipelines]
34 | # Create a TensorFlow data pipeline from NumPy arrays.
35 | if tf:
36 |     y_np = np.random.randint(0, 2, 100)
37 |     dataset = tf.data.Dataset.from_tensor_slices((X_np, y_np)).batch(32).shuffle(100)
38 |     for X_batch, y_batch in dataset.take(1):
39 |         print("\nTensorFlow Dataset Batch Shapes:", X_batch.shape, y_batch.shape)
40 | else:
41 |     print("\nTensorFlow not available; skipping pipeline.")
42 | 
43 | # %% [4. Interfacing with scikit-learn]
44 | # Use NumPy with scikit-learn preprocessing.
45 | if StandardScaler:
46 |     scaler = StandardScaler()
47 |     X_scaled = scaler.fit_transform(X_np)
48 |     print("\nScikit-learn Scaled Features Shape:", X_scaled.shape)
49 |     print("First 3 Scaled Samples:\n", X_scaled[:3])
50 | else:
51 |     X_scaled = (X_np - np.mean(X_np, axis=0)) / np.std(X_np, axis=0)
52 |     print("\nScikit-learn not available; using NumPy scaling.")
53 |     print("First 3 Scaled Samples:\n", X_scaled[:3])
54 | 
55 | # %% [5. Visualizing Integration]
56 | # Visualize scaled features.
57 | plt.figure(figsize=(8, 4))
58 | plt.scatter(X_np[:, 0], X_np[:, 1], c='blue', alpha=0.5, label='Original')
59 | plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c='red', alpha=0.5, label='Scaled')
60 | plt.title('Original vs. Scaled Features')
61 | plt.xlabel('Feature 1')
62 | plt.ylabel('Feature 2')
63 | plt.legend()
64 | plt.savefig('integration_ml_scaled.png')
65 | 
66 | # %% [6. Practical ML Application]
67 | # Prepare a NumPy dataset for a deep learning model.
68 | np.random.seed(42)
69 | X_ml = np.random.rand(1000, 10)
70 | y_ml = np.random.randint(0, 2, 1000)
71 | if tf:
72 |     dataset_ml = tf.data.Dataset.from_tensor_slices((X_ml, y_ml)).batch(64)
73 |     print("\nDeep Learning Dataset:")
74 |     print("Batch Size: 64")
75 | else:
76 |     print("\nTensorFlow not available; skipping deep learning dataset.")
77 | 
78 | # %% [7. Interview Scenario: Framework Integration]
79 | # Discuss NumPy integration with ML frameworks.
80 | print("\nInterview Scenario: Framework Integration")
81 | print("Q: How do you prepare NumPy data for TensorFlow?")
82 | print("A: Convert to tensors with tf.convert_to_tensor, use tf.data.Dataset.")
83 | print("Key: Ensures compatibility with ML framework APIs.")
84 | print("Example: dataset = tf.data.Dataset.from_tensor_slices((X_np, y_np)).batch(32).")


--------------------------------------------------------------------------------
/Numpy Fundamentals/02 Intermediate NumPy Concepts/01 Linear Algebra for ML/linear_algebra_ml.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | try:
 4 |     from sklearn.datasets import load_iris
 5 | except ImportError:
 6 |     load_iris = None
 7 | 
 8 | # %% [1. Introduction to Linear Algebra for ML]
 9 | # Learn NumPy's linear algebra tools for ML tasks.
10 | # Covers matrix operations, solving linear systems, eigenvalues, and SVD.
11 | 
12 | print("NumPy version:", np.__version__)
13 | 
14 | # %% [2. Matrix Operations]
15 | # Perform matrix operations using Iris data.
16 | if load_iris:
17 |     iris = load_iris()
18 |     X = iris.data[:100]  # First 100 samples for simplicity
19 | else:
20 |     X = np.random.rand(100, 4)  # Synthetic data
21 | 
22 | # Matrix multiplication (dot product)
23 | X_T = np.transpose(X)  # Transpose
24 | X_TX = np.dot(X_T, X)  # X^T * X
25 | print("\nX^T * X Matrix (4x4):\n", X_TX)
26 | 
27 | # Matrix multiplication with matmul
28 | X_matmul = np.matmul(X_T, X)
29 | print("\nX^T * X with matmul:\n", X_matmul)
30 | 
31 | # %% [3. Solving Linear Systems]
32 | # Solve a linear system: Ax = b
33 | A = np.array([[3, 1], [1, 2]])  # Coefficient matrix
34 | b = np.array([9, 8])  # Constants
35 | x = np.linalg.solve(A, b)  # Solve for x
36 | print("\nLinear System Solution (Ax = b):")
37 | print("A:\n", A)
38 | print("b:", b)
39 | print("x:", x)
40 | 
41 | # Verify solution
42 | print("Verification (Ax):\n", np.dot(A, x))
43 | 
44 | # %% [4. Eigenvalues and Eigenvectors]
45 | # Compute eigenvalues/vectors for covariance matrix.
46 | cov_matrix = np.cov(X.T)  # Covariance of Iris features
47 | eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
48 | print("\nCovariance Matrix Eigenvalues:", eigenvalues)
49 | print("Eigenvectors:\n", eigenvectors)
50 | 
51 | # %% [5. Singular Value Decomposition (SVD)]
52 | # Apply SVD for dimensionality reduction.
53 | U, S, Vt = np.linalg.svd(X, full_matrices=False)
54 | X_reduced = np.dot(U[:, :2], np.diag(S[:2]))  # Reduce to 2 dimensions
55 | print("\nSVD Reduced Data Shape:", X_reduced.shape)
56 | print("First 3 Reduced Samples:\n", X_reduced[:3])
57 | 
58 | # %% [6. Visualizing Linear Algebra]
59 | # Visualize SVD-reduced data.
60 | plt.figure(figsize=(8, 4))
61 | plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c='blue', alpha=0.5)
62 | plt.title('SVD: Iris Data in 2D')
63 | plt.xlabel('Component 1')
64 | plt.ylabel('Component 2')
65 | plt.savefig('linear_algebra_svd.png')
66 | 
67 | # Visualize covariance matrix
68 | plt.figure(figsize=(6, 4))
69 | plt.imshow(cov_matrix, cmap='viridis')
70 | plt.colorbar()
71 | plt.title('Covariance Matrix Heatmap')
72 | plt.savefig('linear_algebra_cov_matrix.png')
73 | 
74 | # %% [7. Practical ML Application]
75 | # Use matrix operations for feature transformation.
76 | np.random.seed(42)
77 | X_synthetic = np.random.rand(100, 3)  # Synthetic data
78 | W = np.random.rand(3, 2)  # Transformation matrix
79 | X_transformed = np.dot(X_synthetic, W)  # Linear transformation
80 | print("\nSynthetic ML Dataset:")
81 | print("Transformed Features Shape:", X_transformed.shape)
82 | print("First 3 Transformed Samples:\n", X_transformed[:3])
83 | 
84 | # %% [8. Interview Scenario: SVD for PCA]
85 | # Discuss SVD for dimensionality reduction.
86 | print("\nInterview Scenario: SVD for PCA")
87 | print("Q: How would you implement PCA with NumPy?")
88 | print("A: Use np.linalg.svd to decompose data, select top components.")
89 | print("Key: SVD reduces dimensionality while preserving variance.")
90 | print("Example: U, S, Vt = np.linalg.svd(X); X_reduced = U[:, :k] @ np.diag(S[:k]).")


--------------------------------------------------------------------------------
/Numpy Fundamentals/01 Beginner NumPy Concepts/03 Basic Operations/basic_operations.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | try:
 4 |     from sklearn.datasets import load_iris
 5 | except ImportError:
 6 |     load_iris = None
 7 | 
 8 | # %% [1. Introduction to Basic Operations]
 9 | # Learn NumPy’s element-wise operations, broadcasting, and universal functions (ufuncs).
10 | # Essential for ML computations like feature scaling and loss calculations.
11 | 
12 | print("NumPy version:", np.__version__)
13 | 
14 | # %% [2. Element-wise Operations]
15 | # Perform arithmetic operations on arrays.
16 | if load_iris:
17 |     iris = load_iris()
18 |     data = iris.data
19 | else:
20 |     data = np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [7.0, 3.2, 4.7, 1.4]])
21 | 
22 | scaled_data = data * 2  # Multiply all elements by 2
23 | print("\nScaled Data (first 3 rows):\n", scaled_data[:3])
24 | 
25 | added_data = data + 10  # Add 10 to all elements
26 | print("\nAdded Data (first 3 rows):\n", added_data[:3])
27 | 
28 | # Combine arrays
29 | combined = data[:, 0] + data[:, 1]  # Sum of sepal length and width
30 | print("\nSum of Sepal Length and Width (first 5):\n", combined[:5])
31 | 
32 | # %% [3. Broadcasting]
33 | # Apply operations across arrays of different shapes.
34 | bias = np.array([1, -1, 0, 0.5])  # Bias for each feature
35 | biased_data = data + bias  # Broadcasting bias to all rows
36 | print("\nBroadcasted Bias Data (first 3 rows):\n", biased_data[:3])
37 | 
38 | # Broadcasting scalar
39 | normalized = data / np.max(data, axis=0)  # Normalize by column max
40 | print("\nNormalized Data (first 3 rows):\n", normalized[:3])
41 | 
42 | # %% [4. Universal Functions (ufuncs)]
43 | # Apply mathematical functions element-wise.
44 | sin_data = np.sin(data)  # Sine of all elements
45 | print("\nSine of Data (first 3 rows):\n", sin_data[:3])
46 | 
47 | exp_data = np.exp(data[:, 0])  # Exponential of sepal length
48 | print("\nExponential of Sepal Length (first 5):\n", exp_data[:5])
49 | 
50 | mean_data = np.mean(data, axis=0)  # Mean of each feature
51 | print("\nMean of Each Feature:", mean_data)
52 | 
53 | # %% [5. Visualizing Operations]
54 | # Visualize normalized data distribution.
55 | if load_iris:
56 |     plt.figure(figsize=(8, 4))
57 |     plt.hist(normalized[:, 0], bins=20, color='green', alpha=0.7)
58 |     plt.title('Normalized Sepal Length Distribution')
59 |     plt.xlabel('Normalized Value')
60 |     plt.ylabel('Frequency')
61 |     plt.savefig('operations_histogram.png')
62 | 
63 | # %% [6. Practical ML Application]
64 | # Compute a loss function for ML.
65 | np.random.seed(42)
66 | y_true = np.random.randint(0, 2, 100)  # True binary labels
67 | y_pred = np.random.rand(100)  # Predicted probabilities
68 | mse = np.mean((y_true - y_pred) ** 2)  # Mean squared error
69 | print("\nML Loss Calculation:")
70 | print("Mean Squared Error:", mse)
71 | 
72 | # Visualize predictions vs. true labels
73 | plt.figure(figsize=(8, 4))
74 | plt.scatter(range(100), y_true, c='blue', label='True Labels', alpha=0.5)
75 | plt.scatter(range(100), y_pred, c='red', label='Predictions', alpha=0.5)
76 | plt.title('True vs. Predicted Labels')
77 | plt.legend()
78 | plt.savefig('operations_loss.png')
79 | 
80 | # %% [7. Interview Scenario: Broadcasting]
81 | # Discuss broadcasting for ML computations.
82 | print("\nInterview Scenario: Broadcasting")
83 | print("Q: How does broadcasting simplify ML feature scaling?")
84 | print("A: Broadcasting applies operations (e.g., normalization) to arrays without loops.")
85 | print("Key: Ensures shape compatibility for efficient computations.")
86 | print("Example: arr / np.max(arr, axis=0) normalizes columns without explicit iteration.")


--------------------------------------------------------------------------------
/Numpy Fundamentals/01 Beginner NumPy Concepts/02 Indexing and Slicing/indexing_slicing.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | try:
 4 |     from sklearn.datasets import load_iris
 5 | except ImportError:
 6 |     load_iris = None
 7 | 
 8 | # %% [1. Introduction to Indexing and Slicing]
 9 | # Learn how to access and manipulate NumPy arrays using indexing and slicing.
10 | # Covers basic indexing, boolean indexing, fancy indexing, and slicing for ML.
11 | 
12 | print("NumPy version:", np.__version__)
13 | 
14 | # %% [2. Basic Indexing]
15 | # Access elements and subarrays using indices.
16 | if load_iris:
17 |     iris = load_iris()
18 |     data = iris.data
19 | else:
20 |     data = np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [7.0, 3.2, 4.7, 1.4]])
21 | 
22 | print("\nIris Array (first 3 rows):\n", data[:3])
23 | print("Single Element (row 0, col 0):", data[0, 0])  # Sepal length of first sample
24 | print("First Row:", data[0])  # All features of first sample
25 | print("First Column:", data[:, 0])  # Sepal length for all samples
26 | 
27 | # %% [3. Slicing]
28 | # Extract subarrays using slices.
29 | subset = data[:5, 1:3]  # First 5 rows, columns 1 and 2
30 | print("\nSliced Subarray (first 5 rows, cols 1-2):\n", subset)
31 | 
32 | # Step slicing for downsampling
33 | downsampled = data[::2, :]  # Every other row
34 | print("\nDownsampled Array (every other row):\n", downsampled[:3])
35 | 
36 | # %% [4. Boolean Indexing]
37 | # Filter arrays based on conditions.
38 | sepal_length = data[:, 0]
39 | long_sepal = data[sepal_length > 6.0]  # Samples with sepal length > 6.0
40 | print("\nSamples with Sepal Length > 6.0:\n", long_sepal[:3])
41 | 
42 | # Combine conditions
43 | mask = (sepal_length > 5.0) & (data[:, 2] < 2.0)  # Sepal length > 5.0 and petal length < 2.0
44 | filtered = data[mask]
45 | print("\nFiltered Samples (sepal > 5.0, petal < 2.0):\n", filtered)
46 | 
47 | # %% [5. Fancy Indexing]
48 | # Use arrays of indices to select elements.
49 | rows = np.array([0, 2, 4])
50 | cols = np.array([1, 3])
51 | selected = data[rows, cols]  # Elements at (0,1), (2,3), (4,3)
52 | print("\nFancy Indexing (selected elements):\n", selected)
53 | 
54 | # Select specific rows
55 | selected_rows = data[[0, 10, 20]]
56 | print("\nSelected Rows (0, 10, 20):\n", selected_rows)
57 | 
58 | # %% [6. Visualizing Indexing]
59 | # Visualize filtered data.
60 | if load_iris:
61 |     plt.figure(figsize=(8, 4))
62 |     plt.scatter(data[:, 0], data[:, 2], c='blue', alpha=0.5, label='All Samples')
63 |     plt.scatter(long_sepal[:, 0], long_sepal[:, 2], c='red', label='Sepal Length > 6.0')
64 |     plt.xlabel('Sepal Length (cm)')
65 |     plt.ylabel('Petal Length (cm)')
66 |     plt.title('Iris Dataset: Boolean Indexing')
67 |     plt.legend()
68 |     plt.savefig('indexing_scatter.png')
69 | 
70 | # %% [7. Practical ML Application]
71 | # Use indexing to prepare ML features.
72 | np.random.seed(42)
73 | X = np.random.rand(100, 3)  # 100 samples, 3 features
74 | y = np.random.randint(0, 2, 100)  # Binary labels
75 | positive_samples = X[y == 1]  # Select samples with label 1
76 | print("\nML Dataset: Positive Samples Shape:", positive_samples.shape)
77 | print("First 3 Positive Samples:\n", positive_samples[:3])
78 | 
79 | # %% [8. Interview Scenario: Indexing]
80 | # Discuss indexing for ML data selection.
81 | print("\nInterview Scenario: Indexing")
82 | print("Q: How would you filter a dataset for ML preprocessing?")
83 | print("A: Use boolean indexing for conditions (e.g., arr[arr[:, 0] > 5]) and slicing for subsets.")
84 | print("Key: Boolean indexing is efficient for outlier removal and feature selection.")
85 | print("Example: arr[arr[:, 0] > np.mean(arr[:, 0])] for above-average values.")


--------------------------------------------------------------------------------
/Numpy Fundamentals/02 Intermediate NumPy Concepts/04 Implementing ML Algorithms/implementing_ml_algorithms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | try:
 4 |     from sklearn.datasets import make_blobs
 5 | except ImportError:
 6 |     make_blobs = None
 7 | 
 8 | # %% [1. Introduction to Implementing ML Algorithms]
 9 | # Learn to implement ML algorithms with NumPy.
10 | # Covers linear regression, logistic regression, and K-means clustering.
11 | 
12 | print("NumPy version:", np.__version__)
13 | 
14 | # %% [2. Linear Regression with Normal Equations]
15 | # Implement linear regression on synthetic data.
16 | np.random.seed(42)
17 | if make_blobs:
18 |     X, _ = make_blobs(n_samples=100, centers=1, n_features=2)
19 | else:
20 |     X = np.random.rand(100, 2)
21 | y = 2 * X[:, 0] + 3 * X[:, 1] + np.random.randn(100) * 0.1  # Linear relationship
22 | X_b = np.c_[np.ones((100, 1)), X]  # Add bias term
23 | theta = np.linalg.solve(np.dot(X_b.T, X_b), np.dot(X_b.T, y))  # Normal equations
24 | print("\nLinear Regression Coefficients:", theta)
25 | 
26 | # Predict
27 | y_pred = np.dot(X_b, theta)
28 | 
29 | # %% [3. Logistic Regression with Gradient Descent]
30 | # Implement logistic regression.
31 | def sigmoid(z):
32 |     return 1 / (1 + np.exp(-z))
33 | 
34 | X_log = X
35 | y_log = (y > np.median(y)).astype(int)  # Binary labels
36 | X_log_b = np.c_[np.ones((100, 1)), X_log]
37 | theta_log = np.zeros(3)
38 | lr = 0.1
39 | for _ in range(1000):
40 |     z = np.dot(X_log_b, theta_log)
41 |     h = sigmoid(z)
42 |     gradient = np.dot(X_log_b.T, (h - y_log)) / 100
43 |     theta_log -= lr * gradient
44 | print("\nLogistic Regression Coefficients:", theta_log)
45 | 
46 | # Predict
47 | y_pred_log = sigmoid(np.dot(X_log_b, theta_log)) > 0.5
48 | 
49 | # %% [4. K-means Clustering]
50 | # Implement K-means clustering.
51 | if make_blobs:
52 |     X_cluster, _ = make_blobs(n_samples=100, centers=3, n_features=2)
53 | else:
54 |     X_cluster = np.random.rand(100, 2) * 10
55 | K = 3
56 | centroids = X_cluster[np.random.choice(100, K, replace=False)]
57 | for _ in range(10):
58 |     distances = np.sqrt(((X_cluster - centroids[:, np.newaxis])**2).sum(axis=2))
59 |     labels = np.argmin(distances, axis=0)
60 |     centroids = np.array([X_cluster[labels == k].mean(axis=0) for k in range(K)])
61 | print("\nK-means Centroids:\n", centroids)
62 | 
63 | # %% [5. Visualizing ML Algorithms]
64 | # Visualize linear regression predictions.
65 | plt.figure(figsize=(8, 4))
66 | plt.scatter(X[:, 0], y, c='blue', alpha=0.5, label='Data')
67 | plt.plot(X[:, 0], y_pred, c='red', label='Linear Regression')
68 | plt.xlabel('Feature 1')
69 | plt.ylabel('Target')
70 | plt.title('Linear Regression Fit')
71 | plt.legend()
72 | plt.savefig('ml_algorithms_linear.png')
73 | 
74 | # Visualize K-means clusters
75 | plt.figure(figsize=(8, 4))
76 | plt.scatter(X_cluster[:, 0], X_cluster[:, 1], c=labels, cmap='viridis', alpha=0.5)
77 | plt.scatter(centroids[:, 0], centroids[:, 1], c='red', marker='x', s=200, label='Centroids')
78 | plt.title('K-means Clustering')
79 | plt.legend()
80 | plt.savefig('ml_algorithms_kmeans.png')
81 | 
82 | # %% [6. Practical ML Application]
83 | # Evaluate linear regression performance.
84 | mse = np.mean((y_pred - y)**2)
85 | print("\nLinear Regression Performance:")
86 | print("Mean Squared Error:", mse)
87 | 
88 | # %% [7. Interview Scenario: Gradient Descent]
89 | # Discuss implementing gradient descent.
90 | print("\nInterview Scenario: Gradient Descent")
91 | print("Q: How would you implement logistic regression with NumPy?")
92 | print("A: Use gradient descent to minimize loss, compute gradients with np.dot.")
93 | print("Key: Sigmoid function and iterative updates are critical.")
94 | print("Example: theta -= lr * np.dot(X.T, (sigmoid(X @ theta) - y)) / n.")


--------------------------------------------------------------------------------
/Numpy Fundamentals/03 Advanced NumPy Concepts/02 Custom Functions and Ufuncs/custom_functions_ufuncs.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | try:
 4 |     from numba import jit
 5 | except ImportError:
 6 |     jit = lambda x: x
 7 | 
 8 | # %% [1. Introduction to Custom Functions and Ufuncs]
 9 | # Learn to create custom NumPy functions for ML tasks.
10 | # Covers np.frompyfunc, np.vectorize, numba, and gradient computations.
11 | 
12 | print("NumPy version:", np.__version__)
13 | 
14 | # %% [2. Writing Custom Ufuncs with np.frompyfunc]
15 | # Create a custom activation function.
16 | def custom_activation(x):
17 |     return np.clip(x, -1, 1)
18 | 
19 | ufunc_activation = np.frompyfunc(custom_activation, 1, 1)
20 | X = np.linspace(-5, 5, 100)
21 | y_ufunc = ufunc_activation(X).astype(float)
22 | print("\nCustom Ufunc Output (first 5):", y_ufunc[:5])
23 | 
24 | # %% [3. Vectorizing Complex Operations]
25 | # Vectorize a non-trivial function.
26 | def complex_function(x, threshold=0.5):
27 |     return x**2 if x > threshold else np.sin(x)
28 | 
29 | vectorized_func = np.vectorize(complex_function)
30 | X_complex = np.linspace(-2, 2, 100)
31 | y_vectorized = vectorized_func(X_complex)
32 | print("\nVectorized Function Output (first 5):", y_vectorized[:5])
33 | 
34 | # %% [4. Numba for Performance]
35 | # Optimize a gradient computation with numba.
36 | @jit(nopython=True)
37 | def compute_gradient(X, y, theta):
38 |     return np.dot(X.T, (np.dot(X, theta) - y)) / len(y)
39 | 
40 | np.random.seed(42)
41 | X_grad = np.random.rand(1000, 5)
42 | y_grad = np.random.rand(1000)
43 | theta = np.random.rand(5)
44 | gradient = compute_gradient(X_grad, y_grad, theta)
45 | print("\nNumba Gradient Shape:", gradient.shape)
46 | print("Gradient Values:", gradient)
47 | 
48 | # %% [5. Gradient Computations for ML]
49 | # Compute gradients for a custom loss.
50 | def custom_loss(y_true, y_pred):
51 |     return np.mean((y_true - y_pred)**2)
52 | 
53 | def loss_gradient(X, y, theta):
54 |     y_pred = np.dot(X, theta)
55 |     return -2 * np.dot(X.T, (y - y_pred)) / len(y)
56 | 
57 | X_ml = np.random.rand(500, 3)
58 | y_ml = np.random.rand(500)
59 | theta_ml = np.random.rand(3)
60 | grad = loss_gradient(X_ml, y_ml, theta_ml)
61 | print("\nCustom Loss Gradient Shape:", grad.shape)
62 | print("Gradient Values:", grad)
63 | 
64 | # %% [6. Visualizing Custom Functions]
65 | # Plot custom activation function.
66 | plt.figure(figsize=(8, 4))
67 | plt.plot(X, y_ufunc, label='Custom Activation (clipped)')
68 | plt.plot(X, X, '--', label='Input')
69 | plt.title('Custom Ufunc: Clipped Activation')
70 | plt.xlabel('Input')
71 | plt.ylabel('Output')
72 | plt.legend()
73 | plt.savefig('custom_functions_ufunc.png')
74 | 
75 | # Plot vectorized function
76 | plt.figure(figsize=(8, 4))
77 | plt.plot(X_complex, y_vectorized, label='Vectorized Function')
78 | plt.title('Vectorized Complex Function')
79 | plt.xlabel('Input')
80 | plt.ylabel('Output')
81 | plt.legend()
82 | plt.savefig('custom_functions_vectorized.png')
83 | 
84 | # %% [7. Practical ML Application]
85 | # Apply custom ufunc to preprocess ML features.
86 | np.random.seed(42)
87 | X_features = np.random.rand(1000, 10) * 10 - 5
88 | X_processed = ufunc_activation(X_features).astype(float)
89 | print("\nML Feature Preprocessing:")
90 | print("Processed Features Shape:", X_processed.shape)
91 | print("First 3 Processed Samples:\n", X_processed[:3])
92 | 
93 | # %% [8. Interview Scenario: Numba Optimization]
94 | # Discuss numba for ML performance.
95 | print("\nInterview Scenario: Numba Optimization")
96 | print("Q: How would you optimize a gradient computation in NumPy?")
97 | print("A: Use numba's @jit to compile Python code to machine code.")
98 | print("Key: Numba accelerates loops and numerical operations.")
99 | print("Example: @jit def compute_gradient(X, y, theta): ...")


--------------------------------------------------------------------------------
/Numpy Fundamentals/03 Advanced NumPy Concepts/01 Vectorization and Performance/vectorization_performance.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | import time
 4 | try:
 5 |     from scipy import sparse
 6 | except ImportError:
 7 |     sparse = None
 8 | 
 9 | # %% [1. Introduction to Vectorization and Performance]
10 | # Learn advanced NumPy techniques for performance optimization.
11 | # Covers vectorization, memory-efficient computations, and profiling.
12 | 
13 | print("NumPy version:", np.__version__)
14 | 
15 | # %% [2. Replacing Loops with Vectorized Operations]
16 | # Compare loop vs. vectorized operations.
17 | np.random.seed(42)
18 | X = np.random.rand(10000, 100)  # Large dataset
19 | y = np.random.rand(10000)
20 | 
21 | # Loop-based dot product
22 | start_time = time.time()
23 | result_loop = np.zeros(100)
24 | for i in range(100):
25 |     result_loop[i] = np.sum(X[:, i] * y)
26 | loop_time = time.time() - start_time
27 | 
28 | # Vectorized dot product
29 | start_time = time.time()
30 | result_vectorized = np.dot(X.T, y)
31 | vectorized_time = time.time() - start_time
32 | print("\nLoop Time:", loop_time, "seconds")
33 | print("Vectorized Time:", vectorized_time, "seconds")
34 | print("Speedup:", loop_time / vectorized_time)
35 | 
36 | # Verify results
37 | print("Results Match:", np.allclose(result_loop, result_vectorized))
38 | 
39 | # %% [3. Memory-efficient Computations]
40 | # Use np.memmap for large datasets.
41 | large_array = np.memmap('large_array.dat', dtype='float32', mode='w+', shape=(10000, 1000))
42 | large_array[:] = np.random.rand(10000, 1000)
43 | print("\nMemory-mapped Array Shape:", large_array.shape)
44 | 
45 | # Stride tricks for sliding windows
46 | from numpy.lib.stride_tricks import as_strided
47 | X_small = np.random.rand(100, 10)
48 | window_size = 3
49 | strided = as_strided(X_small, shape=(X_small.shape[0] - window_size + 1, window_size, X_small.shape[1]),
50 |                      strides=(X_small.strides[0], X_small.strides[0], X_small.strides[1]))
51 | print("\nStrided Array Shape (sliding windows):", strided.shape)
52 | 
53 | # %% [4. Profiling and Optimization]
54 | # Profile a computation-heavy operation.
55 | def compute_distances(X):
56 |     return np.sqrt(((X[:, np.newaxis] - X)**2).sum(axis=2))
57 | 
58 | X_profile = np.random.rand(1000, 5)
59 | start_time = time.time()
60 | distances = compute_distances(X_profile)
61 | profile_time = time.time() - start_time
62 | print("\nDistance Computation Time:", profile_time, "seconds")
63 | 
64 | # %% [5. Visualizing Performance]
65 | # Plot loop vs. vectorized times.
66 | plt.figure(figsize=(8, 4))
67 | plt.bar(['Loop', 'Vectorized'], [loop_time, vectorized_time], color=['red', 'green'])
68 | plt.title('Loop vs. Vectorized Performance')
69 | plt.ylabel('Time (seconds)')
70 | plt.savefig('vectorization_performance_bar.png')
71 | 
72 | # Visualize distance matrix
73 | plt.figure(figsize=(6, 4))
74 | plt.imshow(distances, cmap='viridis')
75 | plt.colorbar()
76 | plt.title('Distance Matrix')
77 | plt.savefig('vectorization_performance_distances.png')
78 | 
79 | # %% [6. Practical ML Application]
80 | # Optimize a feature scaling operation.
81 | np.random.seed(42)
82 | X_ml = np.random.rand(5000, 50)
83 | start_time = time.time()
84 | X_scaled = (X_ml - np.mean(X_ml, axis=0)) / np.std(X_ml, axis=0)
85 | scaling_time = time.time() - start_time
86 | print("\nML Feature Scaling Time:", scaling_time, "seconds")
87 | print("Scaled Features Shape:", X_scaled.shape)
88 | 
89 | # %% [7. Interview Scenario: Vectorization]
90 | # Discuss vectorization benefits.
91 | print("\nInterview Scenario: Vectorization")
92 | print("Q: Why use vectorized operations in ML?")
93 | print("A: Vectorization replaces slow loops with optimized C-based operations.")
94 | print("Key: Improves performance for large datasets.")
95 | print("Example: np.dot(X.T, y) vs. loop-based summation.")


--------------------------------------------------------------------------------
/Numpy Fundamentals/01 Beginner NumPy Concepts/04 Data Preprocessing for ML/data_preprocessing_ml.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | try:
 4 |     from sklearn.datasets import load_iris
 5 | except ImportError:
 6 |     load_iris = None
 7 | 
 8 | # %% [1. Introduction to Data Preprocessing for ML]
 9 | # Learn how to preprocess ML datasets with NumPy.
10 | # Covers loading datasets, normalization, standardization, and train/test splitting.
11 | 
12 | print("NumPy version:", np.__version__)
13 | 
14 | # %% [2. Loading Datasets]
15 | # Load Iris dataset or use synthetic CSV data.
16 | if load_iris:
17 |     iris = load_iris()
18 |     X = iris.data
19 |     y = iris.target
20 | else:
21 |     # Synthetic CSV-like data
22 |     data = np.array([[5.1, 3.5, 1.4, 0.2, 0], [4.9, 3.0, 1.4, 0.2, 0], 
23 |                      [7.0, 3.2, 4.7, 1.4, 1], [6.4, 3.2, 4.5, 1.5, 1]])
24 |     X = data[:, :-1]  # Features
25 |     y = data[:, -1].astype(int)  # Labels
26 | 
27 | print("\nLoaded Dataset:")
28 | print("Features (X) Shape:", X.shape)
29 | print("Labels (y) Shape:", y.shape)
30 | print("First 3 Samples:\n", np.hstack((X[:3], y[:3].reshape(-1, 1))))
31 | 
32 | # %% [3. Normalization]
33 | # Scale features to [0, 1] using min-max normalization.
34 | X_min = np.min(X, axis=0)
35 | X_max = np.max(X, axis=0)
36 | X_normalized = (X - X_min) / (X_max - X_min)
37 | print("\nNormalized Features (first 3 rows):\n", X_normalized[:3])
38 | 
39 | # %% [4. Standardization]
40 | # Scale features to mean=0, std=1.
41 | X_mean = np.mean(X, axis=0)
42 | X_std = np.std(X, axis=0)
43 | X_standardized = (X - X_mean) / X_std
44 | print("\nStandardized Features (first 3 rows):\n", X_standardized[:3])
45 | 
46 | # %% [5. Train/Test Splitting]
47 | # Split dataset into training and testing sets.
48 | np.random.seed(42)
49 | indices = np.random.permutation(X.shape[0])
50 | train_size = int(0.8 * X.shape[0])
51 | train_idx, test_idx = indices[:train_size], indices[train_size:]
52 | X_train, X_test = X[train_idx], X[test_idx]
53 | y_train, y_test = y[train_idx], y[test_idx]
54 | print("\nTrain/Test Split:")
55 | print("X_train Shape:", X_train.shape)
56 | print("X_test Shape:", X_test.shape)
57 | print("y_train Shape:", y_train.shape)
58 | print("y_test Shape:", y_test.shape)
59 | 
60 | # %% [6. Visualizing Preprocessing]
61 | # Visualize standardized vs. original features.
62 | if load_iris:
63 |     plt.figure(figsize=(8, 4))
64 |     plt.scatter(X[:, 0], X[:, 2], c='blue', alpha=0.5, label='Original')
65 |     plt.scatter(X_standardized[:, 0], X_standardized[:, 2], c='red', alpha=0.5, label='Standardized')
66 |     plt.xlabel('Sepal Length')
67 |     plt.ylabel('Petal Length')
68 |     plt.title('Original vs. Standardized Iris Features')
69 |     plt.legend()
70 |     plt.savefig('preprocessing_scatter.png')
71 | 
72 | # %% [7. Practical ML Application]
73 | # Prepare a synthetic dataset for ML classification.
74 | np.random.seed(42)
75 | X_synthetic = np.random.rand(100, 2)  # 100 samples, 2 features
76 | y_synthetic = (X_synthetic[:, 0] + X_synthetic[:, 1] > 1).astype(int)
77 | X_synthetic_std = (X_synthetic - np.mean(X_synthetic, axis=0)) / np.std(X_synthetic, axis=0)
78 | train_idx = np.random.choice(100, 80, replace=False)
79 | test_idx = np.setdiff1d(np.arange(100), train_idx)
80 | X_train_synthetic = X_synthetic_std[train_idx]
81 | X_test_synthetic = X_synthetic_std[test_idx]
82 | print("\nSynthetic ML Dataset:")
83 | print("Standardized X_train Shape:", X_train_synthetic.shape)
84 | print("X_test Shape:", X_test_synthetic.shape)
85 | 
86 | # %% [8. Interview Scenario: Preprocessing]
87 | # Discuss preprocessing for ML pipelines.
88 | print("\nInterview Scenario: Preprocessing")
89 | print("Q: How would you preprocess a dataset for ML with NumPy?")
90 | print("A: Normalize or standardize features, split into train/test sets.")
91 | print("Key: Standardization (mean=0, std=1) is common for ML algorithms.")
92 | print("Example: (X - np.mean(X, axis=0)) / np.std(X, axis=0) for standardization.")


--------------------------------------------------------------------------------
/Numpy Fundamentals/01 Beginner NumPy Concepts/01 Array Creation and Properties/array_creation_properties.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | try:
 4 |     from sklearn.datasets import load_iris
 5 | except ImportError:
 6 |     load_iris = None
 7 | 
 8 | # %% [1. Introduction to Array Creation and Properties]
 9 | # Learn how to create NumPy arrays and explore their properties for ML data handling.
10 | # Covers np.array, np.zeros, np.ones, np.random, array attributes, and reshaping.
11 | 
12 | print("NumPy version:", np.__version__)
13 | 
14 | # %% [2. Creating Arrays]
15 | # Create arrays using different methods.
16 | # From a Python list (e.g., Iris features).
17 | if load_iris:
18 |     iris = load_iris()
19 |     data = iris.data  # Shape: (150, 4)
20 | else:
21 |     # Fallback synthetic data
22 |     data = np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [7.0, 3.2, 4.7, 1.4]])
23 | 
24 | array_from_list = np.array(data)
25 | print("\nArray from List (Iris features):\n", array_from_list[:3])
26 | 
27 | # Zeros, ones, and random arrays
28 | zeros_array = np.zeros((3, 4))  # 3x4 array of zeros
29 | ones_array = np.ones((2, 5))    # 2x5 array of ones
30 | random_array = np.random.rand(3, 3)  # 3x3 array of random values [0, 1)
31 | print("\nZeros Array:\n", zeros_array)
32 | print("\nOnes Array:\n", ones_array)
33 | print("\nRandom Array:\n", random_array)
34 | 
35 | # %% [3. Array Attributes]
36 | # Explore array properties: shape, dtype, ndim.
37 | print("\nArray Attributes (Iris array):")
38 | print("Shape:", array_from_list.shape)  # (n_samples, n_features)
39 | print("Data Type:", array_from_list.dtype)
40 | print("Dimensions:", array_from_list.ndim)
41 | 
42 | # Example with random integer array
43 | int_array = np.random.randint(0, 10, size=(4, 3))
44 | print("\nInteger Array:\n", int_array)
45 | print("Shape:", int_array.shape)
46 | print("Data Type:", int_array.dtype)
47 | print("Dimensions:", int_array.ndim)
48 | 
49 | # %% [4. Reshaping and Flattening]
50 | # Reshape arrays for ML tasks (e.g., flattening features).
51 | reshaped_array = np.reshape(array_from_list, (50, 12))  # Reshape to 50x12
52 | print("\nReshaped Array (50x12):\n", reshaped_array[:2])
53 | 
54 | flattened_array = np.ravel(array_from_list)  # Flatten to 1D
55 | print("\nFlattened Array (first 10 elements):\n", flattened_array[:10])
56 | 
57 | # Example: Reshape for image-like data
58 | image_array = np.random.rand(16, 16)  # Simulate 16x16 grayscale image
59 | reshaped_image = np.reshape(image_array, (4, 64))  # Reshape to 4x64
60 | print("\nImage Array Shape:", image_array.shape)
61 | print("Reshaped Image Shape:", reshaped_image.shape)
62 | 
63 | # %% [5. Visualizing Arrays]
64 | # Visualize a random 2D array as a heatmap.
65 | plt.figure(figsize=(6, 4))
66 | plt.imshow(random_array, cmap='viridis')
67 | plt.colorbar()
68 | plt.title('Random 2D Array Heatmap')
69 | plt.savefig('array_creation_heatmap.png')
70 | 
71 | # Visualize Iris feature distribution
72 | if load_iris:
73 |     plt.figure(figsize=(8, 4))
74 |     plt.hist(array_from_list[:, 0], bins=20, color='blue', alpha=0.7)
75 |     plt.title('Iris Sepal Length Distribution')
76 |     plt.xlabel('Sepal Length (cm)')
77 |     plt.ylabel('Frequency')
78 |     plt.savefig('iris_sepal_histogram.png')
79 | 
80 | # %% [6. Practical ML Application]
81 | # Create a synthetic dataset for ML classification.
82 | np.random.seed(42)
83 | X = np.random.rand(100, 2)  # 100 samples, 2 features
84 | y = (X[:, 0] + X[:, 1] > 1).astype(int)  # Binary labels
85 | print("\nSynthetic ML Dataset:")
86 | print("Features (X) Shape:", X.shape)
87 | print("Labels (y) Shape:", y.shape)
88 | print("First 5 samples:\n", np.hstack((X[:5], y[:5].reshape(-1, 1))))
89 | 
90 | # %% [7. Interview Scenario: Array Creation]
91 | # Discuss creating arrays for ML tasks.
92 | print("\nInterview Scenario: Array Creation")
93 | print("Q: How would you create a dataset for ML with NumPy?")
94 | print("A: Use np.random for features, np.array for structured data, and np.reshape for correct shapes.")
95 | print("Key: Ensure correct shape and dtype for ML model compatibility.")
96 | print("Example: np.random.rand(100, 2) for 100 samples with 2 features.")


--------------------------------------------------------------------------------
/Numpy Fundamentals/03 Advanced NumPy Concepts/README.md:
--------------------------------------------------------------------------------
  1 | # 🌐 Advanced NumPy Concepts (`numpy`)
  2 | 
  3 | ## 📖 Introduction
  4 | NumPy’s advanced concepts focus on performance optimization, custom functions, integration with ML frameworks, and complex tensor manipulations for AI and machine learning. This section covers **Vectorization and Performance**, **Custom Functions and Ufuncs**, **Integration with ML Frameworks**, and **Advanced Tensor Manipulations**, with practical examples and interview insights to elevate your NumPy expertise.
  5 | 
  6 | ## 🎯 Learning Objectives
  7 | - Optimize NumPy code with vectorization and memory-efficient techniques.
  8 | - Create custom functions and ufuncs for ML tasks.
  9 | - Integrate NumPy with TensorFlow, PyTorch, and scikit-learn.
 10 | - Manipulate tensors for deep learning and large-scale data.
 11 | 
 12 | ## 🔑 Key Concepts
 13 | - **Vectorization and Performance**:
 14 |   - Replacing loops with vectorized operations.
 15 |   - Memory-efficient computations (`np.memmap`, `np.lib.stride_tricks`).
 16 |   - Profiling and optimizing code.
 17 | - **Custom Functions and Ufuncs**:
 18 |   - Writing ufuncs with `np.frompyfunc` or `numba`.
 19 |   - Vectorizing operations (`np.vectorize`).
 20 |   - Gradient computations for ML.
 21 | - **Integration with ML Frameworks**:
 22 |   - Converting arrays to tensors (`tf.convert_to_tensor`, `torch.from_numpy`).
 23 |   - NumPy-based data pipelines.
 24 |   - Interfacing with scikit-learn.
 25 | - **Advanced Tensor Manipulations**:
 26 |   - Batch processing for deep learning.
 27 |   - Sparse arrays (`scipy.sparse`).
 28 |   - Tensor decompositions (Tucker, CP).
 29 | 
 30 | ## 📝 Example Walkthroughs
 31 | The following Python files demonstrate each subsection:
 32 | 
 33 | 1. **`vectorization_performance.py`**:
 34 |    - Compares loop vs. vectorized operations (`np.dot`).
 35 |    - Uses `np.memmap` and `np.lib.stride_tricks` for memory efficiency.
 36 |    - Visualizes performance (bar plot) and distance matrix (heatmap).
 37 | 
 38 |    Example code:
 39 |    ```python
 40 |    import numpy as np
 41 |    X = np.random.rand(10000, 100)
 42 |    result = np.dot(X.T, y)  # Vectorized
 43 |    ```
 44 | 
 45 | 2. **`custom_functions_ufuncs.py`**:
 46 |    - Creates custom ufuncs (`np.frompyfunc`) and vectorized functions (`np.vectorize`).
 47 |    - Optimizes gradients with `numba`.
 48 |    - Visualizes custom activation and vectorized function outputs.
 49 | 
 50 |    Example code:
 51 |    ```python
 52 |    import numpy as np
 53 |    ufunc = np.frompyfunc(lambda x: np.clip(x, -1, 1), 1, 1)
 54 |    y = ufunc(X).astype(float)
 55 |    ```
 56 | 
 57 | 3. **`integration_ml_frameworks.py`**:
 58 |    - Converts NumPy arrays to TensorFlow/PyTorch tensors.
 59 |    - Builds a TensorFlow data pipeline and uses scikit-learn preprocessing.
 60 |    - Visualizes original vs. scaled features.
 61 | 
 62 |    Example code:
 63 |    ```python
 64 |    import tensorflow as tf
 65 |    X_np = np.random.rand(100, 5)
 66 |    X_tf = tf.convert_to_tensor(X_np, dtype=tf.float32)
 67 |    ```
 68 | 
 69 | 4. **`advanced_tensor_manipulations.py`**:
 70 |    - Normalizes image batches and processes sparse arrays.
 71 |    - Performs CP decomposition with `tensorly`.
 72 |    - Visualizes normalized images and sparse matrix structure.
 73 | 
 74 |    Example code:
 75 |    ```python
 76 |    import numpy as np
 77 |    images = np.random.rand(32, 3, 64, 64)
 78 |    images_normalized = images - np.mean(images, axis=(2, 3), keepdims=True)
 79 |    ```
 80 | 
 81 | ## 🛠️ Practical Tasks
 82 | 1. **Vectorization**:
 83 |    - Optimize a loop-based computation (e.g., dot product) with vectorization.
 84 |    - Process a large dataset with `np.memmap`.
 85 | 2. **Custom Functions**:
 86 |    - Write a custom ufunc for a non-standard activation function.
 87 |    - Optimize a gradient computation with `numba`.
 88 | 3. **Framework Integration**:
 89 |    - Convert a NumPy dataset to a TensorFlow `tf.data.Dataset`.
 90 |    - Preprocess features with scikit-learn and NumPy.
 91 | 4. **Tensor Manipulations**:
 92 |    - Normalize a batch of images for a CNN.
 93 |    - Apply CP decomposition to compress a tensor.
 94 | 
 95 | ## 💡 Interview Tips
 96 | - **Common Questions**:
 97 |   - Why is vectorization faster than loops in NumPy?
 98 |   - How do you optimize a NumPy computation with `numba`?
 99 |   - How do you integrate NumPy with TensorFlow/PyTorch?
100 |   - What are the benefits of tensor decomposition in ML?
101 | - **Tips**:
102 |   - Explain vectorization’s use of C-based operations for speed.
103 |   - Highlight `numba`’s JIT compilation for ML optimizations.
104 |   - Be ready to code a data pipeline or tensor decomposition.
105 | - **Coding Tasks**:
106 |   - Vectorize a loop-based computation.
107 |   - Convert a NumPy array to a TensorFlow dataset.
108 |   - Implement a sparse matrix operation.
109 | 
110 | ## 📚 Resources
111 | - [NumPy Performance Tips](https://numpy.org/doc/stable/user/performance.html)
112 | - [NumPy and Numba](https://numba.pydata.org/)
113 | - [TensorFlow Data Pipeline](https://www.tensorflow.org/guide/data)
114 | - [Scikit-learn Preprocessing](https://scikit-learn.org/stable/modules/preprocessing.html)
115 | - [Tensorly Documentation](http://tensorly.org/stable/)
116 | - [SciPy Sparse Arrays](https://docs.scipy.org/doc/scipy/reference/sparse.html)


--------------------------------------------------------------------------------
/Numpy Fundamentals/01 Beginner NumPy Concepts/README.md:
--------------------------------------------------------------------------------
  1 | # 🌱 Beginner NumPy Concepts (`numpy`)
  2 | 
  3 | ## 📖 Introduction
  4 | NumPy is the cornerstone of numerical computing in Python, essential for AI and machine learning (ML) data manipulation and preprocessing. This section introduces the fundamentals of NumPy, focusing on array creation, indexing, operations, and ML preprocessing. It covers **Array Creation and Properties**, **Indexing and Slicing**, **Basic Operations**, and **Data Preprocessing for ML**, with practical examples and interview insights tailored to beginners.
  5 | 
  6 | ## 🎯 Learning Objectives
  7 | - Create and manipulate NumPy arrays for ML datasets.
  8 | - Access and filter data using indexing and slicing.
  9 | - Perform element-wise operations, broadcasting, and universal functions (ufuncs).
 10 | - Preprocess ML datasets with normalization, standardization, and train/test splitting.
 11 | 
 12 | ## 🔑 Key Concepts
 13 | - **Array Creation and Properties**:
 14 |   - Create arrays with `np.array`, `np.zeros`, `np.ones`, `np.random`.
 15 |   - Understand attributes (`shape`, `dtype`, `ndim`) and reshaping (`np.reshape`, `np.ravel`).
 16 | - **Indexing and Slicing**:
 17 |   - Use basic indexing (`arr[0]`), slicing (`arr[:5, 1:3]`), boolean, and fancy indexing.
 18 | - **Basic Operations**:
 19 |   - Perform element-wise operations (e.g., `arr + 1`), broadcasting, and ufuncs (`np.sin`, `np.mean`).
 20 | - **Data Preprocessing for ML**:
 21 |   - Load datasets (`np.loadtxt`), normalize/standardize features, and split train/test sets.
 22 | 
 23 | ## 📝 Example Walkthroughs
 24 | The following Python files demonstrate each subsection:
 25 | 
 26 | 1. **`array_creation_properties.py`**:
 27 |    - Creates arrays from Iris data and synthetic datasets (`np.array`, `np.random.rand`).
 28 |    - Explores attributes (`shape`, `dtype`) and reshaping for ML tasks.
 29 |    - Visualizes a random array as a heatmap and Iris feature distribution.
 30 | 
 31 |    Example code:
 32 |    ```python
 33 |    import numpy as np
 34 |    data = np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2]])
 35 |    print("Shape:", data.shape)  # (2, 4)
 36 |    reshaped = np.reshape(data, (4, 2))
 37 |    ```
 38 | 
 39 | 2. **`indexing_slicing.py`**:
 40 |    - Demonstrates basic indexing, slicing, boolean, and fancy indexing on Iris data.
 41 |    - Filters samples (e.g., sepal length > 6.0) and selects specific rows/columns.
 42 |    - Visualizes filtered data with a scatter plot.
 43 | 
 44 |    Example code:
 45 |    ```python
 46 |    import numpy as np
 47 |    data = np.array([[5.1, 3.5, 1.4], [4.9, 3.0, 1.4]])
 48 |    long_sepal = data[data[:, 0] > 5.0]
 49 |    ```
 50 | 
 51 | 3. **`basic_operations.py`**:
 52 |    - Performs element-wise operations (e.g., scaling), broadcasting (e.g., bias addition), and ufuncs (e.g., `np.sin`).
 53 |    - Computes a mean squared error for ML.
 54 |    - Visualizes normalized feature distribution and true vs. predicted labels.
 55 | 
 56 |    Example code:
 57 |    ```python
 58 |    import numpy as np
 59 |    data = np.array([[5.1, 3.5], [4.9, 3.0]])
 60 |    normalized = data / np.max(data, axis=0)
 61 |    ```
 62 | 
 63 | 4. **`data_preprocessing_ml.py`**:
 64 |    - Loads Iris or synthetic data, normalizes/standardizes features, and splits train/test sets.
 65 |    - Prepares a synthetic ML dataset with standardization.
 66 |    - Visualizes original vs. standardized features.
 67 | 
 68 |    Example code:
 69 |    ```python
 70 |    import numpy as np
 71 |    X = np.array([[5.1, 3.5], [4.9, 3.0]])
 72 |    X_standardized = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
 73 |    ```
 74 | 
 75 | ## 🛠️ Practical Tasks
 76 | 1. **Array Creation**:
 77 |    - Create a 3x4 array of random values and print its shape and dtype.
 78 |    - Reshape a 1D array into a 2D matrix for ML input.
 79 | 2. **Indexing and Slicing**:
 80 |    - Filter a dataset to select samples with a feature value above the mean.
 81 |    - Extract the first and last columns of a 2D array using slicing.
 82 | 3. **Basic Operations**:
 83 |    - Normalize a dataset’s features to [0, 1] using broadcasting.
 84 |    - Compute the mean and standard deviation of each feature in a dataset.
 85 | 4. **Data Preprocessing**:
 86 |    - Load a CSV dataset with `np.loadtxt` and standardize its features.
 87 |    - Split a dataset into 80% training and 20% testing sets.
 88 | 
 89 | ## 💡 Interview Tips
 90 | - **Common Questions**:
 91 |   - How do you create a NumPy array for an ML dataset?
 92 |   - What is broadcasting, and how is it used in ML preprocessing?
 93 |   - How would you filter outliers using boolean indexing?
 94 |   - Why standardize features before training an ML model?
 95 | - **Tips**:
 96 |   - Explain broadcasting’s efficiency for feature scaling (e.g., `arr / np.max(arr)`).
 97 |   - Highlight boolean indexing for data cleaning (e.g., removing outliers).
 98 |   - Be ready to code normalization or train/test splitting with NumPy.
 99 | - **Coding Tasks**:
100 |   - Create a 2D array and normalize its columns.
101 |   - Filter a dataset using a boolean condition.
102 |   - Split a NumPy array into train/test sets.
103 | 
104 | ## 📚 Resources
105 | - [NumPy Quickstart](https://numpy.org/doc/stable/user/quickstart.html)
106 | - [NumPy Basics](https://numpy.org/doc/stable/user/absolute_beginners.html)
107 | - [NumPy Array Creation](https://numpy.org/doc/stable/reference/routines.array-creation.html)
108 | - [NumPy Indexing](https://numpy.org/doc/stable/user/basics.indexing.html)
109 | - [NumPy Broadcasting](https://numpy.org/doc/stable/user/basics.broadcasting.html)
110 | - [SciPy Lecture Notes: NumPy](https://scipy-lectures.org/intro/numpy/index.html)
111 | - [Kaggle: Python and NumPy](https://www.kaggle.com/learn/python)


--------------------------------------------------------------------------------
/Numpy Fundamentals/02 Intermediate NumPy Concepts/README.md:
--------------------------------------------------------------------------------
  1 | # 🏋️ Intermediate NumPy Concepts (`numpy`)
  2 | 
  3 | ## 📖 Introduction
  4 | NumPy’s intermediate concepts build on beginner skills, focusing on linear algebra, tensor operations, statistical computations, and ML algorithm implementation for AI and machine learning. This section covers **Linear Algebra for ML**, **Tensor Operations**, **Statistical Computations**, and **Implementing ML Algorithms**, with practical examples and interview insights to deepen your NumPy proficiency.
  5 | 
  6 | ## 🎯 Learning Objectives
  7 | - Perform matrix operations and linear algebra for ML tasks.
  8 | - Manipulate multi-dimensional tensors for deep learning workflows.
  9 | - Compute statistical metrics and augment data for ML analysis.
 10 | - Implement ML algorithms (e.g., linear regression, K-means) from scratch.
 11 | 
 12 | ## 🔑 Key Concepts
 13 | - **Linear Algebra for ML**:
 14 |   - Matrix operations (`np.dot`, `np.matmul`, `np.transpose`).
 15 |   - Solving linear systems (`np.linalg.solve`).
 16 |   - Eigenvalues/vectors (`np.linalg.eig`) and SVD (`np.linalg.svd`).
 17 | - **Tensor Operations**:
 18 |   - Multi-dimensional arrays (3D+ tensors).
 19 |   - Reshaping/transposing (`np.reshape`, `np.moveaxis`, `np.swapaxes`).
 20 |   - Contractions/reductions (`np.tensordot`, `np.sum`).
 21 | - **Statistical Computations**:
 22 |   - Descriptive statistics (`np.mean`, `np.median`, `np.var`).
 23 |   - Correlation/covariance (`np.corrcoef`, `np.cov`).
 24 |   - Random sampling (`np.random.choice`).
 25 | - **Implementing ML Algorithms**:
 26 |   - Linear regression with normal equations.
 27 |   - Logistic regression with gradient descent.
 28 |   - K-means clustering from scratch.
 29 | 
 30 | ## 📝 Example Walkthroughs
 31 | The following Python files demonstrate each subsection:
 32 | 
 33 | 1. **`linear_algebra_ml.py`**:
 34 |    - Performs matrix operations (`np.dot`, `np.matmul`) on Iris data.
 35 |    - Solves linear systems and computes eigenvalues/SVD for dimensionality reduction.
 36 |    - Visualizes SVD-reduced data and covariance matrix heatmap.
 37 | 
 38 |    Example code:
 39 |    ```python
 40 |    import numpy as np
 41 |    X = np.random.rand(100, 4)
 42 |    U, S, Vt = np.linalg.svd(X, full_matrices=False)
 43 |    X_reduced = np.dot(U[:, :2], np.diag(S[:2]))
 44 |    ```
 45 | 
 46 | 2. **`tensor_operations.py`**:
 47 |    - Creates 3D/4D tensors for image data and reshapes/transposes them.
 48 |    - Performs tensor contractions (`np.tensordot`) and reductions (`np.sum`).
 49 |    - Visualizes tensor slices and reduction results.
 50 | 
 51 |    Example code:
 52 |    ```python
 53 |    import numpy as np
 54 |    tensor = np.random.rand(10, 32, 32)
 55 |    reshaped = np.reshape(tensor, (10, 32 * 32))
 56 |    ```
 57 | 
 58 | 3. **`statistical_computations.py`**:
 59 |    - Computes descriptive statistics (`np.mean`, `np.var`) and correlation/covariance on Iris data.
 60 |    - Augments data with random sampling (`np.random.choice`).
 61 |    - Visualizes correlation matrix and augmented data distribution.
 62 | 
 63 |    Example code:
 64 |    ```python
 65 |    import numpy as np
 66 |    X = np.random.rand(150, 4)
 67 |    corr_matrix = np.corrcoef(X.T)
 68 |    ```
 69 | 
 70 | 4. **`implementing_ml_algorithms.py`**:
 71 |    - Implements linear regression (normal equations), logistic regression (gradient descent), and K-means clustering.
 72 |    - Uses synthetic or blob data for simplicity.
 73 |    - Visualizes linear regression fit and K-means clusters.
 74 | 
 75 |    Example code:
 76 |    ```python
 77 |    import numpy as np
 78 |    X = np.random.rand(100, 2)
 79 |    X_b = np.c_[np.ones((100, 1)), X]
 80 |    theta = np.linalg.solve(np.dot(X_b.T, X_b), np.dot(X_b.T, y))
 81 |    ```
 82 | 
 83 | ## 🛠️ Practical Tasks
 84 | 1. **Linear Algebra**:
 85 |    - Compute the covariance matrix of a dataset and find its eigenvalues.
 86 |    - Apply SVD to reduce a dataset to 2 dimensions.
 87 | 2. **Tensor Operations**:
 88 |    - Reshape a 3D tensor (e.g., image batch) for a dense layer.
 89 |    - Perform a tensor contraction between two tensors.
 90 | 3. **Statistical Computations**:
 91 |    - Calculate feature correlations and select the most predictive feature.
 92 |    - Augment a dataset with random sampling.
 93 | 4. **ML Algorithms**:
 94 |    - Implement linear regression for a synthetic dataset.
 95 |    - Code K-means clustering and visualize the results.
 96 | 
 97 | ## 💡 Interview Tips
 98 | - **Common Questions**:
 99 |   - How does SVD enable PCA in ML?
100 |   - What’s the difference between `np.dot` and `np.matmul`?
101 |   - How would you implement logistic regression with NumPy?
102 |   - Why use random sampling for data augmentation?
103 | - **Tips**:
104 |   - Explain SVD’s role in dimensionality reduction (e.g., `U @ np.diag(S)` for PCA).
105 |   - Highlight gradient descent’s iterative nature for logistic regression.
106 |   - Be ready to code linear regression or K-means from scratch.
107 | - **Coding Tasks**:
108 |   - Implement PCA using SVD.
109 |   - Code gradient descent for logistic regression.
110 |   - Compute a correlation matrix for feature selection.
111 | 
112 | ## 📚 Resources
113 | - [NumPy Linear Algebra](https://numpy.org/doc/stable/reference/routines.linalg.html)
114 | - [NumPy Random Sampling](https://numpy.org/doc/stable/reference/random/index.html)
115 | - [SciPy Lecture Notes: NumPy for ML](https://scipy-lectures.org/intro/numpy/index.html)
116 | - [NumPy Array Manipulation](https://numpy.org/doc/stable/reference/routines.array-manipulation.html)
117 | - [Kaggle: Machine Learning with NumPy](https://www.kaggle.com/learn/intro-to-machine-learning)
118 | - [Python for Data Analysis](https://www.oreilly.com/library/view/python-for-data/9781491957653/)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🚀 NumPy for AI/ML Roadmap
  2 | 
  3 | ## 📖 Introduction
  4 | NumPy is the foundational library for numerical computing in Python, powering data manipulation, tensor operations, and mathematical computations in AI and machine learning (ML). It underpins ML frameworks like TensorFlow, PyTorch, and scikit-learn, making it essential for preprocessing data, implementing algorithms, and optimizing performance. This roadmap provides a structured path to master NumPy for AI/ML, from basic array operations to advanced tensor manipulations and ML algorithm implementation, with a focus on practical applications and interview preparation.
  5 | 
  6 | ## 🎯 Learning Objectives
  7 | - **Master NumPy Basics**: Understand array creation, indexing, and operations for ML data handling.
  8 | - **Apply Linear Algebra**: Use NumPy for matrix operations critical to ML algorithms.
  9 | - **Handle Tensors**: Perform tensor manipulations for deep learning workflows.
 10 | - **Implement ML Algorithms**: Code ML models (e.g., linear regression, PCA) using NumPy.
 11 | - **Optimize Performance**: Leverage NumPy’s vectorization and integration with ML frameworks.
 12 | - **Prepare for Interviews**: Gain hands-on experience and insights for AI/ML job interviews.
 13 | 
 14 | ## 🛠️ Prerequisites
 15 | - **Python**: Familiarity with Python programming (lists, loops, functions).
 16 | - **Basic Math**: Understanding of linear algebra (matrices, vectors) and statistics.
 17 | - **Machine Learning Basics**: Optional knowledge of supervised learning, neural networks, and gradient descent.
 18 | - **Development Environment**: Install NumPy (`pip install numpy`), Matplotlib (`pip install matplotlib`), and optional ML libraries (e.g., scikit-learn, TensorFlow).
 19 | 
 20 | ## 📈 NumPy for AI/ML Learning Roadmap
 21 | 
 22 | ### 🌱 Beginner NumPy Concepts
 23 | Start with the fundamentals of NumPy for data manipulation and preprocessing in ML.
 24 | 
 25 | - **Array Creation and Properties**
 26 |   - Creating arrays (`np.array`, `np.zeros`, `np.ones`, `np.random`)
 27 |   - Array attributes (shape, dtype, ndim)
 28 |   - Reshaping and flattening arrays (`np.reshape`, `np.ravel`)
 29 | - **Indexing and Slicing**
 30 |   - Basic indexing (`arr[0]`, `arr[:, 1]`)
 31 |   - Boolean and fancy indexing
 32 |   - Slicing for data subsetting
 33 | - **Basic Operations**
 34 |   - Element-wise operations (addition, multiplication, etc.)
 35 |   - Broadcasting for shape compatibility
 36 |   - Universal functions (ufuncs: `np.sin`, `np.exp`, `np.mean`)
 37 | - **Data Preprocessing for ML**
 38 |   - Loading datasets (e.g., CSV with `np.loadtxt`, `np.genfromtxt`)
 39 |   - Normalization and standardization (`np.mean`, `np.std`)
 40 |   - Splitting data into train/test sets
 41 | 
 42 | **Practical Tasks**:
 43 | - Create a 2D array from a dataset (e.g., Iris) and compute mean/std per feature.
 44 | - Use boolean indexing to filter outliers in a dataset.
 45 | - Normalize a dataset using broadcasting.
 46 | - Split a NumPy array into train/test sets for ML.
 47 | 
 48 | **Resources**:
 49 | - [NumPy Quickstart](https://numpy.org/doc/stable/user/quickstart.html)
 50 | - [NumPy Basics](https://numpy.org/doc/stable/user/absolute_beginners.html)
 51 | - [NumPy Array Creation](https://numpy.org/doc/stable/reference/routines.array-creation.html)
 52 | 
 53 | ### 🏋️ Intermediate NumPy Concepts
 54 | Deepen your skills with linear algebra, tensor operations, and ML algorithm foundations.
 55 | 
 56 | - **Linear Algebra for ML**
 57 |   - Matrix operations (`np.dot`, `np.matmul`, `np.transpose`)
 58 |   - Solving linear systems (`np.linalg.solve`)
 59 |   - Eigenvalues/vectors (`np.linalg.eig`)
 60 |   - Singular Value Decomposition (SVD) for dimensionality reduction
 61 | - **Tensor Operations**
 62 |   - Multi-dimensional arrays (3D+ tensors for images, sequences)
 63 |   - Tensor reshaping and transposing (`np.moveaxis`, `np.swapaxes`)
 64 |   - Tensor contractions and reductions (`np.tensordot`, `np.sum`)
 65 | - **Statistical Computations**
 66 |   - Descriptive statistics (`np.mean`, `np.median`, `np.var`)
 67 |   - Correlation and covariance (`np.corrcoef`, `np.cov`)
 68 |   - Random sampling for data augmentation (`np.random.choice`)
 69 | - **Implementing ML Algorithms**
 70 |   - Linear regression with normal equations
 71 |   - Logistic regression with gradient descent
 72 |   - K-means clustering from scratch
 73 | 
 74 | **Practical Tasks**:
 75 | - Implement linear regression using `np.dot` and `np.linalg.solve`.
 76 | - Compute PCA using SVD on a dataset (e.g., MNIST).
 77 | - Reshape a 3D tensor (e.g., image batch) for neural network input.
 78 | - Code K-means clustering with NumPy for a synthetic dataset.
 79 | 
 80 | **Resources**:
 81 | - [NumPy Linear Algebra](https://numpy.org/doc/stable/reference/routines.linalg.html)
 82 | - [NumPy Random Sampling](https://numpy.org/doc/stable/reference/random/index.html)
 83 | - [SciPy Lecture Notes: NumPy for ML](https://scipy-lectures.org/intro/numpy/index.html)
 84 | 
 85 | ### 🌐 Advanced NumPy Concepts
 86 | Tackle advanced techniques for performance optimization and integration with ML frameworks.
 87 | 
 88 | - **Vectorization and Performance**
 89 |   - Replacing loops with vectorized operations
 90 |   - Memory-efficient computations (`np.memmap`, `np.lib.stride_tricks`)
 91 |   - Profiling and optimizing NumPy code
 92 | - **Custom Functions and Ufuncs**
 93 |   - Writing custom ufuncs with `np.frompyfunc` or `numba`
 94 |   - Vectorizing complex operations (`np.vectorize`)
 95 |   - Gradient computations for ML optimization
 96 | - **Integration with ML Frameworks**
 97 |   - Converting NumPy arrays to TensorFlow/PyTorch tensors (`tf.convert_to_tensor`, `torch.from_numpy`)
 98 |   - NumPy as a backend for data pipelines
 99 |   - Interfacing with scikit-learn for preprocessing
100 | - **Advanced Tensor Manipulations**
101 |   - Batch processing for deep learning (e.g., image batches)
102 |   - Sparse arrays for large-scale data (`scipy.sparse`)
103 |   - Tensor decompositions (e.g., Tucker, CP) for compression
104 | 
105 | **Practical Tasks**:
106 | - Optimize a matrix multiplication loop with vectorization.
107 | - Write a custom ufunc for a non-standard activation function.
108 | - Convert a NumPy dataset to a TensorFlow `tf.data.Dataset`.
109 | - Implement a tensor decomposition for a 4D image tensor.
110 | 
111 | **Resources**:
112 | - [NumPy Performance Tips](https://numpy.org/doc/stable/user/performance.html)
113 | - [NumPy and Numba](https://numba.pydata.org/)
114 | - [TensorFlow Data Pipeline](https://www.tensorflow.org/guide/data)
115 | 
116 | ### 🧬 NumPy in AI/ML Applications
117 | Apply NumPy to real-world AI/ML tasks and frameworks.
118 | 
119 | - **Data Preprocessing**
120 |   - Handling missing data (`np.isnan`, `np.where`)
121 |   - Feature engineering (e.g., polynomial features)
122 |   - Image preprocessing (e.g., resizing, augmentation)
123 | - **ML Algorithm Implementation**
124 |   - Neural network forward/backward pass from scratch
125 |   - Gradient descent optimization
126 |   - Principal Component Analysis (PCA) for dimensionality reduction
127 | - **Deep Learning Support**
128 |   - Preparing tensor inputs for CNNs/RNNs
129 |   - Computing loss functions (e.g., cross-entropy)
130 |   - Simulating batch normalization
131 | - **Evaluation Metrics**
132 |   - Accuracy, precision, recall, F1-score
133 |   - Confusion matrix and ROC curves
134 |   - Mean squared error and R² for regression
135 | 
136 | **Practical Tasks**:
137 | - Preprocess an image dataset (e.g., CIFAR-10) with NumPy.
138 | - Implement a neural network forward pass for MNIST.
139 | - Compute a confusion matrix for a classification model.
140 | - Apply PCA to reduce dimensionality of a high-dimensional dataset.
141 | 
142 | **Resources**:
143 | - [NumPy for Data Science](https://numpy.org/doc/stable/user/absolute_beginners.html#data-science)
144 | - [Scikit-Learn with NumPy](https://scikit-learn.org/stable/modules/preprocessing.html)
145 | - [Kaggle: NumPy Tutorials](https://www.kaggle.com/learn/python)
146 | 
147 | ### 📦 Optimization and Best Practices
148 | Optimize NumPy for large-scale ML workflows and production.
149 | 
150 | - **Memory Management**
151 |   - Using `np.memmap` for large datasets
152 |   - Avoiding unnecessary copies (`np.copy`, views)
153 |   - Sparse matrices for memory efficiency
154 | - **Parallel Computing**
155 |   - Leveraging `numba` for JIT compilation
156 |   - Using `multiprocessing` with NumPy arrays
157 |   - Integrating with Dask for big data
158 | - **Debugging and Testing**
159 |   - Handling numerical stability (e.g., overflow, underflow)
160 |   - Unit testing NumPy code with `pytest`
161 |   - Validating tensor shapes and dtypes
162 | - **Production Integration**
163 |   - Exporting NumPy arrays to ML frameworks
164 |   - Saving/loading arrays (`np.save`, `np.load`)
165 |   - Interfacing with pandas for data analysis
166 | 
167 | **Practical Tasks**:
168 | - Process a large dataset with `np.memmap` and Dask.
169 | - Optimize a gradient descent loop with `numba`.
170 | - Write unit tests for a custom NumPy ML function.
171 | - Save a preprocessed dataset as `.npy` for a TensorFlow pipeline.
172 | 
173 | **Resources**:
174 | - [NumPy Memory Management](https://numpy.org/doc/stable/reference/arrays.ndarray.html#memory-layout)
175 | - [Dask with NumPy](https://docs.dask.org/en/stable/array.html)
176 | - [NumPy Testing](https://numpy.org/doc/stable/reference/routines.testing.html)
177 | 
178 | ## 💡 Learning Tips
179 | - **Hands-On Practice**: Code each section’s tasks in a Jupyter notebook. Use datasets like MNIST, CIFAR-10, or synthetic data from `np.random`.
180 | - **Visualize Results**: Plot arrays, matrices, and ML outputs (e.g., decision boundaries, PCA results) using Matplotlib.
181 | - **Experiment**: Modify array shapes, operations, or algorithms (e.g., change learning rates in gradient descent) and analyze performance.
182 | - **Portfolio Projects**: Build projects like a NumPy-based linear regression model, PCA pipeline, or neural network to showcase skills.
183 | - **Community**: Engage with NumPy forums, Stack Overflow, and Kaggle for examples and support.
184 | 
185 | ## 🛠️ Practical Tasks
186 | 1. **Beginner**: Load a CSV dataset with NumPy and normalize features.
187 | 2. **Intermediate**: Implement logistic regression with gradient descent.
188 | 3. **Advanced**: Optimize a neural network forward pass with vectorization.
189 | 4. **AI/ML Applications**: Code PCA for dimensionality reduction on MNIST.
190 | 5. **Optimization**: Process a large dataset with `np.memmap` and save as `.npy`.
191 | 
192 | ## 💼 Interview Preparation
193 | - **Common Questions**:
194 |   - How does NumPy’s broadcasting work for ML computations?
195 |   - How would you implement linear regression with NumPy?
196 |   - What are the benefits of vectorization over loops?
197 |   - How do you handle large datasets with NumPy?
198 | - **Coding Tasks**:
199 |   - Implement matrix multiplication or SVD for PCA.
200 |   - Code a neural network forward pass with NumPy.
201 |   - Preprocess a dataset (e.g., normalize, split) using NumPy.
202 | - **Tips**:
203 |   - Explain broadcasting’s role in efficient ML computations.
204 |   - Highlight NumPy’s integration with TensorFlow/PyTorch.
205 |   - Practice debugging numerical issues (e.g., NaN values).
206 | 
207 | ## 📚 Resources
208 | - **Official Documentation**:
209 |   - [NumPy Official Site](https://numpy.org/)
210 |   - [NumPy User Guide](https://numpy.org/doc/stable/user/)
211 |   - [NumPy API Reference](https://numpy.org/doc/stable/reference/)
212 | - **Tutorials**:
213 |   - [NumPy Quickstart Tutorial](https://numpy.org/doc/stable/user/quickstart.html)
214 |   - [SciPy Lecture Notes](https://scipy-lectures.org/intro/numpy/index.html)
215 |   - [Kaggle: Python and NumPy](https://www.kaggle.com/learn/python)
216 | - **Books**:
217 |   - *Python for Data Analysis* by Wes McKinney
218 |   - *Numerical Python* by Robert Johansson
219 |   - *Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow* by Aurélien Géron
220 | - **Community**:
221 |   - [NumPy GitHub](https://github.com/numpy/numpy)
222 |   - [Stack Overflow: NumPy Tag](https://stackoverflow.com/questions/tagged/numpy)
223 |   - [NumPy Mailing List](https://mail.python.org/mailman3/lists/numpy-discussion.python.org/)
224 | 
225 | ## 📅 Suggested Timeline
226 | - **Week 1**: Beginner Concepts (Arrays, Indexing, Operations)
227 | - **Week 2**: Intermediate Concepts (Linear Algebra, Tensors, ML Algorithms)
228 | - **Week 3**: Advanced Concepts (Vectorization, Framework Integration)
229 | - **Week 4**: AI/ML Applications and Optimization
230 | - **Week 5**: Portfolio project and interview prep
231 | 
232 | ## 🚀 Get Started
233 | Clone this repository and start with the Beginner Concepts section. Run the example code in a Jupyter notebook, experiment with tasks, and build a portfolio project (e.g., a NumPy-based ML pipeline) to showcase your skills. Happy learning, and good luck with your AI/ML journey!


--------------------------------------------------------------------------------
/Numpy Interview Questions/README.md:
--------------------------------------------------------------------------------
   1 | # NumPy Interview Questions for AI/ML Roles
   2 | 
   3 | This README provides 170 NumPy interview questions tailored for AI/ML roles, focusing on numerical computing with NumPy in Python. The questions cover **core NumPy concepts** (e.g., array creation, operations, indexing, broadcasting, linear algebra) and their applications in AI/ML tasks like data preprocessing, feature engineering, and model input preparation. Questions are categorized by topic and divided into **Basic**, **Intermediate**, and **Advanced** levels to support candidates preparing for roles requiring NumPy in AI/ML workflows.
   4 | 
   5 | ## Array Creation and Manipulation
   6 | 
   7 | ### Basic
   8 | 1. **What is NumPy, and why is it important in AI/ML?**  
   9 |    NumPy provides efficient array operations for numerical computing in AI/ML.  
  10 |    ```python
  11 |    import numpy as np
  12 |    array = np.array([1, 2, 3])
  13 |    ```
  14 | 
  15 | 2. **How do you create a NumPy array from a Python list?**  
  16 |    Converts lists to arrays for fast computation.  
  17 |    ```python
  18 |    import numpy as np
  19 |    list_data = [1, 2, 3]
  20 |    array = np.array(list_data)
  21 |    ```
  22 | 
  23 | 3. **How do you create a NumPy array with zeros or ones?**  
  24 |    Initializes arrays for placeholders.  
  25 |    ```python
  26 |    zeros = np.zeros((2, 3))
  27 |    ones = np.ones((2, 3))
  28 |    ```
  29 | 
  30 | 4. **What is the role of `np.arange` in NumPy?**  
  31 |    Creates arrays with a range of values.  
  32 |    ```python
  33 |    array = np.arange(0, 10, 2)
  34 |    ```
  35 | 
  36 | 5. **How do you create a NumPy array with random values?**  
  37 |    Generates random data for testing.  
  38 |    ```python
  39 |    random_array = np.random.rand(2, 3)
  40 |    ```
  41 | 
  42 | 6. **How do you reshape a NumPy array?**  
  43 |    Changes array dimensions for ML inputs.  
  44 |    ```python
  45 |    array = np.array([1, 2, 3, 4, 5, 6])
  46 |    reshaped = array.reshape(2, 3)
  47 |    ```
  48 | 
  49 | #### Intermediate
  50 | 7. **Write a function to create a 2D NumPy array with a given shape.**  
  51 |    Initializes arrays dynamically.  
  52 |    ```python
  53 |    def create_2d_array(rows, cols, fill=0):
  54 |        return np.full((rows, cols), fill)
  55 |    ```
  56 | 
  57 | 8. **How do you create a NumPy array with evenly spaced values?**  
  58 |    Uses `linspace` for uniform intervals.  
  59 |    ```python
  60 |    array = np.linspace(0, 10, 5)
  61 |    ```
  62 | 
  63 | 9. **Write a function to initialize a NumPy array with random integers.**  
  64 |    Generates integer arrays for simulations.  
  65 |    ```python
  66 |    def random_int_array(shape, low, high):
  67 |        return np.random.randint(low, high, shape)
  68 |    ```
  69 | 
  70 | 10. **How do you create a diagonal matrix in NumPy?**  
  71 |     Initializes matrices for linear algebra.  
  72 |     ```python
  73 |     diag_matrix = np.diag([1, 2, 3])
  74 |     ```
  75 | 
  76 | 11. **Write a function to visualize a NumPy array as a heatmap.**  
  77 |     Displays array values graphically.  
  78 |     ```python
  79 |     import matplotlib.pyplot as plt
  80 |     def plot_heatmap(array):
  81 |         plt.imshow(array, cmap='viridis')
  82 |         plt.colorbar()
  83 |         plt.savefig('heatmap.png')
  84 |     ```
  85 | 
  86 | 12. **How do you concatenate two NumPy arrays?**  
  87 |     Combines arrays for data aggregation.  
  88 |     ```python
  89 |     array1 = np.array([[1, 2], [3, 4]])
  90 |     array2 = np.array([[5, 6]])
  91 |     concatenated = np.concatenate((array1, array2), axis=0)
  92 |     ```
  93 | 
  94 | #### Advanced
  95 | 13. **Write a function to create a NumPy array with a custom pattern.**  
  96 |     Generates structured arrays.  
  97 |     ```python
  98 |     def custom_pattern(shape, pattern='checkerboard'):
  99 |         array = np.zeros(shape)
 100 |         if pattern == 'checkerboard':
 101 |             array[::2, ::2] = 1
 102 |             array[1::2, 1::2] = 1
 103 |         return array
 104 |     ```
 105 | 
 106 | 14. **How do you optimize array creation for large datasets?**  
 107 |     Uses efficient initialization methods.  
 108 |     ```python
 109 |     large_array = np.empty((10000, 10000))
 110 |     ```
 111 | 
 112 | 15. **Write a function to create a block matrix in NumPy.**  
 113 |     Constructs matrices from subarrays.  
 114 |     ```python
 115 |     def block_matrix(blocks):
 116 |         return np.block(blocks)
 117 |     ```
 118 | 
 119 | 16. **How do you handle memory-efficient array creation?**  
 120 |     Uses sparse arrays or generators.  
 121 |     ```python
 122 |     from scipy.sparse import csr_matrix
 123 |     sparse_array = csr_matrix((1000, 1000))
 124 |     ```
 125 | 
 126 | 17. **Write a function to create a NumPy array with padded borders.**  
 127 |     Adds padding for convolutional tasks.  
 128 |     ```python
 129 |     def pad_array(array, pad_width):
 130 |         return np.pad(array, pad_width, mode='constant')
 131 |     ```
 132 | 
 133 | 18. **How do you create a NumPy array with a specific memory layout?**  
 134 |     Controls C or Fortran order for performance.  
 135 |     ```python
 136 |     array = np.array([[1, 2], [3, 4]], order='F')
 137 |     ```
 138 | 
 139 | ## Array Operations
 140 | 
 141 | ### Basic
 142 | 19. **How do you perform element-wise addition in NumPy?**  
 143 |    Adds arrays for data transformations.  
 144 |    ```python
 145 |    array1 = np.array([1, 2, 3])
 146 |    array2 = np.array([4, 5, 6])
 147 |    result = array1 + array2
 148 |    ```
 149 | 
 150 | 20. **What is broadcasting in NumPy, and how does it work?**  
 151 |    Aligns arrays for operations.  
 152 |    ```python
 153 |    array = np.array([[1, 2], [3, 4]])
 154 |    scalar = 2
 155 |    result = array * scalar
 156 |    ```
 157 | 
 158 | 21. **How do you compute the dot product of two NumPy arrays?**  
 159 |    Performs matrix multiplication.  
 160 |    ```python
 161 |    array1 = np.array([1, 2])
 162 |    array2 = np.array([3, 4])
 163 |    dot_product = np.dot(array1, array2)
 164 |    ```
 165 | 
 166 | 22. **How do you calculate the mean of a NumPy array?**  
 167 |    Computes statistics for analysis.  
 168 |    ```python
 169 |    array = np.array([1, 2, 3, 4])
 170 |    mean = np.mean(array)
 171 |    ```
 172 | 
 173 | 23. **How do you perform matrix transposition in NumPy?**  
 174 |    Flips rows and columns.  
 175 |    ```python
 176 |    array = np.array([[1, 2], [3, 4]])
 177 |    transposed = array.T
 178 |    ```
 179 | 
 180 | 24. **How do you visualize array operations in NumPy?**  
 181 |    Plots operation results.  
 182 |    ```python
 183 |    import matplotlib.pyplot as plt
 184 |    array = np.array([1, 2, 3, 4])
 185 |    plt.plot(array, array**2)
 186 |    plt.savefig('array_operation.png')
 187 |    ```
 188 | 
 189 | #### Intermediate
 190 | 25. **Write a function to perform element-wise operations on NumPy arrays.**  
 191 |     Applies custom operations.  
 192 |     ```python
 193 |     def element_wise_op(array1, array2, op='add'):
 194 |         if op == 'add':
 195 |             return array1 + array2
 196 |         elif op == 'multiply':
 197 |             return array1 * array2
 198 |     ```
 199 | 
 200 | 26. **How do you implement broadcasting for custom operations?**  
 201 |     Aligns shapes dynamically.  
 202 |     ```python
 203 |     array = np.array([[1, 2], [3, 4]])
 204 |     vector = np.array([1, 2])
 205 |     result = array + vector
 206 |     ```
 207 | 
 208 | 27. **Write a function to compute the outer product of two NumPy arrays.**  
 209 |     Generates matrix from vectors.  
 210 |     ```python
 211 |     def outer_product(vec1, vec2):
 212 |         return np.outer(vec1, vec2)
 213 |     ```
 214 | 
 215 | 28. **How do you perform batch operations on NumPy arrays?**  
 216 |     Processes multiple arrays efficiently.  
 217 |     ```python
 218 |     arrays = [np.array([1, 2]), np.array([3, 4])]
 219 |     results = [arr * 2 for arr in arrays]
 220 |     ```
 221 | 
 222 | 29. **Write a function to normalize a NumPy array.**  
 223 |     Scales values for ML preprocessing.  
 224 |     ```python
 225 |     def normalize_array(array):
 226 |         return (array - np.mean(array)) / np.std(array)
 227 |     ```
 228 | 
 229 | 30. **How do you handle numerical stability in NumPy operations?**  
 230 |     Uses safe computations for large numbers.  
 231 |     ```python
 232 |     array = np.array([1e10, 2e10])
 233 |     result = np.log1p(array)
 234 |     ```
 235 | 
 236 | #### Advanced
 237 | 31. **Write a function to implement matrix factorization in NumPy.**  
 238 |     Decomposes matrices for dimensionality reduction.  
 239 |     ```python
 240 |     def matrix_factorization(matrix, k):
 241 |         U, S, Vt = np.linalg.svd(matrix)
 242 |         return U[:, :k], np.diag(S[:k]), Vt[:k, :]
 243 |     ```
 244 | 
 245 | 32. **How do you optimize NumPy operations for performance?**  
 246 |     Uses vectorized operations.  
 247 |     ```python
 248 |     array = np.random.rand(1000, 1000)
 249 |     result = np.einsum('ij,ij->i', array, array)
 250 |     ```
 251 | 
 252 | 33. **Write a function to perform sliding window operations in NumPy.**  
 253 |     Applies operations over windows.  
 254 |     ```python
 255 |     def sliding_window(array, window_size):
 256 |         return np.lib.stride_tricks.sliding_window_view(array, window_size)
 257 |     ```
 258 | 
 259 | 34. **How do you implement custom reductions in NumPy?**  
 260 |     Defines specialized aggregations.  
 261 |     ```python
 262 |     def custom_reduction(array, op='sum'):
 263 |         if op == 'sum':
 264 |             return np.sum(array, axis=0)
 265 |         elif op == 'prod':
 266 |             return np.prod(array, axis=0)
 267 |     ```
 268 | 
 269 | 35. **Write a function to handle sparse array operations in NumPy.**  
 270 |     Optimizes for sparse data.  
 271 |     ```python
 272 |     from scipy.sparse import csr_matrix
 273 |     def sparse_operation(array):
 274 |         sparse = csr_matrix(array)
 275 |         return sparse.dot(sparse.T)
 276 |     ```
 277 | 
 278 | 36. **How do you parallelize NumPy operations?**  
 279 |     Uses libraries like Numba or Dask.  
 280 |     ```python
 281 |     from numba import jit
 282 |     @jit
 283 |     def fast_operation(array):
 284 |         return array * 2
 285 |     ```
 286 | 
 287 | ## Indexing and Slicing
 288 | 
 289 | ### Basic
 290 | 37. **How do you access elements in a NumPy array?**  
 291 |    Uses indices for data retrieval.  
 292 |    ```python
 293 |    array = np.array([[1, 2], [3, 4]])
 294 |    element = array[0, 1]
 295 |    ```
 296 | 
 297 | 38. **What is array slicing in NumPy?**  
 298 |    Extracts subarrays with ranges.  
 299 |    ```python
 300 |    array = np.array([1, 2, 3, 4])
 301 |    slice = array[1:3]
 302 |    ```
 303 | 
 304 | 39. **How do you use boolean indexing in NumPy?**  
 305 |    Filters arrays with conditions.  
 306 |    ```python
 307 |    array = np.array([1, 2, 3, 4])
 308 |    filtered = array[array > 2]
 309 |    ```
 310 | 
 311 | 40. **How do you access rows and columns in a 2D NumPy array?**  
 312 |    Uses slicing for matrix operations.  
 313 |    ```python
 314 |    array = np.array([[1, 2], [3, 4]])
 315 |    row = array[0, :]
 316 |    ```
 317 | 
 318 | 41. **What is fancy indexing in NumPy?**  
 319 |    Uses arrays as indices.  
 320 |    ```python
 321 |    array = np.array([10, 20, 30, 40])
 322 |    indices = [0, 2]
 323 |    selected = array[indices]
 324 |    ```
 325 | 
 326 | 42. **How do you visualize sliced NumPy arrays?**  
 327 |    Plots subarray data.  
 328 |    ```python
 329 |    import matplotlib.pyplot as plt
 330 |    array = np.random.rand(5, 5)
 331 |    plt.imshow(array[:3, :3], cmap='Blues')
 332 |    plt.savefig('sliced_array.png')
 333 |    ```
 334 | 
 335 | #### Intermediate
 336 | 43. **Write a function to extract a subarray using NumPy slicing.**  
 337 |     Retrieves specific regions.  
 338 |     ```python
 339 |     def extract_subarray(array, rows, cols):
 340 |         return array[rows[0]:rows[1], cols[0]:cols[1]]
 341 |     ```
 342 | 
 343 | 44. **How do you use advanced indexing with NumPy?**  
 344 |     Combines integer and boolean indexing.  
 345 |     ```python
 346 |     array = np.array([[1, 2], [3, 4]])
 347 |     rows = np.array([0, 1])
 348 |     cols = np.array([1, 0])
 349 |     selected = array[rows, cols]
 350 |     ```
 351 | 
 352 | 45. **Write a function to filter a NumPy array with conditions.**  
 353 |     Selects elements dynamically.  
 354 |     ```python
 355 |     def filter_array(array, threshold):
 356 |         return array[array > threshold]
 357 |     ```
 358 | 
 359 | 46. **How do you modify array elements using indexing?**  
 360 |     Updates values conditionally.  
 361 |     ```python
 362 |     array = np.array([1, 2, 3, 4])
 363 |     array[array < 3] = 0
 364 |     ```
 365 | 
 366 | 47. **Write a function to extract diagonal elements in NumPy.**  
 367 |     Retrieves matrix diagonals.  
 368 |     ```python
 369 |     def get_diagonal(array):
 370 |         return np.diagonal(array)
 371 |     ```
 372 | 
 373 | 48. **How do you handle out-of-bounds indexing in NumPy?**  
 374 |     Uses safe indexing techniques.  
 375 |     ```python
 376 |     def safe_index(array, index):
 377 |         return array[index] if 0 <= index < len(array) else None
 378 |     ```
 379 | 
 380 | #### Advanced
 381 | 49. **Write a function to implement multi-dimensional indexing in NumPy.**  
 382 |     Accesses complex array structures.  
 383 |     ```python
 384 |     def multi_dim_index(array, indices):
 385 |         return array[tuple(indices)]
 386 |     ```
 387 | 
 388 | 50. **How do you optimize indexing for large NumPy arrays?**  
 389 |     Uses strides or views.  
 390 |     ```python
 391 |     array = np.random.rand(1000, 1000)
 392 |     view = array[::2, ::2]
 393 |     ```
 394 | 
 395 | 51. **Write a function to perform conditional indexing with multiple criteria.**  
 396 |     Filters with complex logic.  
 397 |     ```python
 398 |     def multi_condition_index(array, cond1, cond2):
 399 |         return array[np.logical_and(array > cond1, array < cond2)]
 400 |     ```
 401 | 
 402 | 52. **How do you implement custom indexing for NumPy arrays?**  
 403 |     Defines specialized access patterns.  
 404 |     ```python
 405 |     def custom_index(array, pattern='even'):
 406 |         if pattern == 'even':
 407 |             return array[::2]
 408 |         return array[1::2]
 409 |     ```
 410 | 
 411 | 53. **Write a function to reorder NumPy array elements.**  
 412 |     Rearranges based on indices.  
 413 |     ```python
 414 |     def reorder_array(array, order):
 415 |         return array[np.argsort(order)]
 416 |     ```
 417 | 
 418 | 54. **How do you handle sparse array indexing in NumPy?**  
 419 |     Uses sparse formats for efficiency.  
 420 |     ```python
 421 |     from scipy.sparse import csr_matrix
 422 |     def sparse_index(sparse_array, row, col):
 423 |         return sparse_array[row, col]
 424 |     ```
 425 | 
 426 | ## Broadcasting and Vectorization
 427 | 
 428 | ### Basic
 429 | 55. **What is vectorization in NumPy, and why is it important?**  
 430 |    Replaces loops with array operations for speed.  
 431 |    ```python
 432 |    array = np.array([1, 2, 3])
 433 |    result = array * 2
 434 |    ```
 435 | 
 436 | 56. **How do you perform broadcasting with mismatched shapes?**  
 437 |    Aligns arrays automatically.  
 438 |    ```python
 439 |    array = np.array([[1, 2], [3, 4]])
 440 |    vector = np.array([1, 2])
 441 |    result = array + vector
 442 |    ```
 443 | 
 444 | 57. **How do you compute element-wise operations without loops?**  
 445 |    Uses vectorized functions.  
 446 |    ```python
 447 |    array = np.array([1, 2, 3])
 448 |    squared = np.square(array)
 449 |    ```
 450 | 
 451 | 58. **What is the role of `np.vectorize` in NumPy?**  
 452 |    Applies scalar functions to arrays.  
 453 |    ```python
 454 |    def my_func(x):
 455 |        return x * 2
 456 |    vectorized = np.vectorize(my_func)
 457 |    result = vectorized(np.array([1, 2, 3]))
 458 |    ```
 459 | 
 460 | 59. **How do you visualize broadcasting results?**  
 461 |    Plots operation outputs.  
 462 |    ```python
 463 |    import matplotlib.pyplot as plt
 464 |    array = np.ones((3, 3)) + np.array([1, 2, 3])
 465 |    plt.imshow(array, cmap='Greys')
 466 |    plt.savefig('broadcasting_result.png')
 467 |    ```
 468 | 
 469 | 60. **How do you check broadcasting compatibility in NumPy?**  
 470 |    Verifies shape alignment.  
 471 |    ```python
 472 |    def check_broadcasting(shape1, shape2):
 473 |        try:
 474 |            np.broadcast_arrays(np.empty(shape1), np.empty(shape2))
 475 |            return True
 476 |        except ValueError:
 477 |            return False
 478 |    ```
 479 | 
 480 | #### Intermediate
 481 | 61. **Write a function to perform broadcasting with custom arrays.**  
 482 |     Applies operations across shapes.  
 483 |     ```python
 484 |     def broadcast_operation(array, vector):
 485 |         return array + vector
 486 |     ```
 487 | 
 488 | 62. **How do you optimize vectorized operations in NumPy?**  
 489 |     Minimizes memory overhead.  
 490 |     ```python
 491 |     array = np.random.rand(1000)
 492 |     result = np.sin(array, out=np.empty_like(array))
 493 |     ```
 494 | 
 495 | 63. **Write a function to apply vectorized operations conditionally.**  
 496 |     Uses masks for selective computation.  
 497 |     ```python
 498 |     def conditional_vectorize(array, threshold):
 499 |         return np.where(array > threshold, array * 2, array)
 500 |     ```
 501 | 
 502 | 64. **How do you handle broadcasting with higher-dimensional arrays?**  
 503 |     Aligns multi-dimensional shapes.  
 504 |     ```python
 505 |     array = np.ones((3, 4, 5))
 506 |     vector = np.array([1, 2, 3, 4])
 507 |     result = array + vector[:, np.newaxis]
 508 |     ```
 509 | 
 510 | 65. **Write a function to vectorize a custom computation.**  
 511 |     Applies scalar logic to arrays.  
 512 |     ```python
 513 |     def vectorized_custom(array):
 514 |         return np.vectorize(lambda x: x**2 if x > 0 else 0)(array)
 515 |     ```
 516 | 
 517 | 66. **How do you visualize vectorized operation performance?**  
 518 |     Compares loop vs. vectorized times.  
 519 |     ```python
 520 |     import matplotlib.pyplot as plt
 521 |     import time
 522 |     sizes = [100, 1000, 10000]
 523 |     times = []
 524 |     for n in sizes:
 525 |         array = np.random.rand(n)
 526 |         start = time.time()
 527 |         np.sin(array)
 528 |         times.append(time.time() - start)
 529 |     plt.plot(sizes, times)
 530 |     plt.savefig('vectorized_performance.png')
 531 |     ```
 532 | 
 533 | #### Advanced
 534 | 67. **Write a function to implement complex broadcasting rules.**  
 535 |     Handles intricate shape alignments.  
 536 |     ```python
 537 |     def complex_broadcast(array, shape):
 538 |         return array + np.ones(shape)
 539 |     ```
 540 | 
 541 | 68. **How do you optimize broadcasting for memory efficiency?**  
 542 |     Uses in-place operations.  
 543 |     ```python
 544 |     array = np.random.rand(1000, 1000)
 545 |     array += 1
 546 |     ```
 547 | 
 548 | 69. **Write a function to vectorize matrix operations.**  
 549 |     Applies matrix computations efficiently.  
 550 |     ```python
 551 |     def vectorized_matrix_op(matrix1, matrix2):
 552 |         return np.einsum('ij,jk->ik', matrix1, matrix2)
 553 |     ```
 554 | 
 555 | 70. **How do you handle broadcasting with sparse arrays?**  
 556 |     Uses sparse formats for efficiency.  
 557 |     ```python
 558 |     from scipy.sparse import csr_matrix
 559 |     def sparse_broadcast(sparse, dense):
 560 |         return sparse + dense
 561 |     ```
 562 | 
 563 | 71. **Write a function to debug broadcasting issues.**  
 564 |     Logs shape mismatches.  
 565 |     ```python
 566 |     import logging
 567 |     def debug_broadcast(array1, array2):
 568 |         logging.basicConfig(filename='numpy.log', level=logging.INFO)
 569 |         try:
 570 |             return array1 + array2
 571 |         except ValueError as e:
 572 |             logging.error(f"Broadcasting error: {e}")
 573 |             raise
 574 |     ```
 575 | 
 576 | 72. **How do you implement broadcasting with custom dtypes?**  
 577 |     Handles specialized data types.  
 578 |     ```python
 579 |     array = np.array([1, 2], dtype=np.float32)
 580 |     result = array + np.array([1, 2], dtype=np.int16)
 581 |     ```
 582 | 
 583 | ## Linear Algebra
 584 | 
 585 | ### Basic
 586 | 73. **How do you compute the matrix inverse in NumPy?**  
 587 |    Inverts matrices for solving systems.  
 588 |    ```python
 589 |    matrix = np.array([[1, 2], [3, 4]])
 590 |    inverse = np.linalg.inv(matrix)
 591 |    ```
 592 | 
 593 | 74. **What is the determinant of a matrix in NumPy?**  
 594 |    Measures matrix properties.  
 595 |    ```python
 596 |    matrix = np.array([[1, 2], [3, 4]])
 597 |    det = np.linalg.det(matrix)
 598 |    ```
 599 | 
 600 | 75. **How do you solve a linear system in NumPy?**  
 601 |    Finds solutions to Ax = b.  
 602 |    ```python
 603 |    A = np.array([[1, 2], [3, 4]])
 604 |    b = np.array([5, 6])
 605 |    x = np.linalg.solve(A, b)
 606 |    ```
 607 | 
 608 | 76. **How do you compute eigenvalues in NumPy?**  
 609 |    Analyzes matrix properties.  
 610 |    ```python
 611 |    matrix = np.array([[1, 2], [3, 4]])
 612 |    eigenvalues = np.linalg.eigvals(matrix)
 613 |    ```
 614 | 
 615 | 77. **How do you perform singular value decomposition (SVD) in NumPy?**  
 616 |    Decomposes matrices for ML.  
 617 |    ```python
 618 |    matrix = np.array([[1, 2], [3, 4]])
 619 |    U, S, Vt = np.linalg.svd(matrix)
 620 |    ```
 621 | 
 622 | 78. **How do you visualize matrix operations in NumPy?**  
 623 |    Plots matrix transformations.  
 624 |    ```python
 625 |    import matplotlib.pyplot as plt
 626 |    matrix = np.random.rand(5, 5)
 627 |    plt.imshow(matrix, cmap='hot')
 628 |    plt.savefig('matrix_plot.png')
 629 |    ```
 630 | 
 631 | #### Intermediate
 632 | 79. **Write a function to solve a batch of linear systems in NumPy.**  
 633 |     Handles multiple systems efficiently.  
 634 |     ```python
 635 |     def batch_solve(A_batch, b_batch):
 636 |         return np.linalg.solve(A_batch, b_batch)
 637 |     ```
 638 | 
 639 | 80. **How do you compute the matrix rank in NumPy?**  
 640 |     Determines linear independence.  
 641 |     ```python
 642 |     matrix = np.array([[1, 2], [2, 4]])
 643 |     rank = np.linalg.matrix_rank(matrix)
 644 |     ```
 645 | 
 646 | 81. **Write a function to perform QR decomposition in NumPy.**  
 647 |     Decomposes matrices for stability.  
 648 |     ```python
 649 |     def qr_decomposition(matrix):
 650 |         Q, R = np.linalg.qr(matrix)
 651 |         return Q, R
 652 |     ```
 653 | 
 654 | 82. **How do you compute the condition number of a matrix?**  
 655 |     Assesses numerical stability.  
 656 |     ```python
 657 |     matrix = np.array([[1, 2], [3, 4]])
 658 |     cond = np.linalg.cond(matrix)
 659 |     ```
 660 | 
 661 | 83. **Write a function to compute the Cholesky decomposition.**  
 662 |     Factorizes symmetric matrices.  
 663 |     ```python
 664 |     def cholesky_decomp(matrix):
 665 |         return np.linalg.cholesky(matrix)
 666 |     ```
 667 | 
 668 | 84. **How do you visualize eigenvalues of a matrix?**  
 669 |     Plots eigenvalue distributions.  
 670 |     ```python
 671 |     import matplotlib.pyplot as plt
 672 |     matrix = np.random.rand(5, 5)
 673 |     eigvals = np.linalg.eigvals(matrix)
 674 |     plt.scatter(eigvals.real, eigvals.imag)
 675 |     plt.savefig('eigenvalues_plot.png')
 676 |     ```
 677 | 
 678 | #### Advanced
 679 | 85. **Write a function to implement iterative linear solvers in NumPy.**  
 680 |     Solves large systems efficiently.  
 681 |     ```python
 682 |     from scipy.sparse.linalg import cg
 683 |     def iterative_solve(A, b):
 684 |         x, _ = cg(A, b)
 685 |         return x
 686 |     ```
 687 | 
 688 | 86. **How do you optimize linear algebra operations in NumPy?**  
 689 |     Uses BLAS/LAPACK for speed.  
 690 |     ```python
 691 |     matrix = np.random.rand(1000, 1000)
 692 |     result = np.linalg.inv(matrix)
 693 |     ```
 694 | 
 695 | 87. **Write a function to compute the pseudo-inverse in NumPy.**  
 696 |     Handles non-square matrices.  
 697 |     ```python
 698 |     def pseudo_inverse(matrix):
 699 |         return np.linalg.pinv(matrix)
 700 |     ```
 701 | 
 702 | 88. **How do you implement tensor operations in NumPy?**  
 703 |     Extends linear algebra to tensors.  
 704 |     ```python
 705 |     tensor = np.random.rand(3, 3, 3)
 706 |     result = np.tensordot(tensor, tensor, axes=([2], [2]))
 707 |     ```
 708 | 
 709 | 89. **Write a function to handle ill-conditioned matrices.**  
 710 |     Stabilizes computations.  
 711 |     ```python
 712 |     def safe_inverse(matrix, tol=1e-10):
 713 |         if np.linalg.cond(matrix) < 1/tol:
 714 |             return np.linalg.inv(matrix)
 715 |         return np.linalg.pinv(matrix)
 716 |     ```
 717 | 
 718 | 90. **How do you parallelize linear algebra operations?**  
 719 |     Uses multi-core processing.  
 720 |     ```python
 721 |     from joblib import Parallel, delayed
 722 |     def parallel_matrix_inv(matrices):
 723 |         return Parallel(n_jobs=-1)(delayed(np.linalg.inv)(m) for m in matrices)
 724 |     ```
 725 | 
 726 | ## Integration with AI/ML Workflows
 727 | 
 728 | ### Basic
 729 | 91. **How do you preprocess data with NumPy for AI/ML?**  
 730 |    Normalizes and reshapes inputs.  
 731 |    ```python
 732 |    data = np.random.rand(100, 10)
 733 |    normalized = (data - np.mean(data, axis=0)) / np.std(data, axis=0)
 734 |    ```
 735 | 
 736 | 92. **How do you create feature matrices in NumPy?**  
 737 |    Structures data for ML models.  
 738 |    ```python
 739 |    features = np.array([[1, 2], [3, 4], [5, 6]])
 740 |    ```
 741 | 
 742 | 93. **How do you split data into train/test sets in NumPy?**  
 743 |    Prepares data for evaluation.  
 744 |    ```python
 745 |    data = np.random.rand(100, 5)
 746 |    train = data[:80]
 747 |    test = data[80:]
 748 |    ```
 749 | 
 750 | 94. **How do you compute pairwise distances in NumPy?**  
 751 |    Used in clustering algorithms.  
 752 |    ```python
 753 |    from scipy.spatial.distance import cdist
 754 |    points = np.random.rand(10, 2)
 755 |    distances = cdist(points, points)
 756 |    ```
 757 | 
 758 | 95. **How do you one-hot encode labels in NumPy?**  
 759 |    Prepares categorical data.  
 760 |    ```python
 761 |    labels = np.array([0, 1, 2])
 762 |    one_hot = np.eye(3)[labels]
 763 |    ```
 764 | 
 765 | 96. **How do you visualize data distributions in NumPy?**  
 766 |    Plots histograms for analysis.  
 767 |    ```python
 768 |    import matplotlib.pyplot as plt
 769 |    data = np.random.randn(1000)
 770 |    plt.hist(data, bins=30)
 771 |    plt.savefig('data_distribution.png')
 772 |    ```
 773 | 
 774 | #### Intermediate
 775 | 97. **Write a function to preprocess images with NumPy for ML.**  
 776 |     Normalizes and reshapes images.  
 777 |     ```python
 778 |     def preprocess_image(image):
 779 |         return (image / 255.0).reshape(-1)
 780 |     ```
 781 | 
 782 | 98. **How do you implement data augmentation with NumPy?**  
 783 |     Generates synthetic data.  
 784 |     ```python
 785 |     def augment_data(array):
 786 |         return array + np.random.normal(0, 0.1, array.shape)
 787 |     ```
 788 | 
 789 | 99. **Write a function to compute feature correlations in NumPy.**  
 790 |     Analyzes feature relationships.  
 791 |     ```python
 792 |     def feature_correlation(features):
 793 |         return np.corrcoef(features, rowvar=False)
 794 |     ```
 795 | 
 796 | 100. **How do you handle missing data in NumPy for ML?**  
 797 |      Imputes or removes NaNs.  
 798 |      ```python
 799 |      def handle_missing(array):
 800 |          return np.where(np.isnan(array), np.mean(array, axis=0), array)
 801 |      ```
 802 | 
 803 | 101. **Write a function to standardize features in NumPy.**  
 804 |      Scales features for ML models.  
 805 |      ```python
 806 |      def standardize_features(features):
 807 |          return (features - np.mean(features, axis=0)) / np.std(features, axis=0)
 808 |      ```
 809 | 
 810 | 102. **How do you integrate NumPy with Scikit-learn?**  
 811 |      Prepares data for ML pipelines.  
 812 |      ```python
 813 |      from sklearn.linear_model import LogisticRegression
 814 |      X = np.random.rand(100, 5)
 815 |      y = np.random.randint(0, 2, 100)
 816 |      model = LogisticRegression().fit(X, y)
 817 |      ```
 818 | 
 819 | #### Advanced
 820 | 103. **Write a function to implement PCA with NumPy.**  
 821 |      Reduces dimensionality for ML.  
 822 |      ```python
 823 |      def pca_transform(data, n_components):
 824 |          cov = np.cov(data.T)
 825 |          eigvals, eigvecs = np.linalg.eigh(cov)
 826 |          top_k = eigvecs[:, -n_components:]
 827 |          return data @ top_k
 828 |      ```
 829 | 
 830 | 104. **How do you optimize NumPy for large-scale ML datasets?**  
 831 |      Uses chunked processing.  
 832 |      ```python
 833 |      def process_chunks(data, chunk_size=1000):
 834 |          for i in range(0, len(data), chunk_size):
 835 |              yield standardize_features(data[i:i + chunk_size])
 836 |      ```
 837 | 
 838 | 105. **Write a function to compute gradients in NumPy.**  
 839 |      Supports optimization in ML.  
 840 |      ```python
 841 |      def compute_gradient(X, y, w):
 842 |          return X.T @ (X @ w - y) / len(y)
 843 |      ```
 844 | 
 845 | 106. **How do you implement k-means clustering with NumPy?**  
 846 |      Groups data points.  
 847 |      ```python
 848 |      def kmeans(X, k, max_iters=100):
 849 |          centroids = X[np.random.choice(len(X), k)]
 850 |          for _ in range(max_iters):
 851 |              distances = cdist(X, centroids)
 852 |              labels = np.argmin(distances, axis=1)
 853 |              centroids = np.array([X[labels == i].mean(axis=0) for i in range(k)])
 854 |          return labels, centroids
 855 |      ```
 856 | 
 857 | 107. **Write a function to handle imbalanced datasets in NumPy.**  
 858 |      Resamples data for balance.  
 859 |      ```python
 860 |      def balance_data(X, y, minority_class):
 861 |          minority = X[y == minority_class]
 862 |          majority = X[y != minority_class]
 863 |          minority_upsampled = minority[np.random.choice(len(minority), len(majority))]
 864 |          return np.vstack([majority, minority_upsampled]), np.hstack([y[y != minority_class], np.full(len(majority), minority_class)])
 865 |      ```
 866 | 
 867 | 108. **How do you integrate NumPy with deep learning frameworks?**  
 868 |      Converts data for TensorFlow/PyTorch.  
 869 |      ```python
 870 |      import tensorflow as tf
 871 |      array = np.random.rand(100, 10)
 872 |      tensor = tf.convert_to_tensor(array)
 873 |      ```
 874 | 
 875 | ## Debugging and Error Handling
 876 | 
 877 | ### Basic
 878 | 109. **How do you debug NumPy array shapes?**  
 879 |      Logs shape information.  
 880 |      ```python
 881 |      def debug_shape(array):
 882 |          print(f"Shape: {array.shape}")
 883 |          return array
 884 |      ```
 885 | 
 886 | 110. **What is a try-except block in NumPy applications?**  
 887 |      Handles numerical errors.  
 888 |      ```python
 889 |      try:
 890 |          result = np.linalg.inv(np.array([[1, 2], [2, 4]]))
 891 |      except np.linalg.LinAlgError as e:
 892 |          print(f"Error: {e}")
 893 |      ```
 894 | 
 895 | 111. **How do you validate NumPy array inputs?**  
 896 |      Ensures correct shapes and types.  
 897 |      ```python
 898 |      def validate_array(array, expected_shape):
 899 |          if array.shape != expected_shape:
 900 |              raise ValueError(f"Expected shape {expected_shape}, got {array.shape}")
 901 |          return array
 902 |      ```
 903 | 
 904 | 112. **How do you handle NaN values in NumPy?**  
 905 |      Detects and replaces NaNs.  
 906 |      ```python
 907 |      array = np.array([1, np.nan, 3])
 908 |      cleaned = np.nan_to_num(array, nan=0)
 909 |      ```
 910 | 
 911 | 113. **What is the role of logging in NumPy debugging?**  
 912 |      Tracks errors and operations.  
 913 |      ```python
 914 |      import logging
 915 |      logging.basicConfig(filename='numpy.log', level=logging.INFO)
 916 |      logging.info("Starting NumPy operation")
 917 |      ```
 918 | 
 919 | 114. **How do you handle overflow errors in NumPy?**  
 920 |      Uses safe numerical ranges.  
 921 |      ```python
 922 |      array = np.array([1e308], dtype=np.float64)
 923 |      result = np.clip(array, -1e308, 1e308)
 924 |      ```
 925 | 
 926 | #### Intermediate
 927 | 115. **Write a function to retry NumPy operations on failure.**  
 928 |      Handles transient errors.  
 929 |      ```python
 930 |      def retry_operation(func, array, max_attempts=3):
 931 |          for attempt in range(max_attempts):
 932 |              try:
 933 |                  return func(array)
 934 |              except Exception as e:
 935 |                  if attempt == max_attempts - 1:
 936 |                      raise
 937 |                  print(f"Attempt {attempt+1} failed: {e}")
 938 |      ```
 939 | 
 940 | 116. **How do you debug NumPy operation outputs?**  
 941 |      Inspects intermediate results.  
 942 |      ```python
 943 |      def debug_operation(array):
 944 |          result = array * 2
 945 |          print(f"Input: {array[:5]}, Output: {result[:5]}")
 946 |          return result
 947 |      ```
 948 | 
 949 | 117. **Write a function to validate NumPy array dtypes.**  
 950 |      Ensures correct data types.  
 951 |      ```python
 952 |      def validate_dtype(array, expected_dtype):
 953 |          if array.dtype != expected_dtype:
 954 |              raise ValueError(f"Expected dtype {expected_dtype}, got {array.dtype}")
 955 |          return array
 956 |      ```
 957 | 
 958 | 118. **How do you profile NumPy operation performance?**  
 959 |      Measures execution time.  
 960 |      ```python
 961 |      import time
 962 |      def profile_operation(array):
 963 |          start = time.time()
 964 |          result = np.sin(array)
 965 |          print(f"Operation took {time.time() - start}s")
 966 |          return result
 967 |      ```
 968 | 
 969 | 119. **Write a function to handle memory errors in NumPy.**  
 970 |      Manages large arrays.  
 971 |      ```python
 972 |      def safe_operation(array, max_size=1e6):
 973 |          if array.size > max_size:
 974 |              raise MemoryError("Array too large")
 975 |          return array * 2
 976 |      ```
 977 | 
 978 | 120. **How do you debug broadcasting errors in NumPy?**  
 979 |      Logs shape mismatches.  
 980 |      ```python
 981 |      def debug_broadcasting(array1, array2):
 982 |          try:
 983 |              return array1 + array2
 984 |          except ValueError as e:
 985 |              print(f"Broadcasting error: {e}, Shapes: {array1.shape}, {array2.shape}")
 986 |              raise
 987 |      ```
 988 | 
 989 | #### Advanced
 990 | 121. **Write a function to implement a custom NumPy error handler.**  
 991 |      Logs specific errors.  
 992 |      ```python
 993 |      import logging
 994 |      def custom_error_handler(array, operation):
 995 |          logging.basicConfig(filename='numpy.log', level=logging.ERROR)
 996 |          try:
 997 |              return operation(array)
 998 |          except Exception as e:
 999 |              logging.error(f"Operation error: {e}")
1000 |              raise
1001 |      ```
1002 | 
1003 | 122. **How do you implement circuit breakers in NumPy applications?**  
1004 |      Prevents cascading failures.  
1005 |      ```python
1006 |      from pybreaker import CircuitBreaker
1007 |      breaker = CircuitBreaker(fail_max=3, reset_timeout=60)
1008 |      @breaker
1009 |      def safe_operation(array):
1010 |          return np.linalg.inv(array)
1011 |      ```
1012 | 
1013 | 123. **Write a function to detect numerical instability in NumPy.**  
1014 |      Checks for large condition numbers.  
1015 |      ```python
1016 |      def detect_instability(matrix):
1017 |          cond = np.linalg.cond(matrix)
1018 |          if cond > 1e10:
1019 |              print("Warning: Matrix may be ill-conditioned")
1020 |          return matrix
1021 |      ```
1022 | 
1023 | 124. **How do you implement logging for distributed NumPy operations?**  
1024 |      Centralizes logs for debugging.  
1025 |      ```python
1026 |      import logging.handlers
1027 |      def setup_distributed_logging():
1028 |          handler = logging.handlers.SocketHandler('log-server', 9090)
1029 |          logging.getLogger().addHandler(handler)
1030 |          logging.info("NumPy operation started")
1031 |      ```
1032 | 
1033 | 125. **Write a function to handle version compatibility in NumPy.**  
1034 |      Checks library versions.  
1035 |      ```python
1036 |      import numpy as np
1037 |      def check_numpy_version():
1038 |          if np.__version__ < '1.20':
1039 |              raise ValueError("Unsupported NumPy version")
1040 |      ```
1041 | 
1042 | 126. **How do you debug NumPy performance bottlenecks?**  
1043 |      Profiles operation stages.  
1044 |      ```python
1045 |      import time
1046 |      def debug_bottlenecks(array):
1047 |          start = time.time()
1048 |          result = np.dot(array, array.T)
1049 |          print(f"Matrix multiplication: {time.time() - start}s")
1050 |          return result
1051 |      ```
1052 | 
1053 | ## Visualization and Interpretation
1054 | 
1055 | ### Basic
1056 | 127. **How do you visualize NumPy array distributions?**  
1057 |      Plots histograms for data analysis.  
1058 |      ```python
1059 |      import matplotlib.pyplot as plt
1060 |      array = np.random.randn(1000)
1061 |      plt.hist(array, bins=30)
1062 |      plt.savefig('array_distribution.png')
1063 |      ```
1064 | 
1065 | 128. **How do you create a scatter plot with NumPy data?**  
1066 |      Visualizes relationships in data.  
1067 |      ```python
1068 |      import matplotlib.pyplot as plt
1069 |      x = np.random.rand(100)
1070 |      y = np.random.rand(100)
1071 |      plt.scatter(x, y)
1072 |      plt.savefig('scatter_plot.png')
1073 |      ```
1074 | 
1075 | 129. **How do you visualize matrix data in NumPy?**  
1076 |      Uses heatmaps for matrices.  
1077 |      ```python
1078 |      import matplotlib.pyplot as plt
1079 |      matrix = np.random.rand(5, 5)
1080 |      plt.imshow(matrix, cmap='coolwarm')
1081 |      plt.colorbar()
1082 |      plt.savefig('matrix_heatmap.png')
1083 |      ```
1084 | 
1085 | 130. **How do you plot NumPy array operations?**  
1086 |      Visualizes transformed data.  
1087 |      ```python
1088 |      import matplotlib.pyplot as plt
1089 |      array = np.linspace(0, 10, 100)
1090 |      plt.plot(array, np.sin(array))
1091 |      plt.savefig('sin_plot.png')
1092 |      ```
1093 | 
1094 | 131. **How do you create a 3D plot with NumPy data?**  
1095 |      Visualizes multi-dimensional arrays.  
1096 |      ```python
1097 |      from mpl_toolkits.mplot3d import Axes3D
1098 |      import matplotlib.pyplot as plt
1099 |      x = np.linspace(-5, 5, 100)
1100 |      y = np.linspace(-5, 5, 100)
1101 |      X, Y = np.meshgrid(x, y)
1102 |      Z = np.sin(np.sqrt(X**2 + Y**2))
1103 |      fig = plt.figure()
1104 |      ax = fig.add_subplot(111, projection='3d')
1105 |      ax.plot_surface(X, Y, Z)
1106 |      plt.savefig('3d_plot.png')
1107 |      ```
1108 | 
1109 | 132. **How do you visualize NumPy array statistics?**  
1110 |      Plots mean, std, etc.  
1111 |      ```python
1112 |      import matplotlib.pyplot as plt
1113 |      arrays = [np.random.randn(100) for _ in range(5)]
1114 |      means = [np.mean(arr) for arr in arrays]
1115 |      plt.bar(range(len(means)), means)
1116 |      plt.savefig('array_stats.png')
1117 |      ```
1118 | 
1119 | #### Intermediate
1120 | 133. **Write a function to visualize NumPy array comparisons.**  
1121 |      Plots multiple arrays.  
1122 |      ```python
1123 |      import matplotlib.pyplot as plt
1124 |      def compare_arrays(arrays, labels):
1125 |          for arr, label in zip(arrays, labels):
1126 |              plt.plot(arr, label=label)
1127 |          plt.legend()
1128 |          plt.savefig('array_comparison.png')
1129 |      ```
1130 | 
1131 | 134. **How do you visualize NumPy clustering results?**  
1132 |      Plots clustered data points.  
1133 |      ```python
1134 |      import matplotlib.pyplot as plt
1135 |      def plot_clusters(X, labels):
1136 |          plt.scatter(X[:, 0], X[:, 1], c=labels)
1137 |          plt.savefig('cluster_plot.png')
1138 |      ```
1139 | 
1140 | 135. **Write a function to visualize NumPy feature importance.**  
1141 |      Plots feature weights.  
1142 |      ```python
1143 |      import matplotlib.pyplot as plt
1144 |      def plot_feature_importance(features, importances):
1145 |          plt.bar(features, importances)
1146 |          plt.xticks(rotation=45)
1147 |          plt.savefig('feature_importance.png')
1148 |      ```
1149 | 
1150 | 136. **How do you visualize NumPy matrix transformations?**  
1151 |      Shows before/after effects.  
1152 |      ```python
1153 |      import matplotlib.pyplot as plt
1154 |      def plot_transformation(matrix, transformed):
1155 |          plt.subplot(1, 2, 1)
1156 |          plt.imshow(matrix, cmap='Blues')
1157 |          plt.subplot(1, 2, 2)
1158 |          plt.imshow(transformed, cmap='Blues')
1159 |          plt.savefig('transformation_plot.png')
1160 |      ```
1161 | 
1162 | 137. **Write a function to visualize NumPy error distributions.**  
1163 |      Plots operation errors.  
1164 |      ```python
1165 |      import matplotlib.pyplot as plt
1166 |      def plot_errors(errors):
1167 |          plt.hist(errors, bins=20)
1168 |          plt.savefig('error_distribution.png')
1169 |      ```
1170 | 
1171 | 138. **How do you visualize NumPy data trends?**  
1172 |      Plots time series or trends.  
1173 |      ```python
1174 |      import matplotlib.pyplot as plt
1175 |      data = np.cumsum(np.random.randn(100))
1176 |      plt.plot(data)
1177 |      plt.savefig('data_trend.png')
1178 |      ```
1179 | 
1180 | #### Advanced
1181 | 139. **Write a function to visualize NumPy high-dimensional data.**  
1182 |      Uses PCA for projection.  
1183 |      ```python
1184 |      from sklearn.decomposition import PCA
1185 |      import matplotlib.pyplot as plt
1186 |      def plot_high_dim_data(data):
1187 |          pca = PCA(n_components=2)
1188 |          reduced = pca.fit_transform(data)
1189 |          plt.scatter(reduced[:, 0], reduced[:, 1])
1190 |          plt.savefig('high_dim_plot.png')
1191 |      ```
1192 | 
1193 | 140. **How do you implement a dashboard for NumPy metrics?**  
1194 |      Displays real-time stats.  
1195 |      ```python
1196 |      from fastapi import FastAPI
1197 |      app = FastAPI()
1198 |      metrics = []
1199 |      @app.get('/metrics')
1200 |      async def get_metrics():
1201 |          return {'metrics': metrics}
1202 |      ```
1203 | 
1204 | 141. **Write a function to visualize NumPy operation performance.**  
1205 |      Plots execution times.  
1206 |      ```python
1207 |      import matplotlib.pyplot as plt
1208 |      def plot_performance(sizes, times):
1209 |          plt.plot(sizes, times, marker='o')
1210 |          plt.savefig('performance_plot.png')
1211 |      ```
1212 | 
1213 | 142. **How do you visualize NumPy data drift?**  
1214 |      Tracks data changes over time.  
1215 |      ```python
1216 |      import matplotlib.pyplot as plt
1217 |      def plot_data_drift(metrics):
1218 |          plt.plot(metrics, marker='o')
1219 |          plt.savefig('data_drift.png')
1220 |      ```
1221 | 
1222 | 143. **Write a function to visualize NumPy uncertainty.**  
1223 |      Plots confidence intervals.  
1224 |      ```python
1225 |      import matplotlib.pyplot as plt
1226 |      def plot_uncertainty(data, std):
1227 |          plt.plot(data)
1228 |          plt.fill_between(range(len(data)), data - std, data + std, alpha=0.2)
1229 |          plt.savefig('uncertainty_plot.png')
1230 |      ```
1231 | 
1232 | 144. **How do you visualize NumPy model errors by category?**  
1233 |      Analyzes error patterns.  
1234 |      ```python
1235 |      import matplotlib.pyplot as plt
1236 |      def plot_error_by_category(categories, errors):
1237 |          plt.bar(categories, errors)
1238 |          plt.savefig('error_by_category.png')
1239 |      ```
1240 | 
1241 | ## Best Practices and Optimization
1242 | 
1243 | ### Basic
1244 | 145. **What are best practices for NumPy code organization?**  
1245 |      Modularizes array operations.  
1246 |      ```python
1247 |      def preprocess_data(data):
1248 |          return standardize_features(data)
1249 |      def compute_features(data):
1250 |          return np.dot(data, data.T)
1251 |      ```
1252 | 
1253 | 146. **How do you ensure reproducibility in NumPy?**  
1254 |      Sets random seeds.  
1255 |      ```python
1256 |      np.random.seed(42)
1257 |      ```
1258 | 
1259 | 147. **What is caching in NumPy pipelines?**  
1260 |      Stores intermediate results.  
1261 |      ```python
1262 |      from functools import lru_cache
1263 |      @lru_cache(maxsize=1000)
1264 |      def compute_matrix(array):
1265 |          return np.dot(array, array.T)
1266 |      ```
1267 | 
1268 | 148. **How do you handle large-scale NumPy arrays?**  
1269 |      Uses chunked processing.  
1270 |      ```python
1271 |      def process_large_array(array, chunk_size=1000):
1272 |          for i in range(0, len(array), chunk_size):
1273 |              yield array[i:i + chunk_size]
1274 |      ```
1275 | 
1276 | 149. **What is the role of environment configuration in NumPy?**  
1277 |      Manages settings securely.  
1278 |      ```python
1279 |      import os
1280 |      os.environ['NUMPY_DATA_PATH'] = 'data.npy'
1281 |      ```
1282 | 
1283 | 150. **How do you document NumPy code?**  
1284 |      Uses docstrings for clarity.  
1285 |      ```python
1286 |      def normalize_array(array):
1287 |          """Normalizes array to zero mean and unit variance."""
1288 |          return (array - np.mean(array)) / np.std(array)
1289 |      ```
1290 | 
1291 | #### Intermediate
1292 | 151. **Write a function to optimize NumPy memory usage.**  
1293 |      Limits memory allocation.  
1294 |      ```python
1295 |      def optimize_memory(array, max_size=1e6):
1296 |          if array.size > max_size:
1297 |              return array[:int(max_size)]
1298 |          return array
1299 |      ```
1300 | 
1301 | 152. **How do you implement unit tests for NumPy code?**  
1302 |      Validates array operations.  
1303 |      ```python
1304 |      import unittest
1305 |      class TestNumPy(unittest.TestCase):
1306 |          def test_normalize(self):
1307 |              array = np.array([1, 2, 3])
1308 |              result = normalize_array(array)
1309 |              self.assertAlmostEqual(np.mean(result), 0)
1310 |      ```
1311 | 
1312 | 153. **Write a function to create reusable NumPy templates.**  
1313 |      Standardizes array processing.  
1314 |      ```python
1315 |      def array_template(array, operation='normalize'):
1316 |          if operation == 'normalize':
1317 |              return normalize_array(array)
1318 |          return array
1319 |      ```
1320 | 
1321 | 154. **How do you optimize NumPy for batch processing?**  
1322 |      Processes arrays in chunks.  
1323 |      ```python
1324 |      def batch_process(arrays, batch_size=100):
1325 |          for i in range(0, len(arrays), batch_size):
1326 |              yield [normalize_array(arr) for arr in arrays[i:i + batch_size]]
1327 |      ```
1328 | 
1329 | 155. **Write a function to handle NumPy configuration.**  
1330 |      Centralizes settings.  
1331 |      ```python
1332 |      def configure_numpy():
1333 |          return {'dtype': np.float32, 'order': 'C'}
1334 |      ```
1335 | 
1336 | 156. **How do you ensure NumPy pipeline consistency?**  
1337 |      Standardizes versions and settings.  
1338 |      ```python
1339 |      import numpy as np
1340 |      def check_numpy_env():
1341 |          print(f"NumPy version: {np.__version__}")
1342 |      ```
1343 | 
1344 | #### Advanced
1345 | 157. **Write a function to implement NumPy pipeline caching.**  
1346 |      Reuses processed arrays.  
1347 |      ```python
1348 |      import joblib
1349 |      def cache_array(array, cache_file='cache.npy'):
1350 |          if os.path.exists(cache_file):
1351 |              return np.load(cache_file)
1352 |          result = normalize_array(array)
1353 |          np.save(cache_file, result)
1354 |          return result
1355 |      ```
1356 | 
1357 | 158. **How do you optimize NumPy for high-throughput processing?**  
1358 |      Uses parallel execution.  
1359 |      ```python
1360 |      from joblib import Parallel, delayed
1361 |      def high_throughput_process(arrays):
1362 |          return Parallel(n_jobs=-1)(delayed(normalize_array)(arr) for arr in arrays)
1363 |      ```
1364 | 
1365 | 159. **Write a function to implement NumPy pipeline versioning.**  
1366 |      Tracks changes in workflows.  
1367 |      ```python
1368 |      def version_pipeline(config, version):
1369 |          with open(f'numpy_pipeline_v{version}.json', 'w') as f:
1370 |              json.dump(config, f)
1371 |      ```
1372 | 
1373 | 160. **How do you implement NumPy pipeline monitoring?**  
1374 |      Logs performance metrics.  
1375 |      ```python
1376 |      import logging
1377 |      def monitored_process(array):
1378 |          logging.basicConfig(filename='numpy.log', level=logging.INFO)
1379 |          start = time.time()
1380 |          result = normalize_array(array)
1381 |          logging.info(f"Processed array in {time.time() - start}s")
1382 |          return result
1383 |      ```
1384 | 
1385 | 161. **Write a function to handle NumPy scalability.**  
1386 |      Processes large datasets efficiently.  
1387 |      ```python
1388 |      def scalable_process(array, chunk_size=1000):
1389 |          for i in range(0, len(array), chunk_size):
1390 |              yield normalize_array(array[i:i + chunk_size])
1391 |      ```
1392 | 
1393 | 162. **How do you implement NumPy pipeline automation?**  
1394 |      Scripts end-to-end workflows.  
1395 |      ```python
1396 |      def automate_pipeline(data):
1397 |          processed = normalize_array(data)
1398 |          np.save('processed_data.npy', processed)
1399 |          return processed
1400 |      ```
1401 | 
1402 | ## Ethical Considerations in NumPy
1403 | 
1404 | ### Basic
1405 | 163. **What are ethical concerns in NumPy applications?**  
1406 |    Includes bias in data processing and resource usage.  
1407 |    ```python
1408 |    def check_data_bias(data, labels):
1409 |        return np.mean(data[labels == 0]) - np.mean(data[labels == 1])
1410 |    ```
1411 | 
1412 | 164. **How do you detect bias in NumPy data processing?**  
1413 |    Analyzes statistical disparities.  
1414 |    ```python
1415 |    def detect_bias(data, groups):
1416 |        return {g: np.mean(data[groups == g]) for g in np.unique(groups)}
1417 |    ```
1418 | 
1419 | 165. **What is data privacy in NumPy, and how is it ensured?**  
1420 |    Protects sensitive data.  
1421 |    ```python
1422 |    def anonymize_data(data):
1423 |        return data + np.random.normal(0, 0.1, data.shape)
1424 |    ```
1425 | 
1426 | 166. **How do you ensure fairness in NumPy data processing?**  
1427 |    Balances data across groups.  
1428 |    ```python
1429 |    def fair_processing(data, labels):
1430 |        return balance_data(data, labels, minority_class=1)
1431 |    ```
1432 | 
1433 | 167. **What is explainability in NumPy applications?**  
1434 |    Clarifies data transformations.  
1435 |    ```python
1436 |    def explain_transformation(data, transformed):
1437 |        print(f"Mean before: {np.mean(data)}, Mean after: {np.mean(transformed)}")
1438 |        return transformed
1439 |    ```
1440 | 
1441 | 168. **How do you visualize NumPy data bias?**  
1442 |    Plots group-wise statistics.  
1443 |    ```python
1444 |    import matplotlib.pyplot as plt
1445 |    def plot_bias(groups, means):
1446 |        plt.bar(groups, means)
1447 |        plt.savefig('bias_plot.png')
1448 |    ```
1449 | 
1450 | #### Intermediate
1451 | 169. **Write a function to mitigate bias in NumPy data.**  
1452 |      Reweights or resamples data.  
1453 |      ```python
1454 |      def mitigate_bias(data, labels, minority_class):
1455 |          return balance_data(data, labels, minority_class)
1456 |      ```
1457 | 
1458 | 170. **How do you implement differential privacy in NumPy?**  
1459 |      Adds noise to protect data.  
1460 |      ```python
1461 |      def private_processing(data, epsilon=1.0):
1462 |          noise = np.random.laplace(0, 1/epsilon, data.shape)
1463 |          return data + noise
1464 |      ```


--------------------------------------------------------------------------------