diff --git a/machine_learning/ridge_regression.py b/machine_learning/ridge_regression.py
new file mode 100644
index 000000000000..0dbe481f5944
--- /dev/null
+++ b/machine_learning/ridge_regression.py
@@ -0,0 +1,127 @@
+"""
+Ridge Regression using Gradient Descent.
+
+This script implements Ridge Regression (L2 regularization) using gradient descent.
+It predicts Average Damage per Round (ADR) using player ratings.
+
+Author: Nitin Pratap Singh
+"""
+
+import numpy as np
+import httpx
+
+
+def collect_dataset():
+    """
+    Collects CSGO dataset from a remote CSV file.
+
+    The CSV contains ADR vs Rating of players.
+
+    :return: Numpy array of shape (n_samples, 2)
+
+    >>> data = collect_dataset()
+    >>> data.shape[1]
+    2
+    """
+    response = httpx.get(
+        "https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
+        "master/Week1/ADRvsRating.csv",
+        timeout=10,
+    )
+    lines = response.text.splitlines()
+    data = [line.split(",") for line in lines]
+    data.pop(0)  # Remove header
+    dataset = np.array(data, dtype=float)
+    return dataset
+
+
+def ridge_cost_function(X, y, theta, lam):
+    """
+    Computes the cost for Ridge Regression (L2 regularization).
+
+    :param X: Feature matrix (n_samples, n_features)
+    :param y: Target vector (n_samples,)
+    :param theta: Coefficients (n_features,)
+    :param lam: Regularization strength (lambda)
+    :return: Cost value (float)
+
+    >>> X = np.array([[1, 1], [1, 2]])
+    >>> y = np.array([1, 2])
+    >>> theta = np.zeros(2)
+    >>> round(ridge_cost_function(X, y, theta, 0.1), 2)
+    1.25
+    """
+    m = len(y)
+    predictions = X @ theta
+    error = predictions - y
+    cost = (1 / (2 * m)) * np.dot(error, error)
+    reg_cost = (lam / (2 * m)) * np.dot(theta[1:], theta[1:])
+    return cost + reg_cost
+
+
+def ridge_gradient_descent(X, y, theta, alpha, iterations, lam, verbose=True):
+    """
+    Performs gradient descent with L2 regularization.
+
+    :param X: Feature matrix (n_samples, n_features)
+    :param y: Target values (n_samples,)
+    :param theta: Initial weights (n_features,)
+    :param alpha: Learning rate (float)
+    :param iterations: Number of iterations (int)
+    :param lam: Regularization strength (lambda)
+    :param verbose: Print cost every 10,000 steps if True
+    :return: Optimized weights (n_features,)
+
+    >>> X = np.array([[1, 1], [1, 2]])
+    >>> y = np.array([1, 2])
+    >>> theta = np.zeros(2)
+    >>> final_theta = ridge_gradient_descent(X, y, theta, 0.1, 10, 0.01, verbose=False)
+    >>> len(final_theta)
+    2
+    """
+    m = len(y)
+    for i in range(iterations):
+        predictions = X @ theta
+        error = predictions - y
+        gradient = (1 / m) * (X.T @ error)
+        reg_term = (lam / m) * theta
+        reg_term[0] = 0  # Do not regularize the bias term
+        theta = theta - alpha * (gradient + reg_term)
+
+        if i % 10000 == 0 and verbose:
+            cost = ridge_cost_function(X, y, theta, lam)
+            print(f"Iteration {i}: Cost = {cost:.5f}")
+
+    return theta
+
+
+def main():
+    """
+    Driver function for running Ridge Regression
+    """
+    data = collect_dataset()
+
+    # Normalize feature column to avoid overflow
+    feature = data[:, 0]
+    feature = (feature - feature.mean()) / feature.std()
+
+    X = np.c_[np.ones(data.shape[0]), feature]  # Add bias term
+    y = data[:, 1]
+
+    theta = np.zeros(X.shape[1])
+    alpha = 0.001  # Lowered learning rate
+    iterations = 100000
+    lam = 0.1  # Regularization strength
+
+    final_theta = ridge_gradient_descent(X, y, theta, alpha, iterations, lam)
+
+    print("\nOptimized weights (theta):")
+    for i, value in enumerate(final_theta):
+        print(f"θ{i}: {value:.5f}")
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
+    main()
diff --git a/strings/min_cost_string_conversion.py b/strings/min_cost_string_conversion.py
index 87eb5189e16a..cddc0495c895 100644
--- a/strings/min_cost_string_conversion.py
+++ b/strings/min_cost_string_conversion.py
@@ -18,9 +18,12 @@ def compute_transform_tables(
     insert_cost: int,
 ) -> tuple[list[list[int]], list[list[str]]]:
     """
-    Finds the most cost efficient sequence
+    Finds the most cost-efficient sequence
     for converting one string into another.
 
+    >>> compute_transform_tables("", "", 1, 2, 3, 3)
+    ([[0]], [['0']])
+
     >>> costs, operations = compute_transform_tables("cat", "cut", 1, 2, 3, 3)
     >>> costs[0][:4]
     [0, 3, 6, 9]
@@ -30,10 +33,8 @@ def compute_transform_tables(
     ['0', 'Ic', 'Iu', 'It']
     >>> operations[3][:4]
     ['Dt', 'Dt', 'Rtu', 'Ct']
-
-    >>> compute_transform_tables("", "", 1, 2, 3, 3)
-    ([[0]], [['0']])
     """
+
     source_seq = list(source_string)
     destination_seq = list(destination_string)
     len_source_seq = len(source_seq)
@@ -86,20 +87,16 @@ def assemble_transformation(ops: list[list[str]], i: int, j: int) -> list[str]:
     >>> assemble_transformation(ops, x, y)
     ['Cc', 'Rau', 'Ct']
 
-    >>> ops1 = [['0']]
-    >>> x1 = len(ops1) - 1
-    >>> y1 = len(ops1[0]) - 1
-    >>> assemble_transformation(ops1, x1, y1)
+    >>> assemble_transformation([['0']], 0, 0)
     []
 
     >>> ops2 = [['0', 'I1', 'I2', 'I3'],
     ...         ['D1', 'C1', 'I2', 'I3'],
     ...         ['D2', 'D2', 'R23', 'R23']]
-    >>> x2 = len(ops2) - 1
-    >>> y2 = len(ops2[0]) - 1
-    >>> assemble_transformation(ops2, x2, y2)
+    >>> assemble_transformation(ops2, 2, 3)
     ['C1', 'I2', 'R23']
     """
+
     if i == 0 and j == 0:
         return []
     elif ops[i][j][0] in {"C", "R"}: