diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29..34ee3c8 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: minor
+  changes:
+    added:
+    - Key normalisation functionality.
diff --git a/docs/normalise_keys.md b/docs/normalise_keys.md
new file mode 100644
index 0000000..1f324cc
--- /dev/null
+++ b/docs/normalise_keys.md
@@ -0,0 +1,103 @@
+# Key normalisation
+
+The `normalise_keys` module provides utilities for normalising primary and foreign keys in related database tables to zero-based sequential indices while preserving relationships.
+
+## Functions
+
+### `normalise_table_keys(tables, primary_keys, foreign_keys=None)`
+
+Normalises primary and foreign keys across multiple related tables.
+
+**Parameters:**
+- `tables` (Dict[str, pd.DataFrame]): Dictionary mapping table names to DataFrames
+- `primary_keys` (Dict[str, str]): Dictionary mapping table names to their primary key column names  
+- `foreign_keys` (Optional[Dict[str, Dict[str, str]]]): Dictionary mapping table names to their foreign key relationships. Format: `{table_name: {fk_column: referenced_table}}`. If None, foreign keys are auto-detected.
+
+**Returns:**
+- Dict[str, pd.DataFrame]: Dictionary of normalised tables with zero-based integer keys
+
+**Example:**
+```python
+import pandas as pd
+from policyengine_data import normalise_table_keys
+
+users = pd.DataFrame({
+    'user_id': [101, 105, 103],
+    'name': ['Alice', 'Bob', 'Carol']
+})
+
+orders = pd.DataFrame({
+    'order_id': [201, 205, 207], 
+    'user_id': [105, 101, 105],
+    'amount': [25.99, 15.50, 42.00]
+})
+
+tables = {'users': users, 'orders': orders}
+primary_keys = {'users': 'user_id', 'orders': 'order_id'}
+
+# Auto-detect foreign keys
+normalised = normalise_table_keys(tables, primary_keys)
+
+# Or specify foreign keys explicitly
+foreign_keys = {'orders': {'user_id': 'users'}}
+normalised = normalise_table_keys(tables, primary_keys, foreign_keys)
+```
+
+After normalisation:
+- User IDs become 0, 1, 2 (instead of 101, 105, 103)
+- Order IDs become 0, 1, 2 (instead of 201, 205, 207)  
+- Foreign key relationships are preserved (Bob's orders still reference Bob's new ID)
+
+### `normalise_single_table_keys(df, key_column, start_index=0)`
+
+Normalises keys in a single table to sequential indices.
+
+**Parameters:**
+- `df` (pd.DataFrame): DataFrame to normalise
+- `key_column` (str): Name of the key column to normalise
+- `start_index` (int): Starting index for normalisation (default: 0)
+
+**Returns:**
+- pd.DataFrame: DataFrame with normalised keys
+
+**Example:**
+```python
+import pandas as pd
+from policyengine_data import normalise_single_table_keys
+
+df = pd.DataFrame({
+    'id': [101, 105, 103],
+    'value': ['A', 'B', 'C'] 
+})
+
+normalised = normalise_single_table_keys(df, 'id')
+# Result: IDs become 0, 1, 2
+```
+
+## Key features
+
+- **Relationship preservation**: All foreign key relationships between tables are maintained after normalisation
+- **Auto-detection**: Foreign keys can be automatically detected based on column name matching
+- **Zero-based indexing**: Keys are normalised to start from 0 and increment sequentially
+- **Flexible input**: Works with any pandas DataFrames and column names
+- **Error handling**: Clear error messages for missing columns or invalid references
+- **Duplicate handling**: Properly handles duplicate keys within tables
+
+## Use cases
+
+This functionality is particularly useful for:
+
+- Preparing data for machine learning models that expect sequential indices
+- Converting legacy database exports with non-sequential primary keys  
+- Standardising key formats across multiple related datasets
+- Reducing memory usage by converting large integer keys to compact sequential indices
+- Creating consistent test datasets with predictable key patterns
+
+## Implementation notes
+
+The normalisation process works in two phases:
+
+1. **Mapping creation**: Unique values in each primary key column are mapped to zero-based sequential integers
+2. **Application**: These mappings are applied to both primary keys and corresponding foreign keys across all tables
+
+Foreign key auto-detection works by identifying columns that share names with primary key columns from other tables. For more complex relationships, explicit foreign key specification is recommended.
\ No newline at end of file
diff --git a/src/policyengine_data/__init__.py b/src/policyengine_data/__init__.py
index 9a4f8a7..58b2423 100644
--- a/src/policyengine_data/__init__.py
+++ b/src/policyengine_data/__init__.py
@@ -1,3 +1,4 @@
 from .dataset_legacy import Dataset
 from .multi_year_dataset import MultiYearDataset
+from .normalise_keys import normalise_single_table_keys, normalise_table_keys
 from .single_year_dataset import SingleYearDataset
diff --git a/src/policyengine_data/normalise_keys.py b/src/policyengine_data/normalise_keys.py
new file mode 100644
index 0000000..49189b4
--- /dev/null
+++ b/src/policyengine_data/normalise_keys.py
@@ -0,0 +1,187 @@
+"""
+Key normalisation utilities for tables with primary and foreign keys.
+
+This module provides functionality to normalise primary and foreign keys
+in related tables to zero-based sequential indices while preserving
+relationships between tables.
+"""
+
+from typing import Any, Dict, List, Optional, Union
+
+import pandas as pd
+
+
+def normalise_table_keys(
+    tables: Dict[str, pd.DataFrame],
+    primary_keys: Dict[str, str],
+    foreign_keys: Optional[Dict[str, Dict[str, str]]] = None,
+    start_index: Optional[int] = 0,
+) -> Dict[str, pd.DataFrame]:
+    """
+    Normalise primary and foreign keys across multiple tables to zero-based indices.
+
+    This function takes a collection of related tables and converts their primary
+    and foreign keys to `start_index`-based sequential integers while preserving all
+    relationships between tables.
+
+    Args:
+        tables: Dictionary mapping table names to DataFrames
+        primary_keys: Dictionary mapping table names to their primary key column names
+        foreign_keys: Optional dictionary mapping table names to their foreign key
+                     relationships. Format: {table_name: {fk_column: referenced_table}}
+                     If None, foreign keys will be auto-detected based on column names
+                     matching primary key names from other tables.
+        start_index: Starting index for normalisation (default: 0).
+
+    Returns:
+        Dictionary of normalised tables with `start_index`-based integer keys
+
+    Example:
+        >>> users = pd.DataFrame({
+        ...     'user_id': [101, 105, 103],
+        ...     'name': ['Alice', 'Bob', 'Carol']
+        ... })
+        >>> orders = pd.DataFrame({
+        ...     'order_id': [201, 205, 207],
+        ...     'user_id': [105, 101, 105],
+        ...     'amount': [25.99, 15.50, 42.00]
+        ... })
+        >>> tables = {'users': users, 'orders': orders}
+        >>> primary_keys = {'users': 'user_id', 'orders': 'order_id'}
+        >>> foreign_keys = {'orders': {'user_id': 'users'}}
+        >>> normalised = normalise_table_keys(tables, primary_keys, foreign_keys)
+        >>> # Result: user_ids become 0,1,2 and order_ids become 0,1,2
+        >>> # with foreign key relationships preserved
+    """
+    if not tables:
+        return {}
+
+    if foreign_keys is None:
+        foreign_keys = _auto_detect_foreign_keys(tables, primary_keys)
+
+    # Create mapping dictionaries for each primary key
+    key_mappings = {}
+    normalised_tables = {}
+
+    # First pass: create mappings for primary keys
+    for table_name, df in tables.items():
+        if table_name not in primary_keys:
+            raise ValueError(
+                f"No primary key specified for table '{table_name}'"
+            )
+
+        pk_column = primary_keys[table_name]
+        if pk_column not in df.columns:
+            raise ValueError(
+                f"Primary key column '{pk_column}' not found in table '{table_name}'"
+            )
+
+        # Get unique values and create zero-based mapping
+        unique_keys = df[pk_column].unique()
+        key_mappings[table_name] = {
+            old_key: new_key + start_index
+            for new_key, old_key in enumerate(unique_keys)
+        }
+
+    # Second pass: apply mappings to all tables
+    for table_name, df in tables.items():
+        normalised_df = df.copy()
+        pk_column = primary_keys[table_name]
+
+        # Map primary key
+        normalised_df[pk_column] = normalised_df[pk_column].map(
+            key_mappings[table_name]
+        )
+
+        # Map foreign keys
+        if table_name in foreign_keys:
+            for fk_column, referenced_table in foreign_keys[
+                table_name
+            ].items():
+                if fk_column not in df.columns:
+                    raise ValueError(
+                        f"Foreign key column '{fk_column}' not found in table '{table_name}'"
+                    )
+                if referenced_table not in key_mappings:
+                    raise ValueError(
+                        f"Referenced table '{referenced_table}' not found"
+                    )
+
+                normalised_df[fk_column] = normalised_df[fk_column].map(
+                    key_mappings[referenced_table]
+                )
+
+        normalised_tables[table_name] = normalised_df
+
+    return normalised_tables
+
+
+def _auto_detect_foreign_keys(
+    tables: Dict[str, pd.DataFrame], primary_keys: Dict[str, str]
+) -> Dict[str, Dict[str, str]]:
+    """
+    Auto-detect foreign key relationships based on column name matching.
+
+    Args:
+        tables: Dictionary of table names to DataFrames
+        primary_keys: Dictionary of primary key column names per table
+
+    Returns:
+        Dictionary of detected foreign key relationships
+    """
+    foreign_keys = {}
+    pk_columns = set(primary_keys.values())
+
+    for table_name, df in tables.items():
+        table_fks = {}
+        pk_column = primary_keys[table_name]
+
+        # Look for columns that match primary keys from other tables
+        for column in df.columns:
+            if column != pk_column and column in pk_columns:
+                # Find which table this primary key belongs to
+                for ref_table, ref_pk in primary_keys.items():
+                    if ref_pk == column and ref_table != table_name:
+                        table_fks[column] = ref_table
+                        break
+
+        if table_fks:
+            foreign_keys[table_name] = table_fks
+
+    return foreign_keys
+
+
+def normalise_single_table_keys(
+    df: pd.DataFrame, key_column: str, start_index: int = 0
+) -> pd.DataFrame:
+    """
+    Normalise keys in a single table to sequential indices.
+
+    Args:
+        df: DataFrame to normalise
+        key_column: Name of the key column to normalise
+        start_index: Starting index for normalisation (default: 0)
+
+    Returns:
+        DataFrame with normalised keys
+
+    Example:
+        >>> df = pd.DataFrame({
+        ...     'id': [101, 105, 103],
+        ...     'value': ['A', 'B', 'C']
+        ... })
+        >>> normalised = normalise_single_table_keys(df, 'id')
+        >>> # Result: ids become 0, 1, 2
+    """
+    if key_column not in df.columns:
+        raise ValueError(f"Key column '{key_column}' not found in DataFrame")
+
+    normalised_df = df.copy()
+    unique_keys = df[key_column].unique()
+    key_mapping = {
+        old_key: new_key + start_index
+        for new_key, old_key in enumerate(unique_keys)
+    }
+
+    normalised_df[key_column] = normalised_df[key_column].map(key_mapping)
+    return normalised_df
diff --git a/tests/test_normalise_keys.py b/tests/test_normalise_keys.py
new file mode 100644
index 0000000..61badd9
--- /dev/null
+++ b/tests/test_normalise_keys.py
@@ -0,0 +1,335 @@
+"""
+Tests for key normalisation functionality.
+"""
+
+import pandas as pd
+import pytest
+
+from policyengine_data.normalise_keys import (
+    _auto_detect_foreign_keys,
+    normalise_single_table_keys,
+    normalise_table_keys,
+)
+
+
+class TestNormaliseTableKeys:
+    """Test cases for normalise_table_keys function."""
+
+    def test_simple_single_table(self):
+        """Test normalisation of a single table with no foreign keys."""
+        users = pd.DataFrame(
+            {"user_id": [101, 105, 103], "name": ["Alice", "Bob", "Carol"]}
+        )
+
+        tables = {"users": users}
+        primary_keys = {"users": "user_id"}
+
+        result = normalise_table_keys(tables, primary_keys)
+
+        assert len(result) == 1
+        assert "users" in result
+
+        normalised_users = result["users"]
+        assert list(normalised_users["user_id"]) == [0, 1, 2]
+        assert list(normalised_users["name"]) == ["Alice", "Bob", "Carol"]
+
+    def test_custom_start_index(self):
+        """Test normalisation with custom start index."""
+        users = pd.DataFrame(
+            {"user_id": [101, 105, 103], "name": ["Alice", "Bob", "Carol"]}
+        )
+
+        tables = {"users": users}
+        primary_keys = {"users": "user_id"}
+
+        result = normalise_table_keys(tables, primary_keys, start_index=10)
+
+        assert len(result) == 1
+        assert "users" in result
+
+        normalised_users = result["users"]
+        assert list(normalised_users["user_id"]) == [10, 11, 12]
+        assert list(normalised_users["name"]) == ["Alice", "Bob", "Carol"]
+
+    def test_two_tables_with_foreign_keys(self):
+        """Test normalisation with explicit foreign key relationships."""
+        users = pd.DataFrame(
+            {"user_id": [101, 105, 103], "name": ["Alice", "Bob", "Carol"]}
+        )
+
+        orders = pd.DataFrame(
+            {
+                "order_id": [201, 205, 207],
+                "user_id": [105, 101, 105],
+                "amount": [25.99, 15.50, 42.00],
+            }
+        )
+
+        tables = {"users": users, "orders": orders}
+        primary_keys = {"users": "user_id", "orders": "order_id"}
+        foreign_keys = {"orders": {"user_id": "users"}}
+
+        result = normalise_table_keys(tables, primary_keys, foreign_keys)
+
+        # Check users table
+        normalised_users = result["users"]
+        assert set(normalised_users["user_id"]) == {0, 1, 2}
+
+        # Check orders table
+        normalised_orders = result["orders"]
+        assert set(normalised_orders["order_id"]) == {0, 1, 2}
+
+        # Check foreign key relationships are preserved
+        # Original: user 105 had orders 201, 207
+        # After normalisation: find which index 105 became
+        user_105_new_id = normalised_users[normalised_users["name"] == "Bob"][
+            "user_id"
+        ].iloc[0]
+        bob_orders = normalised_orders[
+            normalised_orders["user_id"] == user_105_new_id
+        ]
+        assert len(bob_orders) == 2
+        assert set(bob_orders["amount"]) == {25.99, 42.00}
+
+    def test_auto_detect_foreign_keys(self):
+        """Test automatic detection of foreign key relationships."""
+        users = pd.DataFrame(
+            {"user_id": [101, 105, 103], "name": ["Alice", "Bob", "Carol"]}
+        )
+
+        orders = pd.DataFrame(
+            {
+                "order_id": [201, 205, 207],
+                "user_id": [105, 101, 105],
+                "amount": [25.99, 15.50, 42.00],
+            }
+        )
+
+        tables = {"users": users, "orders": orders}
+        primary_keys = {"users": "user_id", "orders": "order_id"}
+
+        # Test without explicit foreign keys - should auto-detect
+        result = normalise_table_keys(tables, primary_keys)
+
+        # Verify relationships are still preserved
+        normalised_users = result["users"]
+        normalised_orders = result["orders"]
+
+        # Bob should still have his two orders
+        user_105_new_id = normalised_users[normalised_users["name"] == "Bob"][
+            "user_id"
+        ].iloc[0]
+        bob_orders = normalised_orders[
+            normalised_orders["user_id"] == user_105_new_id
+        ]
+        assert len(bob_orders) == 2
+
+    def test_multiple_foreign_keys(self):
+        """Test table with multiple foreign key relationships."""
+        users = pd.DataFrame(
+            {"user_id": [1, 2, 3], "name": ["Alice", "Bob", "Carol"]}
+        )
+
+        categories = pd.DataFrame(
+            {
+                "category_id": [10, 20, 30],
+                "category_name": ["Electronics", "Books", "Clothing"],
+            }
+        )
+
+        orders = pd.DataFrame(
+            {
+                "order_id": [100, 200, 300],
+                "user_id": [2, 1, 2],
+                "category_id": [20, 10, 30],
+                "amount": [25.99, 15.50, 42.00],
+            }
+        )
+
+        tables = {"users": users, "categories": categories, "orders": orders}
+        primary_keys = {
+            "users": "user_id",
+            "categories": "category_id",
+            "orders": "order_id",
+        }
+
+        result = normalise_table_keys(tables, primary_keys)
+
+        # Verify all tables have zero-based keys
+        for table_name, df in result.items():
+            pk_col = primary_keys[table_name]
+            assert set(df[pk_col]) == {0, 1, 2}
+
+        # Verify relationships preserved
+        normalised_orders = result["orders"]
+        normalised_users = result["users"]
+
+        # Bob (original user_id=2) should have 2 orders
+        bob_new_id = normalised_users[normalised_users["name"] == "Bob"][
+            "user_id"
+        ].iloc[0]
+        bob_orders = normalised_orders[
+            normalised_orders["user_id"] == bob_new_id
+        ]
+        assert len(bob_orders) == 2
+
+    def test_empty_tables(self):
+        """Test with empty input."""
+        result = normalise_table_keys({}, {})
+        assert result == {}
+
+    def test_missing_primary_key_column(self):
+        """Test error handling for missing primary key column."""
+        df = pd.DataFrame({"name": ["Alice", "Bob"]})
+        tables = {"users": df}
+        primary_keys = {"users": "missing_id"}
+
+        with pytest.raises(
+            ValueError, match="Primary key column 'missing_id' not found"
+        ):
+            normalise_table_keys(tables, primary_keys)
+
+    def test_missing_foreign_key_column(self):
+        """Test error handling for missing foreign key column."""
+        users = pd.DataFrame({"user_id": [1, 2], "name": ["Alice", "Bob"]})
+        orders = pd.DataFrame(
+            {"order_id": [100, 200], "amount": [25.99, 15.50]}
+        )
+
+        tables = {"users": users, "orders": orders}
+        primary_keys = {"users": "user_id", "orders": "order_id"}
+        foreign_keys = {"orders": {"missing_user_id": "users"}}
+
+        with pytest.raises(
+            ValueError, match="Foreign key column 'missing_user_id' not found"
+        ):
+            normalise_table_keys(tables, primary_keys, foreign_keys)
+
+    def test_missing_referenced_table(self):
+        """Test error handling for missing referenced table."""
+        orders = pd.DataFrame(
+            {
+                "order_id": [100, 200],
+                "user_id": [1, 2],
+                "amount": [25.99, 15.50],
+            }
+        )
+
+        tables = {"orders": orders}
+        primary_keys = {"orders": "order_id"}
+        foreign_keys = {"orders": {"user_id": "missing_users"}}
+
+        with pytest.raises(
+            ValueError, match="Referenced table 'missing_users' not found"
+        ):
+            normalise_table_keys(tables, primary_keys, foreign_keys)
+
+
+class TestNormaliseSingleTableKeys:
+    """Test cases for normalise_single_table_keys function."""
+
+    def test_basic_normalisation(self):
+        """Test basic single table key normalisation."""
+        df = pd.DataFrame({"id": [101, 105, 103], "value": ["A", "B", "C"]})
+
+        result = normalise_single_table_keys(df, "id")
+
+        assert list(result["id"]) == [0, 1, 2]
+        assert list(result["value"]) == ["A", "B", "C"]
+
+    def test_custom_start_index(self):
+        """Test normalisation with custom start index."""
+        df = pd.DataFrame({"id": [101, 105, 103], "value": ["A", "B", "C"]})
+
+        result = normalise_single_table_keys(df, "id", start_index=10)
+
+        assert list(result["id"]) == [10, 11, 12]
+        assert list(result["value"]) == ["A", "B", "C"]
+
+    def test_duplicate_keys_preserved(self):
+        """Test that duplicate keys are handled correctly."""
+        df = pd.DataFrame(
+            {"id": [101, 105, 101, 103], "value": ["A", "B", "A2", "C"]}
+        )
+
+        result = normalise_single_table_keys(df, "id")
+
+        # Should have 3 unique normalised values (0, 1, 2) for 3 unique original values
+        unique_normalised = result["id"].unique()
+        assert len(unique_normalised) == 3
+        assert set(unique_normalised) == {0, 1, 2}
+
+        # Duplicate original keys should map to same normalised key
+        original_101_rows = df[df["id"] == 101]
+        normalised_101_rows = result[
+            result.index.isin(original_101_rows.index)
+        ]
+        assert len(normalised_101_rows["id"].unique()) == 1
+
+    def test_missing_key_column(self):
+        """Test error handling for missing key column."""
+        df = pd.DataFrame({"value": ["A", "B", "C"]})
+
+        with pytest.raises(
+            ValueError, match="Key column 'missing_id' not found"
+        ):
+            normalise_single_table_keys(df, "missing_id")
+
+
+class TestAutoDetectForeignKeys:
+    """Test cases for _auto_detect_foreign_keys function."""
+
+    def test_simple_detection(self):
+        """Test basic foreign key detection."""
+        users = pd.DataFrame({"user_id": [1, 2], "name": ["Alice", "Bob"]})
+        orders = pd.DataFrame({"order_id": [100, 200], "user_id": [1, 2]})
+
+        tables = {"users": users, "orders": orders}
+        primary_keys = {"users": "user_id", "orders": "order_id"}
+
+        result = _auto_detect_foreign_keys(tables, primary_keys)
+
+        expected = {"orders": {"user_id": "users"}}
+        assert result == expected
+
+    def test_no_foreign_keys(self):
+        """Test when no foreign keys are detected."""
+        users = pd.DataFrame({"user_id": [1, 2], "name": ["Alice", "Bob"]})
+        products = pd.DataFrame(
+            {"product_id": [100, 200], "name": ["Widget", "Gadget"]}
+        )
+
+        tables = {"users": users, "products": products}
+        primary_keys = {"users": "user_id", "products": "product_id"}
+
+        result = _auto_detect_foreign_keys(tables, primary_keys)
+
+        assert result == {}
+
+    def test_multiple_foreign_keys_detection(self):
+        """Test detection of multiple foreign keys in one table."""
+        users = pd.DataFrame({"user_id": [1, 2], "name": ["Alice", "Bob"]})
+        categories = pd.DataFrame(
+            {"category_id": [10, 20], "name": ["Electronics", "Books"]}
+        )
+        orders = pd.DataFrame(
+            {
+                "order_id": [100, 200],
+                "user_id": [1, 2],
+                "category_id": [10, 20],
+            }
+        )
+
+        tables = {"users": users, "categories": categories, "orders": orders}
+        primary_keys = {
+            "users": "user_id",
+            "categories": "category_id",
+            "orders": "order_id",
+        }
+
+        result = _auto_detect_foreign_keys(tables, primary_keys)
+
+        expected = {
+            "orders": {"user_id": "users", "category_id": "categories"}
+        }
+        assert result == expected