diff --git a/.gitignore b/.gitignore
index f8a1ee9544..7fe7d02743 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,7 +25,27 @@ lib64/
 parts/
 sdist/
 var/
-data/
+#data/
+data/benchmarking/
+data/img/
+data/appearances.txt
+data/characters.txt
+data/comics.txt
+data/facebook_combined.txt
+data/honeypot.csv
+data/lesmiserables.csv
+data/samplegraph.json
+data/transactions.csv
+data/twitterDemo.csv
+data/demos_by_use_case/
+data/demos_databases_apis
+data/gfql/
+data/more_examples/
+data/talks/
+data/for_analysis.ipynb
+data/for_developers.ipynb
+data/upload_csv_miniapp.ipynb
+
 *.egg-info/
 .installed.cfg
 *.egg
diff --git a/demos/data/scripts/generator/CrimeNetworkGenerator.py b/demos/data/scripts/generator/CrimeNetworkGenerator.py
new file mode 100644
index 0000000000..aaa06a9130
--- /dev/null
+++ b/demos/data/scripts/generator/CrimeNetworkGenerator.py
@@ -0,0 +1,698 @@
+import pandas as pd
+import numpy as np
+from sklearn.datasets import make_blobs
+import factory.random
+from datetime import datetime, timedelta
+from ProfileGenerator import ProfileFactory
+from scipy.spatial import cKDTree
+from itertools import count
+import graphistry
+
+
+class PersonNetworkGenerator:
+    def __init__(
+            self,
+            n_kingpins: int = 4,
+            dealers_per_kingpin: int = 5,
+            users_per_dealer: int = 3,
+            dealer_normal_connections: int = 4,
+            kingpin_normal_connections: int = 3,
+            within_group_connections: int = 4,
+            random_connections: int = 3,
+            max_calls_per_edge: int = 11,
+            affiliations: list = ['Gang Alpha', 'Cartel Beta', 'Gang Gamma', 'Cartel Delta'],
+            crimes: list = [
+                        "Armed Robbery",
+                        "Burglary",
+                        "Drug Trafficking",
+                        "Vandalism",
+                        "Assault",
+                        "Money Laundering",
+                        "Fraud",
+                        "Homicide",
+                    ],
+            max_crimes_per_case: int = 3,
+            max_cases_per_person: int = 3,
+            n_normal: int = 1000,
+            postal_code: int = None,
+            state: str = None,
+            call_start_date: str = "2022-1-1",
+            call_end_date: str = "2023-12-31",
+            max_num_whereabouts: int = 4,
+            leader_to_leader_call_chance: float = 0.05,
+            shared_case_percentage: float = 0.3,
+            ):
+
+        self.n_kingpins = n_kingpins
+        self.dealers_per_kingpin = dealers_per_kingpin
+        self.users_per_dealer = users_per_dealer
+        self.dealer_normal_connections = dealer_normal_connections
+        self.kingpin_normal_connections = kingpin_normal_connections
+        self.within_group_connections = within_group_connections
+        self.random_connections = random_connections
+        self.n_normal = n_normal
+        self.node_df = None
+        self.edge_df = None
+        self.labels = None
+        self.seed = 42
+        np.random.seed(self.seed)
+        factory.random.reseed_random(self.seed)
+        self.postal_code = postal_code
+        self.state = state
+        self.affiliations = affiliations
+        self.crimes = crimes
+        self.max_crimes_per_case = max_crimes_per_case
+        self.max_cases_per_person = max_cases_per_person
+        self.max_calls_per_edge = max_calls_per_edge
+        self.call_start_date = call_start_date
+        self.call_end_date = call_end_date
+        self.max_num_whereabouts = max_num_whereabouts
+        self.leader_to_leader_call_chance = leader_to_leader_call_chance
+        self.shared_case_percentage = shared_case_percentage
+
+    #NETWORK GENERATION
+    def generate_network(self):
+        # Generate clusters for kingpins, dealers, and users
+        X_kingpins, _ = make_blobs(
+            n_samples=self.n_kingpins,
+            centers=self.n_kingpins,
+            cluster_std=1.0,
+            random_state=self.seed
+            )
+
+        X_dealers, _ = make_blobs(
+            n_samples=self.dealers_per_kingpin * self.n_kingpins,
+            centers=X_kingpins,
+            cluster_std=2.5,
+            random_state=self.seed
+            )
+
+        X_users, _ = make_blobs(
+            n_samples=self.users_per_dealer * self.dealers_per_kingpin * self.n_kingpins,
+            centers=X_dealers,
+            cluster_std=3.5,
+            random_state=self.seed
+            )
+
+        X_normal = np.random.rand(self.n_normal, 2) * 100  # Normal people data
+
+        # Combine all data
+        points = np.vstack([X_kingpins, X_dealers, X_users, X_normal])
+        self.labels = ['kingpin']*self.n_kingpins + \
+                      ['dealer']*self.dealers_per_kingpin*self.n_kingpins + \
+                      ['user']*self.dealers_per_kingpin*self.n_kingpins*self.users_per_dealer + \
+                      ['normal']*self.n_normal
+
+        # Create DataFrame for nodes
+        self.node_df = pd.DataFrame(points, columns=['x', 'y'])
+        self.node_df['node_id'] = range(len(self.node_df))
+        self.node_df['type'] = self.labels
+
+        # Assign personal details
+        self.assign_personal_details(self.postal_code, self.state, self.max_num_whereabouts)
+
+        # assign affiliations
+        self.assign_affiliations()
+
+        # Generate criminal records
+        self.generate_and_assign_criminal_records()
+
+        # Generate edges
+        self.generate_edges()
+
+        # Generate call logs
+        self.generate_and_assign_call_logs(self.call_start_date, self.call_end_date)
+
+    def calculate_nearest_kingpin(self):
+        # Extract coordinates for kingpins and dealers
+        kingpin_coords = self.node_df[self.node_df['type'] == 'kingpin'][['x', 'y']].to_numpy()
+        dealer_coords = self.node_df[self.node_df['type'] == 'dealer'][['x', 'y']].to_numpy()
+
+        # Find nearest kingpin index for each dealer
+        nearest_kingpin_indices = self.find_nearest_kingpin_index(dealer_coords, kingpin_coords)
+
+        # Map nearest kingpin indices back to the original DataFrame indices of kingpins
+        kingpin_df_indices = self.node_df[self.node_df['type'] == 'kingpin'].index.to_numpy()
+        mapped_kingpin_indices = kingpin_df_indices[nearest_kingpin_indices]
+
+        # Assign the mapped kingpin indices to dealers in the DataFrame
+        self.node_df.loc[self.node_df['type'] == 'dealer', 'nearest_kingpin_index'] = mapped_kingpin_indices
+
+    def find_nearest_kingpin_index(
+            self,
+            dealer_coords: np.array,
+            kingpin_coords: np.array
+            ) -> np.array:
+        # Create a KD-tree for kingpin locations
+        tree = cKDTree(kingpin_coords)
+
+        # Query the tree for the nearest kingpin to each dealer
+        # 'query' returns a tuple where the first element is the distance
+        # and the second element is the index of the nearest kingpin in the tree
+        _, nearest_kingpin_indices = tree.query(dealer_coords, k=1)
+
+        return nearest_kingpin_indices
+
+    def ensure_kingpin_dealer_connectivity(self) -> list:
+        edge_list = []
+        kingpins = self.node_df[self.node_df['type'] == 'kingpin']
+        for kingpin_index in kingpins.index:
+            affiliated_dealers = self.node_df[(self.node_df['type'] == 'dealer') & (self.node_df['affiliation'] == self.node_df.at[kingpin_index, 'affiliation'])].index
+            # Ensure each kingpin has connections to dealers
+            if not affiliated_dealers.empty:
+                selected_dealers = np.random.choice(affiliated_dealers, size=min(3, len(affiliated_dealers)), replace=False)
+                for dealer_index in selected_dealers:
+                    edge_list.append((kingpin_index, dealer_index))
+        return edge_list
+
+    def connect_dealers_to_users(self) -> list:
+        edge_list = []
+        dealers = self.node_df[self.node_df['type'] == 'dealer']
+        users = self.node_df[self.node_df['type'] == 'user'].index
+        for dealer_index in dealers.index:
+            # Select a random number of users to connect with each dealer
+            num_connections = self.users_per_dealer  # For example, each dealer connects with 2 to 4 users
+            selected_users = np.random.choice(users, size=num_connections, replace=False)
+            for user_index in selected_users:
+                edge_list.append((dealer_index, user_index))
+        return edge_list
+
+    def connect_within_group(self) -> list:
+        # Exclude kingpins and normal individuals for within-group connections
+        group_nodes = self.node_df[~self.node_df['type'].isin(['kingpin', 'normal'])]
+
+        # Group by affiliation and type
+        grouped = group_nodes.groupby(['affiliation', 'type'])
+
+        # Initialize an empty list to store edges
+        edge_list = []
+
+        # Iterate over each group
+        for name, group in grouped:
+            # Generate connections for each node in the group
+            for node_index in group.index:
+                # Identify potential connections within the same affiliation and type
+                potential_connections = group.index[group.index != node_index]
+                # Randomly select a subset for connections
+                num_connections = np.random.randint(1, self.within_group_connections)  # Adjust numbers as needed
+                if not potential_connections.empty:
+                    selected_connections = np.random.choice(potential_connections, size=min(len(potential_connections), num_connections), replace=False)
+                    # Add connections to the edge list
+                    edge_list.extend([(node_index, connection) for connection in selected_connections])
+
+        return edge_list
+
+    def connect_randomly(self) -> list:
+        # Decide randomly if a node should form random connections
+        nodes_to_connect = self.node_df.index[np.random.rand(len(self.node_df)) < 0.1]
+
+        # Function to generate random connections for a node
+        def generate_random_connections(node):
+            # Exclude self-connections
+            potential_connections = self.node_df.index[self.node_df.index != node]
+            num_connections = np.random.randint(1, self.random_connections)  # Adjust numbers as needed
+            selected_connections = np.random.choice(potential_connections, size=min(len(potential_connections), num_connections), replace=False)
+            return [(node, connection) for connection in selected_connections]
+
+        # Generate random connections for each selected node
+        edge_list = [edge for node in nodes_to_connect for edge in generate_random_connections(node)]
+
+        return edge_list
+
+    def connect_to_normals(self) -> list:
+        # Define which roles should have connections to normal individuals
+        roles_with_normal_connections = ['kingpin', 'dealer']
+
+        # Filter the DataFrame for normal individuals
+        normal_people = self.node_df[self.node_df['type'] == 'normal'].index
+
+        # Filter the DataFrame for nodes that should have connections to normal individuals
+        nodes_to_connect = self.node_df[self.node_df['type'].isin(roles_with_normal_connections)]
+
+        # Generate connections for each node
+        connections = nodes_to_connect.apply(lambda row: self.generate_normal_connections(row, normal_people), axis=1)
+
+        # Flatten the list of connections
+        edge_list = [item for sublist in connections for item in sublist]
+
+        return edge_list
+
+    def generate_normal_connections(
+            self,
+            node_row: pd.DataFrame,
+            normal_people: pd.DataFrame.index
+            ) -> list:
+
+        # Determine the number of normal connections (e.g., 1-3 for kingpins, 1-2 for dealers)
+        if node_row['type'] == 'kingpin':
+            num_connections = np.random.randint(1, self.kingpin_normal_connections)  # Kingpins have 1 to 3 normal connections
+        else:  # Dealers
+            num_connections = np.random.randint(1, self.dealer_normal_connections)  # Dealers have 1 to 10 normal connections
+
+        # Select random normal individuals to connect with
+        selected_normals = np.random.choice(normal_people, size=num_connections, replace=False)
+
+        # Return a list of connections for the node
+        return [(node_row.name, normal_index) for normal_index in selected_normals]
+
+    def generate_edges(self):
+        edge_list = []
+
+        # Initial connections based on affiliations and roles
+        # Ensure kingpin-dealer connectivity and dealer-user connections
+        edge_list.extend(self.ensure_kingpin_dealer_connectivity())
+
+        edge_list.extend(self.connect_dealers_to_users())
+
+        # Within-group connections
+        edge_list.extend(self.connect_within_group())
+
+        # Random connections across the network
+        edge_list.extend(self.connect_randomly())
+
+        # Connect kingpins and dealers to normal individuals
+        edge_list.extend(self.connect_to_normals())
+
+        # Convert edge list to DataFrame
+        self.edge_df = pd.DataFrame(edge_list, columns=['src', 'target'])
+
+    def assign_personal_details(
+            self,
+            postal_code: str,
+            state: str,
+            max_num_whereabouts: int
+            ) -> None:
+
+        details_df = self.generate_details(
+                num_records=len(self.node_df),
+                postal_code=postal_code,
+                state=state,
+                num_whereabouts=max_num_whereabouts
+                )
+        self.node_df = pd.concat([self.node_df, details_df], axis=1)
+        return self.expand_whereabouts_to_columns()
+
+    def flatten_dict(self, d: dict) -> dict:
+        items = []
+        for key, value in d.items():
+            if isinstance(value, dict):
+                items.extend(self.flatten_dict(value).items())
+            else:
+                items.append((key, value))
+        return dict(items)
+
+    #PROFILE GENERATION
+    def generate_details(
+            self,
+            num_records: int,
+            postal_code: str,
+            state: str,
+            num_whereabouts: int
+            ) -> pd.DataFrame:
+
+        return pd.DataFrame([self.flatten_dict(profile.to_dict()) for profile in ProfileFactory.create_batch(num_records, postal_code=postal_code, state=state, num_whereabouts=num_whereabouts)])
+
+    def expand_whereabouts_to_columns(self):
+        max_whereabouts = self.max_num_whereabouts
+
+        # Create a temporary DataFrame from the 'whereabouts' series
+        whereabouts_df = self.node_df['whereabouts'].apply(pd.Series)
+
+        # Iterate over the number of whereabouts
+        for i in range(max_whereabouts):
+            # Extract whereabouts details for each whereabouts
+            whereabouts_details_df = whereabouts_df[i].apply(pd.Series)
+
+            # Assign address, from_date, to_date, and other details to the node DataFrame
+            self.node_df[f'whereabouts_{i+1}_address1'] = whereabouts_details_df['address1']
+            self.node_df[f'whereabouts_{i+1}_address2'] = whereabouts_details_df['address2']
+            self.node_df[f'whereabouts_{i+1}_city'] = whereabouts_details_df['city']
+            self.node_df[f'whereabouts_{i+1}_state'] = whereabouts_details_df['state']
+            self.node_df[f'whereabouts_{i+1}_postalCode'] = whereabouts_details_df['postalCode']
+            self.node_df[f'whereabouts_{i+1}_coordinates'] = whereabouts_details_df['coordinates']
+            # Flatten coordinates into lat and lng
+            coordinates_df = whereabouts_details_df['coordinates'].apply(pd.Series)
+            self.node_df[f'whereabouts_{i+1}_lat'] = coordinates_df['lat']
+            self.node_df[f'whereabouts_{i+1}_lng'] = coordinates_df['lng']
+            # Drop the coordinates column
+            self.node_df.drop(f'whereabouts_{i+1}_coordinates', axis=1, inplace=True)
+
+            self.node_df[f'whereabouts_{i+1}_from_date'] = whereabouts_details_df['from_date']
+            self.node_df[f'whereabouts_{i+1}_to_date'] = whereabouts_details_df['to_date']
+
+
+        # Drop the original 'whereabouts' column
+        self.node_df.drop('whereabouts', axis=1, inplace=True)
+        # Replace NaN values with None
+        self.node_df = self.node_df.where(pd.notnull(self.node_df), None)
+
+    @staticmethod
+    def random_datetime(
+        year: int,
+        month: int,
+        day: int,
+        hour_start: int,
+        hour_end: int
+    ) -> datetime:
+
+        start = datetime(year, month, day, hour_start)
+        end = datetime(year, month, day, hour_end)
+        return start + timedelta(
+            seconds=np.random.randint(0, int((end - start).total_seconds()))
+            )
+
+    def assign_affiliations(self):
+        # Step 1: Assign an affiliation to each kingpin
+        kingpins = self.node_df[self.node_df['type'] == 'kingpin']
+
+        shuffled_affiliations = np.random.choice(
+            self.affiliations,
+            size=len(self.affiliations),
+            replace=False
+            ).tolist()
+
+        for i, index in enumerate(kingpins.index):
+            if i < len(shuffled_affiliations):
+                # Assign a unique affiliation to each kingpin
+                self.node_df.at[index, 'affiliation'] = shuffled_affiliations[i]
+            else:
+                # If there are more kingpins than affiliations, assign random affiliations to the remaining kingpins
+                self.node_df.at[index, 'affiliation'] = np.random.choice(self.affiliations)
+
+        # Step 2: Calculate nearest kingpin for dealers and assign affiliations
+        self.calculate_nearest_kingpin()
+        # Ensure dealers inherit their kingpin's affiliation
+        self.node_df.loc[self.node_df['type'] == 'dealer', 'affiliation'] = self.node_df.loc[self.node_df['type'] == 'dealer', 'nearest_kingpin_index'].map(lambda x: self.node_df.at[x, 'affiliation'])
+
+        # Step 3: Assign 'None' to users and normal individuals
+        self.node_df.loc[self.node_df['type'].isin(['user', 'normal']), 'affiliation'] = 'None'
+
+    def generate_and_assign_criminal_records(self):
+        unique_case_number = count(start=1000, step=1)  # Unique case number generator
+        gang_related_cases = {}  # To track gang-related case numbers and crimes
+
+        # Generate number of cases for each person
+        self.node_df['num_cases'] = np.random.randint(
+            0,
+            self.max_cases_per_person + 1,
+            size=len(self.node_df)
+            )
+
+        # Generate cases for each person
+        self.node_df['cases'] = self.node_df.apply(
+            lambda row: [
+                self.generate_case(
+                    row,
+                    gang_related_cases,
+                    unique_case_number
+                )
+                for _ in range(row['num_cases'])
+                ],
+            axis=1
+            )
+
+        # Drop the 'num_cases' column as it's no longer needed
+        self.node_df.drop('num_cases', axis=1, inplace=True)
+        return self.expand_cases_to_columns()
+
+    def generate_case(
+            self,
+            person: pd.DataFrame,
+            gang_related_cases: dict,
+            unique_case_number: int
+            ) -> dict:
+        # Adjusted logic for determining shared or unique cases
+        if person['affiliation'] != 'None' and gang_related_cases.get(person['affiliation']) and np.random.random() < self.shared_case_percentage:
+            shared_case = np.random.choice(gang_related_cases[person['affiliation']])
+            return shared_case
+        else:
+            case_num = next(unique_case_number)
+            crimes_in_case = np.random.choice(
+                self.crimes,
+                np.random.randint(1, 4),
+                replace=False
+                ).tolist()
+
+            new_case = {"case_number": case_num, "crimes": crimes_in_case}
+
+            if person['affiliation'] != 'None':
+                gang_related_cases.setdefault(
+                    person['affiliation'],
+                    []
+                ).append(new_case)
+
+            return new_case
+
+    def expand_cases_to_columns(self):
+        max_crimes_per_case = self.max_crimes_per_case  # Adjust based on your dataset
+
+        # Create a temporary DataFrame from the 'cases' series
+        cases_df = self.node_df['cases'].apply(pd.Series)
+
+        # Iterate over the number of cases
+        for i in range(max_crimes_per_case):
+            # Extract case details for each case
+            case_details_df = cases_df[i].apply(pd.Series)
+
+            # Assign case number and crimes to the node DataFrame
+            self.node_df[f'case_number_{i+1}'] = case_details_df['case_number'].astype('Int64')
+            self.node_df[f'case_number_{i+1}'] = self.node_df[f'case_number_{i+1}'].astype('object')
+
+            # Extract crimes for each case and assign to the node DataFrame
+            crimes_df = case_details_df['crimes'].apply(pd.Series)
+            for j in range(max_crimes_per_case):
+                self.node_df[f'crime_{i+1}_{j+1}'] = crimes_df[j]
+
+        # Drop the original 'cases' column
+        self.node_df.drop('cases', axis=1, inplace=True)
+        # Replace NaN values with None
+        self.node_df = self.node_df.where(pd.notnull(self.node_df), None)
+
+    #CALL LOG GENERATION
+    def generate_phone_numbers(self):
+        # Assuming self.node_df exists and has been populated
+        self.teledict = self.node_df['phone'].to_dict()
+
+    def generate_and_assign_call_logs(self, start_date, end_date):
+        # Parse date strings
+        start_date = datetime.strptime(start_date, '%Y-%m-%d') \
+            if isinstance(start_date, str) else start_date
+
+        end_date = datetime.strptime(end_date, '%Y-%m-%d') \
+            if isinstance(end_date, str) else end_date
+
+        # Ensure phone numbers are generated
+        if not hasattr(self, 'teledict'):
+            self.generate_phone_numbers()
+
+        # Define a function to generate call logs for a given edge
+        def generate_call_logs(edge: dict) -> list:
+            # Check if the edge exists in self.edge_df
+            if edge['src'] not in self.node_df.index or edge['target'] not in self.node_df.index:
+                # If the edge doesn't exist, manually set the caller and callee types to 'kingpin'
+                caller_type = 'kingpin'
+                callee_type = 'kingpin'
+            else:
+                # If the edge does exist, get the caller and callee types from self.node_df
+                caller_type = self.node_df.loc[edge['src'], 'type']
+                callee_type = self.node_df.loc[edge['target'], 'type']
+
+            # Determine the number of calls for this edge (e.g., 1-10)
+            num_calls = np.random.randint(1, self.max_calls_per_edge)
+
+            # Assign caller and callee phone numbers
+            caller = self.teledict[edge['src']]
+            callee = self.teledict[edge['target']]
+            # Determine the number of calls for this edge (e.g., 1-10)
+
+            # Check if the call is inter-gang (caller is a kingpin and callee is a dealer from diff gang)
+            if caller_type == 'kingpin' and callee_type == 'dealer' and self.node_df.loc[edge['src'], 'affiliation'] != self.node_df.loc[edge['target'], 'affiliation']:
+                call_type = 'inter-gang'
+            # Check if the call is inter-gang (both nodes are kingpins from different gangs)
+            elif caller_type == 'kingpin' and callee_type == 'kingpin' and self.node_df.loc[edge['src'], 'affiliation'] != self.node_df.loc[edge['target'], 'affiliation']:
+                call_type = 'inter-gang'
+            # Check if the call is intra-gang (caller is a kingpin and callee is a dealer from the same gang)
+            elif caller_type == 'kingpin' and callee_type == 'dealer' and self.node_df.loc[edge['src'], 'affiliation'] == self.node_df.loc[edge['target'], 'affiliation']:
+                call_type = 'intra-gang'
+            #dealer to dealer intra-gang
+            elif caller_type == 'dealer' and callee_type == 'dealer' and self.node_df.loc[edge['src'], 'affiliation'] == self.node_df.loc[edge['target'], 'affiliation']:
+                call_type = 'intra-gang'
+            # All other calls are non-affiliated
+            else:
+                call_type = 'non-affiliated'
+
+            # Return a list of call logs for this edge
+            return [{
+                'src': edge['src'],
+                'target': edge['target'],
+                'caller': caller,
+                'callee': callee,
+                'call_time': self.random_datetime(
+                    year=start_date.year + np.random.randint(0, (end_date - start_date).days // 365),
+                    month=np.random.randint(1, 13),
+                    day=np.random.randint(1, 29),
+                    hour_start=0 if caller_type in ['user', 'normal'] else 8,
+                    hour_end=23 if caller_type in ['user', 'normal'] else 22
+                ).strftime('%Y-%m-%d %H:%M:%S'),
+                'duration_minutes': np.random.randint(5, 61) if caller_type in ['user', 'normal'] else np.random.randint(1, 16),
+                'call_type': call_type
+            } for _ in range(num_calls)]
+
+        # Generate call logs for each edge
+        call_logs = self.edge_df.apply(generate_call_logs, axis=1).tolist()
+
+        # Generate inter-gang calls between kingpins
+        kingpins = self.node_df[self.node_df['type'] == 'kingpin']
+        kingpin_calls = []
+
+        for i in range(len(kingpins)):
+            for j in range(i + 1, len(kingpins)):
+                if kingpins.iloc[i]['affiliation'] != kingpins.iloc[j]['affiliation'] and np.random.random() < self.leader_to_leader_call_chance:  # 5% chance of a call
+                    edge = {'src': kingpins.index[i], 'target': kingpins.index[j]}  # Use index here
+                    kingpin_calls.append(generate_call_logs(edge))
+
+        kg_calls = pd.DataFrame(kingpin_calls)
+        call_logs_df = pd.DataFrame(call_logs)
+        # Flatten the DataFrame
+        flattened_df = pd.json_normalize(
+            call_logs_df.apply(lambda x: x.tolist(), axis=1)
+            .explode()
+            .dropna()
+            .tolist()
+            )
+
+        flattened_king_df = pd.json_normalize(
+            kg_calls.apply(lambda x: x.tolist(), axis=1)
+            .explode()
+            .dropna()
+            .tolist()
+            )
+
+        # Drop rows and columns that are entirely NaN
+        flattened_df = flattened_df \
+            .dropna(axis=0, how='all') \
+            .dropna(axis=1, how='all')
+
+        flattened_king_df = flattened_king_df \
+            .dropna(axis=0, how='all') \
+            .dropna(axis=1, how='all')
+
+        # Assign the flattened DataFrame to self.edge_df
+        self.edge_df = pd.concat([flattened_king_df, flattened_df])
+
+    def to_graph(
+            self,
+            size_dict: dict = None,
+            edge_influence: int = 7,
+            icon_mapping: dict = None,
+            color_mapping: dict = None
+            ) -> graphistry.plotter.Plotter:
+
+        ndf = self.node_df.copy()
+        edf = self.edge_df.copy()
+
+        edge_counts = edf.groupby(['src', 'target', 'call_type']) \
+            .size() \
+            .reset_index(name='weight')
+
+        # Default size_dict if none is provided
+        if size_dict is None:
+            size_dict = {'kingpin': 200, 'dealer': 75, 'user': 50, 'normal': 25}
+
+        ndf['size'] = ndf['type'].map(size_dict)
+
+        # Default icon_mapping if none is provided
+        if icon_mapping is None:
+            icon_mapping = {
+                'kingpin': 'user-o',
+                'dealer': 'user-md',
+                'user': 'users',
+                'normal': 'universal-access',
+            }
+
+        # Default color_mapping if none is provided
+        if color_mapping is None:
+            color_mapping = {
+                'non-affiliated': 'blue',
+                'intra-gang': 'red',
+                'inter-gang': 'orange'
+                }
+
+        g = (
+            graphistry.nodes(ndf, 'node_id')
+            .edges(edge_counts, 'src', 'target')
+            .bind(point_title='type', point_size='size')
+            .bind(edge_weight="weight", edge_color="call_type")
+            .settings(url_params={'edgeInfluence': edge_influence})
+            .encode_point_icon('type', categorical_mapping=icon_mapping)
+            .encode_edge_color(
+                'call_type',
+                categorical_mapping=color_mapping,
+                default_mapping='#CCC'
+                )
+        )
+
+        return g
+
+    def get_dealer_to_user_edges_and_nodes(
+            self,
+            affiliated_nodes: pd.DataFrame
+            ) -> tuple:
+        # Filter the node DataFrame to only include dealers
+        affiliated_dealers = affiliated_nodes[affiliated_nodes['type'] == 'dealer']
+
+        # Join the edges and nodes dataframes on the 'target' column
+        edges_with_node_types = self.edge_df.merge(self.node_df[['node_id', 'type']], left_on='target', right_on='node_id', how='left')
+
+        # Filter the joined dataframe to only include edges from dealers to users
+        dealer_to_user_edges_df = edges_with_node_types[(edges_with_node_types['src'].isin(affiliated_dealers['node_id'])) & (edges_with_node_types['type'] == 'user')]
+
+        # Create the dealer to user edges
+        dealer_to_user_edges = dealer_to_user_edges_df[['src', 'target']].copy()
+        dealer_to_user_edges['role'] = 'user'
+        dealer_to_user_edges['affiliation'] = dealer_to_user_edges['src'].map(affiliated_dealers['affiliation'])
+
+        # Get the user nodes
+        user_nodes = self.node_df[self.node_df['node_id'].isin(dealer_to_user_edges['target'])]
+
+        return dealer_to_user_edges, user_nodes
+
+    def to_tree(self, affiliation: str) -> graphistry.plotter.Plotter:
+        # Filter the node DataFrame by the specified affiliation
+        affiliated_nodes = self.node_df[self.node_df['affiliation'] == affiliation].copy()
+        affiliated_nodes.loc[:, "node_label"] = affiliated_nodes["first_name"] + " " + affiliated_nodes["last_name"]
+
+        dealer_to_user_edges, user_nodes = self.get_dealer_to_user_edges_and_nodes(affiliated_nodes)
+
+        user_nodes = pd.DataFrame(user_nodes)
+        user_nodes.loc[:, "node_label"] = user_nodes["first_name"] + " " + user_nodes["last_name"]
+
+        # Get the kingpin node
+        kingpin_node = affiliated_nodes[affiliated_nodes['type'] == 'kingpin']['node_id'].values[0]
+
+        # Add dealer nodes and edges to the dataframes based on the affiliations
+        dealer_nodes = affiliated_nodes[affiliated_nodes['type'] == 'dealer']
+        dealer_edges = pd.DataFrame({
+            'src': kingpin_node,
+            'target': dealer_nodes['node_id'],
+            'role': dealer_nodes['type'],
+            'affiliation': dealer_nodes['affiliation']
+        })
+
+        # Add dealer to user edges to the new_edges DataFrame
+        new_edges = pd.concat([dealer_edges, dealer_to_user_edges])
+
+        # Add user nodes to the new_nodes DataFrame
+        new_nodes = pd.concat([affiliated_nodes, user_nodes])
+
+        g = graphistry.bind(
+            source='src',
+            destination='target',
+            node='node_id',
+            point_title='node_label'
+            ).edges(new_edges).nodes(new_nodes)
+        g = g.encode_point_color('type', categorical_mapping={'kingpin': 'red', 'dealer': 'blue', 'user': 'green'}, default_mapping='gray')
+        g = g.encode_point_icon('type', categorical_mapping={'kingpin': 'user-o', 'dealer': 'user-md', 'user': 'users'})
+        g = g.settings(url_params={'play': 0, "edgeCurvature": 0.0})
+        g = g.tree_layout(width=100, height=50)
+        return g
\ No newline at end of file
diff --git a/demos/data/scripts/generator/ProfileGenerator.py b/demos/data/scripts/generator/ProfileGenerator.py
new file mode 100644
index 0000000000..b866f9f58c
--- /dev/null
+++ b/demos/data/scripts/generator/ProfileGenerator.py
@@ -0,0 +1,89 @@
+import factory
+from datetime import datetime, timedelta
+import pandas as pd
+
+import numpy as np
+import random_address
+
+
+class Profile:
+    def __init__(
+            self,
+            firstname,
+            lastname,
+            phone_number,
+            username, email,
+            address,
+            dob,
+            whereabouts,
+            num_whereabouts=None,
+            postal_code=None,
+            state=None,
+            rand_num=None
+            ):
+
+        self.postal_code = postal_code
+        self.state = state
+        self.username = username
+        self.email = email
+        self.firstname = firstname
+        self.lastname = lastname
+        self.phone_number = phone_number
+        self.address = address
+        self.DOB = dob
+        self.whereabouts = whereabouts
+        self.rand_num = rand_num
+        self.num_whereabouts = num_whereabouts
+
+    def to_dict(self):
+        return {"first_name": self.firstname,
+                "last_name": self.lastname,
+                "user_name": self.username,
+                "DOB": self.DOB,
+                "email": self.email,
+                "phone": self.phone_number,
+                "address": self.address,
+                "whereabouts": self.whereabouts
+                }
+
+    def __str__(self):
+        return str(self.__dict__)
+
+
+#profile factory
+class ProfileFactory(factory.Factory):
+    class Meta:
+        model = Profile
+    # Optional parameters for address generation
+    state = None
+    postal_code = None
+    num_whereabouts = None
+    rand_num = factory.LazyFunction(lambda: str(np.random.randint(0, 999)))
+    username = factory.LazyAttribute(lambda obj: f"{obj.firstname}.{obj.lastname}{obj.rand_num}".lower())
+    email = factory.LazyAttribute(lambda obj: f"{obj.firstname}.{obj.lastname}@{str(obj.rand_num) + np.random.choice(pd.read_csv('domains.txt', header=None)[0].to_list())}".lower())
+    dob = factory.LazyFunction(lambda: (datetime.today() - timedelta(days=np.random.randint(15 * 365, 85 * 365))).strftime('%m-%d-%Y'))
+    firstname = factory.Faker('first_name')
+    lastname = factory.Faker('last_name')
+    phone_number = factory.Faker('basic_phone_number', locale="en_US")
+    address = factory.LazyAttribute(lambda obj: ProfileFactory.generate_address(state=obj.state, postal_code=obj.postal_code, from_date=(datetime.today() - timedelta(days=np.random.randint(0, 365))).strftime('%m-%d-%Y'), to_date=datetime.today().strftime('%m-%d-%Y')))
+    whereabouts = factory.LazyAttribute(lambda obj: [ProfileFactory.generate_address(state=obj.state, postal_code=obj.postal_code, from_date=(datetime.today() - timedelta(days=np.random.randint(365, 365 * 5))).strftime('%m-%d-%Y'), to_date=(datetime.today() - timedelta(days=np.random.randint(0, 365))).strftime('%m-%d-%Y')) for _ in range(obj.num_whereabouts)])
+    
+    @staticmethod
+    def generate_address(state=None, postal_code=None, from_date=None, to_date=None) -> dict:
+        """
+        Function to generate an address.
+        """
+        if state and postal_code:
+            raise ValueError("Cannot specify both state and postal code. Please choose one.")
+        elif state:
+            address = random_address.real_random_address_by_state(state)
+        elif postal_code:
+            address = random_address.real_random_address_by_postal_code(postal_code)
+        else:
+            address = random_address.real_random_address()
+
+        # Add dates to address
+        address['from_date'] = from_date
+        address['to_date'] = to_date
+
+        return address
\ No newline at end of file
diff --git a/demos/data/scripts/generator/domains.txt b/demos/data/scripts/generator/domains.txt
new file mode 100644
index 0000000000..b728c28628
--- /dev/null
+++ b/demos/data/scripts/generator/domains.txt
@@ -0,0 +1,98 @@
+@gmail.com
+@yahoo.com
+@hotmail.com
+@aol.com
+@hotmail.co.uk
+@hotmail.fr
+@msn.com
+@yahoo.fr
+@wanadoo.fr
+@orange.fr
+@comcast.net
+@yahoo.co.uk
+@yahoo.com.br
+@yahoo.co.i
+@live.com
+@rediffmail.com
+@free.fr
+@gmx.de
+@web.de
+@yandex.ru
+@ymail.com
+@libero.it
+@outlook.com
+@uol.com.br
+@bol.com.br
+@mail.ru
+@cox.net
+@hotmail.it
+@sbcglobal.net
+@sfr.fr
+@live.fr
+@verizon.net
+@live.co.uk
+@googlemail.co
+@yahoo.eu
+@ig.com.br
+@live.nl
+@bigpond.com
+@terra.com.br
+@yahoo.itdomains
+@alice.it
+@rocketmail.com
+@att.net
+@laposte.net
+@facebook.com
+@bellsouth.net
+@yahoo.in
+@hotmail.es
+@charter.net
+@yahoo.ca
+@yahoo.com.au
+@rambler.ru
+@hotmail.de
+@tiscali.i
+@shaw.co
+@yahoo.co.jp
+@sky.co
+@earthlink.net
+@optonline.net
+@freenet.de
+@t-online.de
+@aliceadsl.fr
+@virgilio.it
+@home.nl
+@qq.com
+@telenet.be
+@me.com
+@yahoo.com.ar
+@tiscali.co.uk
+@yahoo.com.mx
+@voila.fr
+@gmx.net
+@mail.com
+@planet.nl
+@tin.it
+@live.it
+@ntlworld.com
+@arcor.de
+@yahoo.co.id
+@frontiernet.net
+@hetnet.nl
+@live.com.au
+@yahoo.com.sg
+@zonnet.nl
+@club-internet.fr
+@juno.com
+@optusnet.com.au
+@blueyonder.co.uk
+@bluewin.ch
+@skynet.be
+@sympatico.ca
+@windstream.net
+@mac.com
+@centurytel.net
+@chello.nl
+@live.ca
+@aim.com
+@bigpond.net.au
diff --git a/setup.py b/setup.py
index c81db1b09c..7964a7dbd4 100755
--- a/setup.py
+++ b/setup.py
@@ -43,19 +43,26 @@ def unique_flatten_dict(d):
     'jupyter': ['ipython'],
 }
 
+base_extras_data = {
+    'data-gen': ['random-address', 'factory_boy']
+}
+
+
 base_extras_heavy = {
   'umap-learn': ['umap-learn', 'dirty-cat==0.2.0', 'scikit-learn>=1.0'],
 }
 # https://github.com/facebookresearch/faiss/issues/1589 for faiss-cpu 1.6.1, #'setuptools==67.4.0' removed
 base_extras_heavy['ai'] = base_extras_heavy['umap-learn'] + ['scipy', 'dgl', 'torch<2', 'sentence-transformers', 'faiss-cpu', 'joblib']
 
-base_extras = {**base_extras_light, **base_extras_heavy}
+
+base_extras = {**base_extras_light, **base_extras_heavy, **base_extras_data}
 
 extras_require = {
 
   **base_extras_light,
   **base_extras_heavy,
   **dev_extras,
+  **base_extras_data,
 
   #kitchen sink for users -- not recommended
   'all': unique_flatten_dict(base_extras),
@@ -63,6 +70,8 @@ def unique_flatten_dict(d):
   #kitchen sink for contributors, skips ai
   'dev': unique_flatten_dict(base_extras_light) + unique_flatten_dict(dev_extras),
 
+  #for people data synthesizer
+  'data': unique_flatten_dict(base_extras_data),
 }
 
 setup(