2323import  inspect 
2424from  pathlib  import  Path 
2525from  typing  import  Any , Dict , List , Optional 
26+ import  gzip 
2627
2728from  pydantic  import  BaseModel 
2829from  typing_extensions  import  Literal 
30+ import  msgpack 
31+ 
32+ 
33+ def  msgpk (cls ):
34+     """ 
35+     Decorator that adds MessagePack serialization methods to Pydantic models. 
36+ 
37+     Adds methods: 
38+         - to_msgpack_bytes() -> bytes: Serialize to compact binary format 
39+         - from_msgpack_bytes(data: bytes) -> cls: Deserialize from binary format 
40+         - to_msgpack_dict() -> dict: Convert to msgpack-compatible dict 
41+         - from_msgpack_dict(data: dict) -> cls: Create instance from msgpack dict 
42+     """ 
43+ 
44+     def  _prepare_for_serialization (obj : Any ) ->  Any :
45+         """Convert objects to serialization-friendly format.""" 
46+         if  isinstance (obj , Path ):
47+             return  str (obj )
48+         elif  isinstance (obj , dict ):
49+             return  {
50+                 _prepare_for_serialization (k ): _prepare_for_serialization (v )
51+                 for  k , v  in  obj .items ()
52+             }
53+         elif  isinstance (obj , list ):
54+             return  [_prepare_for_serialization (item ) for  item  in  obj ]
55+         elif  isinstance (obj , tuple ):
56+             return  tuple (_prepare_for_serialization (item ) for  item  in  obj )
57+         elif  isinstance (obj , set ):
58+             return  [_prepare_for_serialization (item ) for  item  in  obj ]
59+         elif  hasattr (obj , "model_dump" ):  # Pydantic model 
60+             return  _prepare_for_serialization (obj .model_dump ())
61+         else :
62+             return  obj 
63+ 
64+     def  to_msgpack_bytes (self ) ->  bytes :
65+         """Serialize the model to compact binary format using MessagePack + gzip.""" 
66+         data  =  _prepare_for_serialization (self .model_dump ())
67+         msgpack_data  =  msgpack .packb (data , use_bin_type = True )
68+         return  gzip .compress (msgpack_data )
69+ 
70+     @classmethod  
71+     def  from_msgpack_bytes (cls_obj , data : bytes ):
72+         """Deserialize from MessagePack + gzip binary format.""" 
73+         decompressed_data  =  gzip .decompress (data )
74+         obj_dict  =  msgpack .unpackb (decompressed_data , raw = False )
75+         return  cls_obj .model_validate (obj_dict )
76+ 
77+     def  to_msgpack_dict (self ) ->  dict :
78+         """Convert to msgpack-compatible dictionary format.""" 
79+         return  _prepare_for_serialization (self .model_dump ())
80+ 
81+     @classmethod  
82+     def  from_msgpack_dict (cls_obj , data : dict ):
83+         """Create instance from msgpack-compatible dictionary.""" 
84+         return  cls_obj .model_validate (data )
85+ 
86+     def  get_msgpack_size (self ) ->  int :
87+         """Get the size of the msgpack serialization in bytes.""" 
88+         return  len (self .to_msgpack_bytes ())
89+ 
90+     def  get_compression_ratio (self ) ->  float :
91+         """Get compression ratio compared to JSON.""" 
92+         json_size  =  len (self .model_dump_json ().encode ("utf-8" ))
93+         msgpack_gzip_size  =  self .get_msgpack_size ()
94+         return  msgpack_gzip_size  /  json_size  if  json_size  >  0  else  1.0 
95+ 
96+     # Add methods to the class 
97+     cls .to_msgpack_bytes  =  to_msgpack_bytes 
98+     cls .from_msgpack_bytes  =  from_msgpack_bytes 
99+     cls .to_msgpack_dict  =  to_msgpack_dict 
100+     cls .from_msgpack_dict  =  from_msgpack_dict 
101+     cls .get_msgpack_size  =  get_msgpack_size 
102+     cls .get_compression_ratio  =  get_compression_ratio 
103+ 
104+     return  cls 
29105
30106
31107def  builder (cls ):
@@ -92,26 +168,9 @@ def build(self):
92168
93169
94170@builder  
171+ @msgpk  
95172class  PyImport (BaseModel ):
96-     """Represents a Python import statement. 
97- 
98-     Attributes: 
99-         module (str): The name of the module being imported. 
100-         name (str): The name of the imported entity (e.g., function, class). 
101-         alias (Optional[str]): An optional alias for the imported entity. 
102-         start_line (int): The line number where the import statement starts. 
103-         end_line (int): The line number where the import statement ends. 
104-         start_column (int): The starting column of the import statement. 
105-         end_column (int): The ending column of the import statement. 
106- 
107-     Example: 
108-         - import numpy as np will be represented as: 
109-             PyImport(module="numpy", name="np", alias="np", start_line=1, end_line=1, start_column=0, end_column=16) 
110-         - from math import sqrt will be represented as: 
111-             PyImport(module="math", name="sqrt", alias=None, start_line=2, end_line=2, start_column=0, end_column=20 
112-         - from os.path import join as path_join will be represented as: 
113-             PyImport(module="os.path", name="path_join", alias="join", start_line=3, end_line=3, start_column=0, end_column=30) 
114-     """ 
173+     """Represents a Python import statement.""" 
115174
116175    module : str 
117176    name : str 
@@ -123,18 +182,9 @@ class PyImport(BaseModel):
123182
124183
125184@builder  
185+ @msgpk  
126186class  PyComment (BaseModel ):
127-     """ 
128-     Represents a Python comment. 
129- 
130-     Attributes: 
131-         content (str): The actual comment string (without the leading '#'). 
132-         start_line (int): The line number where the comment starts. 
133-         end_line (int): The line number where the comment ends (same as start_line for single-line comments). 
134-         start_column (int): The starting column of the comment. 
135-         end_column (int): The ending column of the comment. 
136-         is_docstring (bool): Whether this comment is actually a docstring (triple-quoted string). 
137-     """ 
187+     """Represents a Python comment.""" 
138188
139189    content : str 
140190    start_line : int  =  - 1 
@@ -145,20 +195,9 @@ class PyComment(BaseModel):
145195
146196
147197@builder  
198+ @msgpk  
148199class  PySymbol (BaseModel ):
149-     """ 
150-     Represents a symbol used or declared in Python code. 
151- 
152-     Attributes: 
153-         name (str): The name of the symbol (e.g., 'x', 'self.x', 'os.path'). 
154-         scope (Literal['local', 'nonlocal', 'global', 'class', 'module']): The scope where the symbol is accessed. 
155-         kind (Literal['variable', 'parameter', 'attribute', 'function', 'class', 'module']): The kind of symbol. 
156-         type (Optional[str]): Inferred or annotated type, if available. 
157-         qualified_name (Optional[str]): Fully qualified name (e.g., 'self.x', 'os.path.join'). 
158-         is_builtin (bool): Whether this is a Python builtin. 
159-         lineno (int): Line number where the symbol is accessed or declared. 
160-         col_offset (int): Column offset. 
161-     """ 
200+     """Represents a symbol used or declared in Python code.""" 
162201
163202    name : str 
164203    scope : Literal ["local" , "nonlocal" , "global" , "class" , "module" ]
@@ -171,11 +210,9 @@ class PySymbol(BaseModel):
171210
172211
173212@builder  
213+ @msgpk  
174214class  PyVariableDeclaration (BaseModel ):
175-     """Represents a Python variable declaration. 
176- 
177-     Attributes: 
178-     """ 
215+     """Represents a Python variable declaration.""" 
179216
180217    name : str 
181218    type : Optional [str ]
@@ -189,18 +226,9 @@ class PyVariableDeclaration(BaseModel):
189226
190227
191228@builder  
229+ @msgpk  
192230class  PyCallableParameter (BaseModel ):
193-     """Represents a parameter of a Python callable (function/method). 
194- 
195-     Attributes: 
196-         name (str): The name of the parameter. 
197-         type (str): The type of the parameter. 
198-         default_value (str): The default value of the parameter, if any. 
199-         start_line (int): The line number where the parameter is defined. 
200-         end_line (int): The line number where the parameter definition ends. 
201-         start_column (int): The column number where the parameter starts. 
202-         end_column (int): The column number where the parameter ends. 
203-     """ 
231+     """Represents a parameter of a Python callable (function/method).""" 
204232
205233    name : str 
206234    type : Optional [str ] =  None 
@@ -212,10 +240,9 @@ class PyCallableParameter(BaseModel):
212240
213241
214242@builder  
243+ @msgpk  
215244class  PyCallsite (BaseModel ):
216-     """ 
217-     Represents a Python call site (function or method invocation) with contextual metadata. 
218-     """ 
245+     """Represents a Python call site (function or method invocation) with contextual metadata.""" 
219246
220247    method_name : str 
221248    receiver_expr : Optional [str ] =  None 
@@ -231,26 +258,9 @@ class PyCallsite(BaseModel):
231258
232259
233260@builder  
261+ @msgpk  
234262class  PyCallable (BaseModel ):
235-     """Represents a Python callable (function/method). 
236- 
237-     Attributes: 
238-         name (str): The name of the callable. 
239-         signature (str): The fully qualified name of the callable (e.g., module.function_name). 
240-         docstring (PyComment): The docstring of the callable. 
241-         decorators (List[str]): List of decorators applied to the callable. 
242-         parameters (List[PyCallableParameter]): List of parameters for the callable. 
243-         return_type (Optional[str]): The type of the return value, if specified. 
244-         code (str): The actual code of the callable. 
245-         start_line (int): The line number where the callable is defined. 
246-         end_line (int): The line number where the callable definition ends. 
247-         code_start_line (int): The line number where the code block starts. 
248-         accessed_symbols (List[str]): Symbols accessed within the callable. 
249-         call_sites (List[str]): Call sites of this callable. 
250-         is_entrypoint (bool): Whether this callable is an entry point. 
251-         local_variables (List[PyVariableDeclaration]): Local variables within the callable. 
252-         cyclomatic_complexity (int): Cyclomatic complexity of the callable. 
253-     """ 
263+     """Represents a Python callable (function/method).""" 
254264
255265    name : str 
256266    path : str 
@@ -274,16 +284,9 @@ def __hash__(self) -> int:
274284
275285
276286@builder  
287+ @msgpk  
277288class  PyClassAttribute (BaseModel ):
278-     """Represents a Python class attribute. 
279- 
280-     Attributes: 
281-         name (str): The name of the attribute. 
282-         type (str): The type of the attribute. 
283-         docstring (PyComment): The docstring of the attribute. 
284-         start_line (int): The line number where the attribute is defined. 
285-         end_line (int): The line number where the attribute definition ends. 
286-     """ 
289+     """Represents a Python class attribute.""" 
287290
288291    name : str 
289292    type : Optional [str ] =  None 
@@ -293,20 +296,9 @@ class PyClassAttribute(BaseModel):
293296
294297
295298@builder  
299+ @msgpk  
296300class  PyClass (BaseModel ):
297-     """Represents a Python class. 
298- 
299-     Attributes: 
300-         name (str): The name of the class. 
301-         signature (str): The fully qualified name of the class (e.g., module.class_name). 
302-         docstring (PyComment): The docstring of the class. 
303-         base_classes (List[str]): List of base class names. 
304-         methods (Dict[str, PyCallable]): Mapping of method names to their callable representations. 
305-         attributes (Dict[str, PyClassAttribute]): Mapping of attribute names to their variable declarations. 
306-         inner_classes (Dict[str, "PyClass"]): Mapping of inner class names to their class representations. 
307-         start_line (int): The line number where the class definition starts. 
308-         end_line (int): The line number where the class definition ends. 
309-     """ 
301+     """Represents a Python class.""" 
310302
311303    name : str 
312304    signature : str   # e.g., module.class_name 
@@ -325,18 +317,9 @@ def __hash__(self):
325317
326318
327319@builder  
320+ @msgpk  
328321class  PyModule (BaseModel ):
329-     """Represents a Python module. 
330- 
331-     Attributes: 
332-         file_path (str): The file path of the module. 
333-         module_name (str): The name of the module (e.g., module.submodule). 
334-         imports (List[PyImport]): List of import statements in the module. 
335-         comments (List[PyComment]): List of comments in the module. 
336-         classes (Dict[str, PyClass]): Mapping of class names to their class representations. 
337-         functions (Dict[str, PyCallable]): Mapping of function names to their callable representations. 
338-         variables (List[PyVariableDeclaration]): List of variable declarations in the module. 
339-     """ 
322+     """Represents a Python module.""" 
340323
341324    file_path : str 
342325    module_name : str 
@@ -348,13 +331,8 @@ class PyModule(BaseModel):
348331
349332
350333@builder  
334+ @msgpk  
351335class  PyApplication (BaseModel ):
352-     """Represents a Python application. 
353- 
354-     Attributes: 
355-         name (str): The name of the application. 
356-         version (str): The version of the application. 
357-         description (str): A brief description of the application. 
358-     """ 
336+     """Represents a Python application.""" 
359337
360338    symbol_table : dict [Path , PyModule ]
0 commit comments