55
66XS = "http://www.w3.org/2001/XMLSchema"
77NS = {"xs" : XS }
8+ _TYPES_REGISTRY : Dict [str , Dict [str , Any ]] = {}
9+ _ROOT_NODE : Optional [etree ._Element ] = None # to resolve global xs:group by @ref
810
911# ---- Internal model data-classes (serialized to JSON) -----------------------
1012
@@ -29,21 +31,29 @@ class FieldDoc:
2931@dataclass
3032class FieldModel :
3133 # common
32- kind : str # "element" | "attribute"
34+ kind : str # "element" | "attribute" | "choice"
3335 name : str
3436 dtype : str # xs:<builtin> | QName of named type | "object" for complex
37+ refType : Optional [str ] = None # when element points to a named type (simple/complex)
3538 minOccurs : int = 1
3639 maxOccurs : Optional [int ] = 1 # None => unbounded
3740 required : Optional [bool ] = None # for attributes (use="required")
3841 documentation : Optional [FieldDoc ] = None
3942 facets : Optional [Facets ] = None
4043 children : Optional [List ["FieldModel" ]] = None # for complex/object
4144 attributes : Optional [List ["FieldModel" ]] = None
45+ # для choice: варианты лежат в children, каждый вариант — это полноценный FieldModel (обычно element)
4246
4347 def to_dict (self ) -> Dict [str , Any ]:
4448 d = asdict (self )
45- # compact None values
46- return {k : v for k , v in d .items () if v is not None }
49+ # Keep maxOccurs even if None (unbounded); prune other Nones
50+ out : Dict [str , Any ] = {}
51+ for k , v in d .items ():
52+ if k == "maxOccurs" :
53+ out [k ] = v # may be None → JSON null (means unbounded)
54+ elif v is not None :
55+ out [k ] = v
56+ return out
4757
4858@dataclass
4959class SchemaModel :
@@ -142,13 +152,25 @@ def _parse_complex_type(ct: etree._Element) -> Dict[str, Any]:
142152 # Handle sequence/choice/all and attributes
143153 documentation = _first_doc (ct )
144154 children : List [FieldModel ] = []
145- # Content model (sequence | choice | all)
146- for tag in ("sequence" , "choice" , "all" ):
147- group = ct .find (f"./xs:{ tag } " , namespaces = NS )
148- if group is not None :
149- for el in group .findall ("./xs:element" , namespaces = NS ):
150- children .append (_parse_element (el ))
151- break
155+ # Content model (sequence | choice | all | group), supports complexContent/extension
156+ group = None
157+ # unwrap complexContent/extension if present
158+ cc = ct .find ("./xs:complexContent" , namespaces = NS )
159+ if cc is not None :
160+ ext = cc .find ("./xs:extension" , namespaces = NS )
161+ if ext is not None :
162+ # descend into its model group
163+ for tag in ("sequence" , "choice" , "all" , "group" ):
164+ group = ext .find (f"./xs:{ tag } " , namespaces = NS )
165+ if group is not None :
166+ break
167+ if group is None :
168+ for tag in ("sequence" , "choice" , "all" , "group" ):
169+ group = ct .find (f"./xs:{ tag } " , namespaces = NS )
170+ if group is not None :
171+ break
172+ if group is not None :
173+ children .extend (_parse_model_group (group ))
152174 # attributes (xs:attribute)
153175 attrs : List [FieldModel ] = []
154176 for a in ct .findall ("./xs:attribute" , namespaces = NS ):
@@ -162,6 +184,67 @@ def _parse_complex_type(ct: etree._Element) -> Dict[str, Any]:
162184 result ["documentation" ] = asdict (documentation )
163185 return result
164186
187+ def _parse_model_group (group : etree ._Element ) -> List [FieldModel ]:
188+ """
189+ Recursively parse xs:sequence|xs:choice|xs:all|xs:group, preserving nested choices.
190+ For xs:group with @ref, resolve the global group and parse its content.
191+ """
192+ out : List [FieldModel ] = []
193+ if not isinstance (group .tag , str ):
194+ return []
195+ tag_local = group .tag .split ("}" )[- 1 ]
196+
197+ # Resolve xs:group @ref → inline referenced global group
198+ if tag_local == "group" :
199+ ref = group .get ("ref" )
200+ if ref and _ROOT_NODE is not None :
201+ gmin , gmax = _occurs (group ) # multiplicity on the referencing <xs:group>
202+ # strip prefix if any (ns:name -> name)
203+ ref_name = ref .split (":" )[- 1 ]
204+ gdef = _ROOT_NODE .find (f".//xs:group[@name='{ ref_name } ']" , namespaces = NS )
205+ if gdef is not None :
206+ # group definition contains its own sequence/choice/all
207+ for t in ("sequence" ,"choice" ,"all" ):
208+ inner = gdef .find (f"./xs:{ t } " , namespaces = NS )
209+ if inner is not None :
210+ inner_fms = _parse_model_group (inner )
211+ # If the referenced group is a single CHOICE — apply occurrences from the referencing node
212+ if len (inner_fms ) == 1 and inner_fms [0 ].kind == "choice" :
213+ inner_fms [0 ].minOccurs = gmin
214+ inner_fms [0 ].maxOccurs = gmax # None → unbounded; preserved by to_dict()
215+ out .extend (inner_fms )
216+ return out
217+ # fallthrough: unknown group → ignore silently (safe default)
218+ return out
219+
220+ if tag_local == "choice" :
221+ # preserve multiplicity of the CHOICE group itself
222+ mi , ma = _occurs (group ) # ma == None for unbounded
223+ alts = [ _parse_element (el ) for el in group .findall ("./xs:element" , namespaces = NS ) ]
224+ out .append (FieldModel (
225+ kind = "choice" ,
226+ name = "__choice__" ,
227+ dtype = "object" ,
228+ minOccurs = mi ,
229+ maxOccurs = ma , # <-- IMPORTANT: don't normalize 1 → keep None for unbounded
230+ documentation = _first_doc (group ),
231+ children = alts ,
232+ ))
233+ return out
234+
235+ # sequence / all: iterate *children nodes*, may include comments/PIs -> guard
236+ for node in list (group ):
237+ # lxml: for comments/PI node.tag is a cython function (etree.Comment / etree.PI)
238+ if not isinstance (node .tag , str ):
239+ continue
240+ ntag = node .tag .split ("}" )[- 1 ]
241+ if ntag == "element" :
242+ out .append (_parse_element (node ))
243+ elif ntag in ("sequence" ,"choice" ,"all" ,"group" ):
244+ out .extend (_parse_model_group (node ))
245+ # other particles (any) are ignored for now
246+ return out
247+
165248def _occurs (node : etree ._Element ) -> Tuple [int , Optional [int ]]:
166249 mi = node .get ("minOccurs" )
167250 ma = node .get ("maxOccurs" )
@@ -235,6 +318,32 @@ def _parse_element(el: etree._Element) -> FieldModel:
235318 return fm
236319
237320 # plain reference to named type (simple or complex)
321+ if dtype and dtype in _TYPES_REGISTRY :
322+ t = _TYPES_REGISTRY [dtype ]
323+ if t .get ("kind" ) == "simpleType" :
324+ base = t .get ("base" ) or "xs:string"
325+ return FieldModel (
326+ kind = "element" ,
327+ name = name ,
328+ dtype = base ,
329+ refType = dtype ,
330+ minOccurs = minOccurs ,
331+ maxOccurs = maxOccurs if maxOccurs != 1 else 1 ,
332+ documentation = documentation ,
333+ facets = (Facets (** t ["facets" ]) if t .get ("facets" ) else None ),
334+ )
335+ else :
336+ # complexType: keep a shallow node; renderer will resolve from types[dtype]
337+ return FieldModel (
338+ kind = "element" ,
339+ name = name ,
340+ dtype = "object" ,
341+ refType = dtype ,
342+ minOccurs = minOccurs ,
343+ maxOccurs = maxOccurs if maxOccurs != 1 else 1 ,
344+ documentation = documentation ,
345+ )
346+ # fallback — как было
238347 return FieldModel (
239348 kind = "element" ,
240349 name = name ,
@@ -251,6 +360,10 @@ def build_internal_model(content: bytes) -> Dict[str, Any]:
251360
252361 # registry of named types
253362 types = _resolve_named_types (root )
363+ global _TYPES_REGISTRY
364+ _TYPES_REGISTRY = types
365+ global _ROOT_NODE
366+ _ROOT_NODE = root
254367
255368 # root elements (top-level xs:element)
256369 roots : List [FieldModel ] = []
0 commit comments