diff --git a/newsfragments/5288.added.md b/newsfragments/5288.added.md new file mode 100644 index 00000000000..2618fb10566 --- /dev/null +++ b/newsfragments/5288.added.md @@ -0,0 +1 @@ +Embedded doc mode using HTML comments to fix attribute order issues. \ No newline at end of file diff --git a/pyo3-macros-backend/src/attributes.rs b/pyo3-macros-backend/src/attributes.rs index af81e7aea69..238b2f7cf77 100644 --- a/pyo3-macros-backend/src/attributes.rs +++ b/pyo3-macros-backend/src/attributes.rs @@ -18,8 +18,10 @@ pub mod kw { syn::custom_keyword!(cancel_handle); syn::custom_keyword!(constructor); syn::custom_keyword!(dict); + syn::custom_keyword!(doc_mode); syn::custom_keyword!(eq); syn::custom_keyword!(eq_int); + syn::custom_keyword!(end_doc_mode); syn::custom_keyword!(extends); syn::custom_keyword!(freelist); syn::custom_keyword!(from_py_with); diff --git a/pyo3-macros-backend/src/method.rs b/pyo3-macros-backend/src/method.rs index 0e8977db101..448ffa53c7d 100644 --- a/pyo3-macros-backend/src/method.rs +++ b/pyo3-macros-backend/src/method.rs @@ -976,7 +976,7 @@ impl<'a> FnSpec<'a> { } /// Forwards to [utils::get_doc] with the text signature of this spec. - pub fn get_doc(&self, attrs: &[syn::Attribute], ctx: &Ctx) -> syn::Result { + pub fn get_doc(&self, attrs: &mut Vec, ctx: &Ctx) -> syn::Result { let text_signature = self .text_signature_call_signature() .map(|sig| format!("{}{}", self.python_name, sig)); diff --git a/pyo3-macros-backend/src/module.rs b/pyo3-macros-backend/src/module.rs index 1888f564df5..5b3d9a8dd7b 100644 --- a/pyo3-macros-backend/src/module.rs +++ b/pyo3-macros-backend/src/module.rs @@ -453,7 +453,7 @@ pub fn pymodule_function_impl( .name .map_or_else(|| ident.unraw(), |name| name.value.0); let vis = &function.vis; - let doc = get_doc(&function.attrs, None, ctx)?; + let doc = get_doc(&mut function.attrs, None, ctx)?; let initialization = module_initialization( &name, diff --git a/pyo3-macros-backend/src/pyclass.rs b/pyo3-macros-backend/src/pyclass.rs index 714d56ed247..ef2e46a5807 100644 --- a/pyo3-macros-backend/src/pyclass.rs +++ b/pyo3-macros-backend/src/pyclass.rs @@ -250,7 +250,7 @@ pub fn build_py_class( args.options.take_pyo3_options(&mut class.attrs)?; let ctx = &Ctx::new(&args.options.krate, None); - let doc = utils::get_doc(&class.attrs, None, ctx)?; + let doc = utils::get_doc(&mut class.attrs, None, ctx)?; if let Some(lt) = class.generics.lifetimes().next() { bail_spanned!( @@ -446,11 +446,11 @@ fn impl_class( ctx, )?; - let (default_class_geitem, default_class_geitem_method) = - pyclass_class_geitem(&args.options, &syn::parse_quote!(#cls), ctx)?; + let (default_class_getitem, default_class_getitem_method) = + pyclass_class_getitem(&args.options, &syn::parse_quote!(#cls), ctx)?; - if let Some(default_class_geitem_method) = default_class_geitem_method { - default_methods.push(default_class_geitem_method); + if let Some(default_class_getitem_method) = default_class_getitem_method { + default_methods.push(default_class_getitem_method); } let (default_str, default_str_slot) = @@ -484,7 +484,7 @@ fn impl_class( #default_richcmp #default_hash #default_str - #default_class_geitem + #default_class_getitem } }) } @@ -531,7 +531,7 @@ pub fn build_py_enum( bail_spanned!(generic.span() => "enums do not support #[pyclass(generic)]"); } - let doc = utils::get_doc(&enum_.attrs, None, ctx)?; + let doc = utils::get_doc(&mut enum_.attrs, None, ctx)?; let enum_ = PyClassEnum::new(enum_)?; impl_enum(enum_, &args, doc, method_type, ctx) } @@ -1768,7 +1768,7 @@ fn complex_enum_variant_field_getter<'a>( let property_type = crate::pymethod::PropertyType::Function { self_type: &self_type, spec: &spec, - doc: crate::get_doc(&[], None, ctx)?, + doc: PythonDoc::empty(ctx), }; let getter = crate::pymethod::impl_py_getter_def(variant_cls_type, property_type, ctx)?; @@ -2036,7 +2036,7 @@ fn pyclass_hash( } } -fn pyclass_class_geitem( +fn pyclass_class_getitem( options: &PyClassPyO3Options, cls: &syn::Type, ctx: &Ctx, @@ -2045,7 +2045,7 @@ fn pyclass_class_geitem( match options.generic { Some(_) => { let ident = format_ident!("__class_getitem__"); - let mut class_geitem_impl: syn::ImplItemFn = { + let mut class_getitem_impl: syn::ImplItemFn = { parse_quote! { #[classmethod] fn #ident<'py>( @@ -2058,19 +2058,19 @@ fn pyclass_class_geitem( }; let spec = FnSpec::parse( - &mut class_geitem_impl.sig, - &mut class_geitem_impl.attrs, + &mut class_getitem_impl.sig, + &mut class_getitem_impl.attrs, Default::default(), )?; - let class_geitem_method = crate::pymethod::impl_py_method_def( + let class_getitem_method = crate::pymethod::impl_py_method_def( cls, &spec, - &spec.get_doc(&class_geitem_impl.attrs, ctx)?, + &spec.get_doc(&mut class_getitem_impl.attrs, ctx)?, Some(quote!(#pyo3_path::ffi::METH_CLASS)), ctx, )?; - Ok((Some(class_geitem_impl), Some(class_geitem_method))) + Ok((Some(class_getitem_impl), Some(class_getitem_method))) } None => Ok((None, None)), } diff --git a/pyo3-macros-backend/src/pyfunction.rs b/pyo3-macros-backend/src/pyfunction.rs index 303af155acf..96530e14f93 100644 --- a/pyo3-macros-backend/src/pyfunction.rs +++ b/pyo3-macros-backend/src/pyfunction.rs @@ -425,7 +425,7 @@ pub fn impl_wrap_pyfunction( ); } let wrapper = spec.get_wrapper_function(&wrapper_ident, None, ctx)?; - let methoddef = spec.get_methoddef(wrapper_ident, &spec.get_doc(&func.attrs, ctx)?, ctx); + let methoddef = spec.get_methoddef(wrapper_ident, &spec.get_doc(&mut func.attrs, ctx)?, ctx); let wrapped_pyfunction = quote! { // Create a module with the same name as the `#[pyfunction]` - this way `use ` diff --git a/pyo3-macros-backend/src/pyimpl.rs b/pyo3-macros-backend/src/pyimpl.rs index 282a6f35d7b..ae13ce8f910 100644 --- a/pyo3-macros-backend/src/pyimpl.rs +++ b/pyo3-macros-backend/src/pyimpl.rs @@ -139,7 +139,7 @@ pub fn impl_methods( let method = PyMethod::parse(&mut meth.sig, &mut meth.attrs, fun_options)?; #[cfg(feature = "experimental-inspect")] extra_fragments.push(method_introspection_code(&method.spec, ty, ctx)); - match pymethod::gen_py_method(ty, method, &meth.attrs, ctx)? { + match pymethod::gen_py_method(ty, method, &mut meth.attrs, ctx)? { GeneratedPyMethod::Method(MethodAndMethodDef { associated_method, method_def, diff --git a/pyo3-macros-backend/src/pymethod.rs b/pyo3-macros-backend/src/pymethod.rs index eede90f882f..9ab96de7d52 100644 --- a/pyo3-macros-backend/src/pymethod.rs +++ b/pyo3-macros-backend/src/pymethod.rs @@ -191,7 +191,7 @@ pub fn is_proto_method(name: &str) -> bool { pub fn gen_py_method( cls: &syn::Type, method: PyMethod<'_>, - meth_attrs: &[syn::Attribute], + meth_attrs: &mut Vec, ctx: &Ctx, ) -> Result { let spec = &method.spec; @@ -233,50 +233,59 @@ pub fn gen_py_method( } } // ordinary functions (with some specialties) - (_, FnType::Fn(_)) => GeneratedPyMethod::Method(impl_py_method_def( - cls, - spec, - &spec.get_doc(meth_attrs, ctx)?, - None, - ctx, - )?), - (_, FnType::FnClass(_)) => GeneratedPyMethod::Method(impl_py_method_def( - cls, - spec, - &spec.get_doc(meth_attrs, ctx)?, - Some(quote!(#pyo3_path::ffi::METH_CLASS)), - ctx, - )?), - (_, FnType::FnStatic) => GeneratedPyMethod::Method(impl_py_method_def( - cls, - spec, - &spec.get_doc(meth_attrs, ctx)?, - Some(quote!(#pyo3_path::ffi::METH_STATIC)), - ctx, - )?), + (_, FnType::Fn(_)) => { + let doc = spec.get_doc(meth_attrs, ctx)?; + GeneratedPyMethod::Method(impl_py_method_def(cls, spec, &doc, None, ctx)?) + } + (_, FnType::FnClass(_)) => { + let doc = spec.get_doc(meth_attrs, ctx)?; + GeneratedPyMethod::Method(impl_py_method_def( + cls, + spec, + &doc, + Some(quote!(#pyo3_path::ffi::METH_CLASS)), + ctx, + )?) + } + (_, FnType::FnStatic) => { + let doc = spec.get_doc(meth_attrs, ctx)?; + GeneratedPyMethod::Method(impl_py_method_def( + cls, + spec, + &doc, + Some(quote!(#pyo3_path::ffi::METH_STATIC)), + ctx, + )?) + } // special prototypes (_, FnType::FnNew) | (_, FnType::FnNewClass(_)) => { GeneratedPyMethod::Proto(impl_py_method_def_new(cls, spec, ctx)?) } - (_, FnType::Getter(self_type)) => GeneratedPyMethod::Method(impl_py_getter_def( - cls, - PropertyType::Function { - self_type, - spec, - doc: spec.get_doc(meth_attrs, ctx)?, - }, - ctx, - )?), - (_, FnType::Setter(self_type)) => GeneratedPyMethod::Method(impl_py_setter_def( - cls, - PropertyType::Function { - self_type, - spec, - doc: spec.get_doc(meth_attrs, ctx)?, - }, - ctx, - )?), + (_, FnType::Getter(self_type)) => { + let doc = spec.get_doc(meth_attrs, ctx)?; + GeneratedPyMethod::Method(impl_py_getter_def( + cls, + PropertyType::Function { + self_type, + spec, + doc, + }, + ctx, + )?) + } + (_, FnType::Setter(self_type)) => { + let doc = spec.get_doc(meth_attrs, ctx)?; + GeneratedPyMethod::Method(impl_py_setter_def( + cls, + PropertyType::Function { + self_type, + spec, + doc, + }, + ctx, + )?) + } (_, FnType::FnModule(_)) => { unreachable!("methods cannot be FnModule") } @@ -978,12 +987,16 @@ impl PropertyType<'_> { } fn doc(&self, ctx: &Ctx) -> Result> { - match self { + let doc = match self { PropertyType::Descriptor { field, .. } => { - utils::get_doc(&field.attrs, None, ctx).map(Cow::Owned) + // FIXME: due to the clone this will not properly strip Rust documentation, maybe + // need to parse the field and doc earlier in the process? + let mut attrs = field.attrs.clone(); + Cow::Owned(utils::get_doc(&mut attrs, None, ctx)?) } - PropertyType::Function { doc, .. } => Ok(Cow::Borrowed(doc)), - } + PropertyType::Function { doc, .. } => Cow::Borrowed(doc), + }; + Ok(doc) } } diff --git a/pyo3-macros-backend/src/utils.rs b/pyo3-macros-backend/src/utils.rs index 0df0b75e431..7518a200b29 100644 --- a/pyo3-macros-backend/src/utils.rs +++ b/pyo3-macros-backend/src/utils.rs @@ -115,6 +115,13 @@ impl quote::ToTokens for LitCStr { #[derive(Clone)] pub struct PythonDoc(PythonDocKind); +impl PythonDoc { + /// Returns an empty docstring. + pub fn empty(ctx: &Ctx) -> Self { + PythonDoc(PythonDocKind::LitCStr(LitCStr::empty(ctx))) + } +} + #[derive(Clone)] enum PythonDocKind { LitCStr(LitCStr), @@ -123,13 +130,22 @@ enum PythonDocKind { Tokens(TokenStream), } +enum DocParseMode { + /// Currently generating docs for both Python and Rust. + Both, + /// Currently generating docs for Python only. + PythonOnly, + /// Currently generating docs for Rust only. + RustOnly, +} + /// Collects all #[doc = "..."] attributes into a TokenStream evaluating to a null-terminated string. /// /// If this doc is for a callable, the provided `text_signature` can be passed to prepend /// this to the documentation suitable for Python to extract this into the `__text_signature__` /// attribute. pub fn get_doc( - attrs: &[syn::Attribute], + attrs: &mut Vec, mut text_signature: Option, ctx: &Ctx, ) -> syn::Result { @@ -143,40 +159,123 @@ pub fn get_doc( let mut parts = Punctuated::::new(); let mut first = true; let mut current_part = text_signature.unwrap_or_default(); - let mut current_part_span = None; + let mut current_part_span = None; // Track span for error reporting + + let mut mode = DocParseMode::Both; + + let mut to_retain = vec![]; // Collect indices of attributes to retain - for attr in attrs { + for (i, attr) in attrs.iter().enumerate() { if attr.path().is_ident("doc") { if let Ok(nv) = attr.meta.require_name_value() { - current_part_span = match current_part_span { - None => Some(nv.value.span()), - Some(span) => span.join(nv.value.span()), - }; - if !first { - current_part.push('\n'); - } else { - first = false; - } + let include_in_python; + let retain_in_rust; + if let syn::Expr::Lit(syn::ExprLit { lit: syn::Lit::Str(lit_str), .. }) = &nv.value { + // Update span for error reporting + if current_part_span.is_none() { + current_part_span = Some(lit_str.span()); + } + // Strip single left space from literal strings, if needed. - // e.g. `/// Hello world` expands to #[doc = " Hello world"] let doc_line = lit_str.value(); - current_part.push_str(doc_line.strip_prefix(' ').unwrap_or(&doc_line)); + let stripped_line = doc_line.strip_prefix(' ').unwrap_or(&doc_line); + let trimmed = stripped_line.trim(); + + // Check if this is a mode switch instruction + if let Some(content) = trimmed + .strip_prefix("")) + { + let content_trimmed = content.trim(); + if content_trimmed.starts_with("pyo3_doc_mode:") { + let value = content_trimmed + .strip_prefix("pyo3_doc_mode:") + .unwrap_or("") + .trim(); + mode = match value { + "python" => DocParseMode::PythonOnly, + "rust" => DocParseMode::RustOnly, + "both" => DocParseMode::Both, + _ => return Err(syn::Error::new( + lit_str.span(), + format!("Invalid doc_mode: '{}'. Expected 'python', 'rust', or 'both'.", value) + )), + }; + // Do not retain mode switch lines in Rust, and skip in Python + continue; + } else if is_likely_pyo3_doc_mode_typo(content_trimmed) { + // Handle potential typos in pyo3_doc_mode prefix + return Err(syn::Error::new( + lit_str.span(), + format!( + "Suspicious comment '{}' - did you mean 'pyo3_doc_mode'? Valid format: ", + content_trimmed + ) + )); + } + // If it's an HTML comment but not pyo3_doc_mode related, + // it will be included based on current mode (no special handling) + } + + // Not a mode switch, decide based on current mode + include_in_python = + matches!(mode, DocParseMode::Both | DocParseMode::PythonOnly); + retain_in_rust = matches!(mode, DocParseMode::Both | DocParseMode::RustOnly); + + // Include in Python doc if needed + if include_in_python { + if !first { + current_part.push('\n'); + } else { + first = false; + } + current_part.push_str(stripped_line); + } } else { - // This is probably a macro doc from Rust 1.54, e.g. #[doc = include_str!(...)] - // Reset the string buffer, write that part, and then push this macro part too. - parts.push(quote_spanned!(current_part_span.unwrap_or(Span::call_site()) => #current_part)); - current_part.clear(); - parts.push(nv.value.to_token_stream()); + // This is probably a macro doc, e.g. #[doc = include_str!(...)] + // Decide based on current mode + include_in_python = + matches!(mode, DocParseMode::Both | DocParseMode::PythonOnly); + retain_in_rust = matches!(mode, DocParseMode::Both | DocParseMode::RustOnly); + + // Include in Python doc if needed + if include_in_python { + // Reset the string buffer, write that part, and then push this macro part too. + if !current_part.is_empty() { + parts.push(current_part.to_token_stream()); + current_part.clear(); + } + parts.push(nv.value.to_token_stream()); + } + } + + // Collect to retain if needed + if retain_in_rust { + to_retain.push(i); } } + } else { + // Non-doc attributes are always retained + to_retain.push(i); } } + // Retain only the selected attributes + *attrs = to_retain.into_iter().map(|i| attrs[i].clone()).collect(); + + // Check if mode ended in Both; if not, error to enforce "pairing" + if !matches!(mode, DocParseMode::Both) { + return Err(syn::Error::new( + Span::call_site(), + "doc_mode did not end in 'both' mode; consider adding at the end" + )); + } + if !parts.is_empty() { // Doc contained macro pieces - return as `concat!` expression if !current_part.is_empty() { @@ -189,9 +288,8 @@ pub fn get_doc( syn::Ident::new("concat", Span::call_site()).to_tokens(&mut tokens); syn::token::Not(Span::call_site()).to_tokens(&mut tokens); - syn::token::Bracket(Span::call_site()).surround(&mut tokens, |tokens| { + syn::token::Paren(Span::call_site()).surround(&mut tokens, |tokens| { parts.to_tokens(tokens); - syn::token::Comma(Span::call_site()).to_tokens(tokens); }); Ok(PythonDoc(PythonDocKind::Tokens( @@ -199,15 +297,7 @@ pub fn get_doc( ))) } else { // Just a string doc - return directly with nul terminator - let docs = CString::new(current_part).map_err(|e| { - syn::Error::new( - current_part_span.unwrap_or(Span::call_site()), - format!( - "Python doc may not contain nul byte, found nul at position {}", - e.nul_position() - ), - ) - })?; + let docs = CString::new(current_part).unwrap(); Ok(PythonDoc(PythonDocKind::LitCStr(LitCStr::new( docs, current_part_span.unwrap_or(Span::call_site()), @@ -216,6 +306,68 @@ pub fn get_doc( } } +/// Helper function to detect likely typos in pyo3_doc_mode prefix +fn is_likely_pyo3_doc_mode_typo(content: &str) -> bool { + // Simple fuzzy matching for common typos + let potential_typos = [ + "pyo3_doc_mde", + "pyo3_docc_mode", + "pyo3_doc_mod", + "py03_doc_mode", + "pyo3doc_mode", + "pyo3_docmode", + "pyo_doc_mode", + "pyo3_doc_node", + ]; + + potential_typos.iter().any(|&typo| { + content.starts_with(typo) + || (content.len() >= typo.len() - 2 + && simple_edit_distance(content.split(':').next().unwrap_or(""), typo) <= 2) + }) +} + +/// Simple edit distance calculation for typo detection +fn simple_edit_distance(a: &str, b: &str) -> usize { + let a_chars: Vec = a.chars().collect(); + let b_chars: Vec = b.chars().collect(); + let a_len = a_chars.len(); + let b_len = b_chars.len(); + + if a_len == 0 { + return b_len; + } + if b_len == 0 { + return a_len; + } + + let mut matrix = vec![vec![0; b_len + 1]; a_len + 1]; + + // Initialize first row and column + for i in 0..=a_len { + matrix[i][0] = i; + } + for j in 0..=b_len { + matrix[0][j] = j; + } + + // Fill the matrix + for i in 1..=a_len { + for j in 1..=b_len { + let cost = if a_chars[i - 1] == b_chars[j - 1] { + 0 + } else { + 1 + }; + matrix[i][j] = (matrix[i - 1][j] + 1) // deletion + .min(matrix[i][j - 1] + 1) // insertion + .min(matrix[i - 1][j - 1] + cost); // substitution + } + } + + matrix[a_len][b_len] +} + impl quote::ToTokens for PythonDoc { fn to_tokens(&self, tokens: &mut TokenStream) { match &self.0 {