|
| 1 | +<!DOCTYPE html> |
| 2 | +<html lang="en"> |
| 3 | +<head> |
| 4 | + <meta charset="UTF-8" /> |
| 5 | + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
| 6 | + <title>tsb — api.extensions: Custom Extension Types</title> |
| 7 | + <style> |
| 8 | + :root { |
| 9 | + --bg: #0d1117; |
| 10 | + --surface: #161b22; |
| 11 | + --border: #30363d; |
| 12 | + --text: #e6edf3; |
| 13 | + --accent: #58a6ff; |
| 14 | + --green: #3fb950; |
| 15 | + --orange: #d29922; |
| 16 | + --red: #f85149; |
| 17 | + --font-mono: "Cascadia Code", "Fira Code", "JetBrains Mono", monospace; |
| 18 | + } |
| 19 | + * { box-sizing: border-box; margin: 0; padding: 0; } |
| 20 | + body { |
| 21 | + background: var(--bg); |
| 22 | + color: var(--text); |
| 23 | + font-family: system-ui, -apple-system, sans-serif; |
| 24 | + line-height: 1.6; |
| 25 | + padding: 2rem; |
| 26 | + max-width: 900px; |
| 27 | + margin: 0 auto; |
| 28 | + } |
| 29 | + a { color: var(--accent); } |
| 30 | + h1 { color: var(--accent); margin-bottom: 0.5rem; } |
| 31 | + h2 { color: var(--text); margin: 1.5rem 0 0.5rem; font-size: 1.1rem; } |
| 32 | + p { margin-bottom: 0.75rem; color: #8b949e; } |
| 33 | + pre { |
| 34 | + background: var(--surface); |
| 35 | + border: 1px solid var(--border); |
| 36 | + border-radius: 6px; |
| 37 | + padding: 1rem; |
| 38 | + font-family: var(--font-mono); |
| 39 | + font-size: 0.85rem; |
| 40 | + overflow-x: auto; |
| 41 | + margin-bottom: 1rem; |
| 42 | + white-space: pre; |
| 43 | + } |
| 44 | + .kw { color: #ff7b72; } |
| 45 | + .str { color: #a5d6ff; } |
| 46 | + .num { color: #79c0ff; } |
| 47 | + .cmt { color: #8b949e; font-style: italic; } |
| 48 | + .type { color: #ffa657; } |
| 49 | + .fn { color: #d2a8ff; } |
| 50 | + .result { |
| 51 | + background: #0d2318; |
| 52 | + border: 1px solid var(--green); |
| 53 | + border-radius: 4px; |
| 54 | + padding: 0.75rem 1rem; |
| 55 | + font-family: var(--font-mono); |
| 56 | + font-size: 0.85rem; |
| 57 | + margin-bottom: 1rem; |
| 58 | + color: var(--green); |
| 59 | + } |
| 60 | + .badge { |
| 61 | + display: inline-block; |
| 62 | + padding: 0.15rem 0.5rem; |
| 63 | + border-radius: 4px; |
| 64 | + font-size: 0.75rem; |
| 65 | + font-weight: 600; |
| 66 | + background: #1f3a1f; |
| 67 | + color: var(--green); |
| 68 | + margin-left: 0.5rem; |
| 69 | + vertical-align: middle; |
| 70 | + } |
| 71 | + table { |
| 72 | + border-collapse: collapse; |
| 73 | + width: 100%; |
| 74 | + margin-bottom: 1.5rem; |
| 75 | + font-size: 0.85rem; |
| 76 | + } |
| 77 | + th, td { |
| 78 | + text-align: left; |
| 79 | + padding: 0.5rem 0.75rem; |
| 80 | + border: 1px solid var(--border); |
| 81 | + } |
| 82 | + th { background: var(--surface); color: var(--accent); } |
| 83 | + .nav { margin-bottom: 1.5rem; font-size: 0.85rem; } |
| 84 | + </style> |
| 85 | +</head> |
| 86 | +<body> |
| 87 | + |
| 88 | +<div class="nav"><a href="index.html">← tsb playground</a></div> |
| 89 | + |
| 90 | +<h1>pd.api.extensions <span class="badge">new in pandas 0.23</span></h1> |
| 91 | +<p> |
| 92 | + The <code>api.extensions</code> namespace lets you build custom array types and dtypes |
| 93 | + that integrate with tsb DataFrames and Series — mirroring <code>pandas.api.extensions</code>. |
| 94 | +</p> |
| 95 | + |
| 96 | +<h2>Overview</h2> |
| 97 | +<table> |
| 98 | + <tr><th>Symbol</th><th>Mirrors</th><th>Description</th></tr> |
| 99 | + <tr><td><code>ExtensionDtype</code></td><td><code>pandas.api.extensions.ExtensionDtype</code></td><td>Abstract base class for custom dtypes</td></tr> |
| 100 | + <tr><td><code>ExtensionArray</code></td><td><code>pandas.api.extensions.ExtensionArray</code></td><td>Abstract base class for custom 1-D arrays</td></tr> |
| 101 | + <tr><td><code>registerExtensionDtype(cls)</code></td><td><code>register_extension_dtype</code></td><td>Register a dtype so it can be resolved from a string</td></tr> |
| 102 | + <tr><td><code>constructExtensionDtypeFromString(s)</code></td><td>internal pandas helper</td><td>Resolve a string to a registered extension dtype</td></tr> |
| 103 | + <tr><td><code>registerSeriesAccessor(name, cls)</code></td><td><code>register_series_accessor</code></td><td>Register a custom accessor on Series</td></tr> |
| 104 | + <tr><td><code>registerDataFrameAccessor(name, cls)</code></td><td><code>register_dataframe_accessor</code></td><td>Register a custom accessor on DataFrame</td></tr> |
| 105 | + <tr><td><code>registerIndexAccessor(name, cls)</code></td><td><code>register_index_accessor</code></td><td>Register a custom accessor on Index</td></tr> |
| 106 | + <tr><td><code>getRegisteredAccessors(target)</code></td><td>—</td><td>Return all registered accessors for a target</td></tr> |
| 107 | +</table> |
| 108 | + |
| 109 | +<h2>1 — Custom ExtensionDtype</h2> |
| 110 | +<p> |
| 111 | + Subclass <code>ExtensionDtype</code> to define a new dtype. |
| 112 | + Implement <code>name</code>, <code>type</code>, <code>kind</code>, and |
| 113 | + optionally <code>construct_from_string</code> so the dtype can be resolved |
| 114 | + from a plain string. |
| 115 | +</p> |
| 116 | +<pre><span class="kw">import</span> { <span class="type">ExtensionDtype</span> } <span class="kw">from</span> <span class="str">"tsb"</span>; |
| 117 | + |
| 118 | +<span class="kw">class</span> <span class="type">IPDtype</span> <span class="kw">extends</span> <span class="type">ExtensionDtype</span> { |
| 119 | + <span class="kw">get</span> <span class="fn">name</span>() { <span class="kw">return</span> <span class="str">"ip"</span>; } |
| 120 | + <span class="kw">get</span> <span class="fn">type</span>() { <span class="kw">return</span> <span class="type">String</span>; } |
| 121 | + <span class="kw">get</span> <span class="fn">kind</span>() { <span class="kw">return</span> <span class="str">"O"</span>; } |
| 122 | + |
| 123 | + <span class="kw">static override</span> <span class="fn">construct_from_string</span>(s: <span class="type">string</span>): <span class="type">IPDtype</span> | <span class="kw">null</span> { |
| 124 | + <span class="kw">return</span> s === <span class="str">"ip"</span> ? <span class="kw">new</span> <span class="type">IPDtype</span>() : <span class="kw">null</span>; |
| 125 | + } |
| 126 | +} |
| 127 | + |
| 128 | +<span class="kw">const</span> d = <span class="kw">new</span> <span class="type">IPDtype</span>(); |
| 129 | +console.log(d.name); <span class="cmt">// "ip"</span> |
| 130 | +console.log(d.kind); <span class="cmt">// "O"</span> |
| 131 | +console.log(d.isNumeric); <span class="cmt">// false</span> |
| 132 | +console.log(String(d)); <span class="cmt">// "ip"</span></pre> |
| 133 | +<div class="result"> |
| 134 | +name = "ip"<br> |
| 135 | +kind = "O"<br> |
| 136 | +isNumeric = false<br> |
| 137 | +toString = "ip" |
| 138 | +</div> |
| 139 | + |
| 140 | +<h2>2 — Custom ExtensionArray</h2> |
| 141 | +<p> |
| 142 | + Subclass <code>ExtensionArray</code> to hold a column of your custom elements. |
| 143 | + At a minimum, implement <code>dtype</code>, <code>length</code>, <code>getItem</code>, |
| 144 | + and <code>slice</code>. The default <code>isna</code> and <code>toArray</code> |
| 145 | + implementations call <code>getItem</code> repeatedly — override them for performance. |
| 146 | +</p> |
| 147 | +<pre><span class="kw">import</span> { <span class="type">ExtensionArray</span> } <span class="kw">from</span> <span class="str">"tsb"</span>; |
| 148 | + |
| 149 | +<span class="kw">class</span> <span class="type">IPArray</span> <span class="kw">extends</span> <span class="type">ExtensionArray</span> { |
| 150 | + <span class="kw">readonly</span> _data: (<span class="type">string</span> | <span class="kw">null</span>)[]; |
| 151 | + |
| 152 | + <span class="fn">constructor</span>(data: (<span class="type">string</span> | <span class="kw">null</span>)[]) { |
| 153 | + <span class="kw">super</span>(); |
| 154 | + <span class="kw">this</span>._data = data; |
| 155 | + } |
| 156 | + |
| 157 | + <span class="kw">get</span> <span class="fn">dtype</span>() { <span class="kw">return new</span> <span class="type">IPDtype</span>(); } |
| 158 | + <span class="kw">get</span> <span class="fn">length</span>() { <span class="kw">return this</span>._data.length; } |
| 159 | + |
| 160 | + <span class="fn">getItem</span>(i: <span class="type">number</span>): <span class="type">string</span> | <span class="kw">null</span> { |
| 161 | + <span class="kw">const</span> idx = i < <span class="num">0</span> ? <span class="kw">this</span>._data.length + i : i; |
| 162 | + <span class="kw">return this</span>._data[idx] ?? <span class="kw">null</span>; |
| 163 | + } |
| 164 | + |
| 165 | + <span class="fn">slice</span>(start: <span class="type">number</span>, stop: <span class="type">number</span>): <span class="type">IPArray</span> { |
| 166 | + <span class="kw">return new</span> <span class="type">IPArray</span>(<span class="kw">this</span>._data.slice(start, stop)); |
| 167 | + } |
| 168 | +} |
| 169 | + |
| 170 | +<span class="kw">const</span> arr = <span class="kw">new</span> <span class="type">IPArray</span>([<span class="str">"1.1.1.1"</span>, <span class="kw">null</span>, <span class="str">"8.8.8.8"</span>]); |
| 171 | +console.log(arr.length); <span class="cmt">// 3</span> |
| 172 | +console.log(arr.getItem(<span class="num">0</span>)); <span class="cmt">// "1.1.1.1"</span> |
| 173 | +console.log(arr.getItem(<span class="num">-1</span>)); <span class="cmt">// "8.8.8.8"</span> |
| 174 | +console.log(arr.isna()); <span class="cmt">// [false, true, false]</span> |
| 175 | +console.log(arr.toArray()); <span class="cmt">// ["1.1.1.1", null, "8.8.8.8"]</span></pre> |
| 176 | +<div class="result"> |
| 177 | +length = 3<br> |
| 178 | +getItem(0) = "1.1.1.1"<br> |
| 179 | +getItem(-1) = "8.8.8.8"<br> |
| 180 | +isna() = [false, true, false]<br> |
| 181 | +toArray() = ["1.1.1.1", null, "8.8.8.8"] |
| 182 | +</div> |
| 183 | + |
| 184 | +<h2>3 — Register a dtype</h2> |
| 185 | +<p> |
| 186 | + Call <code>registerExtensionDtype</code> to make a dtype resolvable by name. |
| 187 | + Then use <code>constructExtensionDtypeFromString</code> to look it up — this |
| 188 | + is what tsb uses internally when you pass a dtype string. |
| 189 | +</p> |
| 190 | +<pre><span class="kw">import</span> { |
| 191 | + <span class="fn">registerExtensionDtype</span>, |
| 192 | + <span class="fn">constructExtensionDtypeFromString</span>, |
| 193 | +} <span class="kw">from</span> <span class="str">"tsb"</span>; |
| 194 | + |
| 195 | +<span class="fn">registerExtensionDtype</span>(<span class="type">IPDtype</span>); |
| 196 | + |
| 197 | +<span class="kw">const</span> dtype = <span class="fn">constructExtensionDtypeFromString</span>(<span class="str">"ip"</span>); |
| 198 | +console.log(dtype?.name); <span class="cmt">// "ip"</span> |
| 199 | +console.log(dtype <span class="kw">instanceof</span> <span class="type">IPDtype</span>); <span class="cmt">// true</span> |
| 200 | + |
| 201 | +<span class="fn">constructExtensionDtypeFromString</span>(<span class="str">"unknown"</span>); <span class="cmt">// null</span></pre> |
| 202 | +<div class="result"> |
| 203 | +dtype.name = "ip"<br> |
| 204 | +dtype instanceof IPDtype = true<br> |
| 205 | +constructExtensionDtypeFromString("unknown") = null |
| 206 | +</div> |
| 207 | + |
| 208 | +<h2>4 — Register custom accessors</h2> |
| 209 | +<p> |
| 210 | + Use <code>registerSeriesAccessor</code>, <code>registerDataFrameAccessor</code>, |
| 211 | + or <code>registerIndexAccessor</code> to attach a custom accessor class to tsb objects. |
| 212 | + Call <code>getRegisteredAccessors("series")</code> to retrieve all registered |
| 213 | + accessors for a given target. |
| 214 | +</p> |
| 215 | +<pre><span class="kw">import</span> { |
| 216 | + <span class="fn">registerSeriesAccessor</span>, |
| 217 | + <span class="fn">getRegisteredAccessors</span>, |
| 218 | +} <span class="kw">from</span> <span class="str">"tsb"</span>; |
| 219 | + |
| 220 | +<span class="kw">class</span> <span class="type">GeoAccessor</span> { |
| 221 | + <span class="fn">constructor</span>(<span class="kw">private readonly</span> _series: <span class="type">unknown</span>) {} |
| 222 | + <span class="fn">centroid</span>() { <span class="kw">return</span> [<span class="num">0</span>, <span class="num">0</span>]; } |
| 223 | +} |
| 224 | + |
| 225 | +<span class="fn">registerSeriesAccessor</span>(<span class="str">"geo"</span>, <span class="type">GeoAccessor</span>); |
| 226 | + |
| 227 | +<span class="kw">const</span> accessors = <span class="fn">getRegisteredAccessors</span>(<span class="str">"series"</span>); |
| 228 | +<span class="kw">const</span> <span class="type">Cls</span> = accessors.get(<span class="str">"geo"</span>)!; |
| 229 | +<span class="kw">const</span> acc = <span class="kw">new</span> <span class="type">Cls</span>(mySeries); |
| 230 | +<span class="cmt">// acc.centroid() → [0, 0]</span></pre> |
| 231 | +<div class="result"> |
| 232 | +accessors.has("geo") = true<br> |
| 233 | +new GeoAccessor(series).centroid() = [0, 0] |
| 234 | +</div> |
| 235 | + |
| 236 | +<h2>5 — Accessing via <code>api.extensions</code></h2> |
| 237 | +<p> |
| 238 | + All the above is also available through the unified <code>api</code> namespace: |
| 239 | +</p> |
| 240 | +<pre><span class="kw">import</span> { api } <span class="kw">from</span> <span class="str">"tsb"</span>; |
| 241 | + |
| 242 | +api.extensions.registerExtensionDtype(<span class="type">IPDtype</span>); |
| 243 | +api.extensions.constructExtensionDtypeFromString(<span class="str">"ip"</span>); <span class="cmt">// IPDtype instance</span> |
| 244 | +api.extensions.registerSeriesAccessor(<span class="str">"geo"</span>, <span class="type">GeoAccessor</span>); |
| 245 | +api.extensions.getRegisteredAccessors(<span class="str">"series"</span>).get(<span class="str">"geo"</span>); <span class="cmt">// GeoAccessor</span></pre> |
| 246 | + |
| 247 | +<h2>API reference</h2> |
| 248 | +<table> |
| 249 | + <tr><th>Method / Class</th><th>Signature</th><th>Description</th></tr> |
| 250 | + <tr><td><code>ExtensionDtype</code></td><td>abstract class</td><td>Base for custom dtypes. Implement <code>name</code>, <code>type</code>, <code>kind</code>.</td></tr> |
| 251 | + <tr><td><code>ExtensionArray</code></td><td>abstract class</td><td>Base for custom arrays. Implement <code>dtype</code>, <code>length</code>, <code>getItem</code>, <code>slice</code>.</td></tr> |
| 252 | + <tr><td><code>registerExtensionDtype(cls)</code></td><td><code>(cls: typeof ExtensionDtype) → void</code></td><td>Register a dtype subclass by name.</td></tr> |
| 253 | + <tr><td><code>constructExtensionDtypeFromString(s)</code></td><td><code>(s: string) → ExtensionDtype | null</code></td><td>Resolve a string to a registered dtype.</td></tr> |
| 254 | + <tr><td><code>registerSeriesAccessor(name, cls)</code></td><td><code>(name: string, cls: new(obj) → unknown) → void</code></td><td>Register accessor on Series.</td></tr> |
| 255 | + <tr><td><code>registerDataFrameAccessor(name, cls)</code></td><td><code>(name: string, cls: new(obj) → unknown) → void</code></td><td>Register accessor on DataFrame.</td></tr> |
| 256 | + <tr><td><code>registerIndexAccessor(name, cls)</code></td><td><code>(name: string, cls: new(obj) → unknown) → void</code></td><td>Register accessor on Index.</td></tr> |
| 257 | + <tr><td><code>getRegisteredAccessors(target)</code></td><td><code>("series" | "dataframe" | "index") → ReadonlyMap</code></td><td>Get all registered accessors for a target.</td></tr> |
| 258 | +</table> |
| 259 | + |
| 260 | +</body> |
| 261 | +</html> |
0 commit comments