Skip to content

Commit 806eff2

Browse files
authored
Merge pull request #15 from githubnext/autoloop/build-tsb-pandas-typescript-migration-bdf4100e5aade1f0
[Autoloop] [Autoloop: build-tsb-pandas-typescript-migration]
2 parents 83b61c9 + 310e0aa commit 806eff2

6 files changed

Lines changed: 1482 additions & 0 deletions

File tree

src/core/dtype.ts

Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
/**
2+
* Dtype system — immutable singleton descriptors for all pandas-equivalent dtypes.
3+
*
4+
* Mirrors pandas' dtype hierarchy: numeric (int, uint, float), bool, string,
5+
* object, datetime, timedelta, and category. Each Dtype is a flyweight (cached
6+
* singleton keyed by name) so identity comparisons (`===`) work correctly.
7+
*/
8+
9+
import type { DtypeName, Scalar } from "../types.ts";
10+
11+
/** Classification of a dtype into a broad "kind". */
12+
export type DtypeKind =
13+
| "int"
14+
| "uint"
15+
| "float"
16+
| "bool"
17+
| "string"
18+
| "object"
19+
| "datetime"
20+
| "timedelta"
21+
| "category";
22+
23+
/** Size of a single element in bytes (0 = variable / unknown). */
24+
export type ItemSize = 0 | 1 | 2 | 4 | 8;
25+
26+
const _registry = new Map<DtypeName, Dtype>();
27+
28+
interface InferFlags {
29+
allBool: boolean;
30+
allInt: boolean;
31+
allFloat: boolean;
32+
allDate: boolean;
33+
allString: boolean;
34+
}
35+
36+
/**
37+
* An immutable descriptor for a data type.
38+
*
39+
* Obtain instances via the static factory methods or the `Dtype` named
40+
* constants rather than the constructor.
41+
*
42+
* @example
43+
* ```ts
44+
* const dt = Dtype.float64;
45+
* dt.isNumeric; // true
46+
* dt.itemsize; // 8
47+
* Dtype.from("float64") === dt; // true — singletons
48+
* ```
49+
*/
50+
export class Dtype {
51+
readonly name: DtypeName;
52+
readonly kind: DtypeKind;
53+
readonly itemsize: ItemSize;
54+
55+
private constructor(name: DtypeName, kind: DtypeKind, itemsize: ItemSize) {
56+
this.name = name;
57+
this.kind = kind;
58+
this.itemsize = itemsize;
59+
}
60+
61+
// ─── singleton factory ──────────────────────────────────────────
62+
63+
/** Return (or create) the singleton for `name`. */
64+
static from(name: DtypeName): Dtype {
65+
const cached = _registry.get(name);
66+
if (cached !== undefined) {
67+
return cached;
68+
}
69+
const dt = Dtype.build(name);
70+
_registry.set(name, dt);
71+
return dt;
72+
}
73+
74+
private static build(name: DtypeName): Dtype {
75+
switch (name) {
76+
case "int8":
77+
return new Dtype("int8", "int", 1);
78+
case "int16":
79+
return new Dtype("int16", "int", 2);
80+
case "int32":
81+
return new Dtype("int32", "int", 4);
82+
case "int64":
83+
return new Dtype("int64", "int", 8);
84+
case "uint8":
85+
return new Dtype("uint8", "uint", 1);
86+
case "uint16":
87+
return new Dtype("uint16", "uint", 2);
88+
case "uint32":
89+
return new Dtype("uint32", "uint", 4);
90+
case "uint64":
91+
return new Dtype("uint64", "uint", 8);
92+
case "float32":
93+
return new Dtype("float32", "float", 4);
94+
case "float64":
95+
return new Dtype("float64", "float", 8);
96+
case "bool":
97+
return new Dtype("bool", "bool", 1);
98+
case "string":
99+
return new Dtype("string", "string", 0);
100+
case "object":
101+
return new Dtype("object", "object", 0);
102+
case "datetime":
103+
return new Dtype("datetime", "datetime", 8);
104+
case "timedelta":
105+
return new Dtype("timedelta", "timedelta", 8);
106+
case "category":
107+
return new Dtype("category", "category", 0);
108+
}
109+
}
110+
111+
// ─── named singletons ───────────────────────────────────────────
112+
113+
static readonly int8 = Dtype.from("int8");
114+
static readonly int16 = Dtype.from("int16");
115+
static readonly int32 = Dtype.from("int32");
116+
static readonly int64 = Dtype.from("int64");
117+
static readonly uint8 = Dtype.from("uint8");
118+
static readonly uint16 = Dtype.from("uint16");
119+
static readonly uint32 = Dtype.from("uint32");
120+
static readonly uint64 = Dtype.from("uint64");
121+
static readonly float32 = Dtype.from("float32");
122+
static readonly float64 = Dtype.from("float64");
123+
static readonly bool = Dtype.from("bool");
124+
static readonly string = Dtype.from("string");
125+
static readonly object = Dtype.from("object");
126+
static readonly datetime = Dtype.from("datetime");
127+
static readonly timedelta = Dtype.from("timedelta");
128+
static readonly category = Dtype.from("category");
129+
130+
// ─── type predicates ────────────────────────────────────────────
131+
132+
get isNumeric(): boolean {
133+
return this.kind === "int" || this.kind === "uint" || this.kind === "float";
134+
}
135+
136+
get isInteger(): boolean {
137+
return this.kind === "int" || this.kind === "uint";
138+
}
139+
140+
get isSignedInteger(): boolean {
141+
return this.kind === "int";
142+
}
143+
144+
get isUnsignedInteger(): boolean {
145+
return this.kind === "uint";
146+
}
147+
148+
get isFloat(): boolean {
149+
return this.kind === "float";
150+
}
151+
152+
get isBool(): boolean {
153+
return this.kind === "bool";
154+
}
155+
156+
get isString(): boolean {
157+
return this.kind === "string";
158+
}
159+
160+
get isDatetime(): boolean {
161+
return this.kind === "datetime";
162+
}
163+
164+
get isTimedelta(): boolean {
165+
return this.kind === "timedelta";
166+
}
167+
168+
get isCategory(): boolean {
169+
return this.kind === "category";
170+
}
171+
172+
get isObject(): boolean {
173+
return this.kind === "object";
174+
}
175+
176+
// ─── casting / promotion ────────────────────────────────────────
177+
178+
/**
179+
* True when values of `this` dtype can be safely cast to `target`
180+
* without loss of information.
181+
*/
182+
canCastTo(target: Dtype): boolean {
183+
if (this === target) {
184+
return true;
185+
}
186+
// Numeric promotion rules (mirrors numpy safe casting).
187+
const order: readonly DtypeName[] = [
188+
"int8",
189+
"int16",
190+
"int32",
191+
"int64",
192+
"uint8",
193+
"uint16",
194+
"uint32",
195+
"uint64",
196+
"float32",
197+
"float64",
198+
];
199+
const fromIdx = order.indexOf(this.name);
200+
const toIdx = order.indexOf(target.name);
201+
if (fromIdx !== -1 && toIdx !== -1) {
202+
// Unsigned → signed: only safe if there's enough headroom.
203+
if (this.isUnsignedInteger && target.isSignedInteger) {
204+
return target.itemsize > this.itemsize;
205+
}
206+
return toIdx >= fromIdx;
207+
}
208+
// bool → any numeric is safe.
209+
if (this.isBool && target.isNumeric) {
210+
return true;
211+
}
212+
// string → object is safe.
213+
if (this.isString && target.isObject) {
214+
return true;
215+
}
216+
return false;
217+
}
218+
219+
/**
220+
* Return the smallest dtype that can represent both `a` and `b` without loss.
221+
* Falls back to `object` when no numeric promotion exists.
222+
*/
223+
static commonType(a: Dtype, b: Dtype): Dtype {
224+
if (a === b) {
225+
return a;
226+
}
227+
if (a.canCastTo(b)) {
228+
return b;
229+
}
230+
if (b.canCastTo(a)) {
231+
return a;
232+
}
233+
// Mixed int / float → float64.
234+
if (a.isNumeric && b.isNumeric) {
235+
return Dtype.float64;
236+
}
237+
// bool + numeric → numeric.
238+
if (a.isBool && b.isNumeric) {
239+
return b;
240+
}
241+
if (b.isBool && a.isNumeric) {
242+
return a;
243+
}
244+
// Anything else → object.
245+
return Dtype.object;
246+
}
247+
248+
// ─── inference ──────────────────────────────────────────────────
249+
250+
/**
251+
* Infer the most specific dtype from an array of scalar values.
252+
*
253+
* Rules (in priority order):
254+
* 1. Empty array → float64 (pandas default).
255+
* 2. All booleans → bool.
256+
* 3. All integers (number without fractional part, no NaN/Inf) → int64.
257+
* 4. All finite/NaN numbers → float64.
258+
* 5. All Date objects → datetime.
259+
* 6. All strings → string.
260+
* 7. Otherwise → object.
261+
*/
262+
static inferFrom(values: readonly Scalar[]): Dtype {
263+
if (values.length === 0) {
264+
return Dtype.float64;
265+
}
266+
const flags = Dtype.scanFlags(values);
267+
return Dtype.flagsToDtype(flags);
268+
}
269+
270+
private static scanFlags(values: readonly Scalar[]): InferFlags {
271+
const flags: InferFlags = {
272+
allBool: true,
273+
allInt: true,
274+
allFloat: true,
275+
allDate: true,
276+
allString: true,
277+
};
278+
for (const v of values) {
279+
if (v === null || v === undefined) {
280+
continue;
281+
}
282+
Dtype.updateFlags(flags, v);
283+
}
284+
return flags;
285+
}
286+
287+
private static updateFlags(flags: InferFlags, v: NonNullable<Scalar>): void {
288+
const t = typeof v;
289+
if (t !== "boolean") {
290+
flags.allBool = false;
291+
}
292+
if (t === "boolean") {
293+
flags.allString = false;
294+
flags.allDate = false;
295+
} else if (t === "number") {
296+
flags.allString = false;
297+
flags.allDate = false;
298+
if (!(Number.isFinite(v as number) && Number.isInteger(v as number))) {
299+
flags.allInt = false;
300+
}
301+
} else if (v instanceof Date) {
302+
flags.allString = false;
303+
flags.allInt = false;
304+
flags.allFloat = false;
305+
flags.allBool = false;
306+
} else if (t === "string") {
307+
flags.allInt = false;
308+
flags.allFloat = false;
309+
flags.allDate = false;
310+
flags.allBool = false;
311+
} else {
312+
flags.allBool = false;
313+
flags.allInt = false;
314+
flags.allFloat = false;
315+
flags.allDate = false;
316+
flags.allString = false;
317+
}
318+
}
319+
320+
private static flagsToDtype(f: InferFlags): Dtype {
321+
if (f.allBool) {
322+
return Dtype.bool;
323+
}
324+
if (f.allInt) {
325+
return Dtype.int64;
326+
}
327+
if (f.allFloat) {
328+
return Dtype.float64;
329+
}
330+
if (f.allDate) {
331+
return Dtype.datetime;
332+
}
333+
if (f.allString) {
334+
return Dtype.string;
335+
}
336+
return Dtype.object;
337+
}
338+
339+
// ─── misc ────────────────────────────────────────────────────────
340+
341+
toString(): string {
342+
return this.name;
343+
}
344+
345+
/** Equality: dtypes are singletons, so reference equality suffices. */
346+
equals(other: Dtype): boolean {
347+
return this === other;
348+
}
349+
}

src/core/index.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
11
export { Index } from "./base-index.ts";
22
export type { IndexOptions } from "./base-index.ts";
33
export { RangeIndex } from "./range-index.ts";
4+
export { Dtype } from "./dtype.ts";
5+
export type { DtypeKind, ItemSize } from "./dtype.ts";
6+
export { Series } from "./series.ts";
7+
export type { SeriesOptions } from "./series.ts";

0 commit comments

Comments
 (0)