Skip to content

Commit 7f28cb8

Browse files
committed
feat(bidi-combinators): Combinator library for bidirectional parsing
1 parent 8632109 commit 7f28cb8

File tree

14 files changed

+430
-0
lines changed

14 files changed

+430
-0
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ A repository of Fathom-related binary parsing experiments.
44

55
## Language projects
66

7+
- [**bidi-combinators**](./lang-bidi-combinators/):
8+
A library of bidirectional parser combinators.
79
- [**ll1-combinators**](./lang-ll1-combinators):
810
Linear-time parser combinators based on Neel Krishnaswami’s blog post.
911
- [**ll1-dsl**](./lang-ll1-dsl):

dune-project

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@
2828

2929
; Language experiments
3030

31+
(package
32+
(name bidi-combinators)
33+
(depends
34+
sized-numbers))
35+
3136
(package
3237
(name ll1-combinators)
3338
(depends

lang-bidi-combinators/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Bidirectional combinators
2+
3+
A library of bidirectional parser combinators.
4+
5+
## Resources
6+
7+
- “Composing Bidirectional Programs Monadically”
8+
[[doi:10.1007/978-3-030-17184-1_6](https://doi.org/10.1007/978-3-030-17184-1_6)]
9+
- Codec: Easy bidirectional serialization in Haskell
10+
[[hackage:codec](https://hackage.haskell.org/package/codec>)]
11+
[[github:chpatrick/codec](https://github.com/chpatrick/codec>)]

lang-bidi-combinators/decoder.ml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
type 'a t = bytes -> int -> ('a * int) option
2+
3+
let ( let+ ) x f = Option.map f x
4+
let ( let* ) = Option.bind
5+
6+
let pure (type a) (x : a) : a t =
7+
fun _ pos -> Some (x, pos)
8+
9+
let map (type a b) (f : a -> b) (x : a t) : b t =
10+
fun buf pos ->
11+
let+ x, pos = x buf pos in
12+
(f x, pos)
13+
14+
let both (type a b) (x : a t) (y : b t) : (a * b) t =
15+
fun buf pos ->
16+
let* (x, pos) = x buf pos in
17+
let+ (y, pos) = y buf pos in
18+
((x, y), pos)
19+
20+
let apply (type a b) (f : (a -> b) t) (x : a t) : b t =
21+
both f x |> map (fun (f, x) -> f x)
22+
23+
let bind (type a b) (x : a t) (y : a -> b t) : b t =
24+
fun buf pos ->
25+
let* (x, pos) = x buf pos in
26+
(y x) buf pos
27+
28+
let fail (type a) : a t =
29+
fun _ _ -> None
30+
31+
module Syntax = struct
32+
33+
let ( <$> ) = map
34+
let ( <*> ) = apply
35+
36+
let ( let+ ) x f = map f x
37+
let ( and+ ) = both
38+
let ( let* ) = bind
39+
let ( and* ) = both
40+
41+
end

lang-bidi-combinators/dune

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
(library
2+
(name bidi_combinators)
3+
(public_name bidi-combinators)
4+
(libraries
5+
sized-numbers))

lang-bidi-combinators/encoder.ml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
(** The type of an encoder, parameterized by:
2+
3+
- ['ctx]: the context of the encoder
4+
- ['a]: the type of the decoded value (used for dependent encoders)
5+
*)
6+
type ('ctx, 'a) t = Buffer.t -> 'ctx -> 'a option
7+
8+
let ( let+ ) x f = Option.map f x
9+
let ( let* ) = Option.bind
10+
11+
let pure (type ctx a) (x : a) : (ctx, a) t =
12+
fun _ _ -> Some x
13+
14+
let map (type ctx a b) (f : a -> b) (x : (ctx, a) t) : (ctx, b) t =
15+
fun buf c ->
16+
let+ x = x buf c in
17+
f x
18+
19+
let both (type ctx a b) (x : (ctx, a) t) (y : (ctx, b) t) : (ctx, a * b) t =
20+
fun buf c ->
21+
let* x = x buf c in
22+
let+ y = y buf c in
23+
(x, y)
24+
25+
let apply (type ctx a b) (f : (ctx, a -> b) t) (x : (ctx, a) t) : (ctx, b) t =
26+
both f x |> map (fun (f, x) -> f x)
27+
28+
let bind (type ctx a b) (x : (ctx, a) t) (y : a -> (ctx, b) t) : (ctx, b) t =
29+
fun buf c ->
30+
let* x = x buf c in
31+
(y x) buf c
32+
33+
let fail (type ctx a) : (ctx, a) t =
34+
fun _ _ ->
35+
failwith "encoding fail format"
36+
37+
let comap (type ctx ctx' a) (f : ctx' -> ctx) (x : (ctx, a) t) : (ctx', a) t =
38+
fun buf c ->
39+
x buf (f c)
40+
41+
module Syntax = struct
42+
43+
let ( <$> ) = map
44+
let ( <*> ) = apply
45+
46+
let ( @= ) = comap
47+
48+
let ( let+ ) x f = map f x
49+
let ( and+ ) = both
50+
let ( let* ) = bind
51+
let ( and* ) = both
52+
53+
end

lang-bidi-combinators/examples/dune

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
(library
2+
(name examples)
3+
(libraries
4+
bidi-combinators))
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
open Bidi_combinators
2+
3+
type t = {
4+
width : int;
5+
height : int;
6+
data : int32 array;
7+
}
8+
9+
let width x = x.width
10+
let height x = x.height
11+
let data x = x.data
12+
13+
let format : (t, t) Format.t =
14+
let open Format.Syntax in
15+
16+
let* width = width @= Format.int16_be
17+
and+ height = height @= Format.int16_be in
18+
let+ data = data @= Format.Array.repeat_len Format.int32_be (width * height) in
19+
20+
{ width; height; data }
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
open Bidi_combinators
2+
3+
module Header = struct
4+
5+
type t = {
6+
width : int;
7+
height : int;
8+
}
9+
10+
let width x = x.width
11+
let height x = x.height
12+
13+
let format : (t, t) Format.t =
14+
let open Format.Syntax in
15+
16+
let+ width = width @= Format.int16_be
17+
and+ height = height @= Format.int16_be in
18+
19+
{ width; height }
20+
21+
end
22+
23+
type t = {
24+
header : Header.t;
25+
data : int32 array;
26+
}
27+
28+
let header x = x.header
29+
let data x = x.data
30+
31+
let format =
32+
let open Format.Syntax in
33+
34+
let* header = header @= Header.format in
35+
let+ data = data @= Format.Array.repeat_len Format.int32_be (header.width * header.height) in
36+
37+
{ header; data }

lang-bidi-combinators/format.ml

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
(** The type of a format, parameterized by:
2+
3+
- ['ctx]: the context of the encoder
4+
- ['a]: the type of the decoded value (used for dependent encoders)
5+
*)
6+
type ('ctx, 'a) t = {
7+
decode : 'a Decoder.t;
8+
encode : ('ctx, 'a) Encoder.t;
9+
}
10+
11+
let ( let+ ) x f = Option.map f x
12+
let ( let* ) = Option.bind
13+
14+
let pure (type ctx a) (x : a) : (ctx, a) t = {
15+
decode = Decoder.pure x;
16+
encode = Encoder.pure x;
17+
}
18+
19+
let map (type ctx a b) (f : a -> b) (x : (ctx, a) t) : (ctx, b) t = {
20+
decode = Decoder.map f x.decode;
21+
encode = Encoder.map f x.encode;
22+
}
23+
24+
let both (type ctx a b) (x : (ctx, a) t) (y : (ctx, b) t) : (ctx, a * b) t = {
25+
decode = Decoder.both x.decode y.decode;
26+
encode = Encoder.both x.encode y.encode;
27+
}
28+
29+
let apply (type ctx a b) (f : (ctx, a -> b) t) (x : (ctx, a) t) : (ctx, b) t =
30+
both f x |> map (fun (f, x) -> f x)
31+
32+
let bind (type ctx a b) (x : (ctx, a) t) (y : a -> (ctx, b) t) : (ctx, b) t = {
33+
decode = Decoder.bind x.decode (fun x -> (y x).decode);
34+
encode = Encoder.bind x.encode (fun x -> (y x).encode);
35+
}
36+
37+
let fail (type ctx a) : (ctx, a) t = {
38+
decode = Decoder.fail;
39+
encode = Encoder.fail;
40+
}
41+
42+
let comap (type ctx ctx' a) (f : ctx' -> ctx) (x : (ctx, a) t) : (ctx', a) t =
43+
{ x with encode = Encoder.comap f x.encode }
44+
45+
module Syntax = struct
46+
47+
let ( <$> ) = map
48+
let ( <*> ) = apply
49+
50+
let ( @= ) = comap
51+
52+
let ( let+ ) x f = map f x
53+
let ( and+ ) = both
54+
let ( let* ) = bind
55+
let ( and* ) = both
56+
57+
end
58+
59+
open Sized_numbers
60+
61+
(* TODO: Unsigned formats *)
62+
63+
let int8 : (int, int) t = (* FIXME: int8 *)
64+
let decode buf pos = if pos < Bytes.length buf then Some (Bytes.get_int8 buf pos, pos + 1) else None
65+
and encode buf c = Buffer.add_int8 buf c; Some c in
66+
{ decode; encode }
67+
68+
let int16_be : (int, int) t = (* FIXME: int16 *)
69+
let open Syntax in
70+
71+
let+ b0 = (fun x -> x lsl 8) <$> (fun x -> x lsr 8) @= int8
72+
and+ b1 = int8 in
73+
74+
b0 lor b1
75+
76+
let int16_le : (int, int) t = (* FIXME: int16 *)
77+
let open Syntax in
78+
79+
let+ b0 = int8
80+
and+ b1 = (fun x -> x lsl 8) <$> (fun x -> x lsr 8) @= int8 in
81+
82+
b0 lor b1
83+
84+
let int32_be : (int32, int32) t =
85+
let open Syntax in
86+
let open Int32.O in
87+
88+
let+ b0 = Int32.(fun x -> (of_int x) lsl 24) <$> Int32.(fun x -> to_int (x lsr 24)) @= int8
89+
and+ b1 = Int32.(fun x -> (of_int x) lsl 16) <$> Int32.(fun x -> to_int (x lsr 16)) @= int8
90+
and+ b2 = Int32.(fun x -> (of_int x) lsl 8) <$> Int32.(fun x -> to_int (x lsr 8)) @= int8
91+
and+ b3 = Int32.of_int <$> Int32.to_int @= int8 in
92+
93+
b0 lor b1 lor b2 lor b3
94+
95+
let int32_le : (int32, int32) t =
96+
let open Syntax in
97+
let open Int32.O in
98+
99+
let+ b0 = Int32.of_int <$> Int32.to_int @= int8
100+
and+ b1 = Int32.(fun x -> (of_int x) lsl 8) <$> Int32.(fun x -> to_int (x lsr 8)) @= int8
101+
and+ b2 = Int32.(fun x -> (of_int x) lsl 16) <$> Int32.(fun x -> to_int (x lsr 16)) @= int8
102+
and+ b3 = Int32.(fun x -> (of_int x) lsl 24) <$> Int32.(fun x -> to_int (x lsr 24)) @= int8 in
103+
104+
b0 lor b1 lor b2 lor b3
105+
106+
let int64_be : (int64, int64) t =
107+
let open Syntax in
108+
let open Int64.O in
109+
110+
let+ b0 = Int64.(fun x -> (of_int x) lsl 56) <$> Int64.(fun x -> to_int (x lsr 56)) @= int8
111+
and+ b1 = Int64.(fun x -> (of_int x) lsl 48) <$> Int64.(fun x -> to_int (x lsr 48)) @= int8
112+
and+ b2 = Int64.(fun x -> (of_int x) lsl 40) <$> Int64.(fun x -> to_int (x lsr 40)) @= int8
113+
and+ b3 = Int64.(fun x -> (of_int x) lsl 32) <$> Int64.(fun x -> to_int (x lsr 32)) @= int8
114+
and+ b4 = Int64.(fun x -> (of_int x) lsl 24) <$> Int64.(fun x -> to_int (x lsr 24)) @= int8
115+
and+ b5 = Int64.(fun x -> (of_int x) lsl 16) <$> Int64.(fun x -> to_int (x lsr 16)) @= int8
116+
and+ b6 = Int64.(fun x -> (of_int x) lsl 8) <$> Int64.(fun x -> to_int (x lsr 8)) @= int8
117+
and+ b7 = Int64.of_int <$> Int64.to_int @= int8 in
118+
119+
b0 lor b1 lor b2 lor b3 lor b4 lor b5 lor b6 lor b7
120+
121+
let int64_le : (int64, int64) t =
122+
let open Syntax in
123+
let open Int64.O in
124+
125+
let+ b0 = Int64.of_int <$> Int64.to_int @= int8
126+
and+ b1 = Int64.(fun x -> (of_int x) lsl 8) <$> Int64.(fun x -> to_int (x lsr 8)) @= int8
127+
and+ b2 = Int64.(fun x -> (of_int x) lsl 16) <$> Int64.(fun x -> to_int (x lsr 16)) @= int8
128+
and+ b3 = Int64.(fun x -> (of_int x) lsl 24) <$> Int64.(fun x -> to_int (x lsr 24)) @= int8
129+
and+ b4 = Int64.(fun x -> (of_int x) lsl 32) <$> Int64.(fun x -> to_int (x lsr 32)) @= int8
130+
and+ b5 = Int64.(fun x -> (of_int x) lsl 40) <$> Int64.(fun x -> to_int (x lsr 40)) @= int8
131+
and+ b6 = Int64.(fun x -> (of_int x) lsl 48) <$> Int64.(fun x -> to_int (x lsr 48)) @= int8
132+
and+ b7 = Int64.(fun x -> (of_int x) lsl 56) <$> Int64.(fun x -> to_int (x lsr 56)) @= int8 in
133+
134+
b0 lor b1 lor b2 lor b3 lor b4 lor b5 lor b6 lor b7
135+
136+
137+
module List = struct
138+
139+
open Syntax
140+
141+
let repeat_len (type a) (elem : (a, a) t) (len : int) : (a list, a list) t =
142+
let rec repeat_len i =
143+
if i < len then
144+
(* FIXME: List.hd and List.tl fail on the empty list*)
145+
let* x = List.hd @= elem in
146+
let+ xs = List.tl @= repeat_len (i + 1) in
147+
x :: xs
148+
else
149+
pure []
150+
in
151+
repeat_len 0
152+
153+
end
154+
155+
module Array = struct
156+
157+
let repeat_len (type a) (elem : (a, a) t) (len : int) : (a array, a array) t =
158+
(* FIXME: No clue if this actually works - write some tests! *)
159+
let decode buf pos =
160+
if len <= 0 then Some ([||], pos) else
161+
let* (x, pos) = elem.decode buf pos in
162+
let xs = Array.make len x in
163+
let rec go i buf pos =
164+
if i < len then
165+
let* (x, pos) = elem.decode buf pos in
166+
Array.set xs i x;
167+
(go [@tailcall]) (i + 1) buf pos
168+
else
169+
Some (xs, pos)
170+
in
171+
go 1 buf pos
172+
173+
and encode =
174+
let rec go i buf xs =
175+
if i <= Array.length xs then
176+
let* _ = elem.encode buf (Array.get xs i) in
177+
(go [@tailcall]) (i + 1) buf xs
178+
else
179+
Some xs
180+
in
181+
go 0
182+
in
183+
184+
{ decode; encode }
185+
186+
end

0 commit comments

Comments
 (0)