diff --git a/analyzer/protobuf.spicy b/analyzer/protobuf.spicy index bcaef43..68723cc 100644 --- a/analyzer/protobuf.spicy +++ b/analyzer/protobuf.spicy @@ -2,25 +2,83 @@ module protobuf; import zeek; -# TODO: Our example here models a simple example packet format of static size: -# 19 payload bytes, followed by the protocol number for the next layer, for -# which the data then follows subsequently. (This is just what our test trace -# happens to contain). Adapt as suitable. public type Packet = unit { - payload: bytes &size=19; - protocol: uint16; # next-layer protocol, values need to be mapped to analyzers in Zeek scriptland + payload: Message; on %done { # Feed into Zeek's next-layer packet analysis. - zeek::forward_packet(self.protocol); + zeek::forward_packet(self.payload); } }; -type WireType = enum { - VARINT = 0x00, - I64 = 0x01, - LEN = 0x02, - SGROUP = 0x03, # deprecated - EGROUP = 0x04, # deprecated - I32 = 0x05 +# message := (tag value)* +# A message is encoded as a sequence of zero or more pairs of tags and values. +type Message = unit { + # TODO +}; + +# tag := (field << 3) | wire_type +# A tag is a combination of a wire_type, stored in the least significant three bits, +# and the field number that is defined in the .proto file. +# Encoded as uint32 varint. +type Tag = unit { + # TODO +}; + +# value := varint for wire_type == VARINT, +# i32 for wire_type == I32, +# i64 for wire_type == I64, +# len-prefix for wire_type == LEN, +# for wire_type == SGROUP or EGROUP +# A value is stored differently depending on the wire_type specified in the tag. +type Value = unit { + # TODO +}; + +# varint := int32 | int64 | uint32 | uint64 | bool | enum | sint32 | sint64 +# Encoded as varints (sintN are ZigZag-encoded first). +type VarInt = unit { + # TODO +}; + +# i32 := sfixed32 | fixed32 | float +# Encoded as 4-byte little-endian. +# memcpy of the equivalent C types (u?int32_t, float) +type I32 = unit { + # TODO +}; + +# i64 := sfixed64 | fixed64 | double +# Encoded as 8-byte little-endian. +# memcpy of the equivalent C types (u?int64_t, double) +type I64 = unit { + # TODO +}; + +# len-prefix := size (message | string | bytes | packed) +# A length-prefixed value is stored as a length (encoded as a varint), and then one +# of the listed data types. +# Size encoded as int32 varint +type LenPrefix = unit { + # TODO +}; + +# string := valid UTF-8 string (e.g. ASCII) +# As described, a string must use UTF-8 character encoding. A string cannot exceed 2GB. +type String = unit { + # TODO +}; + +# bytes := any sequence of 8-bit bytes +# As described, bytes can store custom data types, up to 2GB in size. +type Bytes = unit { + # TODO +}; + +# packed := varint* | i32* | i64* +# Use the packed data type when you are storing consecutive values of the type described +# in the protocol definition. The tag is dropped for values after the first, which +# amortizes the costs of tags to one per field, rather than per element. +type Packed = unit { + # TODO };