Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better safeguard against incompatible builtin handles between dialects #15961

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
337 changes: 189 additions & 148 deletions libyul/backends/evm/EVMDialect.cpp
Original file line number Diff line number Diff line change
@@ -32,6 +32,7 @@

#include <range/v3/algorithm/all_of.hpp>
#include <range/v3/view/enumerate.hpp>
#include <range/v3/view/map.hpp>

#include <regex>
#include <utility>
@@ -195,91 +196,123 @@ std::set<std::string, std::less<>> createReservedIdentifiers(langutil::EVMVersio
return reserved;
}

std::vector<std::optional<BuiltinFunctionForEVM>> createBuiltins(langutil::EVMVersion _evmVersion, std::optional<uint8_t> _eofVersion, bool _objectAccess)
class Builtins
{
public:
explicit Builtins(langutil::EVMVersion const& _evmVersion, std::optional<uint8_t> const& _eofVersion):
m_evmVersion(_evmVersion),
m_eofVersion(_eofVersion)
{}

// Exclude prevrandao as builtin for VMs before paris and difficulty for VMs after paris.
auto prevRandaoException = [&](std::string const& _instrName) -> bool
{
return (_instrName == "prevrandao" && _evmVersion < langutil::EVMVersion::paris()) || (_instrName == "difficulty" && _evmVersion >= langutil::EVMVersion::paris());
};

std::vector<std::optional<BuiltinFunctionForEVM>> builtins;
for (auto const& instr: evmasm::c_instructions)
void addInstruction(std::string const& _name, evmasm::Instruction const& _opcode)
{
std::string name = toLower(instr.first);
auto const opcode = instr.second;
auto const lowerCaseName = toLower(_name);
// Exclude prevrandao as builtin for VMs before paris and difficulty for VMs after paris.
auto prevRandaoException = [&](std::string_view const _instrName) -> bool
{
return (_instrName == "prevrandao" && m_evmVersion < langutil::EVMVersion::paris()) || (_instrName == "difficulty" && m_evmVersion >= langutil::EVMVersion::paris());
};

if (
!(opcode >= evmasm::Instruction::DUP1 && opcode <= evmasm::Instruction::DUP16) &&
!(opcode >= evmasm::Instruction::SWAP1 && opcode <= evmasm::Instruction::SWAP16) &&
!evmasm::isPushInstruction(opcode) &&
opcode != evmasm::Instruction::JUMP &&
opcode != evmasm::Instruction::JUMPI &&
opcode != evmasm::Instruction::JUMPDEST &&
opcode != evmasm::Instruction::DATALOADN &&
opcode != evmasm::Instruction::EOFCREATE &&
opcode != evmasm::Instruction::RETURNCONTRACT &&
opcode != evmasm::Instruction::RJUMP &&
opcode != evmasm::Instruction::RJUMPI &&
opcode != evmasm::Instruction::CALLF &&
opcode != evmasm::Instruction::JUMPF &&
opcode != evmasm::Instruction::DUPN &&
opcode != evmasm::Instruction::SWAPN &&
opcode != evmasm::Instruction::RETF &&
_evmVersion.hasOpcode(opcode, _eofVersion) &&
!prevRandaoException(name)
!(_opcode >= evmasm::Instruction::DUP1 && _opcode <= evmasm::Instruction::DUP16) &&
!(_opcode >= evmasm::Instruction::SWAP1 && _opcode <= evmasm::Instruction::SWAP16) &&
!isPushInstruction(_opcode) &&
_opcode != evmasm::Instruction::JUMP &&
_opcode != evmasm::Instruction::JUMPI &&
_opcode != evmasm::Instruction::JUMPDEST &&
_opcode != evmasm::Instruction::DATALOADN &&
_opcode != evmasm::Instruction::EOFCREATE &&
_opcode != evmasm::Instruction::RETURNCONTRACT &&
_opcode != evmasm::Instruction::RJUMP &&
_opcode != evmasm::Instruction::RJUMPI &&
_opcode != evmasm::Instruction::CALLF &&
_opcode != evmasm::Instruction::JUMPF &&
_opcode != evmasm::Instruction::DUPN &&
_opcode != evmasm::Instruction::SWAPN &&
_opcode != evmasm::Instruction::RETF &&
Comment on lines +217 to +232
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For builtins based on opcodes, this part of the condition is the declarative part. I'd move it to a separate function that returns a boolean. Then when adding a new opcode we'd only really modify that function.

In fact, I'd actually create more than one function. I think we should make it more semantic by having smaller functions expressing specific properties the excluded opcodes satisfy:

  • All the DUP/SWAP/PUSH variants: low-level stack manipulation opcodes.
    • We should also use SemanticInformation::isSwapInstruction()/SemanticInformation::isDupInstruction() here.
  • All the JUMP variants and RETF/CALLF: low-level control flow opcodes
  • DATALOADN, EOFCREATE, RETURNCONTRACT: these are actually exposed, but not directly. We have builtins replacing them. We can define a separate function for them, but a nicer alternative would be to be able to declare for a builtin that it replaces an instruction and have the definition logic exclude that instruction automatically.

Executing these functions and EVMVersion::hasOpcode() would then be a fixed part of the definition logic and would not have to be adjusted. It would be the equivalent of checking the scope of a builtin.

m_evmVersion.hasOpcode(_opcode, m_eofVersion) &&
!prevRandaoException(lowerCaseName)
)
builtins.emplace_back(createEVMFunction(_evmVersion, name, opcode));
m_builtinFunctions.emplace_back(createEVMFunction(m_evmVersion, lowerCaseName, _opcode));
else
builtins.emplace_back(std::nullopt);
m_builtinFunctions.emplace_back(std::nullopt);
}

auto const createIfObjectAccess = [_objectAccess](
/// When adding new builtin functions, it is imperative to not change the order between dialect versions and/or
/// object access modifiers. Blanks are added to be able to uphold this property.
/// Changing the order affects the generated `BuiltinHandle`s, which makes different dialect versions inherently
/// incompatible.
void addIfObjectAccess(
bool const _objectAccess,
std::string const& _name,
size_t _params,
size_t _returns,
size_t const _params,
size_t const _returns,
SideEffects _sideEffects,
ControlFlowSideEffects _controlFlowSideEffects,
std::vector<std::optional<LiteralKind>> _literalArguments,
std::function<void(FunctionCall const&, AbstractAssembly&, BuiltinContext&)> _generateCode
) -> std::optional<BuiltinFunctionForEVM>
)
{
if (!_objectAccess)
return std::nullopt;
return createFunction(_name, _params, _returns, _sideEffects, _controlFlowSideEffects, std::move(_literalArguments), std::move(_generateCode));
};
if (_objectAccess)
m_builtinFunctions.emplace_back(
createFunction(
_name,
_params,
_returns,
_sideEffects,
_controlFlowSideEffects,
std::move(_literalArguments),
std::move(_generateCode)
)
);
else
m_builtinFunctions.emplace_back(std::nullopt);
}

std::vector<std::optional<BuiltinFunctionForEVM>> const& functions() const
{
return m_builtinFunctions;
}

private:
langutil::EVMVersion m_evmVersion;
std::optional<uint8_t> m_eofVersion;
std::vector<std::optional<BuiltinFunctionForEVM>> m_builtinFunctions;
};

/// Make sure to only add builtins in a way that is consistent over EVM versions. If the order depends on the
/// EVM version - which can easily happen using conditionals -, different dialects' builtin handles
/// become inherently incompatible.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking about it, the compatibility between dialects with and without object access should be more prominently documented in the header. I'd mention it for strictAssemblyForEVMObjects() and maybe also for the whole EVMDialect class.

Copy link
Member

@cameel cameel Mar 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TBH we guarantee so much compatibility between them that I'm not sure we should even be calling these distinct dialects. Move like flavors of the same dialect. It seems like an orthogonal property that could apply to any dialect.

Builtins createBuiltins(langutil::EVMVersion _evmVersion, std::optional<uint8_t> _eofVersion, bool _objectAccess)
{
Builtins builtins(_evmVersion, _eofVersion);
for (auto const& [name, opcode]: evmasm::c_instructions)
builtins.addInstruction(name, opcode);

builtins.emplace_back(createIfObjectAccess("linkersymbol", 1, 1, SideEffects{}, ControlFlowSideEffects{}, {LiteralKind::String}, [](
builtins.addIfObjectAccess(_objectAccess, "linkersymbol", 1, 1, SideEffects{}, ControlFlowSideEffects{}, {LiteralKind::String}, [](
FunctionCall const& _call,
AbstractAssembly& _assembly,
BuiltinContext&
) {
yulAssert(_call.arguments.size() == 1, "");
Expression const& arg = _call.arguments.front();
_assembly.appendLinkerSymbol(formatLiteral(std::get<Literal>(arg)));
}));
builtins.emplace_back(createIfObjectAccess(
"memoryguard",
1,
1,
SideEffects{},
ControlFlowSideEffects{},
{LiteralKind::Number},
[](
FunctionCall const& _call,
AbstractAssembly& _assembly,
BuiltinContext&
) {
yulAssert(_call.arguments.size() == 1, "");
Literal const* literal = std::get_if<Literal>(&_call.arguments.front());
yulAssert(literal, "");
_assembly.appendConstant(literal->value.value());
})
);
});

builtins.addIfObjectAccess(_objectAccess, "memoryguard", 1, 1, SideEffects{}, ControlFlowSideEffects{}, {LiteralKind::Number}, [](
FunctionCall const& _call,
AbstractAssembly& _assembly,
BuiltinContext&
) {
yulAssert(_call.arguments.size() == 1, "");
Literal const* literal = std::get_if<Literal>(&_call.arguments.front());
yulAssert(literal, "");
_assembly.appendConstant(literal->value.value());
});

if (!_eofVersion.has_value())
{
builtins.emplace_back(createIfObjectAccess("datasize", 1, 1, SideEffects{}, ControlFlowSideEffects{}, {LiteralKind::String}, [](
builtins.addIfObjectAccess(_objectAccess, "datasize", 1, 1, SideEffects{}, ControlFlowSideEffects{}, {LiteralKind::String}, [](
FunctionCall const& _call,
Comment on lines 313 to 316
Copy link
Member

@cameel cameel Mar 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fact that now some conditions are handled here and some in Builtins still bothers me a bit. I think it would be best if we made the whole thing more declarative. I.e. have one place that declares the builtins along with their scopes (i.e. EVM/EOF version range and state of object access). Then have another place that loops over all builtins that decides whether to actually define them based on their scopes.

The definition loop would then be the single place that decides whether the opcode should be skipped and whether to leave an empty spot in m_functions.

This would be a more comprehensive solution for the problem, because we would no longer touch that definition loop when adding new opcodes. We would only touch declarations and scopes without the risk of accidentally breaking the logic.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, not sure "scope" is the best way to call it, but I could not think of anything better so far. Could be just "conditions", but that sounds very non-specific.

AbstractAssembly& _assembly,
BuiltinContext& _context
@@ -299,8 +332,8 @@ std::vector<std::optional<BuiltinFunctionForEVM>> createBuiltins(langutil::EVMVe
yulAssert(!subIdPath.empty(), "Could not find assembly object <" + dataName.str() + ">.");
_assembly.appendDataSize(subIdPath);
}
}));
builtins.emplace_back(createIfObjectAccess("dataoffset", 1, 1, SideEffects{}, ControlFlowSideEffects{}, {LiteralKind::String}, [](
});
builtins.addIfObjectAccess(_objectAccess, "dataoffset", 1, 1, SideEffects{}, ControlFlowSideEffects{}, {LiteralKind::String}, [](
FunctionCall const& _call,
AbstractAssembly& _assembly,
BuiltinContext& _context
@@ -320,8 +353,9 @@ std::vector<std::optional<BuiltinFunctionForEVM>> createBuiltins(langutil::EVMVe
yulAssert(!subIdPath.empty(), "Could not find assembly object <" + dataName.str() + ">.");
_assembly.appendDataOffset(subIdPath);
}
}));
builtins.emplace_back(createIfObjectAccess(
});
builtins.addIfObjectAccess(
_objectAccess,
"datacopy",
3,
0,
@@ -335,8 +369,9 @@ std::vector<std::optional<BuiltinFunctionForEVM>> createBuiltins(langutil::EVMVe
) {
_assembly.appendInstruction(evmasm::Instruction::CODECOPY);
}
));
builtins.emplace_back(createIfObjectAccess(
);
builtins.addIfObjectAccess(
_objectAccess,
"setimmutable",
3,
0,
@@ -362,8 +397,9 @@ std::vector<std::optional<BuiltinFunctionForEVM>> createBuiltins(langutil::EVMVe
auto const identifier = (formatLiteral(std::get<Literal>(_call.arguments[1])));
_assembly.appendImmutableAssignment(identifier);
}
));
builtins.emplace_back(createIfObjectAccess(
);
builtins.addIfObjectAccess(
_objectAccess,
"loadimmutable",
1,
1,
@@ -378,82 +414,82 @@ std::vector<std::optional<BuiltinFunctionForEVM>> createBuiltins(langutil::EVMVe
yulAssert(_call.arguments.size() == 1, "");
_assembly.appendImmutable(formatLiteral(std::get<Literal>(_call.arguments.front())));
}
));
);
}
else // EOF context
{
if (_objectAccess)
{
builtins.emplace_back(createFunction(
"auxdataloadn",
1,
1,
EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::DATALOADN),
ControlFlowSideEffects::fromInstruction(evmasm::Instruction::DATALOADN),
{LiteralKind::Number},
[](
FunctionCall const& _call,
AbstractAssembly& _assembly,
BuiltinContext&
) {
yulAssert(_call.arguments.size() == 1);
Literal const* literal = std::get_if<Literal>(&_call.arguments.front());
yulAssert(literal, "");
yulAssert(literal->value.value() <= std::numeric_limits<uint16_t>::max());
_assembly.appendAuxDataLoadN(static_cast<uint16_t>(literal->value.value()));
}
));

builtins.emplace_back(createFunction(
"eofcreate",
5,
1,
EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::EOFCREATE),
ControlFlowSideEffects::fromInstruction(evmasm::Instruction::EOFCREATE),
{LiteralKind::String, std::nullopt, std::nullopt, std::nullopt, std::nullopt},
[](
FunctionCall const& _call,
AbstractAssembly& _assembly,
BuiltinContext& context
) {
yulAssert(_call.arguments.size() == 5);
Literal const* literal = std::get_if<Literal>(&_call.arguments.front());
auto const formattedLiteral = formatLiteral(*literal);
yulAssert(!util::contains(formattedLiteral, '.'));
auto const* containerID = valueOrNullptr(context.subIDs, formattedLiteral);
yulAssert(containerID != nullptr);
yulAssert(*containerID <= std::numeric_limits<AbstractAssembly::ContainerID>::max());
_assembly.appendEOFCreate(static_cast<AbstractAssembly::ContainerID>(*containerID));
}
));

builtins.emplace_back(createFunction(
"returncontract",
3,
0,
EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::RETURNCONTRACT),
ControlFlowSideEffects::fromInstruction(evmasm::Instruction::RETURNCONTRACT),
{LiteralKind::String, std::nullopt, std::nullopt},
[](
FunctionCall const& _call,
AbstractAssembly& _assembly,
BuiltinContext& context
) {
yulAssert(_call.arguments.size() == 3);
Literal const* literal = std::get_if<Literal>(&_call.arguments.front());
yulAssert(literal);
auto const formattedLiteral = formatLiteral(*literal);
yulAssert(!util::contains(formattedLiteral, '.'));
auto const* containerID = valueOrNullptr(context.subIDs, formattedLiteral);
yulAssert(containerID != nullptr);
yulAssert(*containerID <= std::numeric_limits<AbstractAssembly::ContainerID>::max());
_assembly.appendReturnContract(static_cast<AbstractAssembly::ContainerID>(*containerID));
}
));
}
builtins.addIfObjectAccess(
_objectAccess,
"auxdataloadn",
1,
1,
EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::DATALOADN),
ControlFlowSideEffects::fromInstruction(evmasm::Instruction::DATALOADN),
{LiteralKind::Number},
[](
FunctionCall const& _call,
AbstractAssembly& _assembly,
BuiltinContext&
) {
yulAssert(_call.arguments.size() == 1);
Literal const* literal = std::get_if<Literal>(&_call.arguments.front());
yulAssert(literal, "");
yulAssert(literal->value.value() <= std::numeric_limits<uint16_t>::max());
_assembly.appendAuxDataLoadN(static_cast<uint16_t>(literal->value.value()));
}
);

builtins.addIfObjectAccess(
_objectAccess,
"eofcreate",
5,
1,
EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::EOFCREATE),
ControlFlowSideEffects::fromInstruction(evmasm::Instruction::EOFCREATE),
{LiteralKind::String, std::nullopt, std::nullopt, std::nullopt, std::nullopt},
[](
FunctionCall const& _call,
AbstractAssembly& _assembly,
BuiltinContext& context
) {
yulAssert(_call.arguments.size() == 5);
Literal const* literal = std::get_if<Literal>(&_call.arguments.front());
auto const formattedLiteral = formatLiteral(*literal);
yulAssert(!util::contains(formattedLiteral, '.'));
auto const* containerID = valueOrNullptr(context.subIDs, formattedLiteral);
yulAssert(containerID != nullptr);
yulAssert(*containerID <= std::numeric_limits<AbstractAssembly::ContainerID>::max());
_assembly.appendEOFCreate(static_cast<AbstractAssembly::ContainerID>(*containerID));
}
);

builtins.addIfObjectAccess(
_objectAccess,
"returncontract",
3,
0,
EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::RETURNCONTRACT),
ControlFlowSideEffects::fromInstruction(evmasm::Instruction::RETURNCONTRACT),
{LiteralKind::String, std::nullopt, std::nullopt},
[](
FunctionCall const& _call,
AbstractAssembly& _assembly,
BuiltinContext& context
) {
yulAssert(_call.arguments.size() == 3);
Literal const* literal = std::get_if<Literal>(&_call.arguments.front());
yulAssert(literal);
auto const formattedLiteral = formatLiteral(*literal);
yulAssert(!util::contains(formattedLiteral, '.'));
auto const* containerID = valueOrNullptr(context.subIDs, formattedLiteral);
yulAssert(containerID != nullptr);
yulAssert(*containerID <= std::numeric_limits<AbstractAssembly::ContainerID>::max());
_assembly.appendReturnContract(static_cast<AbstractAssembly::ContainerID>(*containerID));
}
);
}
yulAssert(
ranges::all_of(builtins, [](std::optional<BuiltinFunctionForEVM> const& _builtinFunction){
ranges::all_of(builtins.functions(), [](std::optional<BuiltinFunctionForEVM> const& _builtinFunction){
return !_builtinFunction || _builtinFunction->name.substr(0, "verbatim_"s.size()) != "verbatim_";
}),
"Builtin functions besides verbatim should not start with the verbatim_ prefix."
@@ -474,22 +510,22 @@ EVMDialect::EVMDialect(langutil::EVMVersion _evmVersion, std::optional<uint8_t>
m_objectAccess(_objectAccess),
m_evmVersion(_evmVersion),
m_eofVersion(_eofVersion),
m_functions(createBuiltins(_evmVersion, _eofVersion, _objectAccess)),
m_functions(createBuiltins(_evmVersion, _eofVersion, _objectAccess).functions()),
m_reserved(createReservedIdentifiers(_evmVersion, _eofVersion))
{
for (auto const& [index, maybeBuiltin]: m_functions | ranges::views::enumerate)
if (maybeBuiltin)
// ids are offset by the maximum number of verbatim functions
m_builtinFunctionsByName[maybeBuiltin->name] = BuiltinHandle{index + verbatimIDOffset};

m_discardFunction = findBuiltin("pop");
m_equalityFunction = findBuiltin("eq");
m_booleanNegationFunction = findBuiltin("iszero");
m_memoryStoreFunction = findBuiltin("mstore");
m_memoryLoadFunction = findBuiltin("mload");
m_storageStoreFunction = findBuiltin("sstore");
m_storageLoadFunction = findBuiltin("sload");
m_hashFunction = findBuiltin("keccak256");
m_discardFunction = EVMDialect::findBuiltin("pop");
m_equalityFunction = EVMDialect::findBuiltin("eq");
m_booleanNegationFunction = EVMDialect::findBuiltin("iszero");
m_memoryStoreFunction = EVMDialect::findBuiltin("mstore");
m_memoryLoadFunction = EVMDialect::findBuiltin("mload");
m_storageStoreFunction = EVMDialect::findBuiltin("sstore");
m_storageLoadFunction = EVMDialect::findBuiltin("sload");
m_hashFunction = EVMDialect::findBuiltin("keccak256");

m_auxiliaryBuiltinHandles.add = EVMDialect::findBuiltin("add");
m_auxiliaryBuiltinHandles.exp = EVMDialect::findBuiltin("exp");
@@ -581,6 +617,11 @@ SideEffects EVMDialect::sideEffectsOfInstruction(evmasm::Instruction _instructio
};
}

std::set<std::string_view> EVMDialect::builtinFunctionNames() const
{
return ranges::views::keys(m_builtinFunctionsByName) | ranges::to<std::set>;
}

BuiltinFunctionForEVM EVMDialect::createVerbatimFunctionFromHandle(BuiltinHandle const& _handle)
{
return std::apply(createVerbatimFunction, verbatimIndexToArgsAndRets(_handle.id));
2 changes: 2 additions & 0 deletions libyul/backends/evm/EVMDialect.h
Original file line number Diff line number Diff line change
@@ -113,6 +113,8 @@ class EVMDialect: public Dialect
static size_t constexpr verbatimMaxInputSlots = 100;
static size_t constexpr verbatimMaxOutputSlots = 100;

std::set<std::string_view> builtinFunctionNames() const;

protected:
static bool constexpr isVerbatimHandle(BuiltinHandle const& _handle) { return _handle.id < verbatimIDOffset; }
static BuiltinFunctionForEVM createVerbatimFunctionFromHandle(BuiltinHandle const& _handle);
2 changes: 2 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -145,6 +145,8 @@ set(libyul_sources
libyul/ControlFlowSideEffectsTest.h
libyul/EVMCodeTransformTest.cpp
libyul/EVMCodeTransformTest.h
libyul/EVMDialectCompatibility.h
libyul/EVMDialectCompatibility.cpp
libyul/FunctionSideEffects.cpp
libyul/FunctionSideEffects.h
libyul/Inliner.cpp
82 changes: 82 additions & 0 deletions test/libyul/EVMDialectCompatibility.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
// SPDX-License-Identifier: GPL-3.0

#include <test/libyul/EVMDialectCompatibility.h>

#include <boost/test/data/test_case.hpp>
#include <boost/test/data/monomorphic.hpp>

namespace bdata = boost::unit_test::data;

using namespace solidity;
using namespace solidity::yul;
using namespace solidity::yul::test;

BOOST_AUTO_TEST_SUITE(EVMDialectCompatibility)

BOOST_DATA_TEST_CASE(
builtin_function_handle_compatibility_non_eof,
bdata::make(generateEVMDialectConfigurationsToTest(std::nullopt)),
evmDialectConfigurationToTest
)
{
auto const& dialectToTestAgainst = evmDialectConfigurationToTest.dialect();
// no object access for current
{
auto const& currentDialect = EVMDialect::strictAssemblyForEVM({}, std::nullopt);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd keep it more explicit to make places where we hard-code current EVM version easier to spot.

Suggested change
auto const& currentDialect = EVMDialect::strictAssemblyForEVM({}, std::nullopt);
auto const& currentDialect = EVMDialect::strictAssemblyForEVM(EVMVersion{}, std::nullopt);

BTW, we should probably add that EVMVersion::current() helper. But just internally, without exposing it to the user.

for (auto const& builtinFunctionName: currentDialect.builtinFunctionNames())
requireBuiltinCompatibility(currentDialect, dialectToTestAgainst, builtinFunctionName);
}
// object access for current
{
auto const& currentDialect = EVMDialect::strictAssemblyForEVMObjects({}, std::nullopt);
for (auto const& builtinFunctionName: currentDialect.builtinFunctionNames())
requireBuiltinCompatibility(currentDialect, dialectToTestAgainst, builtinFunctionName);
}
}

BOOST_DATA_TEST_CASE(
builtin_function_handle_compatibility_eof,
bdata::monomorphic::grid(
bdata::make(generateEVMDialectConfigurationsToTest(std::nullopt)) + bdata::make(generateEVMDialectConfigurationsToTest(1)),
Comment on lines +54 to +56
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to actually be testing both EOF and non-EOF despite the name.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And a much more relevant difference is that it's using the latest rather than the default EVM version. In not so distant future both current and latest will have EOF, but the fact that current is not always the latest will remain. If you replace withEOF check with withEOF && evmVersion.supportsEOF() it will be more future-proof. You can then unify the tests and make the source EVM version just another dimension.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, I think we should have test case where we cover the "inline" -> "with objects" conversion for all dialects. Converting between distinct dialects is nice to have but these pairs are the most relevant, because for them we actually do perform such a conversion in practice.

bdata::make({false, true})
),
evmDialectConfigurationToTest,
withEOF
)
{
auto const& dialectToTestAgainst = evmDialectConfigurationToTest.dialect();
langutil::EVMVersion latestEVMVersion = langutil::EVMVersion::allVersions().back();
std::optional<uint8_t> eofVersion = std::nullopt;
if (withEOF)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's likely that we'll forget to update this if we ever add a EOF version. We should define allEOFVersions() (analogous to EVMVersion::allVersions()), so that we can just iterate over them.

eofVersion = 1;
// no object access for latest
{
auto const& latestDialect = EVMDialect::strictAssemblyForEVM(latestEVMVersion, eofVersion);
for (auto const& builtinFunctionName: latestDialect.builtinFunctionNames())
requireBuiltinCompatibility(latestDialect, dialectToTestAgainst, builtinFunctionName);
}
// object access for latest
{
auto const& latestDialect = EVMDialect::strictAssemblyForEVMObjects(latestEVMVersion, eofVersion);
for (auto const& builtinFunctionName: latestDialect.builtinFunctionNames())
requireBuiltinCompatibility(latestDialect, dialectToTestAgainst, builtinFunctionName);
}
}

BOOST_AUTO_TEST_SUITE_END()
78 changes: 78 additions & 0 deletions test/libyul/EVMDialectCompatibility.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
This file is part of solidity.
solidity is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
solidity is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with solidity. If not, see <http://www.gnu.org/licenses/>.
*/
// SPDX-License-Identifier: GPL-3.0

#pragma once

#include <libyul/backends/evm/EVMDialect.h>

#include <boost/test/unit_test.hpp>

#include <fmt/format.h>

#include <cstdint>
#include <optional>
#include <vector>


namespace solidity::yul::test
{

inline void requireBuiltinCompatibility(EVMDialect const& _dialect1, EVMDialect const& _dialect2, std::string_view const _builtin)
{
if (auto const currentHandle = _dialect1.findBuiltin(_builtin))
if (auto const handle = _dialect2.findBuiltin(_builtin))
Comment on lines +35 to +38
Copy link
Member

@cameel cameel Mar 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will only ensure that builtins available in both dialects have matching IDs, which is necessary but not sufficient for full compatibility. We also need to ensure that the dialect with objects has all the builtins present in the one without.

BOOST_REQUIRE_MESSAGE(
*handle == *currentHandle,
fmt::format("Failed for \"{}\" with IDs {} =/= {}.", _builtin, currentHandle->id, handle->id)
);
}

struct EVMDialectConfigurationToTest
{
EVMDialect const& dialect() const
{
return objectAccess ? EVMDialect::strictAssemblyForEVMObjects(evmVersion, eofVersion) : EVMDialect::strictAssemblyForEVM(evmVersion, eofVersion);
}

friend std::ostream& operator<<(std::ostream& _out, EVMDialectConfigurationToTest const& _config)
{
_out << fmt::format(
"EVMConfigurationToTest[{}, eof={}, objectAccess={}]",
_config.evmVersion.name(),
_config.eofVersion.has_value() ? std::to_string(*_config.eofVersion) : "null",
_config.objectAccess
);
return _out;
}

langutil::EVMVersion evmVersion;
std::optional<uint8_t> eofVersion;
bool objectAccess;
};

inline std::vector<EVMDialectConfigurationToTest> generateEVMDialectConfigurationsToTest(std::optional<uint8_t> _eofVersion)
{
std::vector<EVMDialectConfigurationToTest> configs;
for (bool providesObjectAccess: {false, true})
for (auto evmVersion: langutil::EVMVersion::allVersions())
if (!_eofVersion || evmVersion >= langutil::EVMVersion::firstWithEOF())
configs.push_back(EVMDialectConfigurationToTest{evmVersion, _eofVersion, providesObjectAccess});
return configs;
}

}