Skip to content
This repository has been archived by the owner on Feb 17, 2025. It is now read-only.

Parallelize gate argument outside the expression tree. #19

Merged
merged 2 commits into from
Jul 30, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -77,18 +77,18 @@ namespace nil {
constexpr static const std::size_t argument_size = 1;

static inline void build_variable_value_map(
const math::expression<polynomial_dfs_variable_type>& expr,
const math::expression<variable_type>& expr,
const plonk_polynomial_dfs_table<FieldType> &assignments,
std::shared_ptr<math::evaluation_domain<FieldType>> domain,
std::size_t extended_domain_size,
std::unordered_map<polynomial_dfs_variable_type, polynomial_dfs_type>& variable_values_out) {
std::unordered_map<variable_type, polynomial_dfs_type>& variable_values_out) {

std::unordered_map<polynomial_dfs_variable_type, size_t> variable_counts;
std::unordered_map<variable_type, size_t> variable_counts;

std::vector<polynomial_dfs_variable_type> variables;
std::vector<variable_type> variables;

math::expression_for_each_variable_visitor<polynomial_dfs_variable_type> visitor(
[&variable_counts, &variables, &variable_values_out](const polynomial_dfs_variable_type& var) {
math::expression_for_each_variable_visitor<variable_type> visitor(
[&variable_counts, &variables, &variable_values_out](const variable_type& var) {
// Create the structure of the map so we can change the values later.
if (variable_counts[var] == 0) {
variables.push_back(var);
@@ -107,12 +107,14 @@ namespace nil {

parallel_for(0, variables.size(),
[&variables, &variable_values_out, &assignments, &domain, &extended_domain, extended_domain_size](std::size_t i) {
const auto& var = variables[i];
// We may have variable values in required sizes in some cases.
if (variable_values_out[var].size() == extended_domain_size)
return;
const variable_type& var = variables[i];

polynomial_dfs_type assignment = assignments.get_variable_value(var, domain);
// Convert the variable to polynomial_dfs variable type.
polynomial_dfs_variable_type var_dfs(var.index, var.rotation, var.relative,
static_cast<typename polynomial_dfs_variable_type::column_type>(
static_cast<std::uint8_t>(var.type)));

polynomial_dfs_type assignment = assignments.get_variable_value(var_dfs, domain);

// In parallel version we always resize the assignment poly, it's better for parallelization.
// if (count > 1) {
@@ -152,11 +154,7 @@ namespace nil {
degree_limits.push_back(max_degree / 2);
extended_domain_sizes.push_back(max_domain_size / 2);

std::vector<math::expression<polynomial_dfs_variable_type>> expressions(extended_domain_sizes.size());

// Only in parallel version we store the subexpressions of each expression and ignore the cache.
std::vector<std::vector<math::expression<polynomial_dfs_variable_type>>> subexpressions(extended_domain_sizes.size());

std::vector<math::expression<variable_type>> expressions(extended_domain_sizes.size());
auto theta_acc = FieldType::value_type::one();

// Every constraint has variable type 'variable_type', but we want it to use
@@ -170,28 +168,10 @@ namespace nil {
const auto& gates = constraint_system.gates();

for (const auto& gate: gates) {
std::vector<math::expression<polynomial_dfs_variable_type>> gate_results(extended_domain_sizes.size());

// We will split gates into parts especially for zkEVM circuit, since there is only 1 large gate with
// 683 constraints. Will split it into 24 parts, ~32 constraints each.
// This will mean our code will multiply by selector 16 times, instead of just once. But this is
// much better that losing parallelization. We do not want to re-write the whole code to try parallelize
// each gate compatation separately. This will not harm circuits with smaller number of terms much.
std::vector<math::expression<polynomial_dfs_variable_type>> gate_parts(extended_domain_sizes.size());
std::vector<std::size_t> gate_parts_constaint_counts(extended_domain_sizes.size());


// This parameter can be tuned based on the circuit and the number of cores of the server on which the proofs
// are generated. On the current zkEVM circuit this value is optimal based on experiments.
const std::size_t constraint_limit = 16;


auto selector = polynomial_dfs_variable_type(
gate.selector_index, 0, false, polynomial_dfs_variable_type::column_type::selector);

std::vector<math::expression<variable_type>> gate_results(extended_domain_sizes.size());
for (std::size_t constraint_idx = 0; constraint_idx < gate.constraints.size(); ++constraint_idx) {
const auto& constraint = gate.constraints[constraint_idx];
auto next_term = converter.convert(constraint) * value_type_to_polynomial_dfs(theta_acc);
auto next_term = constraint * theta_acc;

theta_acc *= theta;
// +1 stands for the selector multiplication.
@@ -200,57 +180,44 @@ namespace nil {
// Whatever the degree of term is, add it to the maximal degree expression.
if (degree_limits[i] >= constraint_degree || i == 0) {
gate_results[i] += next_term;
gate_parts[i] += next_term;
gate_parts_constaint_counts[i]++;

// If we already have constraint_limit constaints in the gate_parts[i], add it to the 'subexpressions'.
if (gate_parts_constaint_counts[i] == constraint_limit) {
subexpressions[i].push_back(gate_parts[i] * selector);
gate_parts[i] = math::expression<polynomial_dfs_variable_type>();
gate_parts_constaint_counts[i] = 0;
}
break;
}

}
}

auto selector = variable_type(
gate.selector_index, 0, false, variable_type::column_type::selector);
for (size_t i = 0; i < extended_domain_sizes.size(); ++i) {
// Only in parallel version we store the subexpressions of each expression and ignore the cache.
expressions[i] += gate_results[i] * selector;
if (gate_parts_constaint_counts[i] != 0)
subexpressions[i].push_back(gate_parts[i] * selector);
}
}

std::array<polynomial_dfs_type, argument_size> F;

std::vector<polynomial_dfs_type> F_0_parts(extended_domain_sizes.size());
parallel_for(0, extended_domain_sizes.size(),
[&subexpressions, &extended_domain_sizes, &F_0_parts, &original_domain, &column_polynomials, &expressions](std::size_t i) {
std::unordered_map<polynomial_dfs_variable_type, polynomial_dfs_type> variable_values;
F[0] = polynomial_dfs_type::zero();
for (std::size_t i = 0; i < extended_domain_sizes.size(); ++i) {
std::unordered_map<variable_type, polynomial_dfs_type> variable_values;

build_variable_value_map(expressions[i], column_polynomials, original_domain,
extended_domain_sizes[i], variable_values);

std::vector<polynomial_dfs_type> subvalues(subexpressions[i].size());
parallel_for(0, subexpressions[i].size(),
[&subexpressions, &variable_values, &extended_domain_sizes, &subvalues, i](std::size_t subexpression_index) {
// Only in parallel version we store the subexpressions of each expression and ignore the cache,
// not using "cached_expression_evaluator".
math::expression_evaluator<polynomial_dfs_variable_type> evaluator(
subexpressions[i][subexpression_index],
[&assignments=variable_values, domain_size=extended_domain_sizes[i]]
(const polynomial_dfs_variable_type &var) -> const polynomial_dfs_type& {
return assignments[var];
});
subvalues[subexpression_index] = evaluator.evaluate();
}, ThreadPool::PoolLevel::HIGH);
polynomial_dfs_type result(extended_domain_sizes[i] - 1, extended_domain_sizes[i]);
wait_for_all(parallel_run_in_chunks<void>(
extended_domain_sizes[i],
[&variable_values, &extended_domain_sizes, &result, &expressions, i]
(std::size_t begin, std::size_t end) {
for (std::size_t j = begin; j < end; ++j) {
math::expression_evaluator<variable_type> evaluator(
expressions[i],
[&assignments=variable_values, j]
(const variable_type &var) -> const typename FieldType::value_type& {
return assignments[var][j];
});
result[j] = evaluator.evaluate();
}
}, ThreadPool::PoolLevel::HIGH));

F_0_parts[i] = polynomial_sum<FieldType>(std::move(subvalues));
}, ThreadPool::PoolLevel::LASTPOOL);

F[0] += polynomial_sum<FieldType>(std::move(F_0_parts));
F[0] += result;
};
F[0] *= mask_polynomial;
return F;
}