Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parallelize gate argument outside the expression tree. #19

Merged
merged 2 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -77,18 +77,18 @@ namespace nil {
constexpr static const std::size_t argument_size = 1;

static inline void build_variable_value_map(
const math::expression<polynomial_dfs_variable_type>& expr,
const math::expression<variable_type>& expr,
const plonk_polynomial_dfs_table<FieldType> &assignments,
std::shared_ptr<math::evaluation_domain<FieldType>> domain,
std::size_t extended_domain_size,
std::unordered_map<polynomial_dfs_variable_type, polynomial_dfs_type>& variable_values_out) {
std::unordered_map<variable_type, polynomial_dfs_type>& variable_values_out) {

std::unordered_map<polynomial_dfs_variable_type, size_t> variable_counts;
std::unordered_map<variable_type, size_t> variable_counts;

std::vector<polynomial_dfs_variable_type> variables;
std::vector<variable_type> variables;

math::expression_for_each_variable_visitor<polynomial_dfs_variable_type> visitor(
[&variable_counts, &variables, &variable_values_out](const polynomial_dfs_variable_type& var) {
math::expression_for_each_variable_visitor<variable_type> visitor(
[&variable_counts, &variables, &variable_values_out](const variable_type& var) {
// Create the structure of the map so we can change the values later.
if (variable_counts[var] == 0) {
variables.push_back(var);
Expand All @@ -107,12 +107,14 @@ namespace nil {

parallel_for(0, variables.size(),
[&variables, &variable_values_out, &assignments, &domain, &extended_domain, extended_domain_size](std::size_t i) {
const auto& var = variables[i];
// We may have variable values in required sizes in some cases.
if (variable_values_out[var].size() == extended_domain_size)
return;
const variable_type& var = variables[i];

polynomial_dfs_type assignment = assignments.get_variable_value(var, domain);
// Convert the variable to polynomial_dfs variable type.
polynomial_dfs_variable_type var_dfs(var.index, var.rotation, var.relative,
static_cast<typename polynomial_dfs_variable_type::column_type>(
static_cast<std::uint8_t>(var.type)));

polynomial_dfs_type assignment = assignments.get_variable_value(var_dfs, domain);

// In parallel version we always resize the assignment poly, it's better for parallelization.
// if (count > 1) {
Expand Down Expand Up @@ -152,11 +154,7 @@ namespace nil {
degree_limits.push_back(max_degree / 2);
extended_domain_sizes.push_back(max_domain_size / 2);

std::vector<math::expression<polynomial_dfs_variable_type>> expressions(extended_domain_sizes.size());

// Only in parallel version we store the subexpressions of each expression and ignore the cache.
std::vector<std::vector<math::expression<polynomial_dfs_variable_type>>> subexpressions(extended_domain_sizes.size());

std::vector<math::expression<variable_type>> expressions(extended_domain_sizes.size());
auto theta_acc = FieldType::value_type::one();

// Every constraint has variable type 'variable_type', but we want it to use
Expand All @@ -170,28 +168,10 @@ namespace nil {
const auto& gates = constraint_system.gates();

for (const auto& gate: gates) {
std::vector<math::expression<polynomial_dfs_variable_type>> gate_results(extended_domain_sizes.size());

// We will split gates into parts especially for zkEVM circuit, since there is only 1 large gate with
// 683 constraints. Will split it into 24 parts, ~32 constraints each.
// This will mean our code will multiply by selector 16 times, instead of just once. But this is
// much better that losing parallelization. We do not want to re-write the whole code to try parallelize
// each gate compatation separately. This will not harm circuits with smaller number of terms much.
std::vector<math::expression<polynomial_dfs_variable_type>> gate_parts(extended_domain_sizes.size());
std::vector<std::size_t> gate_parts_constaint_counts(extended_domain_sizes.size());


// This parameter can be tuned based on the circuit and the number of cores of the server on which the proofs
// are generated. On the current zkEVM circuit this value is optimal based on experiments.
const std::size_t constraint_limit = 16;


auto selector = polynomial_dfs_variable_type(
gate.selector_index, 0, false, polynomial_dfs_variable_type::column_type::selector);

std::vector<math::expression<variable_type>> gate_results(extended_domain_sizes.size());
for (std::size_t constraint_idx = 0; constraint_idx < gate.constraints.size(); ++constraint_idx) {
const auto& constraint = gate.constraints[constraint_idx];
auto next_term = converter.convert(constraint) * value_type_to_polynomial_dfs(theta_acc);
auto next_term = constraint * theta_acc;

theta_acc *= theta;
// +1 stands for the selector multiplication.
Expand All @@ -200,57 +180,46 @@ namespace nil {
// Whatever the degree of term is, add it to the maximal degree expression.
if (degree_limits[i] >= constraint_degree || i == 0) {
gate_results[i] += next_term;
gate_parts[i] += next_term;
gate_parts_constaint_counts[i]++;

// If we already have constraint_limit constaints in the gate_parts[i], add it to the 'subexpressions'.
if (gate_parts_constaint_counts[i] == constraint_limit) {
subexpressions[i].push_back(gate_parts[i] * selector);
gate_parts[i] = math::expression<polynomial_dfs_variable_type>();
gate_parts_constaint_counts[i] = 0;
}
break;
}

}
}

auto selector = variable_type(
gate.selector_index, 0, false, variable_type::column_type::selector);
for (size_t i = 0; i < extended_domain_sizes.size(); ++i) {
// Only in parallel version we store the subexpressions of each expression and ignore the cache.
expressions[i] += gate_results[i] * selector;
if (gate_parts_constaint_counts[i] != 0)
subexpressions[i].push_back(gate_parts[i] * selector);
}
}

std::array<polynomial_dfs_type, argument_size> F;

std::vector<polynomial_dfs_type> F_0_parts(extended_domain_sizes.size());
parallel_for(0, extended_domain_sizes.size(),
[&subexpressions, &extended_domain_sizes, &F_0_parts, &original_domain, &column_polynomials, &expressions](std::size_t i) {
std::unordered_map<polynomial_dfs_variable_type, polynomial_dfs_type> variable_values;
F[0] = polynomial_dfs_type::zero();
for (std::size_t i = 0; i < extended_domain_sizes.size(); ++i) {
std::unordered_map<variable_type, polynomial_dfs_type> variable_values;

build_variable_value_map(expressions[i], column_polynomials, original_domain,
extended_domain_sizes[i], variable_values);

std::vector<polynomial_dfs_type> subvalues(subexpressions[i].size());
parallel_for(0, subexpressions[i].size(),
[&subexpressions, &variable_values, &extended_domain_sizes, &subvalues, i](std::size_t subexpression_index) {
// Only in parallel version we store the subexpressions of each expression and ignore the cache,
// not using "cached_expression_evaluator".
math::expression_evaluator<polynomial_dfs_variable_type> evaluator(
subexpressions[i][subexpression_index],
[&assignments=variable_values, domain_size=extended_domain_sizes[i]]
(const polynomial_dfs_variable_type &var) -> const polynomial_dfs_type& {
return assignments[var];
});
subvalues[subexpression_index] = evaluator.evaluate();
}, ThreadPool::PoolLevel::HIGH);
polynomial_dfs_type result(extended_domain_sizes[i] - 1, extended_domain_sizes[i]);
wait_for_all(parallel_run_in_chunks<void>(
extended_domain_sizes[i],
[&variable_values, &extended_domain_sizes, &result, &expressions, i]
(std::size_t begin, std::size_t end) {
for (std::size_t j = begin; j < end; ++j) {
// Don't use cache here. In practice it's slower to maintain the cache
// than to re-compute the subexpression value when value type is field element.
math::expression_evaluator<variable_type> evaluator(
expressions[i],
[&assignments=variable_values, j]
(const variable_type &var) -> const typename FieldType::value_type& {
return assignments[var][j];
});
result[j] = evaluator.evaluate();
}
}, ThreadPool::PoolLevel::HIGH));

F_0_parts[i] = polynomial_sum<FieldType>(std::move(subvalues));
}, ThreadPool::PoolLevel::LASTPOOL);

F[0] += polynomial_sum<FieldType>(std::move(F_0_parts));
F[0] += result;
};
F[0] *= mask_polynomial;
return F;
}
Expand Down
Loading
Loading