Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug: Fix compatibility running ecFSEOF with GECKO3 models #301

Merged
merged 13 commits into from
May 11, 2023
155 changes: 80 additions & 75 deletions src/geckomat/utilities/ecFSEOF/ecFlux_scanning.m
Original file line number Diff line number Diff line change
@@ -1,141 +1,146 @@
function FC = ecFlux_scanning(model,target,C_source,alpha,tol,filterG)
function FC = ecFlux_scanning(ecModel,target,cSource,alpha,tolerance,filterG)
%ecFlux_scanning
%
% model (struct) ecModel with total protein pool constraint.
% the model should come with growth pseudoreaction as
% an objective to maximize.
% target (string) Rxn ID for the production target reaction,
% a exchange reaction is recommended.
% cSource (string) Rxn ID for the main carbon source uptake
% reaction
% alpha (dobule) scalling factor for production yield
% for enforced objective limits
% tol (double, optional) numerical tolerance for fixing
% bounds
% filterG (logical) TRUE if genes K_scores results should
% be filtered according to the alpha vector distribution
%
% Usage: FC = compare_substrate(model,target,C_source,alpha,tol,filterG)
% Input:
% ecModel an ecModel in GECKO 3 format (with ecModel.ec structure).
% rxnTarget rxn ID for the production target reaction, a exchange
% reaction is recommended.
% cSource rxn ID for the main carbon source uptake reaction.
% alpha scalling factor for production yield for enforced objective
% limits
% tolerance numerical tolerance for fixing bounds.
% (Optional, defaul 1E-4)
% filterG logical value. TRUE if genes K_scores results should be
% filtered according to the alpha vector distribution
% (Optional, defaul false)
%
% Usage:
% FC = ecFlux_scanning(model,target,cSource,alpha,tolerance,filterG)

if nargin < 6 || isempty(filterG)
filterG = false;
end

if nargin<6
filterG = false;
if nargin < 5 || isempty(tolerance)
tolerance = 1E-4;
end

%Simulate WT (100% growth):
cd ..
FC.flux_WT = simulateGrowth(model,target,C_source,1,1);
%Simulate forced (X% growth and the rest towards product) based on yield:
% Simulate WT (100% growth):
[~, FC.flux_WT] = getFluxTarget(ecModel,target,cSource);

% Simulate forced (X% growth and the rest towards product) based on yield:
FC.alpha = alpha;
%initialize fluxes and K_scores matrices
v_matrix = zeros(length(model.rxns),length(alpha));
k_matrix = zeros(length(model.rxns),length(alpha));

% Initialize fluxes and K_scores matrices
v_matrix = zeros(length(ecModel.rxns),length(alpha));
k_matrix = zeros(length(ecModel.rxns),length(alpha));
for i = 1:length(alpha)
%disp(['Iteration #' num2str(i)])
FC.flux_MAX = simulateGrowth(model,target,C_source,1,alpha(i));
[~, FC.flux_MAX] = getFluxTarget(ecModel,target,cSource,alpha(i));
v_matrix(:,i) = FC.flux_MAX;
k_matrix(:,i) = FC.flux_MAX./FC.flux_WT;
end
cd ecFSEOF
%Take out rxns with no grRule:
withGR = ~cellfun(@isempty,model.grRules);
%Generate rxn equations:
rxnEqs = constructEquations(model,model.rxns(withGR),true);

% Take out rxns with no grRule:
withGR = ~cellfun(@isempty,ecModel.grRules);

% Generate rxn equations:
rxnEqs = constructEquations(ecModel,ecModel.rxns(withGR),true);
v_matrix = v_matrix(withGR,:);
k_matrix = k_matrix(withGR,:);
rxnGeneM = model.rxnGeneMat(withGR,:);
FC.rxns = [model.rxns(withGR),model.rxnNames(withGR),model.grRules(withGR),rxnEqs];
rxnGeneM = ecModel.rxnGeneMat(withGR,:);
FC.rxns = [ecModel.rxns(withGR),ecModel.rxnNames(withGR),ecModel.grRules(withGR),rxnEqs];

%Filter out rxns that are always zero -> k=0/0=NaN:
% Filter out rxns that are always zero -> k=0/0=NaN:
non_nan = sum(~isnan(k_matrix),2) > 0;
v_matrix = v_matrix(non_nan,:);
k_matrix = k_matrix(non_nan,:);
rxnGeneM = rxnGeneM(non_nan,:);
FC.rxns = FC.rxns(non_nan,:);

%Replace remaining NaNs with 1s:
% Replace remaining NaNs with 1s:
k_matrix(isnan(k_matrix)) = 1;
%Replace any Inf value with 1000 (maximum value is ~700):

% Replace any Inf value with 1000 (maximum value is ~700):
k_matrix(k_matrix>1000) = 1000;

%Filter out values that are inconsistent at different alphas:
always_down = sum(k_matrix <= (1-tol),2) == length(alpha);
always_up = sum(k_matrix >= (1+tol),2) == length(alpha);
%Identify those reactions with mixed patterns
% Filter out values that are inconsistent at different alphas:
always_down = sum(k_matrix <= 1,2) == length(alpha);
always_up = sum(k_matrix >= 1,2) == length(alpha);

% Identify those reactions with mixed patterns
incons_rxns = always_down + always_up == 0;
%Identify genes that are linked to "inconsistent rxns"

% Identify genes that are linked to "inconsistent rxns"
incons_genes = sum(rxnGeneM(incons_rxns,:),1) > 0;
%Finally, inconsistent reactions are those that are not conected
%to "inconsistent genes" from the original "inconsistent rxns" set

% Finally, inconsistent reactions are those that are not conected
% to "inconsistent genes" from the original "inconsistent rxns" set
incons_rxns = sum(rxnGeneM(:,incons_genes),2) > 0;
%Keep results for the consistent rxns exclusively

% Keep results for the consistent rxns exclusively
v_matrix = v_matrix(~incons_rxns,:);
k_matrix = k_matrix(~incons_rxns,:);
rxnGeneM = rxnGeneM(~incons_rxns,:);
FC.rxns = FC.rxns(~incons_rxns,:);
%Get median k score across steps

% Get median k score across steps
FC.k_rxns = mean(k_matrix,2);
%Remove arm_rxns from the list of rxns with K_score>1
armUP = startsWith(FC.rxns(:,1),'arm_') & FC.k_rxns>1;
FC.k_rxns = FC.k_rxns(~armUP,:);
FC.v_matrix = v_matrix(~armUP,:);
FC.k_matrix = k_matrix(~armUP,:);
rxnGeneM = rxnGeneM(~armUP,:);
FC.rxns = FC.rxns(~armUP,:);

%Order from highest to lowest median k_score (across alphas)

% Order from highest to lowest median k_score (across alphas)
[~,order] = sort(FC.k_rxns,'descend');
FC.k_rxns = FC.k_rxns(order,:);
FC.v_matrix = v_matrix(order,:);
FC.k_matrix = k_matrix(order,:);
rxnGeneM = rxnGeneM(order,:);
FC.rxns = FC.rxns(order,:);
%Create list of remaining genes and filter out any inconsistent score:
%Just those genes that are connected to the remaining rxns are
FC.genes = model.genes(sum(rxnGeneM,1) > 0);
FC.geneNames = model.geneShortNames(sum(rxnGeneM,1) > 0);

% Create list of remaining genes and filter out any inconsistent score:
% Just those genes that are connected to the remaining rxns are
FC.genes = ecModel.genes(sum(rxnGeneM,1) > 0);
FC.geneNames = ecModel.geneShortNames(sum(rxnGeneM,1) > 0);
FC.k_genes = zeros(size(FC.genes));
cons_genes = false(size(FC.genes));
rxnGeneM = rxnGeneM(:,sum(rxnGeneM,1) > 0);

for i = 1:length(FC.genes)
%Extract all the K_scores (from rxns across alphas) conected to
%each remaining gene
% Extract all the K_scores (from rxns across alphas) conected to
% each remaining gene
k_set = FC.k_rxns(rxnGeneM(:,i) > 0);
%Check the kind of control that gene i-th exerts over its reactions
always_down = sum(k_set <= (1-tol)) == length(k_set);
always_up = sum(k_set >= (1+tol)) == length(k_set);
%Evaluate if gene is always exerting either a positive or negative
%control
% Check the kind of control that gene i-th exerts over its reactions
always_down = sum(k_set <= (1-tolerance)) == length(k_set);
always_up = sum(k_set >= (1+tolerance)) == length(k_set);
% Evaluate if gene is always exerting either a positive or negative
% control
cons_genes(i) = always_down + always_up == 1;
FC.k_genes(i) = mean(k_set);
end
%Keep "consistent genes"
% Keep "consistent genes"
FC.genes = FC.genes(cons_genes);
FC.geneNames = FC.geneNames(cons_genes);
FC.k_genes = FC.k_genes(cons_genes);
rxnGeneM = rxnGeneM(:,cons_genes);

if filterG
%Filter any value between mean(alpha) and 1:
unchanged = (FC.k_genes >= mean(alpha) - tol) + (FC.k_genes <= 1 + tol) == 2;
FC.genes = FC.genes(~unchanged);
FC.geneNames = FC.geneNames(~unchanged);
FC.k_genes = FC.k_genes(~unchanged);
% Filter any value between mean(alpha) and 1:
unchanged = (FC.k_genes >= mean(alpha) - tolerance) + (FC.k_genes <= 1 + tolerance) == 2;
FC.genes = FC.genes(~unchanged);
FC.geneNames = FC.geneNames(~unchanged);
FC.k_genes = FC.k_genes(~unchanged);
rxnGeneM = rxnGeneM(:,~unchanged);
%Update results for rxns-related fields (remove remaining reactions
%without any associated gene in rxnGeneM)
% Update results for rxns-related fields (remove remaining reactions
% without any associated gene in rxnGeneM)
toKeep = (sum(rxnGeneM,2) > 0);
FC.k_rxns = FC.k_rxns(toKeep,:);
FC.v_matrix = v_matrix(toKeep,:);
FC.k_matrix = k_matrix(toKeep,:);
FC.rxns = FC.rxns(toKeep,:);
end

%Order from highest to lowest k:
% Order from highest to lowest k:
[~,order] = sort(FC.k_genes,'descend');
FC.genes = FC.genes(order,:);
FC.geneNames = FC.geneNames(order,:);
FC.k_genes = FC.k_genes(order,:);

end
end
132 changes: 77 additions & 55 deletions src/geckomat/utilities/ecFSEOF/run_ecFSEOF.m
Original file line number Diff line number Diff line change
@@ -1,53 +1,72 @@
function results = run_ecFSEOF(model,rxnTarget,cSource,alphaLims,Nsteps,file1,file2)
%
% run_ecFSEOF
function results = run_ecFSEOF(ecModel,rxnTarget,cSource,alphaLims,Nsteps,file_genes,file_rxns,modelAdapter)
%run_ecFSEOF
%
% Function that runs Flux-scanning with Enforced Objective Function
% for a specified production target.
%
% model (struct) ecModel with total protein pool constraint.
% rxnTarget (string) Rxn ID for the production target reaction,
% a exchange reaction is recommended.
% cSource (string) Rxn ID for the main carbon source uptake
% reaction (make sure that the correct directionality is
% indicated).
% alphaLims (vector) Minimum and maximum biomass yield [gDw/mmol Csource]
% for enforced objective limits
% Nsteps (integer) Number of steps for suboptimal objective
% in FSEOF
% file1 (string) opt, file name for an output .txt tab-separated
% file for the results at the genes level
% file2 (string) opt, file name for an output .txt tab-separated
% file for the results at the reactions level
% Function that runs Flux-scanning with Enforced Objective Function
% for a specified production target.
%
% Usage: run_ecFSEOF(ecModel,rxnTarget,cSource,alphaLims,Nsteps)
% Input:
% ecModel an ecModel in GECKO 3 format (with ecModel.ec structure).
% rxnTarget rxn ID for the production target reaction, a exchange
% reaction is recommended.
% cSource rxn ID for the main carbon source uptake reaction.
% alphaLims vector of Minimum and maximum biomass scalling factors for
% enforced objective limits (e.g. [0.5 1]). Max value: 1.
% Nsteps number of steps for suboptimal objective in FSEOF.
% (Optional, default 16)
% file_genes file name for results output at the genes level.
% (Optional, default output in the model-specific 'output'
% sub-folder taken from modelAdapter,
% e.g. GECKO/userData/ecYeastGEM/output/ecFSEOF_genes.tsv)
% file_rxns file name for results output at the reactions level.
% (Optional, default output in the model-specific 'output'
% sub-folder taken from modelAdapter,
% e.g. GECKO/userData/ecYeastGEM/output/ecFSEOF_rxns.tsv)
% modelAdapter a loaded model adapter. (Optional, will otherwise use
% the default model adapter)
%
% Usage:
% results = run_ecFSEOF(ecModel,rxnTarget,cSource,alphaLims)
%

if nargin < 8 || isempty(modelAdapter)
modelAdapter = ModelAdapterManager.getDefaultAdapter();
if isempty(modelAdapter)
error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.')
end
end
params = modelAdapter.getParameters();

if nargin < 7 || isempty(file_rxns)
file_rxns = fullfile(params.path,'output','ecFSEOF_rxns.tsv');
end

if nargin < 7
file2 = [];
if nargin < 6
file1 = [];
end
if nargin < 6 || isempty(file_genes)
file_genes = fullfile(params.path,'output','ecFSEOF_genes.tsv');
end
%Check model format, if COBRA model then convert to RAVEN format
if isfield('model','rules') && ~isfield('model','metComps')
model = ravenCobraWrapper(model);

if nargin < 5 || isempty(Nsteps)
Nsteps = 16;
end
%Define alpha vector for suboptimal enforced objective values

% Define alpha vector for suboptimal enforced objective values
alphaV = alphaLims(1):((alphaLims(2)-alphaLims(1))/(Nsteps-1)):alphaLims(2);
%Standardize grRules and rxnGeneMat in model
[grRules,rxnGeneMat] = standardizeGrRules(model,true);
model.grRules = grRules;
model.rxnGeneMat = rxnGeneMat;
%run FSEOF analysis
results = ecFlux_scanning(model,rxnTarget,cSource,alphaV,1E-4,true);
%Create gene table:

% Standardize grRules and rxnGeneMat in model
[grRules,rxnGeneMat] = standardizeGrRules(ecModel,true);
ecModel.grRules = grRules;
ecModel.rxnGeneMat = rxnGeneMat;

% run FSEOF analysis
results = ecFlux_scanning(ecModel,rxnTarget,cSource,alphaV,1E-4,true);

% Create gene table:
results.geneTable = cell(length(results.genes),3);
results.geneTable(:,1) = results.genes;
results.geneTable(:,2) = results.geneNames;
results.geneTable(:,3) = num2cell(results.k_genes);
%Create rxns table (exclude enzyme usage reactions):
toKeep = find(~startsWith(results.rxns(:,1),'draw_prot_'));

% Create rxns table (exclude enzyme usage reactions):
toKeep = find(~startsWith(results.rxns(:,1),'usage_prot_'));
results.k_rxns = results.k_rxns(toKeep);
results.k_matrix = results.k_matrix(toKeep,:);
results.v_matrix = results.v_matrix(toKeep,:);
Expand All @@ -57,24 +76,27 @@
results.rxnsTable(:,3) = num2cell(results.k_rxns);
results.rxnsTable(:,4) = results.rxns(toKeep,3);
results.rxnsTable(:,5) = results.rxns(toKeep,4);
try
if ~isempty(file1)
varNames = {'gene_IDs' 'gene_names' 'K_score'};
T = cell2table(results.geneTable,'VariableNames',varNames);
writetable(T,file1,'Delimiter','\t','QuoteStrings',false)
end
if ~isempty(file2)
varNames = {'rxn_IDs' 'rxnNames' 'K_score' 'grRules' 'rxn_formula'};
T = cell2table(results.rxnsTable,'VariableNames',varNames);
writetable(T,file2,'Delimiter','\t','QuoteStrings',false)
end
catch
warning('Output files directory not found');
end
%Remove redundant output fields

writetable(cell2table(results.geneTable, ...
'VariableNames', {'gene_IDs' 'gene_names' 'K_score'}), ...
file_genes, ...
'FileType', 'text', ...
'Delimiter', '\t', ...
'QuoteStrings', false);

writetable(cell2table(results.rxnsTable, ...
'VariableNames', {'rxn_IDs' 'rxnNames' 'K_score' 'grRules' 'rxn_formula'}), ...
file_rxns, ...
'FileType', 'text', ...
'Delimiter', '\t', ...
'QuoteStrings', false);

disp(['ecFSEOF results stored at: ' newline fileparts(file_genes)]);

% Remove redundant output fields
results = rmfield(results,'k_rxns');
results = rmfield(results,'rxns');
results = rmfield(results,'genes');
results = rmfield(results,'geneNames');
results = rmfield(results,'k_genes');
end
end
Loading