flowr-analysis · EagleoutIce · May 19, 2025 · May 16, 2025 · May 16, 2025 · May 16, 2025
diff --git a/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts b/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts
@@ -13,6 +13,7 @@ import { dataflowLogger } from '../../../../logger';
 import { ReferenceType } from '../../../../environments/identifier';
 
 export const UnnamedFunctionCallPrefix = 'unnamed-function-call-';
+export const UnnamedFunctionCallOrigin = 'unnamed';
 
 export function processUnnamedFunctionCall<OtherInfo>(functionCall: RUnnamedFunctionCall<OtherInfo & ParentInformation>, data: DataflowProcessorInformation<OtherInfo & ParentInformation>): DataflowInformation {
 	const calledFunction = processDataflowFor(functionCall.calledFunction, data);
@@ -49,7 +50,7 @@ export function processUnnamedFunctionCall<OtherInfo>(functionCall: RUnnamedFunc
 		onlyBuiltin: false,
 		cds:         data.controlDependencies,
 		args:        callArgs, // same reference
-		origin:      ['unnamed']
+		origin:      [UnnamedFunctionCallOrigin]
 	});
 
 	let inIds = remainingReadInArgs;

diff --git a/src/documentation/print-cfg-wiki.ts b/src/documentation/print-cfg-wiki.ts
@@ -33,6 +33,8 @@ import type { DataflowInformation } from '../dataflow/info';
 import type { DataflowGraphVertexValue } from '../dataflow/graph/vertex';
 import { SemanticCfgGuidedVisitor } from '../control-flow/semantic-cfg-guided-visitor';
 import { NewIssueUrl } from './doc-util/doc-issue';
+import { EdgeType, edgeTypeToName } from '../dataflow/graph/edge';
+import { guard } from '../util/assert';
 
 const CfgLongExample = `f <- function(a, b = 3) {
  if(a > b) {
@@ -184,6 +186,7 @@ For readability, we structure this wiki page into various segments:
 	- [Simple Traversal](#cfg-simple-traversal)
 	- [Diffing and Testing](#cfg-diff-and-test)
 	- [Sophisticated CFG Traversal](#cfg-traversal)
+	- [Working With Exit Points](#cfg-exit-points)
 
 
 ${section('Initial Overview', 2, 'cfg-overview')}
@@ -534,7 +537,50 @@ ${await (async() => {
 })()}
 
 
+${section('Working With Exit Points', 3, 'cfg-exit-points')}
+
+With the [Dataflow Graph](${FlowrWikiBaseRef}/Dataflow%20Graph) you already get a \`${edgeTypeToName(EdgeType.Returns)}\` edge that tells you what a function call returns 
+(given that this function call does neither transform nor create a value).
+But the control flow perspective gives you more! Given a simple addition like \`x + 1\`, the CFG looks like this:
+
+${await (async function() {
+	const cfg = await getCfg(shell, 'x + 1');
+	const [plusVertexId, plusVertex] = [...cfg.info.graph.vertices()].filter(([n]) => recoverName(n, cfg.ast.idMap) === '+')[0];
+	guard(plusVertex.type === CfgVertexType.Expression);
+	const numOfExits
+		= plusVertex.end?.length ?? 0;
+	guard(plusVertex.end && numOfExits === 1);
+
+	return `${await printCFGCode(shell, 'x + 1', { showCode: true, prefix: 'flowchart RL\n' })}
+
+Looking at the binary operation vertex for \`+\` (with id \`${plusVertexId}\`) we see that it is linked to a single exit ("end marker") point: \`${plusVertex.end[0]}\`.
+Checking this vertex essentially reveals all exit points of the expression &dash; in this case, this simply refers to the operands of the addition.
+However, the idea transfers to more complex expressions as well...
+	`;
+})()}
+
+${details('Example: Exit Points for an if', await (async function() {
+	const expr = 'if(u) 3 else 2';
+	const cfg = await getCfg(shell, expr);
+	const [ifVertexId, ifVertex] = [...cfg.info.graph.vertices()].filter(([n]) => recoverName(n, cfg.ast.idMap) === 'if')[0];
+	guard(ifVertex.type === CfgVertexType.Statement);
+	const numOfExits
+			= ifVertex.end?.length ?? 0;
+	guard(ifVertex.end && numOfExits === 1);
+
+	return `${await printCFGCode(shell, expr, { showCode: true, prefix: 'flowchart RL\n' })}
+
+Looking at the if vertex for (with id \`${ifVertexId}\`) we see that it is again linked to a single exit point: \`${ifVertex.end[0]}\`.
+Yet, now this exit vertex is linked to the two branches of the if statement (the \`then\` and \`else\` branch).
+	`;
+})())}
 
+Hence, you may rely on the corresponding exit point(s) to identify all exits of a given expression (in a way, these exit-points are merely super-sinks trying to ensure the hammock graph property).
+
+${block({
+	type:    'WARNING',
+	content: 'Using basic blocks, this works just the same. However please keep in mind that the corresponding exit markers do not (and for control statements usually will not) be part of the same basic block.'
+})}
 
 `;
 }

diff --git a/src/documentation/print-dataflow-graph-wiki.ts b/src/documentation/print-dataflow-graph-wiki.ts
@@ -16,21 +16,21 @@ import type { ExplanationParameters, SubExplanationParameters } from './data/dfg
 import { getAllEdges, getAllVertices } from './data/dfg/doc-data-dfg-util';
 import { getReplCommand } from './doc-util/doc-cli-option';
 import type { MermaidTypeReport } from './doc-util/doc-types';
-import { getDocumentationForType , shortLink , getTypesFromFolderAsMermaid, printHierarchy } from './doc-util/doc-types';
+import { getDocumentationForType, getTypesFromFolderAsMermaid, printHierarchy, shortLink } from './doc-util/doc-types';
 import { block, details, section } from './doc-util/doc-structure';
 import { codeBlock } from './doc-util/doc-code';
 import path from 'path';
 import { lastJoin, prefixLines } from './doc-util/doc-general';
 import type { NodeId } from '../r-bridge/lang-4.x/ast/model/processing/node-id';
-import { recoverContent , recoverName } from '../r-bridge/lang-4.x/ast/model/processing/node-id';
+import { recoverContent, recoverName } from '../r-bridge/lang-4.x/ast/model/processing/node-id';
 import { ReferenceType } from '../dataflow/environments/identifier';
 import { EmptyArgument } from '../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
 import {
-	resolveByName, resolveIdToValue,
+	resolveByName,
+	resolveIdToValue,
 	resolvesToBuiltInConstant,
 	resolveValueOfVariable
 } from '../dataflow/environments/resolve-by-name';
-import { defaultEnv } from '../../test/functionality/_helper/dataflow/environment-builder';
 import { createDataflowPipeline, DEFAULT_DATAFLOW_PIPELINE } from '../core/steps/pipeline/default-pipelines';
 import type { PipelineOutput } from '../core/steps/pipeline/pipeline';
 import { autoGenHeader } from './doc-util/doc-auto-gen';
@@ -42,6 +42,12 @@ import { printNormalizedAstForCode } from './doc-util/doc-normalized-ast';
 import type { RFunctionDefinition } from '../r-bridge/lang-4.x/ast/model/nodes/r-function-definition';
 import { getOriginInDfg } from '../dataflow/origin/dfg-get-origin';
 import { getValueOfArgument } from '../queries/catalog/call-context-query/identify-link-to-last-call-relation';
+import { NewIssueUrl } from './doc-util/doc-issue';
+import {
+	UnnamedFunctionCallOrigin,
+	UnnamedFunctionCallPrefix
+} from '../dataflow/internal/process/functions/call/unnamed-call-handling';
+import { defaultEnv } from '../../test/functionality/_helper/dataflow/environment-builder';
 
 async function subExplanation(shell: RShell, { description, code, expectedSubgraph }: SubExplanationParameters): Promise<string> {
 	expectedSubgraph = await verifyExpectedSubgraph(shell, code, expectedSubgraph);
@@ -89,7 +95,7 @@ async function explanation(
 <a id='${name.toLowerCase().replaceAll(' ', '-')}'> </a>
 ### ${index}) ${name}
 
-Type: \`${type}\`
+Type: \`${type}\` (this is the numeric value of the bit-flag encountered when looking at the serialized vertex type)
 
 ${await subExplanation(shell, { name, description, code, expectedSubgraph })}
 
@@ -429,6 +435,21 @@ ${details('Example: Anonymous Function Call (given directly)', await printDfGrap
 
 ${details('Example: Anonymous Function Call (given indirectly)', await printDfGraphForCode(shell, 'foo <- function() return(function() 3)\nfoo()()', { mark: new Set([12, '12->4']) }))}
 
+${block({
+	type:    'NOTE',
+	content: `Now you might be asking yourself how to differentiate anonymous and named functions and what you have to keep in mind when working with them?
+
+Unnamed functions have an array of signatures which you can use to identify them. 
+But in short - the \`origin\` attribute of the ${shortLink('DataflowGraphVertexFunctionCall', vertexType.info)} is \`${UnnamedFunctionCallOrigin}\`.
+Please be aware that unnamed functions still have a \`name\` property to give it a unique identifier that can be used for debugging and reference.
+This name _always_ starts with \`${UnnamedFunctionCallPrefix}\`.
+
+To identify these calls please do not rely on the [Normalized AST](${FlowrWikiBaseRef}/Normalized%20AST). An expression like \`1 + 1\` will be correctly
+identified as a syntactical binary operation. Yet, from a dataflow/semantic perspective this is equivalent to \`\` \`+\`(1, 1) \`\` (which is a named function call and marked as such in the dataflow graph).
+To know which function is called, please rely on the ${linkEdgeName(EdgeType.Calls)} edge.
+	`
+})}
+
 Another interesting case is a function with **side effects**, most prominently with the super-assignment \`<<-\`.
 In this case, you may encounter the ${linkEdgeName(EdgeType.SideEffectOnCall)} as exemplified below.
 ${details('Example: Function Call with a Side-Effect', await printDfGraphForCode(shell, 'f <- function() x <<- 3\n f()', { mark: new Set([8, '1->8']) }))}
@@ -689,11 +710,44 @@ However, nested definitions can carry it (in the nested case, \`x\` is defined b
 
 	edgeExplanations.set(EdgeType.Returns, [{
 		shell,
-		name:             'Returns Edge',
-		type:             EdgeType.Returns,
-		description:      'Link the [function call](#function-call-vertex) to the exit points of the target definition (this may incorporate the call-context).',
+		name:        'Returns Edge',
+		type:        EdgeType.Returns,
+		description: `Link the [function call](#function-call-vertex) to the exit points of the target definition (this may incorporate the call-context).
+As you can see in the example, this happens for user-defined functions (like \`foo\`) as well as for built-in functions (like \`<-\`).
+However, these edges are specific to scenarios in which flowR knows that a specific element is returned. 
+For contrast, compare this to a use of, for example, \`+\`:
+
+${details('Example: No returns edge for +', await printDfGraphForCode(shell,  '1 + 1'))}
+
+Here, we do not get a ${linkEdgeName(EdgeType.Returns)} edge as this function call creates a new value based on its arguments.
+In these scenarios you should rely on the \`args\` property of the ${shortLink('DataflowGraphVertexFunctionCall', vertexType.info)} 
+and use the arguments to calculate what you need to know. Alternatively, you can track the ${linkEdgeName(EdgeType.Argument)} edges.
+
+In general, the ${linkEdgeName(EdgeType.Returns)} edge already does most of the heavy lifting for you, by respecting control flow influences and
+(as long as flowR is able to detect it) dead code.
+
+${details('Example: Tricky Returns', 
+	`We show the _simplified_ DFG for simplicity and highlight all ${linkEdgeName(EdgeType.Returns)} edges involved in tracking the return of a call to \`f\` (as ${linkEdgeName(EdgeType.Returns)} are never transitive and must hence be followed):\n` + 
+	await printDfGraphForCode(shell,  'f <- function() { if(u) { return(3); 2 } else 42 }\nf()', { 
+		simplified: true,
+		mark:       new Set(['19->15', '15->14', '14->12', '14->11', '11->9', '9->7'])
+	})
+			+ '\n\n Note, that the `2` should be completely absent of the dataflow graph (recognized as dead code).'
+)}
+<br/>
+
+${block({
+	type:    'NOTE',
+	content: `You might find it an inconvenience that there is no ${linkEdgeName(EdgeType.Returns)} edge for _every_ function call. 
+If there is particular function for which you think flowR should be able to detect the return, please open a [new issue](${NewIssueUrl}).
+Yet the problem of flowR not tracking returns for functions that create new/transform existing values is a fundamental design decision &mdash; if this irritates you ~~you may be eligible for compensation~~, you may be interested in an
+alternative with the [Control Flow Graph](${FlowrWikiBaseRef}/Control%20Flow%20Graph#cfg-exit-points) which not just tracks all possible execution orders of the program,
+but also the exit points of _all_ function calls. 
+`
+})}
+		`,
 		code:             'foo <- function() x\nfoo()',
-		expectedSubgraph: emptyGraph().returns('2@foo', '1@x')
+		expectedSubgraph: emptyGraph().returns('2@foo', '1@x').returns('1@<-', '1@foo').argument('1@<-', '1@foo')
 	}, []]);
 
 
@@ -914,10 +968,18 @@ ${prefixLines(codeBlock('ts', `const name = ${recoverName.name}(id, graph.idMap)
 
 ${section('Vertices', 2, 'vertices')}
 
+1. ${getAllVertices().map(
+	([k,v], index) => `[\`${k}\`](#${index + 1}-${v.toLowerCase().replace(/\s/g, '-')}-vertex)`
+).join('\n1. ')}
+
 ${await getVertexExplanations(shell, vertexType)}
 
 ${section('Edges', 2, 'edges')}
 
+1. ${getAllEdges().map(
+	([k, v], index) => `[\`${k}\` (${v})](#${index + 1}-${k.toLowerCase().replace(/\s/g, '-')}-edge)`
+).join('\n1. ')}
+
 ${await getEdgesExplanations(shell, vertexType)}
 
 ${section('Control Dependencies', 2, 'control-dependencies')}

diff --git a/src/util/mermaid/dfg.ts b/src/util/mermaid/dfg.ts
@@ -7,12 +7,8 @@ import type { DataflowFunctionFlowInformation, DataflowGraph, FunctionArgument }
 import { isNamedArgument, isPositionalArgument } from '../../dataflow/graph/graph';
 import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id';
 import { normalizeIdToNumberIfPossible } from '../../r-bridge/lang-4.x/ast/model/processing/node-id';
-import type {
-	IdentifierDefinition,
-	IdentifierReference } from '../../dataflow/environments/identifier';
-import {
-	ReferenceTypeReverseMapping
-} from '../../dataflow/environments/identifier';
+import type { IdentifierDefinition, IdentifierReference } from '../../dataflow/environments/identifier';
+import { ReferenceTypeReverseMapping } from '../../dataflow/environments/identifier';
 import { EmptyArgument } from '../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
 import type { EdgeType } from '../../dataflow/graph/edge';
 import { edgeTypeToName, splitEdgeTypes } from '../../dataflow/graph/edge';
@@ -74,9 +70,9 @@ function subflowToMermaid(nodeId: NodeId, exitPoints: readonly NodeId[], subflow
 		// get parent
 		const idMap = mermaid.rootGraph.idMap;
 		const node = idMap?.get(nodeId);
-		const nodeLexeme = node?.info.fullLexeme ?? node?.lexeme ?? '??';
+		const nodeLexeme = node?.info.fullLexeme ?? node?.lexeme ?? 'function';
 		const location = node?.location?.[0] ? ` (L. ${node?.location?.[0]})` : '';
-		mermaid.nodeLines.push(`\nsubgraph "${subflowId}" ["${escapeMarkdown(nodeLexeme ?? 'function')}${location}"]`);
+		mermaid.nodeLines.push(`\nsubgraph "${subflowId}" ["${escapeMarkdown(nodeLexeme)}${location}"]`);
 	} else {
 		mermaid.nodeLines.push(`\nsubgraph "${subflowId}" [function ${nodeId}]`);
 	}