Skip to content

Commit c4aae96

Browse files
authored
rubysrc2cpg: fixing call-related .code, .line and .column properties (#2813)
* Switching RUBY's node builder to take ParserRuleContexts instead of TerminalNodes * Fixing call-related .code, .line and .column properties
1 parent 7767514 commit c4aae96

File tree

2 files changed

+66
-39
lines changed

2 files changed

+66
-39
lines changed

joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala

Lines changed: 41 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,13 @@ package io.joern.rubysrc2cpg.astcreation
22
import io.joern.rubysrc2cpg.parser.RubyParser._
33
import io.joern.rubysrc2cpg.parser.{RubyLexer, RubyParser}
44
import io.joern.x2cpg.Ast.storeInDiffGraph
5-
import io.joern.x2cpg.datastructures.Global
65
import io.joern.x2cpg.Defines.DynamicCallUnknownFullName
6+
import io.joern.x2cpg.datastructures.Global
77
import io.joern.x2cpg.{Ast, AstCreatorBase, AstNodeBuilder}
8-
import io.shiftleft.codepropertygraph.generated.{
9-
ControlStructureTypes,
10-
DispatchTypes,
11-
ModifierTypes,
12-
NodeTypes,
13-
Operators
14-
}
158
import io.shiftleft.codepropertygraph.generated.nodes._
9+
import io.shiftleft.codepropertygraph.generated._
1610
import org.antlr.v4.runtime.tree.TerminalNode
17-
import org.antlr.v4.runtime.{CharStreams, CommonTokenStream, Token}
11+
import org.antlr.v4.runtime.{CharStreams, CommonTokenStream, ParserRuleContext, Token}
1812
import org.slf4j.LoggerFactory
1913
import overflowdb.BatchedUpdate
2014

@@ -25,7 +19,7 @@ import scala.jdk.CollectionConverters._
2519

2620
class AstCreator(filename: String, global: Global)
2721
extends AstCreatorBase(filename)
28-
with AstNodeBuilder[TerminalNode, AstCreator] {
22+
with AstNodeBuilder[ParserRuleContext, AstCreator] {
2923

3024
object Defines {
3125
val Any: String = "ANY"
@@ -83,13 +77,13 @@ class AstCreator(filename: String, global: Global)
8377
}
8478

8579
private def createIdentiferWithScope(
86-
node: TerminalNode,
80+
ctx: ParserRuleContext,
8781
name: String,
8882
code: String,
8983
typeFullName: String,
9084
dynamicTypeHints: Seq[String]
9185
): NewIdentifier = {
92-
val newNode = identifierNode(node, name, code, typeFullName, dynamicTypeHints)
86+
val newNode = identifierNode(ctx, name, code, typeFullName, dynamicTypeHints)
9387
setIdentiferInScope(newNode)
9488
newNode
9589
}
@@ -146,10 +140,10 @@ class AstCreator(filename: String, global: Global)
146140
diffGraph
147141
}
148142

149-
protected def line(node: TerminalNode): Option[Integer] = Option(node.getSymbol.getLine)
150-
protected def column(node: TerminalNode): Option[Integer] = Option(node.getSymbol.getCharPositionInLine)
151-
protected def lineEnd(node: TerminalNode): Option[Integer] = None
152-
protected def columnEnd(node: TerminalNode): Option[Integer] = None
143+
protected def line(ctx: ParserRuleContext): Option[Integer] = Option(ctx.getStart.getLine)
144+
protected def column(ctx: ParserRuleContext): Option[Integer] = Option(ctx.getStart.getCharPositionInLine)
145+
protected def lineEnd(ctx: ParserRuleContext): Option[Integer] = Option(ctx.getStop.getLine)
146+
protected def columnEnd(ctx: ParserRuleContext): Option[Integer] = Option(ctx.getStop.getCharPositionInLine)
153147

154148
private def registerType(typ: String): String = {
155149
if (typ != Defines.Any) {
@@ -161,7 +155,7 @@ class AstCreator(filename: String, global: Global)
161155
val terminalNode = ctx.children.asScala.map(_.asInstanceOf[TerminalNode]).head
162156
val token = terminalNode.getSymbol
163157
val variableName = token.getText
164-
val node = createIdentiferWithScope(terminalNode, variableName, variableName, Defines.Any, List[String]())
158+
val node = createIdentiferWithScope(ctx, variableName, variableName, Defines.Any, List[String]())
165159
setIdentiferInScope(node)
166160
Seq(Ast(node))
167161
}
@@ -195,7 +189,7 @@ class AstCreator(filename: String, global: Global)
195189
}
196190
val varSymbol = localVar.getSymbol()
197191
val node =
198-
createIdentiferWithScope(localVar, varSymbol.getText, varSymbol.getText, Defines.Any, List(Defines.Any))
192+
createIdentiferWithScope(ctx, varSymbol.getText, varSymbol.getText, Defines.Any, List(Defines.Any))
199193
val yAst = Ast(node)
200194

201195
val callNode = NewCall()
@@ -747,8 +741,8 @@ class AstCreator(filename: String, global: Global)
747741
val primaryAst = astForPrimaryContext(ctx.primary())
748742
val localVar = ctx.CONSTANT_IDENTIFIER()
749743
val varSymbol = localVar.getSymbol()
750-
val node = createIdentiferWithScope(localVar, varSymbol.getText, varSymbol.getText, Defines.Any, List(Defines.Any))
751-
val constAst = Ast(node)
744+
val node = createIdentiferWithScope(ctx, varSymbol.getText, varSymbol.getText, Defines.Any, List(Defines.Any))
745+
val constAst = Ast(node)
752746

753747
val callNode = NewCall()
754748
.name(ctx.COLON2().getText)
@@ -1078,13 +1072,13 @@ class AstCreator(filename: String, global: Global)
10781072
}
10791073

10801074
def astForInvocationWithBlockOnlyPrimaryContext(ctx: InvocationWithBlockOnlyPrimaryContext): Seq[Ast] = {
1081-
val methodIdAst = astForMethodIdentifierContext(ctx.methodIdentifier())
1075+
val methodIdAst = astForMethodIdentifierContext(ctx.methodIdentifier(), ctx.getText)
10821076
val blockAst = astForBlockContext(ctx.block())
10831077
blockAst ++ methodIdAst
10841078
}
10851079

10861080
def astForInvocationWithParenthesesPrimaryContext(ctx: InvocationWithParenthesesPrimaryContext): Seq[Ast] = {
1087-
val methodIdAst = astForMethodIdentifierContext(ctx.methodIdentifier())
1081+
val methodIdAst = astForMethodIdentifierContext(ctx.methodIdentifier(), ctx.getText)
10881082
val parenAst = astForArgumentsWithParenthesesContext(ctx.argumentsWithParentheses())
10891083
val callNode = methodIdAst.head.nodes.filter(_.isInstanceOf[NewCall]).head.asInstanceOf[NewCall]
10901084
callNode.name(getActualMethodName(callNode.name))
@@ -1156,10 +1150,14 @@ class AstCreator(filename: String, global: Global)
11561150
}
11571151

11581152
def astForLiteralPrimaryContext(ctx: LiteralPrimaryContext): Seq[Ast] = {
1153+
val lineStart = line(ctx.literal())
1154+
val columnStart = column(ctx.literal())
11591155
if (ctx.literal().numericLiteral() != null) {
11601156
val text = ctx.getText
11611157
val node = NewLiteral()
11621158
.code(text)
1159+
.lineNumber(lineStart)
1160+
.columnNumber(columnStart)
11631161
.typeFullName(Defines.Number)
11641162
.dynamicTypeHintFullName(List(Defines.Number))
11651163
registerType(Defines.Number)
@@ -1168,13 +1166,17 @@ class AstCreator(filename: String, global: Global)
11681166
val text = ctx.getText
11691167
val node = NewLiteral()
11701168
.code(text)
1169+
.lineNumber(lineStart)
1170+
.columnNumber(columnStart)
11711171
.typeFullName(Defines.String)
11721172
.dynamicTypeHintFullName(List(Defines.String))
11731173
Seq(Ast(node))
11741174
} else if (ctx.literal().DOUBLE_QUOTED_STRING_CHARACTER_SEQUENCE() != null) {
11751175
val text = ctx.literal().DOUBLE_QUOTED_STRING_CHARACTER_SEQUENCE().getText
11761176
val node = NewLiteral()
11771177
.code(text)
1178+
.lineNumber(lineStart)
1179+
.columnNumber(columnStart)
11781180
.typeFullName(Defines.String)
11791181
.dynamicTypeHintFullName(List(Defines.String))
11801182
registerType(Defines.String)
@@ -1190,7 +1192,7 @@ class AstCreator(filename: String, global: Global)
11901192
astForDefinedMethodNameContext(ctx.definedMethodName())
11911193
}
11921194

1193-
def astForCallNode(localIdentifier: TerminalNode): Seq[Ast] = {
1195+
def astForCallNode(localIdentifier: TerminalNode, code: String): Seq[Ast] = {
11941196
val column = localIdentifier.getSymbol().getCharPositionInLine()
11951197
val line = localIdentifier.getSymbol().getLine()
11961198
val name = getActualMethodName(localIdentifier.getText)
@@ -1201,44 +1203,44 @@ class AstCreator(filename: String, global: Global)
12011203
.signature(localIdentifier.getText())
12021204
.typeFullName(DynamicCallUnknownFullName)
12031205
.dispatchType(DispatchTypes.STATIC_DISPATCH)
1204-
.code(localIdentifier.getText())
1206+
.code(code)
12051207
.lineNumber(line)
12061208
.columnNumber(column)
12071209
Seq(callAst(callNode))
12081210
}
12091211

12101212
def astForMethodOnlyIdentifier(ctx: MethodOnlyIdentifierContext): Seq[Ast] = {
12111213
if (ctx.LOCAL_VARIABLE_IDENTIFIER() != null) {
1212-
astForCallNode(ctx.LOCAL_VARIABLE_IDENTIFIER())
1214+
astForCallNode(ctx.LOCAL_VARIABLE_IDENTIFIER(), ctx.getText)
12131215
} else if (ctx.CONSTANT_IDENTIFIER() != null) {
1214-
astForCallNode(ctx.CONSTANT_IDENTIFIER())
1216+
astForCallNode(ctx.CONSTANT_IDENTIFIER(), ctx.getText)
12151217
} else {
12161218
Seq(Ast())
12171219
}
12181220
}
12191221

1220-
def astForMethodIdentifierContext(ctx: MethodIdentifierContext): Seq[Ast] = {
1222+
def astForMethodIdentifierContext(ctx: MethodIdentifierContext, code: String): Seq[Ast] = {
12211223
if (ctx.methodOnlyIdentifier() != null) {
12221224
astForMethodOnlyIdentifier(ctx.methodOnlyIdentifier())
12231225
} else if (ctx.LOCAL_VARIABLE_IDENTIFIER() != null) {
12241226
val localVar = ctx.LOCAL_VARIABLE_IDENTIFIER()
12251227
val varSymbol = localVar.getSymbol()
12261228
if (lookupIdentiferInScope(varSymbol.getText)) {
12271229
val node =
1228-
createIdentiferWithScope(localVar, varSymbol.getText, varSymbol.getText, Defines.Any, List(Defines.Any))
1230+
createIdentiferWithScope(ctx, varSymbol.getText, varSymbol.getText, Defines.Any, List(Defines.Any))
12291231
Seq(Ast(node))
12301232
} else {
1231-
astForCallNode(localVar)
1233+
astForCallNode(localVar, code)
12321234
}
12331235
} else if (ctx.CONSTANT_IDENTIFIER() != null) {
12341236
val localVar = ctx.CONSTANT_IDENTIFIER()
12351237
val varSymbol = localVar.getSymbol()
12361238
if (lookupIdentiferInScope(varSymbol.getText)) {
12371239
val node =
1238-
createIdentiferWithScope(localVar, varSymbol.getText, varSymbol.getText, Defines.Any, List(Defines.Any))
1240+
createIdentiferWithScope(ctx, varSymbol.getText, varSymbol.getText, Defines.Any, List(Defines.Any))
12391241
Seq(Ast(node))
12401242
} else {
1241-
astForCallNode(localVar)
1243+
astForCallNode(localVar, code)
12421244
}
12431245
} else {
12441246
Seq(Ast())
@@ -1264,7 +1266,7 @@ class AstCreator(filename: String, global: Global)
12641266

12651267
def astForMethodNameContext(ctx: MethodNameContext): Seq[Ast] = {
12661268
if (ctx.methodIdentifier() != null) {
1267-
astForMethodIdentifierContext(ctx.methodIdentifier())
1269+
astForMethodIdentifierContext(ctx.methodIdentifier(), ctx.getText)
12681270
} else if (ctx.operatorMethodName() != null) {
12691271
astForOperatorMethodNameContext(ctx.operatorMethodName())
12701272
} else if (ctx.keyword() != null) {
@@ -1295,13 +1297,13 @@ class AstCreator(filename: String, global: Global)
12951297
val localVar = ctx.LOCAL_VARIABLE_IDENTIFIER()
12961298
val varSymbol = localVar.getSymbol()
12971299
val node =
1298-
createIdentiferWithScope(localVar, varSymbol.getText, varSymbol.getText, Defines.Any, List(Defines.Any))
1300+
createIdentiferWithScope(ctx, varSymbol.getText, varSymbol.getText, Defines.Any, List(Defines.Any))
12991301
Seq(Ast(node))
13001302
} else if (ctx.CONSTANT_IDENTIFIER() != null) {
13011303
val localVar = ctx.CONSTANT_IDENTIFIER()
13021304
val varSymbol = localVar.getSymbol()
13031305
val node =
1304-
createIdentiferWithScope(localVar, varSymbol.getText, varSymbol.getText, Defines.Any, List(Defines.Any))
1306+
createIdentiferWithScope(ctx, varSymbol.getText, varSymbol.getText, Defines.Any, List(Defines.Any))
13051307
Seq(Ast(node))
13061308
} else {
13071309
Seq(Ast())
@@ -1376,7 +1378,7 @@ class AstCreator(filename: String, global: Global)
13761378
localVarList
13771379
.map(localVar => {
13781380
val varSymbol = localVar.getSymbol()
1379-
createIdentiferWithScope(localVar, varSymbol.getText, varSymbol.getText, Defines.Any, Seq[String](Defines.Any))
1381+
createIdentiferWithScope(ctx, varSymbol.getText, varSymbol.getText, Defines.Any, Seq[String](Defines.Any))
13801382
val param = NewMethodParameterIn()
13811383
.name(varSymbol.getText)
13821384
.code(varSymbol.getText)
@@ -1557,7 +1559,7 @@ class AstCreator(filename: String, global: Global)
15571559
def astForSimpleScopedConstantReferencePrimaryContext(ctx: SimpleScopedConstantReferencePrimaryContext): Seq[Ast] = {
15581560
val localVar = ctx.CONSTANT_IDENTIFIER()
15591561
val varSymbol = localVar.getSymbol()
1560-
val node = createIdentiferWithScope(localVar, varSymbol.getText, varSymbol.getText, Defines.Any, List(Defines.Any))
1562+
val node = createIdentiferWithScope(ctx, varSymbol.getText, varSymbol.getText, Defines.Any, List(Defines.Any))
15611563

15621564
val callNode = NewCall()
15631565
.name(ctx.COLON2().getText)
@@ -1590,7 +1592,7 @@ class AstCreator(filename: String, global: Global)
15901592
case ctx: RubyParser.ArgsAndDoBlockAndMethodIdCommandWithDoBlockContext =>
15911593
val argsAsts = astForArgumentsWithoutParenthesesContext(ctx.argumentsWithoutParentheses())
15921594
val doBlockAsts = astForDoBlockContext(ctx.doBlock())
1593-
val methodIdAsts = astForMethodIdentifierContext(ctx.methodIdentifier())
1595+
val methodIdAsts = astForMethodIdentifierContext(ctx.methodIdentifier(), ctx.getText)
15941596
methodIdAsts ++ argsAsts ++ doBlockAsts
15951597
case ctx: RubyParser.PrimaryMethodArgsDoBlockCommandWithDoBlockContext =>
15961598
val argsAsts = astForArgumentsWithoutParenthesesContext(ctx.argumentsWithoutParentheses())
@@ -1790,7 +1792,7 @@ class AstCreator(filename: String, global: Global)
17901792
else return Seq(Ast())
17911793
}
17921794

1793-
val astNode = createIdentiferWithScope(node, ctx.getText, ctx.getText, Defines.Any, List(Defines.Any))
1795+
val astNode = createIdentiferWithScope(ctx, ctx.getText, ctx.getText, Defines.Any, List(Defines.Any))
17941796
Seq(Ast(astNode))
17951797
}
17961798

@@ -1906,7 +1908,7 @@ class AstCreator(filename: String, global: Global)
19061908
} else if (ctx.YIELD() != null) {
19071909
astForArgumentsWithoutParenthesesContext(ctx.argumentsWithoutParentheses())
19081910
} else if (ctx.methodIdentifier() != null) {
1909-
val methodIdentifierAsts = astForMethodIdentifierContext(ctx.methodIdentifier())
1911+
val methodIdentifierAsts = astForMethodIdentifierContext(ctx.methodIdentifier(), ctx.getText)
19101912
methodNameAsIdentiferQ.enqueue(methodIdentifierAsts.head)
19111913
val argsAsts = astForArgumentsWithoutParenthesesContext(ctx.argumentsWithoutParentheses())
19121914

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package io.joern.rubysrc2cpg.passes.ast
2+
3+
import io.joern.rubysrc2cpg.testfixtures.RubyCode2CpgFixture
4+
import io.shiftleft.semanticcpg.language._
5+
6+
class SimpleAstCreationPassTest extends RubyCode2CpgFixture {
7+
8+
"AST generation for simple fragments" should {
9+
10+
"have correct structure for a single command call" in {
11+
val cpg = code("""puts 123""")
12+
13+
val List(commandCall) = cpg.call.l
14+
val List(arg) = commandCall.argument.isLiteral.l
15+
16+
commandCall.code shouldBe "puts 123"
17+
commandCall.lineNumber shouldBe Some(1)
18+
19+
arg.code shouldBe "123"
20+
arg.lineNumber shouldBe Some(1)
21+
arg.columnNumber shouldBe Some(5)
22+
}
23+
}
24+
25+
}

0 commit comments

Comments
 (0)