From 940784b7c28f12c4d27b9786e9f37fda259adb8d Mon Sep 17 00:00:00 2001 From: Paul Galbraith Date: Sun, 23 Sep 2018 08:58:10 -0400 Subject: [PATCH] Remove endless loop detection to address https://github.com/linkedin/URL-Detector/issues/15. So the obvious downside is the risk of actually getting into an endless loop but I think the risk is worth it. If the code is solid then this is not possible, so I'd rather address those cases as (if?) they come up, instead causing false failures like the case in this issue. --- .../urls/detection/InputTextReader.java | 44 ------------------- .../linkedin/urls/detection/UrlDetector.java | 9 ---- .../urls/detection/TestInputTextReader.java | 9 ---- .../urls/detection/TestUriDetection.java | 36 +++++++++++++++ 4 files changed, 36 insertions(+), 62 deletions(-) diff --git a/src/main/java/com/linkedin/urls/detection/InputTextReader.java b/src/main/java/com/linkedin/urls/detection/InputTextReader.java index c58e0ea..0ac13cf 100644 --- a/src/main/java/com/linkedin/urls/detection/InputTextReader.java +++ b/src/main/java/com/linkedin/urls/detection/InputTextReader.java @@ -14,11 +14,6 @@ */ public class InputTextReader { - /** - * The number of times something can be backtracked is this multiplier times the length of the string. - */ - protected static final int MAX_BACKTRACK_MULTIPLIER = 10; - /** * The content to read. */ @@ -29,16 +24,6 @@ public class InputTextReader { */ private int _index = 0; - /** - * Contains the amount of characters that were backtracked. This is used for performance analysis. - */ - private int _backtracked = 0; - - /** - * When detecting for exceeding the backtrack limit, make sure the text is at least 20 characters. - */ - private final static int MINIMUM_BACKTRACK_LENGTH = 20; - /** * Creates a new instance of the InputTextReader using the content to read. * @param content The content to read. @@ -102,47 +87,18 @@ public int getPosition() { return _index; } - /** - * Gets the total number of characters that were backtracked when reading. - */ - public int getBacktrackedCount() { - return _backtracked; - } - /** * Moves the index to the specified position. * @param position The position to set the index to. */ public void seek(int position) { - int backtrackLength = Math.max(_index - position, 0); - _backtracked += backtrackLength; _index = position; - checkBacktrackLoop(backtrackLength); } /** * Goes back a single character. */ public void goBack() { - _backtracked++; _index--; - checkBacktrackLoop(1); - } - - private void checkBacktrackLoop(int backtrackLength) { - if (_backtracked > (_content.length * MAX_BACKTRACK_MULTIPLIER)) { - if (backtrackLength < MINIMUM_BACKTRACK_LENGTH) { - backtrackLength = MINIMUM_BACKTRACK_LENGTH; - } - - int start = Math.max(_index, 0); - if (start + backtrackLength > _content.length) { - backtrackLength = _content.length - start; - } - - String badText = new String(_content, start, backtrackLength); - throw new NegativeArraySizeException("Backtracked max amount of characters. Endless loop detected. Bad Text: '" - + badText + "'"); - } } } diff --git a/src/main/java/com/linkedin/urls/detection/UrlDetector.java b/src/main/java/com/linkedin/urls/detection/UrlDetector.java index 142020d..5445d87 100644 --- a/src/main/java/com/linkedin/urls/detection/UrlDetector.java +++ b/src/main/java/com/linkedin/urls/detection/UrlDetector.java @@ -125,15 +125,6 @@ public UrlDetector(String content, UrlDetectorOptions options) { _options = options; } - /** - * Gets the number of characters that were backtracked while reading the input. This is useful for performance - * measurement. - * @return The count of characters that were backtracked while reading. - */ - public int getBacktracked() { - return _reader.getBacktrackedCount(); - } - /** * Detects the urls and returns a list of detected url strings. * @return A list with detected urls. diff --git a/src/test/java/com/linkedin/urls/detection/TestInputTextReader.java b/src/test/java/com/linkedin/urls/detection/TestInputTextReader.java index e0b0ec9..8a43426 100644 --- a/src/test/java/com/linkedin/urls/detection/TestInputTextReader.java +++ b/src/test/java/com/linkedin/urls/detection/TestInputTextReader.java @@ -59,13 +59,4 @@ public void testSeek() { reader.seek(1); Assert.assertEquals(reader.read(), CONTENT.charAt(1)); } - - @Test(expectedExceptions = NegativeArraySizeException.class, expectedExceptionsMessageRegExp = ".*" + CONTENT + ".*") - public void testEndlessLoopDetection() { - InputTextReader reader = new InputTextReader(CONTENT); - for (int i = 0; i < InputTextReader.MAX_BACKTRACK_MULTIPLIER + 1; i++) { - reader.seek(CONTENT.length()); - reader.seek(0); - } - } } diff --git a/src/test/java/com/linkedin/urls/detection/TestUriDetection.java b/src/test/java/com/linkedin/urls/detection/TestUriDetection.java index eb9ce95..3d1bbde 100644 --- a/src/test/java/com/linkedin/urls/detection/TestUriDetection.java +++ b/src/test/java/com/linkedin/urls/detection/TestUriDetection.java @@ -649,6 +649,42 @@ public void testBacktrackInvalidUsernamePassword() { runTest("http://hello:asdf.com", UrlDetectorOptions.Default, "asdf.com"); } + /* + * https://github.com/linkedin/URL-Detector/issues/12 + */ + @Test + public void testIssue12() { + runTest("http://user:pass@host.com host.com", UrlDetectorOptions.Default, "http://user:pass@host.com", "host.com"); + } + + /* + * https://github.com/linkedin/URL-Detector/issues/13 + */ + @Test + public void testIssue13() { + runTest("user@github.io/page", UrlDetectorOptions.Default, "user@github.io/page"); + runTest("name@gmail.com", UrlDetectorOptions.Default, "name@gmail.com"); + runTest("name.lastname@gmail.com", UrlDetectorOptions.Default, "name.lastname@gmail.com"); + runTest("gmail.com@gmail.com", UrlDetectorOptions.Default, "gmail.com@gmail.com"); + runTest("first.middle.reallyreallyreallyreallyreallyreallyreallyreallyreallyreallylonglastname@gmail.com", UrlDetectorOptions.Default, "first.middle.reallyreallyreallyreallyreallyreallyreallyreallyreallyreallylonglastname@gmail.com"); + } + + /* + * https://github.com/linkedin/URL-Detector/issues/15 + */ + @Test + public void testIssue15() { + runTest(".............:::::::::::;;;;;;;;;;;;;;;::...............................................:::::::::::::::::::::::::::::....................", UrlDetectorOptions.Default); + } + + /* + * https://github.com/linkedin/URL-Detector/issues/16 + */ + @Test + public void testIssue16() { + runTest("://VIVE MARINE LE PEN//:@.", UrlDetectorOptions.Default); + } + private void runTest(String text, UrlDetectorOptions options, String... expected) { //do the detection UrlDetector parser = new UrlDetector(text, options);