From 25f80080ee7fd0c2f163a35fcd83e2b0079e4af7 Mon Sep 17 00:00:00 2001 From: Paul Galbraith Date: Fri, 21 Sep 2018 19:51:16 -0400 Subject: [PATCH] fix for https://github.com/linkedin/URL-Detector/issues/13 --- .../com/linkedin/urls/detection/DomainNameReader.java | 10 +++++++++- .../java/com/linkedin/urls/detection/UrlDetector.java | 4 ++++ src/test/java/com/linkedin/urls/TestUrl.java | 3 ++- .../com/linkedin/urls/detection/TestUriDetection.java | 2 +- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/linkedin/urls/detection/DomainNameReader.java b/src/main/java/com/linkedin/urls/detection/DomainNameReader.java index de1b97f..415dd82 100644 --- a/src/main/java/com/linkedin/urls/detection/DomainNameReader.java +++ b/src/main/java/com/linkedin/urls/detection/DomainNameReader.java @@ -102,7 +102,11 @@ public enum ReaderNextState { /** * Finished reading, next step should be to read the query string. */ - ReadQueryString + ReadQueryString, + /** + * This was actually not a domain at all. + */ + ReadUserPass } /** @@ -332,6 +336,10 @@ public ReaderNextState readDomainName() { } else if (curr == '#') { //continue by reading the fragment return checkDomainNameValid(ReaderNextState.ReadFragment, curr); + } else if (curr == '@') { + //this may not have been a domain after all, but rather a username/password instead + _reader.goBack(); + return ReaderNextState.ReadUserPass; } else if (CharUtils.isDot(curr) || (curr == '%' && _reader.canReadChars(2) && _reader.peek(2).equalsIgnoreCase(HEX_ENCODED_DOT))) { //if the current character is a dot or a urlEncodedDot diff --git a/src/main/java/com/linkedin/urls/detection/UrlDetector.java b/src/main/java/com/linkedin/urls/detection/UrlDetector.java index 5445d87..3275ca6 100644 --- a/src/main/java/com/linkedin/urls/detection/UrlDetector.java +++ b/src/main/java/com/linkedin/urls/detection/UrlDetector.java @@ -545,6 +545,10 @@ public void addCharacter(char character) { return readPort(); case ReadQueryString: return readQueryString(); + case ReadUserPass: + int host = _currentUrlMarker.indexOf(UrlPart.HOST); + _currentUrlMarker.unsetIndex(UrlPart.HOST); + return readUserPass(host); default: return readEnd(ReadEndState.InvalidUrl); } diff --git a/src/test/java/com/linkedin/urls/TestUrl.java b/src/test/java/com/linkedin/urls/TestUrl.java index a8570d2..1e5bc05 100644 --- a/src/test/java/com/linkedin/urls/TestUrl.java +++ b/src/test/java/com/linkedin/urls/TestUrl.java @@ -28,7 +28,8 @@ private Object[][] getUsernamePasswordUrls() { {"@www.google.com", "www.google.com", "/", "", ""}, {"lalal:@www.gogo.com", "www.gogo.com", "/", "lalal", ""}, {"nono:boo@[::1]", "[::1]", "/", "nono", "boo"}, - {"nono:boo@yahoo.com/@1234", "yahoo.com", "/@1234", "nono", "boo"} + {"nono:boo@yahoo.com/@1234", "yahoo.com", "/@1234", "nono", "boo"}, + {"big.big.boss@google.com", "google.com", "/", "big.big.boss", ""} }; } diff --git a/src/test/java/com/linkedin/urls/detection/TestUriDetection.java b/src/test/java/com/linkedin/urls/detection/TestUriDetection.java index 3d1bbde..a46a046 100644 --- a/src/test/java/com/linkedin/urls/detection/TestUriDetection.java +++ b/src/test/java/com/linkedin/urls/detection/TestUriDetection.java @@ -668,7 +668,7 @@ public void testIssue13() { runTest("gmail.com@gmail.com", UrlDetectorOptions.Default, "gmail.com@gmail.com"); runTest("first.middle.reallyreallyreallyreallyreallyreallyreallyreallyreallyreallylonglastname@gmail.com", UrlDetectorOptions.Default, "first.middle.reallyreallyreallyreallyreallyreallyreallyreallyreallyreallylonglastname@gmail.com"); } - + /* * https://github.com/linkedin/URL-Detector/issues/15 */