Skip to content

Commit

Permalink
Merge pull request erlang#9104 from richcarl/scan-errors
Browse files Browse the repository at this point in the history
Do not allow name characters immediately after a number
  • Loading branch information
bjorng authored Nov 29, 2024
2 parents 4a18df9 + 429dc57 commit 709aed9
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 41 deletions.
19 changes: 19 additions & 0 deletions lib/stdlib/src/erl_scan.erl
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,15 @@ string_quote(_) -> $". %"
(is_integer(C) andalso
(C >= $\000 andalso C =< $\s orelse C >= $\200 andalso C =< $\240))).
-define(DIGIT(C), (is_integer(C) andalso $0 =< C andalso C =< $9)).
-define(NAMECHAR(C),
(is_integer(C) andalso
(C >= $a andalso C =< $z orelse
C >= $A andalso C =< $Z orelse
C =:= $_ orelse
C >= $0 andalso C =< $9 orelse
C =:= $@ orelse
C >= andalso C =< $ÿ andalso C =/= andalso
C >= andalso C =< andalso C =/= ))).
-define(CHAR(C), (is_integer(C) andalso 0 =< C andalso C < 16#110000)).
-define(UNICODE(C),
(is_integer(C) andalso
Expand Down Expand Up @@ -1806,6 +1815,8 @@ scan_number([$_]=Cs, St, Line, Col, Toks, Ncs, Us) ->
{more,{Cs,St,Col,Toks,Line,{Ncs,Us},fun scan_number/6}};
scan_number([$.,C|Cs], St, Line, Col, Toks, Ncs, Us) when ?DIGIT(C) ->
scan_fraction(Cs, St, Line, Col, Toks, [C,$.|Ncs], Us);
scan_number([$.,C|_]=Cs0, _St, Line, Col, _Toks, Ncs, _Us) when ?NAMECHAR(C) ->
scan_error({illegal,float}, Line, Col, Line, incr_column(Col, length(Ncs)), Cs0);
scan_number([$.]=Cs, St, Line, Col, Toks, Ncs, Us) ->
{more,{Cs,St,Col,Toks,Line,{Ncs,Us},fun scan_number/6}};
scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0, Us) ->
Expand All @@ -1822,6 +1833,8 @@ scan_number([$#|Cs]=Cs0, St, Line, Col, Toks, Ncs0, Us) ->
%% Extremely unlikely to occur in practice.
scan_error({illegal,base}, Line, Col, Line, Col, Cs0)
end;
scan_number([C|_]=Cs0, _St, Line, Col, _Toks, Ncs, _Us) when ?NAMECHAR(C) ->
scan_error({illegal,integer}, Line, Col, Line, incr_column(Col, length(Ncs)), Cs0);
scan_number([]=Cs, St, Line, Col, Toks, Ncs, Us) ->
{more,{Cs,St,Col,Toks,Line,{Ncs,Us},fun scan_number/6}};
scan_number(Cs, St, Line, Col, Toks, Ncs0, Us) ->
Expand Down Expand Up @@ -1861,6 +1874,8 @@ scan_based_int([$_,Next|Cs], St, Line, Col, Toks, B, [Prev|_]=Ncs, Bcs, _Us)
with_underscore);
scan_based_int([$_]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) ->
{more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}};
scan_based_int([C|_]=Cs0, _St, Line, Col, _Toks, _B, Ncs, Bcs, _Us) when ?NAMECHAR(C) ->
scan_error({illegal,integer}, Line, Col, Line, incr_column(Col, length(Ncs) + length(Bcs)), Cs0);
scan_based_int([]=Cs, St, Line, Col, Toks, B, NCs, BCs, Us) ->
{more,{Cs,St,Col,Toks,Line,{B,NCs,BCs,Us},fun scan_based_int/6}};
scan_based_int(Cs, _St, Line, Col, _Toks, _B, [], Bcs, _Us) ->
Expand Down Expand Up @@ -1893,6 +1908,8 @@ scan_fraction([$_]=Cs, St, Line, Col, Toks, Ncs, Us) ->
{more,{Cs,St,Col,Toks,Line,{Ncs,Us},fun scan_fraction/6}};
scan_fraction([E|Cs], St, Line, Col, Toks, Ncs, Us) when E =:= $e; E =:= $E ->
scan_exponent_sign(Cs, St, Line, Col, Toks, [E|Ncs], Us);
scan_fraction([C|_]=Cs0, _St, Line, Col, _Toks, Ncs, _Us) when ?NAMECHAR(C) ->
scan_error({illegal,float}, Line, Col, Line, incr_column(Col, length(Ncs)), Cs0);
scan_fraction([]=Cs, St, Line, Col, Toks, Ncs, Us) ->
{more,{Cs,St,Col,Toks,Line,{Ncs,Us},fun scan_fraction/6}};
scan_fraction(Cs, St, Line, Col, Toks, Ncs, Us) ->
Expand All @@ -1919,6 +1936,8 @@ scan_exponent([$_,Next|Cs], St, Line, Col, Toks, [Prev|_]=Ncs, _) when
scan_exponent(Cs, St, Line, Col, Toks, [Next,$_|Ncs], with_underscore);
scan_exponent([$_]=Cs, St, Line, Col, Toks, Ncs, Us) ->
{more,{Cs,St,Col,Toks,Line,{Ncs,Us},fun scan_exponent/6}};
scan_exponent([C|_]=Cs0, _St, Line, Col, _Toks, Ncs, _Us) when ?NAMECHAR(C) ->
scan_error({illegal,float}, Line, Col, Line, incr_column(Col, length(Ncs)), Cs0);
scan_exponent([]=Cs, St, Line, Col, Toks, Ncs, Us) ->
{more,{Cs,St,Col,Toks,Line,{Ncs,Us},fun scan_exponent/6}};
scan_exponent(Cs, St, Line, Col, Toks, Ncs, Us) ->
Expand Down
96 changes: 55 additions & 41 deletions lib/stdlib/test/erl_scan_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -313,24 +313,37 @@ integers() ->
fun({S, I}) ->
test_string(S, [{integer, {1, 1}, I}])
end, UnderscoreSamples),
UnderscoreErrors =
NotIntegers =
["_123",
"__123"],
lists:foreach(
fun(S) ->
case erl_scan:string(S) of
{ok, [{integer, _, _}|_], _} ->
error({unexpected_integer, S});
{ok, _, _} ->
ok
end
end, NotIntegers),
IntegerErrors =
["123_",
"123__",
"123_456_",
"123__456",
"_123",
"__123"],
"123_.456",
"123abc",
"12@"],
lists:foreach(
fun(S) ->
case erl_scan:string(S) of
{ok, [{integer, _, _}], _} ->
error({unexpected_integer, S});
_ ->
ok
{error,{1,erl_scan,{illegal,integer}},_} ->
ok;
{error,Err,_} ->
error({unexpected_error, S, Err});
Succ ->
error({unexpected_success, S, Succ})
end
end, UnderscoreErrors),
test_string("_123", [{var,{1,1},'_123'}]),
test_string("123_", [{integer,{1,1},123},{var,{1,4},'_'}]),
end, IntegerErrors),
ok.

base_integers() ->
Expand All @@ -350,8 +363,6 @@ base_integers() ->
{error,{{1,1},erl_scan,{base,1000}},{1,6}} =
erl_scan:string("1_000#000", {1,1}, []),

test_string("12#bc", [{integer,{1,1},11},{atom,{1,5},c}]),

[begin
Str = BS ++ "#" ++ S,
E = 2 + length(BS),
Expand All @@ -360,12 +371,6 @@ base_integers() ->
end || {BS,S} <- [{"3","3"},{"15","f"},{"12","c"},
{"1_5","f"},{"1_2","c"}] ],

{ok,[{integer,1,239},{'@',1}],1} = erl_scan_string("16#ef@"),
{ok,[{integer,{1,1},239},{'@',{1,6}}],{1,7}} =
erl_scan_string("16#ef@", {1,1}, []),
{ok,[{integer,{1,1},14},{atom,{1,5},g@}],{1,7}} =
erl_scan_string("16#eg@", {1,1}, []),

UnderscoreSamples =
[{"16#1234_ABCD_EF56", 16#1234abcdef56},
{"2#0011_0101_0011", 2#001101010011},
Expand All @@ -376,25 +381,31 @@ base_integers() ->
fun({S, I}) ->
test_string(S, [{integer, {1, 1}, I}])
end, UnderscoreSamples),
UnderscoreErrors =
IntegerErrors =
["16_#123ABC",
"16#123_",
"16#_123",
"16#ABC_",
"16#_ABC",
"2#_0101",
"1__6#ABC",
"16#AB__CD"],
"16#AB__CD",
"16#eg",
"16#ef@",
"10_#",
"10#12a4",
"10#12A4"],
lists:foreach(
fun(S) ->
case erl_scan:string(S) of
{ok, [{integer, _, _}], _} ->
error({unexpected_integer, S});
_ ->
ok
{error,{1,erl_scan,{illegal,integer}},_} ->
ok;
{error,Err,_} ->
error({unexpected_error, S, Err});
Succ ->
error({unexpected_success, S, Succ})
end
end, UnderscoreErrors),
test_string("16#123_", [{integer,{1,1},291},{var,{1,7},'_'}]),
end, IntegerErrors),
test_string("_16#ABC", [{var,{1,1},'_16'},{'#',{1,4}},{var,{1,5},'ABC'}]),
ok.

Expand All @@ -405,7 +416,6 @@ floats() ->
test_string(FS, Ts)
end || FS <- ["1.0","001.17","3.31200","1.0e0","1.0E17",
"34.21E-18", "17.0E+14"]],
test_string("1.e2", [{integer,{1,1},1},{'.',{1,2}},{atom,{1,3},e2}]),

{error,{1,erl_scan,{illegal,float}},1} =
erl_scan:string("1.0e400"),
Expand All @@ -430,25 +440,29 @@ floats() ->
fun({S, I}) ->
test_string(S, [{float, {1, 1}, I}])
end, UnderscoreSamples),
UnderscoreErrors =
["123_.456",
"123._456",
"123.456_",
"123._",
"1._23e10",
FloatErrors =
["123.456_",
"1.23_e10",
"1.23e_10",
"1.23e10_"],
"1.23e10_",
"123.45_e6",
"123.45a12",
"123.45e23a12",
"1.e2",
"12._34",
"123.a4"
],
lists:foreach(
fun(S) ->
case erl_scan:string(S) of
{ok, [{float, _, _}], _} ->
error({unexpected_float, S});
_ ->
ok
{error,{1,erl_scan,{illegal,float}},_} ->
ok;
{error,Err,_} ->
error({unexpected_error, S, Err});
Succ ->
error({unexpected_success, S, Succ})
end
end, UnderscoreErrors),
test_string("123._", [{integer,{1,1},123},{'.',{1,4}},{var,{1,5},'_'}]),
test_string("1.23_e10", [{float,{1,1},1.23},{var,{1,5},'_e10'}]),
end, FloatErrors),
ok.

dots() ->
Expand Down

0 comments on commit 709aed9

Please sign in to comment.