Skip to content

Commit be45b8b

Browse files
deprecate utf8 for String
1 parent 8b1b350 commit be45b8b

24 files changed

+38
-103
lines changed

base/REPL.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,11 +323,11 @@ An editor may have converted tabs to spaces at line """
323323

324324
function hist_getline(file)
325325
while !eof(file)
326-
line = utf8(readline(file))
326+
line = readline(file)
327327
isempty(line) && return line
328328
line[1] in "\r\n" || return line
329329
end
330-
return utf8("")
330+
return ""
331331
end
332332

333333
function hist_from_file(hp, file)

base/docs/helpdb/Base.jl

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1854,7 +1854,7 @@ Dict{String,Float64} with 2 entries:
18541854
"bar" => 42.0
18551855
"foo" => 0.0
18561856
1857-
julia> b = Dict(utf8("baz") => 17, utf8("bar") => 4711)
1857+
julia> b = Dict("baz" => 17, "bar" => 4711)
18581858
Dict{String,Int64} with 2 entries:
18591859
"bar" => 4711
18601860
"baz" => 17
@@ -2962,29 +2962,6 @@ Extract a named field from a `value` of composite type. The syntax `a.b` calls
29622962
"""
29632963
getfield
29642964

2965-
"""
2966-
utf8(::Array{UInt8,1})
2967-
2968-
Create a UTF-8 string from a byte array.
2969-
"""
2970-
utf8(::Vector{UInt8})
2971-
2972-
"""
2973-
utf8(::Ptr{UInt8}, [length])
2974-
2975-
Create a UTF-8 string from the address of a C (0-terminated) string encoded in UTF-8. A copy
2976-
is made; the ptr can be safely freed. If `length` is specified, the string does not have to
2977-
be 0-terminated.
2978-
"""
2979-
utf8(::Ptr{UInt8}, length::Int = 1)
2980-
2981-
"""
2982-
utf8(s)
2983-
2984-
Convert a string to a contiguous UTF-8 string (all characters must be valid UTF-8 characters).
2985-
"""
2986-
utf8(s)
2987-
29882965
"""
29892966
hvcat(rows::Tuple{Vararg{Int}}, values...)
29902967

base/env.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ function access_env(onError::Function, str::AbstractString)
3434
var = cwstring(str)
3535
len = _getenvlen(var)
3636
if len == 0
37-
return Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND ? utf8("") : onError(str)
37+
return Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND ? "" : onError(str)
3838
end
3939
val = zeros(UInt16,len)
4040
ret = ccall(:GetEnvironmentVariableW,stdcall,UInt32,(Ptr{UInt16},Ptr{UInt16},UInt32),var,val,len)

base/exports.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -887,7 +887,6 @@ export
887887
ucfirst,
888888
unescape_string,
889889
uppercase,
890-
utf8,
891890
utf16,
892891
utf32,
893892
warn,

base/libc.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ function FormatMessage end
259259
FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_MAX_WIDTH_MASK,
260260
C_NULL, e, 0, lpMsgBuf, 0, C_NULL)
261261
p = lpMsgBuf[1]
262-
len == 0 && return utf8("")
262+
len == 0 && return ""
263263
buf = Array(UInt16, len)
264264
unsafe_copy!(pointer(buf), p, len)
265265
ccall(:LocalFree,stdcall,Ptr{Void},(Ptr{Void},),p)

base/libgit2.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ function snapshot(repo::GitRepo)
465465
work = try
466466
with(GitIndex, repo) do idx
467467
if length(readdir(path(repo))) > 1
468-
add!(idx, utf8("."))
468+
add!(idx, ".")
469469
write!(idx)
470470
end
471471
write_tree!(idx)

base/precompile.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,6 @@ precompile(Base.UInt, (UInt,))
400400
precompile(Base.unsafe_copy!, (Array{Dict{Any, Any}, 1}, Int, Array{Dict{Any, Any}, 1}, Int, Int))
401401
precompile(Base.unsafe_copy!, (Ptr{Dict{Any, Any}}, Ptr{Dict{Any, Any}}, Int))
402402
precompile(Base.unshift!, (Array{Any,1}, Task))
403-
precompile(Base.utf8, (String,))
404403
precompile(Base.uv_error, (String, Bool))
405404
precompile(Base.uvfinalize, (Base.TTY,))
406405
precompile(Base.vcat, (Base.LineEdit.Prompt,))

base/regex.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,8 @@ function matchall(re::Regex, str::String, overlap::Bool=false)
209209
matches
210210
end
211211

212-
matchall(re::Regex, str::Union{String,SubString}, overlap::Bool=false) =
213-
matchall(re, utf8(str), overlap)
212+
matchall(re::Regex, str::SubString, overlap::Bool=false) =
213+
matchall(re, String(str), overlap)
214214

215215
function search(str::Union{String,SubString}, re::Regex, idx::Integer)
216216
if idx > nextind(str,endof(str))

base/show.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1189,7 +1189,7 @@ Accept keyword args `c` for alternate single character marker.
11891189
"""
11901190
function replace_with_centered_mark(s::AbstractString;c::Char = '')
11911191
N = length(s)
1192-
return join(setindex!([utf8(" ") for i=1:N],string(c),ceil(Int,N/2)))
1192+
return join(setindex!([" " for i=1:N],string(c),ceil(Int,N/2)))
11931193
end
11941194

11951195
"""

base/strings/basic.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ String(s::Vector{UInt8}) =
3232
String(p::Ptr{UInt8}, [length::Integer])
3333
3434
Create a string from the address of a C (0-terminated) string encoded as UTF-8.
35-
A copy is made so the ptr can be safely freed. If `length` is specified, the string
36-
does not have to be 0-terminated.
35+
A copy is made so the pointer can be safely freed. If `length` is specified, the
36+
string does not have to be 0-terminated.
3737
"""
3838
function String(p::Union{Ptr{UInt8},Ptr{Int8}}, len::Integer)
3939
p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))

base/unicode/utf8.jl

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,6 @@ write(io::IO, s::String) = write(io, s.data)
230230

231231
## transcoding to UTF-8 ##
232232

233-
utf8(x) = convert(String, x)
234233
convert(::Type{String}, s::String) = s
235234

236235
function convert(::Type{String}, dat::Vector{UInt8})
@@ -350,10 +349,3 @@ function encode_to_utf8{T<:Union{UInt16, UInt32}}(::Type{T}, dat, len)
350349
end
351350
String(buf)
352351
end
353-
354-
utf8(p::Ptr{UInt8}) =
355-
utf8(p, p == C_NULL ? Csize_t(0) : ccall(:strlen, Csize_t, (Ptr{UInt8},), p))
356-
function utf8(p::Ptr{UInt8}, len::Integer)
357-
p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))
358-
String(ccall(:jl_pchar_to_array, Vector{UInt8}, (Ptr{UInt8}, Csize_t), p, len))
359-
end

contrib/BBEditTextWrangler-julia.plist

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1177,7 +1177,6 @@
11771177
<string>using</string>
11781178
<string>utf16</string>
11791179
<string>utf32</string>
1180-
<string>utf8</string>
11811180
<string>values</string>
11821181
<string>var</string>
11831182
<string>varm</string>

doc/stdlib/collections.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -842,7 +842,7 @@ Given a dictionary ``D``, the syntax ``D[x]`` returns the value of key ``x`` (if
842842
"bar" => 42.0
843843
"foo" => 0.0
844844

845-
julia> b = Dict(utf8("baz") => 17, utf8("bar") => 4711)
845+
julia> b = Dict("baz" => 17, "bar" => 4711)
846846
Dict{String,Int64} with 2 entries:
847847
"bar" => 4711
848848
"baz" => 17

doc/stdlib/strings.rst

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -68,24 +68,6 @@
6868
6969
Convert a string to ``String`` type and check that it contains only ASCII data, otherwise throwing an ``ArugmentError`` indicating the position of the first non-ASCII byte.
7070

71-
.. function:: utf8(::Array{UInt8,1})
72-
73-
.. Docstring generated from Julia source
74-
75-
Create a UTF-8 string from a byte array.
76-
77-
.. function:: utf8(::Ptr{UInt8}, [length])
78-
79-
.. Docstring generated from Julia source
80-
81-
Create a UTF-8 string from the address of a C (0-terminated) string encoded in UTF-8. A copy is made; the ptr can be safely freed. If ``length`` is specified, the string does not have to be 0-terminated.
82-
83-
.. function:: utf8(s)
84-
85-
.. Docstring generated from Julia source
86-
87-
Convert a string to a contiguous UTF-8 string (all characters must be valid UTF-8 characters).
88-
8971
.. function:: @r_str -> Regex
9072

9173
.. Docstring generated from Julia source

test/base64.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ end
2424
rm(fname)
2525

2626
# Encode to string and decode
27-
@test utf8(base64decode(base64encode(inputText))) == inputText
27+
@test String(base64decode(base64encode(inputText))) == inputText
2828

2929
# Decode with max line chars = 76 and padding
3030
ipipe = Base64DecodePipe(IOBuffer(encodedMaxLine76))

test/dict.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ end
263263
for d in (Dict("\n" => "\n", "1" => "\n", "\n" => "2"),
264264
[string(i) => i for i = 1:30],
265265
[reshape(1:i^2,i,i) => reshape(1:i^2,i,i) for i = 1:24],
266-
[utf8(Char['α':'α'+i;]) => utf8(Char['α':'α'+i;]) for i = (1:10)*10],
266+
[String(Char['α':'α'+i;]) => String(Char['α':'α'+i;]) for i = (1:10)*10],
267267
Dict("key" => zeros(0, 0)))
268268
for cols in (12, 40, 80), rows in (2, 10, 24)
269269
# Ensure output is limited as requested

test/replcompletions.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,7 @@ c, r, res = test_scomplete(s)
569569
withenv("PATH" => string(tempdir(), ":", dir)) do
570570
s = string("repl-completio")
571571
c,r = test_scomplete(s)
572-
@test [utf8("repl-completion")] == c
572+
@test ["repl-completion"] == c
573573
@test s[r] == "repl-completio"
574574
end
575575

test/strings/basic.jl

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -215,14 +215,14 @@ end
215215
# issue #11142
216216
s = "abcdefghij"
217217
sp = pointer(s)
218-
@test utf8(sp) == s
219-
@test utf8(sp,5) == "abcde"
220-
@test typeof(utf8(sp)) == String
218+
@test String(sp) == s
219+
@test String(sp,5) == "abcde"
220+
@test typeof(String(sp)) == String
221221
s = "abcde\uff\u2000\U1f596"
222222
sp = pointer(s)
223-
@test utf8(sp) == s
224-
@test utf8(sp,5) == "abcde"
225-
@test typeof(utf8(sp)) == String
223+
@test String(sp) == s
224+
@test String(sp,5) == "abcde"
225+
@test typeof(String(sp)) == String
226226

227227
@test get(tryparse(BigInt, "1234567890")) == BigInt(1234567890)
228228
@test isnull(tryparse(BigInt, "1234567890-"))
@@ -434,7 +434,7 @@ let s = "abcdef", u8 = "abcdef\uff", u16 = utf16(u8), u32 = utf32(u8),
434434
@test isvalid(u8)
435435
@test isvalid(u16)
436436
@test isvalid(u32)
437-
@test isvalid(String, u8)
437+
@test isvalid(String, u8)
438438
@test isvalid(UTF16String, u16)
439439
@test isvalid(UTF32String, u32)
440440
end
@@ -464,11 +464,9 @@ end
464464
# issue # 11464: uppercase/lowercase of UTF16String becomes a String
465465
str = "abcdef\uff\uffff\u10ffffABCDEF"
466466
@test typeof(uppercase("abcdef")) == String
467-
@test typeof(uppercase(utf8(str))) == String
468467
@test typeof(uppercase(utf16(str))) == UTF16String
469468
@test typeof(uppercase(utf32(str))) == UTF32String
470469
@test typeof(lowercase("ABCDEF")) == String
471-
@test typeof(lowercase(utf8(str))) == String
472470
@test typeof(lowercase(utf16(str))) == UTF16String
473471
@test typeof(lowercase(utf32(str))) == UTF32String
474472

@@ -481,16 +479,11 @@ foobaz(ch) = reinterpret(Char, typemax(UInt32))
481479

482480
@test "a".*["b","c"] == ["ab","ac"]
483481
@test ["b","c"].*"a" == ["ba","ca"]
484-
@test utf8("a").*["b","c"] == ["ab","ac"]
485-
@test "a".*map(utf8,["b","c"]) == ["ab","ac"]
486482
@test ["a","b"].*["c","d"]' == ["ac" "ad"; "bc" "bd"]
487483

488-
# Make sure NULL pointer are handled consistently by
489-
# `String`, `ascii` and `utf8`
484+
# Make sure NULL pointer are handled consistently by String
490485
@test_throws ArgumentError String(Ptr{UInt8}(0))
491486
@test_throws ArgumentError String(Ptr{UInt8}(0), 10)
492-
@test_throws ArgumentError utf8(Ptr{UInt8}(0))
493-
@test_throws ArgumentError utf8(Ptr{UInt8}(0), 10)
494487

495488
# ascii works on ASCII strings and fails on non-ASCII strings
496489
@test ascii("Hello, world") == "Hello, world"

test/strings/types.jl

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ slen_u8str2 = length(u8str2)
1313
@test len_u8str2 == 2 * len_u8str
1414
@test slen_u8str2 == 2 * slen_u8str
1515

16-
u8str2plain = utf8(u8str2)
16+
u8str2plain = String(u8str2)
1717

1818
for i1 = 1:length(u8str2)
1919
if !isvalid(u8str2, i1); continue; end
@@ -93,8 +93,7 @@ u = SubString(str, 1, 5)
9393
@test prevind(SubString("{var}",2,4),4) == 3
9494

9595
# issue #4183
96-
@test split(SubString(ascii("x"), 2, 0), "y") == AbstractString[""]
97-
@test split(SubString(utf8("x"), 2, 0), "y") == AbstractString[""]
96+
@test split(SubString("x", 2, 0), "y") == AbstractString[""]
9897

9998
# issue #6772
10099
@test float(SubString("10",1,1)) === 1.0
@@ -132,7 +131,7 @@ let s="lorem ipsum",
132131
end #let
133132

134133
#for isvalid(SubString{String})
135-
let s = utf8("Σx + βz - 2")
134+
let s = "Σx + βz - 2"
136135
for i in -1:length(s)+2
137136
ss=SubString(s,1,i)
138137
@test isvalid(ss,i)==isvalid(s,i)

test/unicode/checkstring.jl

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,7 @@ try
8989
@test_throws UnicodeError Base.checkstring(UInt8[byt,0x80,0x80,0xc0])
9090
end
9191

92-
# Long encoding of 0x01
93-
@test_throws UnicodeError utf8(b"\xf0\x80\x80\x80")
94-
# Test ends of long encoded surrogates
95-
@test_throws UnicodeError utf8(b"\xf0\x8d\xa0\x80")
96-
@test_throws UnicodeError utf8(b"\xf0\x8d\xbf\xbf")
92+
# Long encodings
9793
@test_throws UnicodeError Base.checkstring(b"\xf0\x80\x80\x80")
9894
@test Base.checkstring(b"\xc0\x81"; accept_long_char=true) == (1,0x1,0,0,0)
9995
@test Base.checkstring(b"\xf0\x80\x80\x80"; accept_long_char=true) == (1,0x1,0,0,0)

test/unicode/utf16.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ u16 = utf16(u8)
66
@test sizeof(u16) == 18
77
@test length(u16.data) == 10 && u16.data[end] == 0
88
@test length(u16) == 5
9-
@test utf8(u16) == u8
9+
@test String(u16) == u8
1010
@test collect(u8) == collect(u16)
1111
@test u8 == utf16(u16.data[1:end-1]) == utf16(copy!(Array(UInt8, 18), 1, reinterpret(UInt8, u16.data), 1, 18))
1212
@test u8 == utf16(pointer(u16)) == utf16(convert(Ptr{Int16}, pointer(u16)))

test/unicode/utf32.jl

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ u32 = utf32(u8)
66
@test sizeof(u32) == 20
77
@test length(u32.data) == 6 && u32.data[end] == 0
88
@test length(u32) == 5
9-
@test utf8(u32) == u8
9+
@test String(u32) == u8
1010
@test collect(u8) == collect(u32)
1111
@test u8 == utf32(u32.data[1:end-1]) == utf32(copy!(Array(UInt8, 20), 1, reinterpret(UInt8, u32.data), 1, 20))
1212
@test u8 == utf32(pointer(u32)) == utf32(convert(Ptr{Int32}, pointer(u32)))
@@ -16,9 +16,9 @@ u32 = utf32(u8)
1616
function tstcvt(strUTF8::String, strUTF16::UTF16String, strUTF32::UTF32String)
1717
@test utf16(strUTF8) == strUTF16
1818
@test utf32(strUTF8) == strUTF32
19-
@test utf8(strUTF16) == strUTF8
19+
@test String(strUTF16) == strUTF8
2020
@test utf32(strUTF16) == strUTF32
21-
@test utf8(strUTF32) == strUTF8
21+
@test String(strUTF32) == strUTF8
2222
@test utf16(strUTF32) == strUTF16
2323
end
2424

@@ -49,7 +49,7 @@ str3_UTF32 = utf32(str3_UTF8)
4949
str4_UTF32 = utf32(str4_UTF8)
5050
strS_UTF32 = utf32(strS_UTF8)
5151

52-
@test utf8(strAscii) == strAscii
52+
@test String(strAscii) == strAscii
5353
@test utf16(strAscii) == strAscii
5454
@test utf32(strAscii) == strAscii
5555

@@ -62,13 +62,12 @@ tstcvt(str4_UTF8,str4_UTF16,str4_UTF32)
6262
# Test converting surrogate pairs
6363
@test utf16(strS_UTF8) == strC_UTF8
6464
@test utf32(strS_UTF8) == strC_UTF8
65-
@test utf8(strS_UTF16) == strC_UTF8
65+
@test String(strS_UTF16) == strC_UTF8
6666
@test utf32(strS_UTF16) == strC_UTF8
67-
@test utf8(strS_UTF32) == strC_UTF8
67+
@test String(strS_UTF32) == strC_UTF8
6868
@test utf16(strS_UTF32) == strC_UTF8
6969

7070
# Test converting overlong \0
71-
@test utf8(strZ) == strz_UTF8
7271
@test utf16(String(strZ)) == strz_UTF8
7372
@test utf32(String(strZ)) == strz_UTF8
7473

@@ -172,7 +171,7 @@ end
172171
# Wstring
173172
u8 = "\U10ffff\U1d565\U1d7f6\U00066\U2008a"
174173
w = wstring(u8)
175-
@test length(w) == 5 && utf8(w) == u8 && collect(u8) == collect(w)
174+
@test length(w) == 5 && String(w) == u8 && collect(u8) == collect(w)
176175
@test u8 == WString(w.data)
177176

178177
# 12268
@@ -211,7 +210,7 @@ end
211210

212211
# Test pointer() functions
213212
let str = ascii("this ")
214-
u8 = utf8(str)
213+
u8 = String(str)
215214
u16 = utf16(str)
216215
u32 = utf32(str)
217216
pa = pointer(str)

test/unicode/utf8.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
let ch = 0x10000
66
for hichar = 0xd800:0xdbff
77
for lochar = 0xdc00:0xdfff
8-
@test convert(String, utf8(Char[hichar, lochar]).data) == string(Char(ch))
8+
@test convert(String, String(Char[hichar, lochar]).data) == string(Char(ch))
99
ch += 1
1010
end
1111
end

0 commit comments

Comments
 (0)