-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Throw an error when collecting from iterators with inconsistent length #29458
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -517,8 +517,25 @@ julia> collect(Float64, 1:2:5) | |
""" | ||
collect(::Type{T}, itr) where {T} = _collect(T, itr, IteratorSize(itr)) | ||
|
||
_collect(::Type{T}, itr, isz::HasLength) where {T} = copyto!(Vector{T}(undef, Int(length(itr)::Integer)), itr) | ||
_collect(::Type{T}, itr, isz::HasShape) where {T} = copyto!(similar(Array{T}, axes(itr)), itr) | ||
function copyto_check_length!(dest::Array, src) | ||
len = length(dest) | ||
i = 0 | ||
for x in src | ||
i == len && | ||
throw(ErrorException("iterator returned more elements than its declared length")) | ||
i += 1 | ||
@inbounds dest[i] = x | ||
end | ||
if i < len | ||
throw(ErrorException("iterator returned fewer elements than its declared length")) | ||
end | ||
return dest | ||
end | ||
|
||
_collect(::Type{T}, itr, isz::HasLength) where {T} = | ||
copyto_check_length!(Vector{T}(undef, Int(length(itr)::Integer)), itr) | ||
_collect(::Type{T}, itr, isz::HasShape) where {T} = | ||
copyto_check_length!(similar(Array{T}, axes(itr)), itr) | ||
function _collect(::Type{T}, itr, isz::SizeUnknown) where T | ||
a = Vector{T}() | ||
for x in itr | ||
|
@@ -561,7 +578,7 @@ collect(A::AbstractArray) = _collect_indices(axes(A), A) | |
collect_similar(cont, itr) = _collect(cont, itr, IteratorEltype(itr), IteratorSize(itr)) | ||
|
||
_collect(cont, itr, ::HasEltype, isz::Union{HasLength,HasShape}) = | ||
copyto!(_similar_for(cont, eltype(itr), itr, isz), itr) | ||
copyto_check_length!(_similar_for(cont, eltype(itr), itr, isz), itr) | ||
|
||
function _collect(cont, itr, ::HasEltype, isz::SizeUnknown) | ||
a = _similar_for(cont, eltype(itr), itr, isz) | ||
|
@@ -618,6 +635,9 @@ function collect(itr::Generator) | |
else | ||
y = iterate(itr) | ||
if y === nothing | ||
if isa(isz, Union{HasLength, HasShape}) && length(itr) != 0 | ||
throw(ErrorException("iterator returned fewer elements than its declared length")) | ||
end | ||
return _array_for(et, itr.iter, isz) | ||
end | ||
v1, st = y | ||
|
@@ -667,6 +687,10 @@ function collect_to!(dest::AbstractArray{T}, itr, offs, st) where T | |
return collect_to!(new, itr, i+1, st) | ||
end | ||
end | ||
i-1 < length(dest) && | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should use |
||
throw(ErrorException("iterator returned fewer elements than its declared length")) | ||
i-1 > length(dest) && | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this case, the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In theory I agree, I don't like this # After change
julia> x = Vector{Int}(undef, 1000);
julia> itr = 1:1000;
julia> @btime Base.collect_to!($x, $itr, 1, 0);
676.981 ns (0 allocations: 0 bytes)
julia> y = copy(x);
julia> @btime Base.collect_to!($x, $y, 1, 1);
847.618 ns (0 allocations: 0 bytes)
# Master
julia> x = Vector{Int}(undef, 1000);
julia> itr = 1:1000;
julia> @btime Base.collect_to!($x, $itr, 1, 0);
86.233 ns (0 allocations: 0 bytes)
julia> y = copy(x);
julia> @btime Base.collect_to!($x, $y, 1, 1);
90.980 ns (0 allocations: 0 bytes) Maybe the compiler could be improved, but for now I guess we can't do this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ouch, that's bad. |
||
throw(ErrorException("iterator returned more elements than its declared length")) | ||
return dest | ||
end | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2524,3 +2524,29 @@ Base.view(::T25958, args...) = args | |
@test t[end,end,1] == @view(t[end,end,1]) == @views t[end,end,1] | ||
@test t[end,end,end] == @view(t[end,end,end]) == @views t[end,end,end] | ||
end | ||
|
||
@testset "collect on iterator with incorrect length" begin | ||
# Iterator with declared length too large | ||
struct InvalidIter1 end | ||
Base.length(::InvalidIter1) = 2 | ||
Base.iterate(::InvalidIter1, i=1) = i > 1 ? nothing : (i, (i + 1)) | ||
|
||
@test_throws ErrorException collect(InvalidIter1()) | ||
@test_throws ErrorException collect(Any, InvalidIter1()) | ||
@test_throws ErrorException collect(Int, InvalidIter1()) | ||
@test_throws ErrorException [x for x in InvalidIter1()] | ||
# Should also throw ErrorException | ||
@test_broken length(Int[x for x in InvalidIter1()]) != 2 | ||
|
||
# Iterator with declared length too small | ||
struct InvalidIter2 end | ||
Base.length(::InvalidIter2) = 2 | ||
Base.iterate(::InvalidIter2, i=1) = i > 3 ? nothing : (i, (i + 1)) | ||
|
||
@test_throws ErrorException collect(InvalidIter2()) | ||
@test_throws ErrorException collect(Any, InvalidIter2()) | ||
@test_throws ErrorException collect(Int, InvalidIter2()) | ||
@test_throws ErrorException [x for x in InvalidIter2()] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we can test this, since it will do an invalid memory write. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mmm, yeah, so the only way to catch this would be to drop the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better to add checks for some cases (when it can be efficient) than to have no checks at all, I'd say. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But are there reasons to care less about performance in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, does that mean it's OK for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, it doesn't mean it's OK for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That was actually an honest question: maybe we should remove the check from |
||
# Should also throw ErrorException | ||
@test_broken length(Int[x for x in InvalidIter2()]) != 2 | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like
copyto!
already does this check.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes but it doesn't do the one below, and it cannot be done with access to
i
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, then we should add a
_copyto_impl!
that takes astrict::Bool
argument, using the current code fromcopyto!
. Then it can be shared by both this code (passingtrue
for strict) andcopyto!
(passingfalse
).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can do that. Though note that the error message will be less specific (like the current one in
copyto!
), sincecopyto!
cannot assume that the problem comes from an invalid declared length. So I'm not sure it's really better.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, either way. This does need to use
eachindex
though.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not if we keep this method, as it only applies to
Array
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually it can also be called on
OffsetArray
and possibly on other arrays in corner cases. So I've taken the_copy_impl!
approach.